| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # Copyright 2022 Jeffrey Kegler | 
| 2 |  |  |  |  |  |  | # This file is part of Marpa::R2.  Marpa::R2 is free software: you can | 
| 3 |  |  |  |  |  |  | # redistribute it and/or modify it under the terms of the GNU Lesser | 
| 4 |  |  |  |  |  |  | # General Public License as published by the Free Software Foundation, | 
| 5 |  |  |  |  |  |  | # either version 3 of the License, or (at your option) any later version. | 
| 6 |  |  |  |  |  |  | # | 
| 7 |  |  |  |  |  |  | # Marpa::R2 is distributed in the hope that it will be useful, | 
| 8 |  |  |  |  |  |  | # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 9 |  |  |  |  |  |  | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
| 10 |  |  |  |  |  |  | # Lesser General Public License for more details. | 
| 11 |  |  |  |  |  |  | # | 
| 12 |  |  |  |  |  |  | # You should have received a copy of the GNU Lesser | 
| 13 |  |  |  |  |  |  | # General Public License along with Marpa::R2.  If not, see | 
| 14 |  |  |  |  |  |  | # http://www.gnu.org/licenses/. | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | package Marpa::R2::HTML; | 
| 17 |  |  |  |  |  |  |  | 
| 18 | 8 |  |  | 8 |  | 8203 | use 5.010001; | 
|  | 8 |  |  |  |  | 27 |  | 
| 19 | 8 |  |  | 8 |  | 44 | use strict; | 
|  | 8 |  |  |  |  | 15 |  | 
|  | 8 |  |  |  |  | 168 |  | 
| 20 | 8 |  |  | 8 |  | 35 | use warnings; | 
|  | 8 |  |  |  |  | 17 |  | 
|  | 8 |  |  |  |  | 213 |  | 
| 21 |  |  |  |  |  |  |  | 
| 22 | 8 |  |  | 8 |  | 40 | use vars qw( $VERSION $STRING_VERSION ); | 
|  | 8 |  |  |  |  | 15 |  | 
|  | 8 |  |  |  |  | 625 |  | 
| 23 |  |  |  |  |  |  | $VERSION        = '13.002_000'; | 
| 24 |  |  |  |  |  |  | $STRING_VERSION = $VERSION; | 
| 25 |  |  |  |  |  |  | ## no critic (BuiltinFunctions::ProhibitStringyEval) | 
| 26 |  |  |  |  |  |  | $VERSION = eval $VERSION; | 
| 27 |  |  |  |  |  |  | ## use critic | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | our @EXPORT_OK; | 
| 30 | 8 |  |  | 8 |  | 51 | use base qw(Exporter); | 
|  | 8 |  |  |  |  | 14 |  | 
|  | 8 |  |  |  |  | 913 |  | 
| 31 | 8 |  |  | 8 |  | 279 | BEGIN { @EXPORT_OK = qw(html); } | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | package Marpa::R2::HTML::Internal; | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | # Data::Dumper is used in tracing | 
| 36 | 8 |  |  | 8 |  | 2065 | use Data::Dumper; | 
|  | 8 |  |  |  |  | 20893 |  | 
|  | 8 |  |  |  |  | 446 |  | 
| 37 |  |  |  |  |  |  |  | 
| 38 | 8 |  |  | 8 |  | 3502 | use Marpa::R2::HTML::Internal; | 
|  | 8 |  |  |  |  | 21 |  | 
|  | 8 |  |  |  |  | 246 |  | 
| 39 | 8 |  |  | 8 |  | 3558 | use Marpa::R2::HTML::Config; | 
|  | 8 |  |  |  |  | 26 |  | 
|  | 8 |  |  |  |  | 248 |  | 
| 40 | 8 |  |  | 8 |  | 48 | use Carp (); | 
|  | 8 |  |  |  |  | 14 |  | 
|  | 8 |  |  |  |  | 150 |  | 
| 41 | 8 |  |  | 8 |  | 108 | use HTML::Parser 3.69; | 
|  | 8 |  |  |  |  | 135 |  | 
|  | 8 |  |  |  |  | 189 |  | 
| 42 | 8 |  |  | 8 |  | 40 | use HTML::Entities qw(decode_entities); | 
|  | 8 |  |  |  |  | 16 |  | 
|  | 8 |  |  |  |  | 474 |  | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | # versions below must be coordinated with | 
| 45 |  |  |  |  |  |  | # those required in Build.PL | 
| 46 |  |  |  |  |  |  |  | 
| 47 | 8 |  |  | 8 |  | 45 | use English qw( -no_match_vars ); | 
|  | 8 |  |  |  |  | 13 |  | 
|  | 8 |  |  |  |  | 45 |  | 
| 48 | 8 |  |  | 8 |  | 6347 | use Marpa::R2; | 
|  | 8 |  |  |  |  | 39 |  | 
|  | 8 |  |  |  |  | 827 |  | 
| 49 |  |  |  |  |  |  | { | 
| 50 |  |  |  |  |  |  | my $submodule_version = $Marpa::R2::VERSION; | 
| 51 |  |  |  |  |  |  | die 'Marpa::R2::VERSION not defined' if not defined $submodule_version; | 
| 52 |  |  |  |  |  |  | die | 
| 53 |  |  |  |  |  |  | "Marpa::R2::VERSION ($submodule_version) does not match Marpa::R2::HTML::VERSION ", | 
| 54 |  |  |  |  |  |  | $Marpa::R2::HTML::VERSION | 
| 55 |  |  |  |  |  |  | if $submodule_version != $Marpa::R2::HTML::VERSION; | 
| 56 |  |  |  |  |  |  | } | 
| 57 |  |  |  |  |  |  |  | 
| 58 | 8 |  |  | 8 |  | 62 | use Marpa::R2::Thin::Trace; | 
|  | 8 |  |  |  |  | 14 |  | 
|  | 8 |  |  |  |  | 221 |  | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | # constants | 
| 61 |  |  |  |  |  |  |  | 
| 62 | 8 |  |  | 8 |  | 48 | use constant PHYSICAL_TOKEN      => 42; | 
|  | 8 |  |  |  |  | 16 |  | 
|  | 8 |  |  |  |  | 641 |  | 
| 63 | 8 |  |  | 8 |  | 63 | use constant RUBY_SLIPPERS_TOKEN => 43; | 
|  | 8 |  |  |  |  | 28 |  | 
|  | 8 |  |  |  |  | 1206 |  | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | our @LIBMARPA_ERROR_NAMES = Marpa::R2::Thin::error_names(); | 
| 66 |  |  |  |  |  |  | our $UNEXPECTED_TOKEN_ID; | 
| 67 |  |  |  |  |  |  | our $NO_MARPA_ERROR; | 
| 68 |  |  |  |  |  |  | ERROR: for my $error_number ( 0 .. $#LIBMARPA_ERROR_NAMES ) { | 
| 69 |  |  |  |  |  |  | my $error_name = $LIBMARPA_ERROR_NAMES[$error_number]; | 
| 70 |  |  |  |  |  |  | if ( $error_name eq 'MARPA_ERR_UNEXPECTED_TOKEN_ID' ) { | 
| 71 |  |  |  |  |  |  | $UNEXPECTED_TOKEN_ID = $error_number; | 
| 72 |  |  |  |  |  |  | next ERROR; | 
| 73 |  |  |  |  |  |  | } | 
| 74 |  |  |  |  |  |  | if ( $error_name eq 'MARPA_ERR_NONE' ) { | 
| 75 |  |  |  |  |  |  | $NO_MARPA_ERROR = $error_number; | 
| 76 |  |  |  |  |  |  | next ERROR; | 
| 77 |  |  |  |  |  |  | } | 
| 78 |  |  |  |  |  |  | } ## end ERROR: for my $error_number ( 0 .. $#LIBMARPA_ERROR_NAMES ) | 
| 79 |  |  |  |  |  |  |  | 
| 80 | 8 |  |  | 8 |  | 4165 | use Marpa::R2::HTML::Callback; | 
|  | 8 |  |  |  |  | 34 |  | 
|  | 8 |  |  |  |  | 55086 |  | 
| 81 |  |  |  |  |  |  | { | 
| 82 |  |  |  |  |  |  | my $submodule_version = $Marpa::R2::HTML::Callback::VERSION; | 
| 83 |  |  |  |  |  |  | die 'Marpa::R2::HTML::Callback::VERSION not defined' | 
| 84 |  |  |  |  |  |  | if not defined $submodule_version; | 
| 85 |  |  |  |  |  |  | die | 
| 86 |  |  |  |  |  |  | "Marpa::R2::HTML::Callback::VERSION ($submodule_version) does not match Marpa::R2::HTML::VERSION ", | 
| 87 |  |  |  |  |  |  | $Marpa::R2::HTML::VERSION | 
| 88 |  |  |  |  |  |  | if $submodule_version != $Marpa::R2::HTML::VERSION; | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | sub earleme_to_linecol { | 
| 92 | 0 |  |  | 0 | 0 | 0 | my ( $self, $earleme ) = @_; | 
| 93 | 0 |  |  |  |  | 0 | my $html_parser_tokens = $self->{tokens}; | 
| 94 | 0 |  |  |  |  | 0 | my $html_token_ix = $self->{earleme_to_html_token_ix}->[$earleme] + 1; | 
| 95 |  |  |  |  |  |  |  | 
| 96 | 0 | 0 |  |  |  | 0 | die if not defined $html_token_ix; | 
| 97 |  |  |  |  |  |  |  | 
| 98 | 0 |  |  |  |  | 0 | return @{ $html_parser_tokens->[$html_token_ix] }[ | 
|  | 0 |  |  |  |  | 0 |  | 
| 99 |  |  |  |  |  |  | Marpa::R2::HTML::Internal::Token::LINE, | 
| 100 |  |  |  |  |  |  | Marpa::R2::HTML::Internal::Token::COLUMN, | 
| 101 |  |  |  |  |  |  | ]; | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | } ## end sub earleme_to_linecol | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | sub earleme_to_offset { | 
| 106 | 0 |  |  | 0 | 0 | 0 | my ( $self, $earleme ) = @_; | 
| 107 | 0 |  |  |  |  | 0 | my $html_parser_tokens = $self->{tokens}; | 
| 108 | 0 |  |  |  |  | 0 | my $html_token_ix = $self->{earleme_to_html_token_ix}->[$earleme] + 1; | 
| 109 |  |  |  |  |  |  |  | 
| 110 | 0 | 0 |  |  |  | 0 | die if not defined $html_token_ix; | 
| 111 |  |  |  |  |  |  |  | 
| 112 | 0 |  |  |  |  | 0 | return $html_parser_tokens->[$html_token_ix] | 
| 113 |  |  |  |  |  |  | ->[Marpa::R2::HTML::Internal::Token::END_OFFSET]; | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | } ## end sub earleme_to_offset | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | sub add_handler { | 
| 118 | 415 |  |  | 415 | 0 | 709 | my ( $self, $handler_description ) = @_; | 
| 119 | 415 |  | 50 |  |  | 906 | my $ref_type = ref $handler_description || 'not a reference'; | 
| 120 | 415 | 50 |  |  |  | 784 | Marpa::R2::exception( | 
| 121 |  |  |  |  |  |  | "Long form handler description should be ref to hash, but it is $ref_type" | 
| 122 |  |  |  |  |  |  | ) if $ref_type ne 'HASH'; | 
| 123 | 415 |  |  |  |  | 754 | my $element     = delete $handler_description->{element}; | 
| 124 | 415 |  |  |  |  | 652 | my $class       = delete $handler_description->{class}; | 
| 125 | 415 |  |  |  |  | 618 | my $pseudoclass = delete $handler_description->{pseudoclass}; | 
| 126 | 415 |  |  |  |  | 589 | my $action      = delete $handler_description->{action}; | 
| 127 |  |  |  |  |  |  | Marpa::R2::exception( | 
| 128 |  |  |  |  |  |  | 'Unknown option(s) in Long form handler description: ', | 
| 129 | 0 |  |  |  |  | 0 | ( join q{ }, keys %{$handler_description} ) | 
| 130 | 415 | 50 |  |  |  | 548 | ) if scalar keys %{$handler_description}; | 
|  | 415 |  |  |  |  | 1091 |  | 
| 131 |  |  |  |  |  |  |  | 
| 132 | 415 | 50 |  |  |  | 832 | Marpa::R2::exception('Handler action must be CODE ref') | 
| 133 |  |  |  |  |  |  | if ref $action ne 'CODE'; | 
| 134 |  |  |  |  |  |  |  | 
| 135 | 415 | 100 |  |  |  | 751 | if ( defined $pseudoclass ) { | 
| 136 | 317 |  |  |  |  | 605 | $self->{handler_by_species}->{$pseudoclass} = $action; | 
| 137 | 317 |  |  |  |  | 849 | return 1; | 
| 138 |  |  |  |  |  |  | } | 
| 139 |  |  |  |  |  |  |  | 
| 140 | 98 | 100 |  |  |  | 196 | $element = q{*} if not $element; | 
| 141 | 98 |  |  |  |  | 210 | $element = lc $element; | 
| 142 | 98 |  | 100 |  |  | 373 | $class //= q{*}; | 
| 143 | 98 |  |  |  |  | 324 | $self->{handler_by_element_and_class}->{ join q{;}, $element, $class } = | 
| 144 |  |  |  |  |  |  | $action; | 
| 145 | 98 |  |  |  |  | 309 | return 1; | 
| 146 |  |  |  |  |  |  | } ## end sub add_handler | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | sub add_handlers_from_hashes { | 
| 149 | 0 |  |  | 0 | 0 | 0 | my ( $self, $handler_specs ) = @_; | 
| 150 | 0 |  | 0 |  |  | 0 | my $ref_type = ref $handler_specs || 'not a reference'; | 
| 151 | 0 | 0 |  |  |  | 0 | Marpa::R2::exception( | 
| 152 |  |  |  |  |  |  | "handlers arg must must be ref to ARRAY, it is $ref_type") | 
| 153 |  |  |  |  |  |  | if $ref_type ne 'ARRAY'; | 
| 154 | 0 |  |  |  |  | 0 | for my $handler_spec ( keys %{$handler_specs} ) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 155 | 0 |  |  |  |  | 0 | add_handler( $self, $handler_spec ); | 
| 156 |  |  |  |  |  |  | } | 
| 157 | 0 |  |  |  |  | 0 | return 1; | 
| 158 |  |  |  |  |  |  | } ## end sub add_handlers_from_hashes | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  | sub add_handlers { | 
| 161 | 91 |  |  | 91 | 0 | 180 | my ( $self, $handler_specs ) = @_; | 
| 162 | 91 |  |  |  |  | 121 | HANDLER_SPEC: for my $specifier ( keys %{$handler_specs} ) { | 
|  | 91 |  |  |  |  | 302 |  | 
| 163 | 415 |  |  |  |  | 998 | my ( $element, $class, $pseudoclass ); | 
| 164 | 415 |  |  |  |  | 595 | my $action = $handler_specs->{$specifier}; | 
| 165 | 415 | 100 | 100 |  |  | 2984 | ( $element, $class ) = ( $specifier =~ /\A ([^.]*) [.] (.*) \z/oxms ) | 
| 166 |  |  |  |  |  |  | or ( $element, $pseudoclass ) = | 
| 167 |  |  |  |  |  |  | ( $specifier =~ /\A ([^:]*) [:] (.*) \z/oxms ) | 
| 168 |  |  |  |  |  |  | or $element = $specifier; | 
| 169 |  |  |  |  |  |  | state $allowed_pseudoclasses = | 
| 170 | 415 |  |  |  |  | 694 | { map { ( $_, 1 ) } | 
|  | 50 |  |  |  |  | 121 |  | 
| 171 |  |  |  |  |  |  | qw(TOP PI DECL COMMENT PROLOG TRAILER WHITESPACE CDATA PCDATA CRUFT) | 
| 172 |  |  |  |  |  |  | }; | 
| 173 | 415 | 50 | 66 |  |  | 1277 | if ( $pseudoclass | 
| 174 |  |  |  |  |  |  | and not exists $allowed_pseudoclasses->{$pseudoclass} ) | 
| 175 |  |  |  |  |  |  | { | 
| 176 | 0 |  |  |  |  | 0 | Marpa::R2::exception( | 
| 177 |  |  |  |  |  |  | qq{pseudoclass "$pseudoclass" is not known:\n}, | 
| 178 |  |  |  |  |  |  | "Specifier was $specifier\n" ); | 
| 179 |  |  |  |  |  |  | } ## end if ( $pseudoclass and not exists $allowed_pseudoclasses...) | 
| 180 | 415 | 50 | 66 |  |  | 1084 | if ( $pseudoclass and $element ) { | 
| 181 | 0 |  |  |  |  | 0 | Marpa::R2::exception( | 
| 182 |  |  |  |  |  |  | qq{pseudoclass "$pseudoclass" may not have an element specified:\n}, | 
| 183 |  |  |  |  |  |  | "Specifier was $specifier\n" | 
| 184 |  |  |  |  |  |  | ); | 
| 185 |  |  |  |  |  |  | } ## end if ( $pseudoclass and $element ) | 
| 186 |  |  |  |  |  |  | add_handler( | 
| 187 | 415 |  |  |  |  | 1427 | $self, | 
| 188 |  |  |  |  |  |  | {   element     => $element, | 
| 189 |  |  |  |  |  |  | class       => $class, | 
| 190 |  |  |  |  |  |  | pseudoclass => $pseudoclass, | 
| 191 |  |  |  |  |  |  | action      => $action | 
| 192 |  |  |  |  |  |  | } | 
| 193 |  |  |  |  |  |  | ); | 
| 194 |  |  |  |  |  |  | } ## end HANDLER_SPEC: for my $specifier ( keys %{$handler_specs} ) | 
| 195 |  |  |  |  |  |  |  | 
| 196 | 91 |  |  |  |  | 176 | return 1; | 
| 197 |  |  |  |  |  |  | } ## end sub add_handlers | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | # If we factor this package, this will be the constructor. | 
| 200 |  |  |  |  |  |  | ## no critic (Subroutines::RequireArgUnpacking) | 
| 201 |  |  |  |  |  |  | sub create { | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | ## use critic | 
| 204 | 94 |  |  | 94 | 0 | 189 | my $self = {}; | 
| 205 | 94 |  |  |  |  | 283 | $self->{trace_fh} = \*STDERR; | 
| 206 | 94 |  |  |  |  | 243 | ARG: for my $arg (@_) { | 
| 207 | 93 |  | 50 |  |  | 290 | my $ref_type = ref $arg || 'not a reference'; | 
| 208 | 93 | 100 |  |  |  | 238 | if ( $ref_type eq 'HASH' ) { | 
| 209 | 91 |  |  |  |  | 309 | Marpa::R2::HTML::Internal::add_handlers( $self, $arg ); | 
| 210 | 91 |  |  |  |  | 202 | next ARG; | 
| 211 |  |  |  |  |  |  | } | 
| 212 |  |  |  |  |  |  | Marpa::R2::exception( | 
| 213 | 2 | 50 |  |  |  | 5 | "Argument must be hash or refs to hash: it is $ref_type") | 
| 214 |  |  |  |  |  |  | if $ref_type ne 'REF'; | 
| 215 | 2 |  |  |  |  | 3 | my $option_hash = ${$arg}; | 
|  | 2 |  |  |  |  | 5 |  | 
| 216 | 2 |  | 50 |  |  | 6 | $ref_type = ref $option_hash || 'not a reference'; | 
| 217 | 2 | 50 |  |  |  | 5 | Marpa::R2::exception( | 
| 218 |  |  |  |  |  |  | "Argument must be hash or refs to hash: it is ref to $ref_type") | 
| 219 |  |  |  |  |  |  | if $ref_type ne 'HASH'; | 
| 220 | 2 |  |  |  |  | 4 | OPTION: for my $option ( keys %{$option_hash} ) { | 
|  | 2 |  |  |  |  | 9 |  | 
| 221 | 3 | 50 |  |  |  | 8 | if ( $option eq 'handlers' ) { | 
| 222 | 0 |  |  |  |  | 0 | add_handlers_from_hashes( $self, $option_hash->{$option} ); | 
| 223 |  |  |  |  |  |  | } | 
| 224 |  |  |  |  |  |  | state $allowed_options = { | 
| 225 | 3 |  |  |  |  | 6 | map { ( $_, 1 ) } | 
|  | 9 |  |  |  |  | 21 |  | 
| 226 |  |  |  |  |  |  | qw(trace_fh trace_values trace_handlers | 
| 227 |  |  |  |  |  |  | trace_conflicts | 
| 228 |  |  |  |  |  |  | trace_terminals trace_cruft | 
| 229 |  |  |  |  |  |  | dump_AHFA dump_config compile | 
| 230 |  |  |  |  |  |  | ) | 
| 231 |  |  |  |  |  |  | }; | 
| 232 | 3 | 50 |  |  |  | 9 | if ( not exists $allowed_options->{$option} ) { | 
| 233 | 0 |  |  |  |  | 0 | Marpa::R2::exception("unknown option: $option"); | 
| 234 |  |  |  |  |  |  | } | 
| 235 | 3 |  |  |  |  | 11 | $self->{$option} = $option_hash->{$option}; | 
| 236 |  |  |  |  |  |  | } ## end OPTION: for my $option ( keys %{$option_hash} ) | 
| 237 |  |  |  |  |  |  | } ## end ARG: for my $arg (@_) | 
| 238 |  |  |  |  |  |  |  | 
| 239 | 94 |  |  |  |  | 197 | my $source_ref = $self->{compile}; | 
| 240 | 94 | 100 |  |  |  | 216 | if ( defined $source_ref ) { | 
| 241 | 1 | 50 |  |  |  | 5 | ref $source_ref eq 'SCALAR' | 
| 242 |  |  |  |  |  |  | or Marpa::R2::exception( | 
| 243 |  |  |  |  |  |  | qq{value of "compile" option must be a SCALAR}); | 
| 244 | 1 |  |  |  |  | 9 | $self->{config} = Marpa::R2::HTML::Config->new_from_compile($source_ref); | 
| 245 |  |  |  |  |  |  | } ## end if ( defined $source_ref ) | 
| 246 |  |  |  |  |  |  | else { | 
| 247 | 93 |  |  |  |  | 475 | $self->{config} = Marpa::R2::HTML::Config->new(); | 
| 248 |  |  |  |  |  |  | } | 
| 249 |  |  |  |  |  |  |  | 
| 250 | 94 |  |  |  |  | 195 | return $self; | 
| 251 |  |  |  |  |  |  | } ## end sub create | 
| 252 |  |  |  |  |  |  |  | 
| 253 |  |  |  |  |  |  | sub handler_find { | 
| 254 | 1341 |  |  | 1341 | 0 | 2469 | my ( $self, $rule_id, $class ) = @_; | 
| 255 | 1341 |  |  |  |  | 1998 | my $trace_handlers = $self->{trace_handlers}; | 
| 256 | 1341 |  |  |  |  | 1716 | my $handler; | 
| 257 | 1341 |  | 50 |  |  | 2301 | $class //= q{*}; | 
| 258 | 1341 |  |  |  |  | 2236 | my $action = $self->{action_by_rule_id}->[$rule_id]; | 
| 259 |  |  |  |  |  |  | FIND_HANDLER: { | 
| 260 |  |  |  |  |  |  |  | 
| 261 | 1341 | 100 |  |  |  | 1734 | last FIND_HANDLER if not defined $action; | 
|  | 1341 |  |  |  |  | 2348 |  | 
| 262 |  |  |  |  |  |  |  | 
| 263 | 675 | 100 |  |  |  | 1484 | if ( index( $action, 'SPE_' ) == 0 ) { | 
| 264 | 180 |  |  |  |  | 310 | my $species = substr $action, 4; | 
| 265 | 180 |  |  |  |  | 363 | $handler = $self->{handler_by_species}->{$species}; | 
| 266 | 180 | 50 | 0 |  |  | 369 | say {*STDERR} | 
|  | 0 |  | 33 |  |  | 0 |  | 
| 267 |  |  |  |  |  |  | qq{Rule $rule_id: Found handler by species: "$species"} | 
| 268 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO") | 
| 269 |  |  |  |  |  |  | if $trace_handlers and defined $handler; | 
| 270 | 180 |  |  |  |  | 316 | last FIND_HANDLER; | 
| 271 |  |  |  |  |  |  | } ## end if ( index( $action, 'SPE_' ) == 0 ) | 
| 272 |  |  |  |  |  |  |  | 
| 273 |  |  |  |  |  |  | ## At this point action always is defined | 
| 274 |  |  |  |  |  |  | ## and starts with 'ELE_' | 
| 275 | 495 |  |  |  |  | 889 | my $element = substr $action, 4; | 
| 276 |  |  |  |  |  |  |  | 
| 277 | 495 |  |  |  |  | 1743 | my @handler_keys = ( | 
| 278 |  |  |  |  |  |  | ( join q{;}, $element, $class ), | 
| 279 |  |  |  |  |  |  | ( join q{;}, q{*},     $class ), | 
| 280 |  |  |  |  |  |  | ( join q{;}, $element, q{*} ), | 
| 281 |  |  |  |  |  |  | ( join q{;}, q{*},     q{*} ), | 
| 282 |  |  |  |  |  |  | ); | 
| 283 |  |  |  |  |  |  | ($handler) = | 
| 284 | 1980 |  |  |  |  | 3656 | grep {defined} | 
| 285 | 495 |  |  |  |  | 791 | @{ $self->{handler_by_element_and_class} }{@handler_keys}; | 
|  | 495 |  |  |  |  | 1801 |  | 
| 286 |  |  |  |  |  |  |  | 
| 287 | 0 |  |  |  |  | 0 | say {*STDERR} qq{Rule $rule_id: Found handler by action and class: "}, | 
| 288 | 495 | 50 | 0 |  |  | 1370 | ( grep { defined $self->{handler_by_element_and_class}->{$_} } | 
|  | 0 |  | 33 |  |  | 0 |  | 
| 289 |  |  |  |  |  |  | @handler_keys )[0], q{"} | 
| 290 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO") | 
| 291 |  |  |  |  |  |  | if $trace_handlers and defined $handler; | 
| 292 |  |  |  |  |  |  |  | 
| 293 |  |  |  |  |  |  | } ## end FIND_HANDLER: | 
| 294 | 1341 | 100 |  |  |  | 3053 | return $handler if defined $handler; | 
| 295 |  |  |  |  |  |  |  | 
| 296 | 856 | 50 | 0 |  |  | 1476 | say {*STDERR} qq{Rule $rule_id: Using default handler for action "}, | 
|  | 0 |  | 0 |  |  | 0 |  | 
| 297 |  |  |  |  |  |  | ( $action // q{*} ), qq{" and class: "$class"} | 
| 298 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO") | 
| 299 |  |  |  |  |  |  | if $trace_handlers; | 
| 300 |  |  |  |  |  |  |  | 
| 301 | 856 |  |  |  |  | 2964 | return 'default_handler'; | 
| 302 |  |  |  |  |  |  | } ## end sub handler_find | 
| 303 |  |  |  |  |  |  |  | 
| 304 |  |  |  |  |  |  | # "Original" value of a token range -- that is, the corresponding | 
| 305 |  |  |  |  |  |  | # text of the original document, unchanged. | 
| 306 |  |  |  |  |  |  | # Returned as a reference, because it may be very long | 
| 307 |  |  |  |  |  |  | sub token_range_to_original { | 
| 308 | 1033 |  |  | 1033 | 0 | 1455 | my ( $self, $first_token_ix, $last_token_ix ) = @_; | 
| 309 |  |  |  |  |  |  |  | 
| 310 | 1033 | 50 |  |  |  | 1579 | return \q{} if not defined $first_token_ix; | 
| 311 | 1033 |  |  |  |  | 1368 | my $document = $self->{document}; | 
| 312 | 1033 |  |  |  |  | 1241 | my $tokens   = $self->{tokens}; | 
| 313 | 1033 |  |  |  |  | 1628 | my $start_offset = | 
| 314 |  |  |  |  |  |  | $tokens->[$first_token_ix] | 
| 315 |  |  |  |  |  |  | ->[Marpa::R2::HTML::Internal::Token::START_OFFSET]; | 
| 316 | 1033 |  |  |  |  | 1547 | my $end_offset = | 
| 317 |  |  |  |  |  |  | $tokens->[$last_token_ix] | 
| 318 |  |  |  |  |  |  | ->[Marpa::R2::HTML::Internal::Token::END_OFFSET]; | 
| 319 | 1033 |  |  |  |  | 1190 | my $original = substr ${$document}, $start_offset, | 
|  | 1033 |  |  |  |  | 2291 |  | 
| 320 |  |  |  |  |  |  | ( $end_offset - $start_offset ); | 
| 321 | 1033 |  |  |  |  | 2107 | return \$original; | 
| 322 |  |  |  |  |  |  | } ## end sub token_range_to_original | 
| 323 |  |  |  |  |  |  |  | 
| 324 |  |  |  |  |  |  | # "Original" value of token -- that is, the corresponding | 
| 325 |  |  |  |  |  |  | # text of the original document, unchanged. | 
| 326 |  |  |  |  |  |  | # The empty string if there is no such text. | 
| 327 |  |  |  |  |  |  | # Returned as a reference, because it may be very long | 
| 328 |  |  |  |  |  |  | sub tdesc_item_to_original { | 
| 329 | 0 |  |  | 0 | 0 | 0 | my ( $self, $tdesc_item ) = @_; | 
| 330 |  |  |  |  |  |  |  | 
| 331 | 0 |  |  |  |  | 0 | my $text            = q{}; | 
| 332 | 0 |  |  |  |  | 0 | my $document        = $self->{document}; | 
| 333 | 0 |  |  |  |  | 0 | my $tokens          = $self->{tokens}; | 
| 334 | 0 |  |  |  |  | 0 | my $tdesc_item_type = $tdesc_item->[0]; | 
| 335 | 0 | 0 |  |  |  | 0 | return q{} if not defined $tdesc_item_type; | 
| 336 |  |  |  |  |  |  |  | 
| 337 | 0 | 0 |  |  |  | 0 | if ( $tdesc_item_type eq 'PHYSICAL_TOKEN' ) { | 
| 338 | 0 |  |  |  |  | 0 | return token_range_to_original( | 
| 339 |  |  |  |  |  |  | $self, | 
| 340 |  |  |  |  |  |  | $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::START_TOKEN], | 
| 341 |  |  |  |  |  |  | $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::END_TOKEN], | 
| 342 |  |  |  |  |  |  | ); | 
| 343 |  |  |  |  |  |  | } ## end if ( $tdesc_item_type eq 'PHYSICAL_TOKEN' ) | 
| 344 | 0 | 0 |  |  |  | 0 | if ( $tdesc_item_type eq 'VALUED_SPAN' ) { | 
| 345 | 0 |  |  |  |  | 0 | return token_range_to_original( | 
| 346 |  |  |  |  |  |  | $self, | 
| 347 |  |  |  |  |  |  | $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::START_TOKEN], | 
| 348 |  |  |  |  |  |  | $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::END_TOKEN], | 
| 349 |  |  |  |  |  |  | ); | 
| 350 |  |  |  |  |  |  | } ## end if ( $tdesc_item_type eq 'VALUED_SPAN' ) | 
| 351 | 0 |  |  |  |  | 0 | return q{}; | 
| 352 |  |  |  |  |  |  | } ## end sub tdesc_item_to_original | 
| 353 |  |  |  |  |  |  |  | 
| 354 |  |  |  |  |  |  | # Given a token range and a tdesc list, | 
| 355 |  |  |  |  |  |  | # return a reference to the literal value. | 
| 356 |  |  |  |  |  |  | sub range_and_values_to_literal { | 
| 357 | 206 |  |  | 206 | 0 | 533 | my ( $self, $next_token_ix, $final_token_ix, $tdesc_list ) = @_; | 
| 358 |  |  |  |  |  |  |  | 
| 359 | 206 |  |  |  |  | 401 | my @flat_tdesc_list = (); | 
| 360 | 206 |  |  |  |  | 286 | TDESC_ITEM: for my $tdesc_item ( @{$tdesc_list} ) { | 
|  | 206 |  |  |  |  | 416 |  | 
| 361 | 693 |  |  |  |  | 971 | my $type = $tdesc_item->[0]; | 
| 362 | 693 | 50 |  |  |  | 1087 | next TDESC_ITEM if not defined $type; | 
| 363 | 693 | 50 |  |  |  | 1096 | next TDESC_ITEM if $type eq 'ZERO_SPAN'; | 
| 364 | 693 | 50 |  |  |  | 1077 | next TDESC_ITEM if $type eq 'RUBY_SLIPPERS_TOKEN'; | 
| 365 | 693 | 100 |  |  |  | 1057 | if ( $type eq 'VALUES' ) { | 
| 366 |  |  |  |  |  |  | push @flat_tdesc_list, | 
| 367 | 4 |  |  |  |  | 13 | @{ $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::VALUE] }; | 
|  | 4 |  |  |  |  | 40 |  | 
| 368 | 4 |  |  |  |  | 20 | next TDESC_ITEM; | 
| 369 |  |  |  |  |  |  | } | 
| 370 | 689 |  |  |  |  | 1005 | push @flat_tdesc_list, $tdesc_item; | 
| 371 |  |  |  |  |  |  | } ## end TDESC_ITEM: for my $tdesc_item ( @{$tdesc_list} ) | 
| 372 |  |  |  |  |  |  |  | 
| 373 | 206 |  |  |  |  | 313 | my @literal_pieces = (); | 
| 374 | 206 |  |  |  |  | 389 | TDESC_ITEM: for my $tdesc_item (@flat_tdesc_list) { | 
| 375 |  |  |  |  |  |  |  | 
| 376 |  |  |  |  |  |  | my ( $tdesc_item_type, $next_explicit_token_ix, | 
| 377 |  |  |  |  |  |  | $furthest_explicit_token_ix ) | 
| 378 | 699 |  |  |  |  | 792 | = @{$tdesc_item}; | 
|  | 699 |  |  |  |  | 1141 |  | 
| 379 |  |  |  |  |  |  |  | 
| 380 | 699 | 100 |  |  |  | 1185 | if ( not defined $next_explicit_token_ix ) { | 
| 381 |  |  |  |  |  |  | ## An element can contain no HTML tokens -- it may contain | 
| 382 |  |  |  |  |  |  | ## only Ruby Slippers tokens. | 
| 383 |  |  |  |  |  |  | ## Treat this as a special case. | 
| 384 | 10 | 50 |  |  |  | 32 | if ( $tdesc_item_type eq 'VALUED_SPAN' ) { | 
| 385 | 10 |  | 100 |  |  | 43 | my $value = | 
| 386 |  |  |  |  |  |  | $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::VALUE] | 
| 387 |  |  |  |  |  |  | // q{}; | 
| 388 | 10 |  |  |  |  | 35 | push @literal_pieces, \( q{} . $value ); | 
| 389 |  |  |  |  |  |  | } ## end if ( $tdesc_item_type eq 'VALUED_SPAN' ) | 
| 390 | 10 |  |  |  |  | 28 | next TDESC_ITEM; | 
| 391 |  |  |  |  |  |  | } ## end if ( not defined $next_explicit_token_ix ) | 
| 392 |  |  |  |  |  |  |  | 
| 393 | 689 | 100 |  |  |  | 1254 | push @literal_pieces, | 
| 394 |  |  |  |  |  |  | token_range_to_original( $self, $next_token_ix, | 
| 395 |  |  |  |  |  |  | $next_explicit_token_ix - 1 ) | 
| 396 |  |  |  |  |  |  | if $next_token_ix < $next_explicit_token_ix; | 
| 397 | 689 | 100 |  |  |  | 1179 | if ( $tdesc_item_type eq 'VALUED_SPAN' ) { | 
| 398 | 588 |  |  |  |  | 782 | my $value = | 
| 399 |  |  |  |  |  |  | $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::VALUE]; | 
| 400 | 588 | 100 |  |  |  | 906 | if ( defined $value ) { | 
| 401 | 98 |  |  |  |  | 264 | push @literal_pieces, \( q{} . $value ); | 
| 402 | 98 |  |  |  |  | 176 | $next_token_ix = $furthest_explicit_token_ix + 1; | 
| 403 | 98 |  |  |  |  | 202 | next TDESC_ITEM; | 
| 404 |  |  |  |  |  |  | } | 
| 405 |  |  |  |  |  |  | ## FALL THROUGH | 
| 406 |  |  |  |  |  |  | } ## end if ( $tdesc_item_type eq 'VALUED_SPAN' ) | 
| 407 | 591 | 50 |  |  |  | 1147 | push @literal_pieces, | 
| 408 |  |  |  |  |  |  | token_range_to_original( $self, $next_explicit_token_ix, | 
| 409 |  |  |  |  |  |  | $furthest_explicit_token_ix ) | 
| 410 |  |  |  |  |  |  | if $next_explicit_token_ix <= $furthest_explicit_token_ix; | 
| 411 | 591 |  |  |  |  | 876 | $next_token_ix = $furthest_explicit_token_ix + 1; | 
| 412 |  |  |  |  |  |  | } ## end TDESC_ITEM: for my $tdesc_item (@flat_tdesc_list) | 
| 413 |  |  |  |  |  |  |  | 
| 414 | 206 |  |  |  |  | 379 | return \( join q{}, map { ${$_} } @literal_pieces ); | 
|  | 923 |  |  |  |  | 1043 |  | 
|  | 923 |  |  |  |  | 2440 |  | 
| 415 |  |  |  |  |  |  |  | 
| 416 |  |  |  |  |  |  | } ## end sub range_and_values_to_literal | 
| 417 |  |  |  |  |  |  |  | 
| 418 |  |  |  |  |  |  | sub symbol_names_by_rule_id { | 
| 419 | 0 |  |  | 0 | 0 | 0 | my ( $self, $rule_id ) = @_; | 
| 420 | 0 |  |  |  |  | 0 | my $tracer = $self->{tracer}; | 
| 421 | 0 |  |  |  |  | 0 | my $grammar           = $tracer->grammar(); | 
| 422 | 0 |  |  |  |  | 0 | my $rule_length       = $grammar->rule_length($rule_id); | 
| 423 | 0 | 0 |  |  |  | 0 | return if not defined $rule_length; | 
| 424 | 0 |  |  |  |  | 0 | my @symbol_ids = ( $grammar->rule_lhs($rule_id) ); | 
| 425 |  |  |  |  |  |  | push @symbol_ids, | 
| 426 | 0 |  |  |  |  | 0 | map { $grammar->rule_rhs( $rule_id, $_ ) } ( 0 .. $rule_length - 1 ); | 
|  | 0 |  |  |  |  | 0 |  | 
| 427 | 0 |  |  |  |  | 0 | return map { $tracer->symbol_name($_) } @symbol_ids; | 
|  | 0 |  |  |  |  | 0 |  | 
| 428 |  |  |  |  |  |  | } ## end sub symbol_names_by_rule_id | 
| 429 |  |  |  |  |  |  |  | 
| 430 |  |  |  |  |  |  | sub parse { | 
| 431 | 94 |  |  | 94 | 0 | 186 | my ( $self, $document_ref ) = @_; | 
| 432 |  |  |  |  |  |  |  | 
| 433 | 94 |  |  |  |  | 166 | my %tags = (); | 
| 434 |  |  |  |  |  |  |  | 
| 435 |  |  |  |  |  |  | Marpa::R2::exception( | 
| 436 |  |  |  |  |  |  | "parse() already run on this object\n", | 
| 437 |  |  |  |  |  |  | 'For a new parse, create a new object' | 
| 438 | 94 | 50 |  |  |  | 230 | ) if $self->{document}; | 
| 439 |  |  |  |  |  |  |  | 
| 440 | 94 |  |  |  |  | 160 | my $trace_cruft     = $self->{trace_cruft}; | 
| 441 | 94 |  | 50 |  |  | 326 | my $trace_terminals = $self->{trace_terminals} // 0; | 
| 442 | 94 |  |  |  |  | 174 | my $trace_conflicts = $self->{trace_conflicts}; | 
| 443 | 94 |  |  |  |  | 136 | my $trace_handlers  = $self->{trace_handlers}; | 
| 444 | 94 |  |  |  |  | 132 | my $trace_values    = $self->{trace_values}; | 
| 445 | 94 |  |  |  |  | 148 | my $trace_fh        = $self->{trace_fh}; | 
| 446 | 94 |  |  |  |  | 202 | my $ref_type        = ref $document_ref; | 
| 447 |  |  |  |  |  |  | Marpa::R2::exception('Arg to parse() must be ref to string') | 
| 448 |  |  |  |  |  |  | if not $ref_type | 
| 449 |  |  |  |  |  |  | or $ref_type ne 'SCALAR' | 
| 450 | 94 | 50 | 33 |  |  | 364 | or not defined ${$document_ref}; | 
|  | 94 |  | 33 |  |  | 302 |  | 
| 451 |  |  |  |  |  |  |  | 
| 452 | 94 |  |  |  |  | 220 | my $document = $self->{document} = $document_ref; | 
| 453 |  |  |  |  |  |  |  | 
| 454 |  |  |  |  |  |  | my ($core_rules,   $runtime_tag, | 
| 455 |  |  |  |  |  |  | $rank_by_name, $is_empty_element, | 
| 456 |  |  |  |  |  |  | $primary_group_by_tag | 
| 457 | 94 |  |  |  |  | 288 | ) = $self->{config}->contents(); | 
| 458 | 94 |  |  |  |  | 167 | $self->{is_empty_element} = $is_empty_element; | 
| 459 | 94 | 100 |  |  |  | 212 | if ($self->{dump_config}) { | 
| 460 | 2 |  |  |  |  | 14 | return $self->{config}->as_string(); | 
| 461 |  |  |  |  |  |  | } | 
| 462 | 92 |  |  |  |  | 162 | my @action_by_rule_id = (); | 
| 463 | 92 |  |  |  |  | 179 | $self->{action_by_rule_id} = \@action_by_rule_id; | 
| 464 | 92 |  |  |  |  | 1215 | my $thin_grammar = Marpa::R2::Thin::G->new( { if => 1 } ); | 
| 465 | 92 |  |  |  |  | 453 | my $tracer = Marpa::R2::Thin::Trace->new($thin_grammar); | 
| 466 | 92 |  |  |  |  | 245 | $self->{tracer}                  = $tracer; | 
| 467 |  |  |  |  |  |  |  | 
| 468 | 92 |  |  |  |  | 152 | RULE: for my $rule ( @{$core_rules} ) { | 
|  | 92 |  |  |  |  | 179 |  | 
| 469 | 12788 |  |  |  |  | 19344 | my $lhs    = $rule->{lhs}; | 
| 470 | 12788 |  |  |  |  | 16518 | my $rhs    = $rule->{rhs}; | 
| 471 | 12788 |  |  |  |  | 15623 | my $min    = $rule->{min}; | 
| 472 | 12788 |  |  |  |  | 16031 | my $action = $rule->{action}; | 
| 473 | 12788 |  |  |  |  | 16493 | my @symbol_ids = (); | 
| 474 | 12788 |  |  |  |  | 15844 | for my $symbol_name ( $lhs, @{$rhs} ) { | 
|  | 12788 |  |  |  |  | 20749 |  | 
| 475 | 31648 |  | 100 |  |  | 58484 | push @symbol_ids, | 
| 476 |  |  |  |  |  |  | $tracer->symbol_by_name($symbol_name) | 
| 477 |  |  |  |  |  |  | // $tracer->symbol_new($symbol_name); | 
| 478 |  |  |  |  |  |  | } | 
| 479 | 12788 |  |  |  |  | 21173 | my ($lhs_id, @rhs_ids) = @symbol_ids; | 
| 480 | 12788 |  |  |  |  | 15278 | my $rule_id; | 
| 481 | 12788 | 100 |  |  |  | 19360 | if ( defined $min ) { | 
| 482 | 2116 |  |  |  |  | 7029 | $rule_id = | 
| 483 |  |  |  |  |  |  | $thin_grammar->sequence_new( $lhs_id, $rhs_ids[0], | 
| 484 |  |  |  |  |  |  | { min => $min } ); | 
| 485 |  |  |  |  |  |  | } | 
| 486 |  |  |  |  |  |  | else { | 
| 487 | 10672 |  |  |  |  | 26244 | $rule_id = $thin_grammar->rule_new( $lhs_id, \@rhs_ids ); | 
| 488 |  |  |  |  |  |  | } | 
| 489 | 12788 |  |  |  |  | 29496 | $action_by_rule_id[$rule_id] = $action; | 
| 490 |  |  |  |  |  |  | } ## end RULE: for my $rule ( @{$core_rules} ) | 
| 491 |  |  |  |  |  |  |  | 
| 492 |  |  |  |  |  |  | # Some constants that we will use a lot | 
| 493 | 92 |  |  |  |  | 221 | my $SYMID_CRUFT = $tracer->symbol_by_name('CRUFT'); | 
| 494 | 92 |  |  |  |  | 185 | my $SYMID_CDATA = $tracer->symbol_by_name('CDATA'); | 
| 495 | 92 |  |  |  |  | 208 | my $SYMID_PCDATA = $tracer->symbol_by_name('PCDATA'); | 
| 496 | 92 |  |  |  |  | 181 | my $SYMID_WHITESPACE = $tracer->symbol_by_name('WHITESPACE'); | 
| 497 | 92 |  |  |  |  | 199 | my $SYMID_PI = $tracer->symbol_by_name('PI'); | 
| 498 | 92 |  |  |  |  | 190 | my $SYMID_C = $tracer->symbol_by_name('C'); | 
| 499 | 92 |  |  |  |  | 186 | my $SYMID_D = $tracer->symbol_by_name('D'); | 
| 500 | 92 |  |  |  |  | 224 | my $SYMID_EOF = $tracer->symbol_by_name('EOF'); | 
| 501 |  |  |  |  |  |  |  | 
| 502 | 92 |  |  |  |  | 147 | my @raw_tokens = (); | 
| 503 | 92 |  |  |  |  | 865 | my $p          = HTML::Parser->new( | 
| 504 |  |  |  |  |  |  | api_version => 3, | 
| 505 |  |  |  |  |  |  | start_h     => [ | 
| 506 |  |  |  |  |  |  | \@raw_tokens, q{tagname,'S',line,column,offset,offset_end,is_cdata,attr} | 
| 507 |  |  |  |  |  |  | ], | 
| 508 |  |  |  |  |  |  | end_h => | 
| 509 |  |  |  |  |  |  | [ \@raw_tokens, q{tagname,'E',line,column,offset,offset_end,is_cdata} ], | 
| 510 |  |  |  |  |  |  | text_h => [ | 
| 511 |  |  |  |  |  |  | \@raw_tokens, | 
| 512 |  |  |  |  |  |  | qq{'$SYMID_WHITESPACE','T',line,column,offset,offset_end,is_cdata} | 
| 513 |  |  |  |  |  |  | ], | 
| 514 |  |  |  |  |  |  | comment_h => | 
| 515 |  |  |  |  |  |  | [ \@raw_tokens, qq{'$SYMID_C','C',line,column,offset,offset_end,is_cdata} ], | 
| 516 |  |  |  |  |  |  | declaration_h => | 
| 517 |  |  |  |  |  |  | [ \@raw_tokens, qq{'$SYMID_D','D',line,column,offset,offset_end,is_cdata} ], | 
| 518 |  |  |  |  |  |  | process_h => | 
| 519 |  |  |  |  |  |  | [ \@raw_tokens, qq{'$SYMID_PI','PI',line,column,offset,offset_end,is_cdata} ], | 
| 520 |  |  |  |  |  |  | unbroken_text => 1 | 
| 521 |  |  |  |  |  |  | ); | 
| 522 |  |  |  |  |  |  |  | 
| 523 | 92 |  |  |  |  | 9497 | $p->parse( ${$document} ); | 
|  | 92 |  |  |  |  | 2806 |  | 
| 524 | 92 |  |  |  |  | 478 | $p->eof; | 
| 525 |  |  |  |  |  |  |  | 
| 526 | 92 |  |  |  |  | 171 | my @html_parser_tokens = (); | 
| 527 |  |  |  |  |  |  | HTML_PARSER_TOKEN: | 
| 528 | 92 |  |  |  |  | 198 | for my $raw_token (@raw_tokens) { | 
| 529 |  |  |  |  |  |  | my ( undef, $token_type, $line, $column, $offset, $offset_end, $is_cdata, $attr ) = | 
| 530 | 1379 |  |  |  |  | 1565 | @{$raw_token}; | 
|  | 1379 |  |  |  |  | 2634 |  | 
| 531 |  |  |  |  |  |  |  | 
| 532 |  |  |  |  |  |  | PROCESS_TOKEN_TYPE: { | 
| 533 | 1379 | 50 |  |  |  | 1676 | if ($is_cdata) { | 
|  | 1379 |  |  |  |  | 2147 |  | 
| 534 | 0 |  |  |  |  | 0 | $raw_token->[Marpa::R2::HTML::Internal::Token::TOKEN_ID] = | 
| 535 |  |  |  |  |  |  | $SYMID_CDATA; | 
| 536 | 0 |  |  |  |  | 0 | last PROCESS_TOKEN_TYPE; | 
| 537 |  |  |  |  |  |  | } | 
| 538 | 1379 | 100 |  |  |  | 2212 | if ( $token_type eq 'T' ) { | 
| 539 |  |  |  |  |  |  |  | 
| 540 |  |  |  |  |  |  | # White space as defined in HTML 4.01 | 
| 541 |  |  |  |  |  |  | # space (x20); ASCII tab (x09); ASCII form feed (x0C;); Zero-width space (x200B) | 
| 542 |  |  |  |  |  |  | # and the two characters which appear in line breaks: | 
| 543 |  |  |  |  |  |  | # carriage return (x0D) and line feed (x0A) | 
| 544 |  |  |  |  |  |  | # I avoid the Perl character codes because I do NOT want | 
| 545 |  |  |  |  |  |  | # localization | 
| 546 |  |  |  |  |  |  | $raw_token->[Marpa::R2::HTML::Internal::Token::TOKEN_ID] = | 
| 547 |  |  |  |  |  |  | $SYMID_PCDATA if | 
| 548 |  |  |  |  |  |  | substr( | 
| 549 | 635 | 100 |  |  |  | 712 | ${$document}, $offset, ( $offset_end - $offset ) | 
|  | 635 |  |  |  |  | 2378 |  | 
| 550 |  |  |  |  |  |  | ) =~ / [^\x09\x0A\x0C\x0D\x20\x{200B}] /oxms; | 
| 551 |  |  |  |  |  |  |  | 
| 552 | 635 |  |  |  |  | 980 | last PROCESS_TOKEN_TYPE; | 
| 553 |  |  |  |  |  |  | } ## end if ( $token_type eq 'T' ) | 
| 554 | 744 | 100 | 100 |  |  | 1794 | if ( $token_type eq 'E' or $token_type eq 'S' ) { | 
| 555 |  |  |  |  |  |  |  | 
| 556 |  |  |  |  |  |  | # If it's a virtual token from HTML::Parser, | 
| 557 |  |  |  |  |  |  | # pretend it never existed. | 
| 558 |  |  |  |  |  |  | # HTML::Parser supplies missing | 
| 559 |  |  |  |  |  |  | # end tags for title elements, but for no | 
| 560 |  |  |  |  |  |  | # others. | 
| 561 |  |  |  |  |  |  | # This is not helpful and we need to special-case | 
| 562 |  |  |  |  |  |  | # these zero-length tags and throw them away. | 
| 563 | 739 | 100 |  |  |  | 1150 | next HTML_PARSER_TOKEN if $offset_end <= $offset; | 
| 564 |  |  |  |  |  |  |  | 
| 565 | 737 |  |  |  |  | 973 | my $tag_name = $raw_token | 
| 566 |  |  |  |  |  |  | ->[Marpa::R2::HTML::Internal::Token::TAG_NAME]; | 
| 567 | 737 |  |  |  |  | 1118 | my $terminal    = $token_type . q{_} . $tag_name; | 
| 568 | 737 |  |  |  |  | 1440 | my $terminal_id = $tracer->symbol_by_name($terminal); | 
| 569 | 737 | 100 |  |  |  | 1391 | if ( not defined $terminal_id ) { | 
| 570 | 25 |  | 50 |  |  | 89 | my $group_symbol = $primary_group_by_tag->{$tag_name} | 
| 571 |  |  |  |  |  |  | // 'GRP_anywhere'; | 
| 572 | 25 |  | 50 |  |  | 78 | my $contents = $runtime_tag->{$tag_name} // 'FLO_mixed'; | 
| 573 | 25 |  |  |  |  | 106 | my @symbol_names = ( | 
| 574 |  |  |  |  |  |  | $group_symbol, | 
| 575 |  |  |  |  |  |  | 'ELE_' . $tag_name, | 
| 576 |  |  |  |  |  |  | 'S_' . $tag_name, | 
| 577 |  |  |  |  |  |  | $contents, 'E_' . $tag_name | 
| 578 |  |  |  |  |  |  | ); | 
| 579 | 25 |  |  |  |  | 39 | my @symbol_ids = (); | 
| 580 | 25 |  |  |  |  | 55 | SYMBOL: for my $symbol_name (@symbol_names) { | 
| 581 | 125 |  |  |  |  | 234 | my $symbol_id = $tracer->symbol_by_name($symbol_name); | 
| 582 | 125 | 100 |  |  |  | 250 | if ( not defined $symbol_id ) { | 
| 583 | 75 |  |  |  |  | 151 | $symbol_id = $tracer->symbol_new($symbol_name); | 
| 584 |  |  |  |  |  |  | } | 
| 585 | 125 |  |  |  |  | 276 | push @symbol_ids, $symbol_id; | 
| 586 |  |  |  |  |  |  | } ## end SYMBOL: for my $symbol_name (@symbol_names) | 
| 587 | 25 |  |  |  |  | 90 | my ( $top_id, $lhs_id, @rhs_ids ) = @symbol_ids; | 
| 588 | 25 |  |  |  |  | 162 | $thin_grammar->rule_new( $top_id, [$lhs_id] ); | 
| 589 | 25 |  |  |  |  | 96 | my $element_rule_id = | 
| 590 |  |  |  |  |  |  | $thin_grammar->rule_new( $lhs_id, \@rhs_ids ); | 
| 591 | 25 |  |  |  |  | 70 | $action_by_rule_id[$element_rule_id] = 'ELE_' . $tag_name; | 
| 592 | 25 |  |  |  |  | 67 | $terminal_id = $tracer->symbol_by_name($terminal); | 
| 593 |  |  |  |  |  |  |  | 
| 594 |  |  |  |  |  |  | } ## end if ( not defined $terminal_id ) | 
| 595 | 737 |  |  |  |  | 1107 | $raw_token->[Marpa::R2::HTML::Internal::Token::TOKEN_ID] = | 
| 596 |  |  |  |  |  |  | $terminal_id; | 
| 597 | 737 |  |  |  |  | 1020 | last PROCESS_TOKEN_TYPE; | 
| 598 |  |  |  |  |  |  | } ## end if ( $token_type eq 'E' or $token_type eq 'S' ) | 
| 599 |  |  |  |  |  |  | } ## end PROCESS_TOKEN_TYPE: | 
| 600 | 1377 |  |  |  |  | 2304 | push @html_parser_tokens, $raw_token; | 
| 601 |  |  |  |  |  |  | } ## end HTML_PARSER_TOKEN: for my $raw_token (@raw_tokens) | 
| 602 |  |  |  |  |  |  |  | 
| 603 |  |  |  |  |  |  | # Points AFTER the last HTML | 
| 604 |  |  |  |  |  |  | # Parser token. | 
| 605 |  |  |  |  |  |  | # The other logic needs to be ready for this. | 
| 606 |  |  |  |  |  |  | { | 
| 607 | 92 |  |  |  |  | 147 | my $document_length = length ${$document}; | 
|  | 92 |  |  |  |  | 135 |  | 
|  | 92 |  |  |  |  | 163 |  | 
| 608 | 92 |  |  |  |  | 169 | my $last_token      = $html_parser_tokens[-1]; | 
| 609 |  |  |  |  |  |  | push @html_parser_tokens, | 
| 610 |  |  |  |  |  |  | [ | 
| 611 |  |  |  |  |  |  | $SYMID_EOF, 'EOF', | 
| 612 | 92 |  |  |  |  | 160 | @{$last_token}[ | 
|  | 92 |  |  |  |  | 276 |  | 
| 613 |  |  |  |  |  |  | Marpa::R2::HTML::Internal::Token::LINE, | 
| 614 |  |  |  |  |  |  | Marpa::R2::HTML::Internal::Token::COLUMN | 
| 615 |  |  |  |  |  |  | ], | 
| 616 |  |  |  |  |  |  | $document_length, | 
| 617 |  |  |  |  |  |  | $document_length | 
| 618 |  |  |  |  |  |  | ]; | 
| 619 |  |  |  |  |  |  | } | 
| 620 |  |  |  |  |  |  |  | 
| 621 |  |  |  |  |  |  | # conserve memory | 
| 622 | 92 |  |  |  |  | 483 | $p          = undef; | 
| 623 | 92 |  |  |  |  | 223 | @raw_tokens = (); | 
| 624 |  |  |  |  |  |  |  | 
| 625 | 92 |  |  |  |  | 247 | $thin_grammar->start_symbol_set( $tracer->symbol_by_name('document') ); | 
| 626 | 92 |  |  |  |  | 110941 | $thin_grammar->precompute(); | 
| 627 |  |  |  |  |  |  |  | 
| 628 | 92 | 50 |  |  |  | 497 | if ($self->{dump_AHFA}) { | 
| 629 | 0 |  |  |  |  | 0 | return \$tracer->show_AHFA(); | 
| 630 |  |  |  |  |  |  | } | 
| 631 |  |  |  |  |  |  |  | 
| 632 |  |  |  |  |  |  | # Memoize these -- we use highest symbol a lot | 
| 633 | 92 |  |  |  |  | 363 | my $highest_symbol_id = $thin_grammar->highest_symbol_id(); | 
| 634 | 92 |  |  |  |  | 242 | my $highest_rule_id = $thin_grammar->highest_rule_id(); | 
| 635 |  |  |  |  |  |  |  | 
| 636 |  |  |  |  |  |  | # For the Ruby Slippers engine | 
| 637 |  |  |  |  |  |  | # We need to know quickly if a symbol is a start tag; | 
| 638 | 92 |  |  |  |  | 175 | my @is_start_tag = (); | 
| 639 |  |  |  |  |  |  |  | 
| 640 |  |  |  |  |  |  | # Find Ruby slippers ranks, by symbol ID | 
| 641 | 92 |  |  |  |  | 159 | my @ruby_rank_by_id = (); | 
| 642 |  |  |  |  |  |  | { | 
| 643 | 92 |  |  |  |  | 126 | my @non_final_end_tag_ids = (); | 
|  | 92 |  |  |  |  | 163 |  | 
| 644 |  |  |  |  |  |  | SYMBOL: | 
| 645 | 92 |  |  |  |  | 250 | for my $symbol_id ( 0 .. $highest_symbol_id ) { | 
| 646 | 14519 |  |  |  |  | 26486 | my $symbol_name = $tracer->symbol_name($symbol_id); | 
| 647 | 14519 | 100 |  |  |  | 29332 | next SYMBOL if not 0 == index $symbol_name, 'E_'; | 
| 648 |  |  |  |  |  |  | next SYMBOL | 
| 649 | 2785 | 100 | 100 |  |  | 7880 | if $symbol_name eq 'E_body' | 
| 650 |  |  |  |  |  |  | or $symbol_name eq 'E_html'; | 
| 651 | 2601 |  |  |  |  | 4093 | push @non_final_end_tag_ids, $symbol_id; | 
| 652 |  |  |  |  |  |  | } ## end SYMBOL: for my $symbol_id ( 0 .. $highest_symbol_id ) | 
| 653 |  |  |  |  |  |  |  | 
| 654 | 92 |  |  |  |  | 186 | my %ruby_vectors = (); | 
| 655 | 92 |  |  |  |  | 126 | for my $rejected_symbol_name ( keys %{$rank_by_name} ) { | 
|  | 92 |  |  |  |  | 558 |  | 
| 656 | 2760 |  |  |  |  | 18240 | my @ruby_vector_by_id = ( (0) x ( $highest_symbol_id + 1 ) ); | 
| 657 |  |  |  |  |  |  | my $rank_by_candidate_name = | 
| 658 | 2760 |  |  |  |  | 4006 | $rank_by_name->{$rejected_symbol_name}; | 
| 659 |  |  |  |  |  |  | CANDIDATE: | 
| 660 | 2760 |  |  |  |  | 3237 | for my $candidate_name ( keys %{$rank_by_candidate_name} ) { | 
|  | 2760 |  |  |  |  | 6157 |  | 
| 661 | 14076 |  |  |  |  | 18353 | my $rank = $rank_by_candidate_name->{$candidate_name}; | 
| 662 | 14076 | 100 |  |  |  | 23242 | if ( $candidate_name eq '*>' ) { | 
| 663 | 2760 |  |  |  |  | 15515 | $ruby_vector_by_id[$_] = $rank for @non_final_end_tag_ids; | 
| 664 | 2760 |  |  |  |  | 4164 | next CANDIDATE; | 
| 665 |  |  |  |  |  |  | } | 
| 666 | 11316 |  |  |  |  | 19600 | my $candidate_id = $tracer->symbol_by_name($candidate_name); | 
| 667 | 11316 | 50 |  |  |  | 19225 | die "Unknown ruby slippers candidate name: $candidate_name" | 
| 668 |  |  |  |  |  |  | if not defined $candidate_id; | 
| 669 |  |  |  |  |  |  | $ruby_vector_by_id[$candidate_id] = $rank | 
| 670 | 11316 |  |  |  |  | 66425 | for @non_final_end_tag_ids; | 
| 671 |  |  |  |  |  |  | } ## end CANDIDATE: for my $candidate_name ( keys %{...}) | 
| 672 | 2760 |  |  |  |  | 6173 | $ruby_vectors{$rejected_symbol_name} = \@ruby_vector_by_id; | 
| 673 |  |  |  |  |  |  | } ## end for my $rejected_symbol_name ( keys %{$rank_by_name} ) | 
| 674 |  |  |  |  |  |  |  | 
| 675 | 92 |  |  |  |  | 767 | my @no_ruby_slippers_vector = ( (0) x ( $highest_symbol_id + 1 ) ); | 
| 676 | 92 |  |  |  |  | 222 | SYMBOL: for my $rejected_symbol_id ( 0 .. $highest_symbol_id ) { | 
| 677 | 14519 | 100 |  |  |  | 29452 | if ( not $thin_grammar->symbol_is_terminal($rejected_symbol_id) ) | 
| 678 |  |  |  |  |  |  | { | 
| 679 | 8213 |  |  |  |  | 10905 | $ruby_rank_by_id[$rejected_symbol_id] = | 
| 680 |  |  |  |  |  |  | \@no_ruby_slippers_vector; | 
| 681 | 8213 |  |  |  |  | 11832 | next SYMBOL; | 
| 682 |  |  |  |  |  |  | } ## end if ( not $thin_grammar->symbol_is_terminal(...)) | 
| 683 | 6306 |  |  |  |  | 11643 | my $rejected_symbol_name = | 
| 684 |  |  |  |  |  |  | $tracer->symbol_name($rejected_symbol_id); | 
| 685 | 6306 |  |  |  |  | 7986 | my $placement; | 
| 686 |  |  |  |  |  |  | FIND_PLACEMENT: { | 
| 687 | 6306 |  |  |  |  | 7503 | my $prefix = substr $rejected_symbol_name, 0, 2; | 
|  | 6306 |  |  |  |  | 9353 |  | 
| 688 | 6306 | 100 |  |  |  | 10621 | if ( $prefix eq 'S_' ) { | 
| 689 | 2785 |  |  |  |  | 3378 | $placement = ''; | 
| 690 | 2785 |  |  |  |  | 3477 | $is_start_tag[$rejected_symbol_id] = 1; | 
| 691 | 2785 |  |  |  |  | 3748 | last FIND_PLACEMENT; | 
| 692 |  |  |  |  |  |  | } | 
| 693 | 3521 | 100 |  |  |  | 5912 | if ( $prefix eq 'E_' ) { | 
| 694 | 2785 |  |  |  |  | 3622 | $placement = '/'; | 
| 695 |  |  |  |  |  |  | } | 
| 696 |  |  |  |  |  |  | } ## end FIND_PLACEMENT: | 
| 697 | 6306 |  |  |  |  | 9052 | my $ruby_vector = $ruby_vectors{$rejected_symbol_name}; | 
| 698 | 6306 | 100 |  |  |  | 9931 | if ( defined $ruby_vector ) { | 
| 699 | 2300 |  |  |  |  | 3154 | $ruby_rank_by_id[$rejected_symbol_id] = $ruby_vector; | 
| 700 | 2300 |  |  |  |  | 3651 | next SYMBOL; | 
| 701 |  |  |  |  |  |  | } | 
| 702 | 4006 | 100 |  |  |  | 6514 | if ( not defined $placement ) { | 
| 703 | 460 | 100 |  |  |  | 796 | if ( $rejected_symbol_name eq 'CRUFT' ) { | 
| 704 | 92 |  |  |  |  | 156 | $ruby_rank_by_id[$rejected_symbol_id] = | 
| 705 |  |  |  |  |  |  | \@no_ruby_slippers_vector; | 
| 706 | 92 |  |  |  |  | 163 | next SYMBOL; | 
| 707 |  |  |  |  |  |  | } | 
| 708 |  |  |  |  |  |  | $ruby_rank_by_id[$rejected_symbol_id] = | 
| 709 | 368 |  | 50 |  |  | 1122 | $ruby_vectors{'!non_element'} | 
| 710 |  |  |  |  |  |  | // \@no_ruby_slippers_vector; | 
| 711 | 368 |  |  |  |  | 1383 | next SYMBOL; | 
| 712 |  |  |  |  |  |  | } ## end if ( not defined $placement ) | 
| 713 | 3546 |  |  |  |  | 5006 | my $tag = substr $rejected_symbol_name, 2; | 
| 714 | 3546 |  |  |  |  | 5203 | my $primary_group = $primary_group_by_tag->{$tag}; | 
| 715 | 3546 | 100 |  |  |  | 5814 | my $element_type = defined $primary_group ? (substr $primary_group, 4) : 'anywhere'; | 
| 716 |  |  |  |  |  |  | $ruby_vector = | 
| 717 | 3546 |  |  |  |  | 6245 | $ruby_vectors{ q{<} . $placement . q{%} . $element_type . q{>} }; | 
| 718 | 3546 | 100 |  |  |  | 5718 | if ( defined $ruby_vector ) { | 
| 719 | 295 |  |  |  |  | 410 | $ruby_rank_by_id[$rejected_symbol_id] = $ruby_vector; | 
| 720 | 295 |  |  |  |  | 514 | next SYMBOL; | 
| 721 |  |  |  |  |  |  | } | 
| 722 | 3251 |  |  |  |  | 4703 | $ruby_vector = $ruby_vectors{ q{<} . $placement . q{*>} }; | 
| 723 | 3251 | 50 |  |  |  | 5287 | if ( defined $ruby_vector ) { | 
| 724 | 3251 |  |  |  |  | 4099 | $ruby_rank_by_id[$rejected_symbol_id] = $ruby_vector; | 
| 725 | 3251 |  |  |  |  | 5696 | next SYMBOL; | 
| 726 |  |  |  |  |  |  | } | 
| 727 | 0 |  |  |  |  | 0 | $ruby_rank_by_id[$rejected_symbol_id] = \@no_ruby_slippers_vector; | 
| 728 |  |  |  |  |  |  | } ## end SYMBOL: for my $rejected_symbol_id ( 0 .. $highest_symbol_id ) | 
| 729 |  |  |  |  |  |  |  | 
| 730 |  |  |  |  |  |  | } | 
| 731 |  |  |  |  |  |  |  | 
| 732 | 92 |  |  |  |  | 179 | my @empty_element_end_tag = (); | 
| 733 |  |  |  |  |  |  | { | 
| 734 | 92 |  |  |  |  | 136 | TAG: for my $tag (keys %{$is_empty_element}) { | 
|  | 92 |  |  |  |  | 148 |  | 
|  | 92 |  |  |  |  | 421 |  | 
| 735 | 1104 |  |  |  |  | 2682 | my $start_tag_id = $tracer->symbol_by_name('S_' . $tag); | 
| 736 | 1104 | 100 |  |  |  | 2412 | next TAG if not defined $start_tag_id; | 
| 737 | 284 |  |  |  |  | 645 | my $end_tag_id = $tracer->symbol_by_name('E_' . $tag); | 
| 738 | 284 |  |  |  |  | 531 | $empty_element_end_tag[$start_tag_id] = $end_tag_id; | 
| 739 |  |  |  |  |  |  | } | 
| 740 |  |  |  |  |  |  | } | 
| 741 |  |  |  |  |  |  |  | 
| 742 | 92 |  |  |  |  | 1098 | my $recce = Marpa::R2::Thin::R->new($thin_grammar); | 
| 743 | 92 |  |  |  |  | 1409 | $recce->start_input(); | 
| 744 |  |  |  |  |  |  |  | 
| 745 | 92 |  |  |  |  | 254 | $self->{recce}                    = $recce; | 
| 746 | 92 |  |  |  |  | 196 | $self->{tokens}                   = \@html_parser_tokens; | 
| 747 | 92 |  |  |  |  | 229 | $self->{earleme_to_html_token_ix} = [-1]; | 
| 748 |  |  |  |  |  |  |  | 
| 749 |  |  |  |  |  |  | # These variables track virtual start tokens as | 
| 750 |  |  |  |  |  |  | # a protection against infinite loops. | 
| 751 | 92 |  |  |  |  | 170 | my %start_virtuals_used           = (); | 
| 752 | 92 |  |  |  |  | 128 | my $earleme_of_last_start_virtual = -1; | 
| 753 |  |  |  |  |  |  |  | 
| 754 |  |  |  |  |  |  | # first token is a dummy, so that ix is never 0 | 
| 755 |  |  |  |  |  |  | # this is done because 0 has a special meaning as a Libmarpa | 
| 756 |  |  |  |  |  |  | # token value | 
| 757 | 92 |  |  |  |  | 142 | my $latest_html_token = -1; | 
| 758 | 92 |  |  |  |  | 123 | my $token_number      = 0; | 
| 759 | 92 |  |  |  |  | 137 | my $token_count       = scalar @html_parser_tokens; | 
| 760 |  |  |  |  |  |  |  | 
| 761 |  |  |  |  |  |  | # this array track the last token number (location) at which | 
| 762 |  |  |  |  |  |  | # the symbol with this number was last read.  It's used | 
| 763 |  |  |  |  |  |  | # to prevent the same Ruby Slippers token being added | 
| 764 |  |  |  |  |  |  | # at the same location more than once. | 
| 765 |  |  |  |  |  |  | # If allowed, this could cause an infinite loop. | 
| 766 |  |  |  |  |  |  | # Note that only start tags are tracked -- the rest of the | 
| 767 |  |  |  |  |  |  | # array stays at -1. | 
| 768 | 92 |  |  |  |  | 584 | my @terminal_last_seen = ( (-1) x ( $highest_symbol_id + 1 ) ); | 
| 769 |  |  |  |  |  |  |  | 
| 770 | 92 |  |  |  |  | 366 | $thin_grammar->throw_set(0); | 
| 771 | 92 |  |  |  |  | 143 | my $empty_element_end_tag; | 
| 772 | 92 |  |  |  |  | 282 | RECCE_RESPONSE: while ( $token_number < $token_count ) { | 
| 773 |  |  |  |  |  |  |  | 
| 774 | 2232 | 100 |  |  |  | 3518 | if ( defined $empty_element_end_tag ) { | 
| 775 | 8 |  |  |  |  | 25 | my $read_result = | 
| 776 |  |  |  |  |  |  | $recce->alternative( $empty_element_end_tag, RUBY_SLIPPERS_TOKEN, | 
| 777 |  |  |  |  |  |  | 1 ); | 
| 778 | 8 | 50 |  |  |  | 19 | if ( $read_result != $NO_MARPA_ERROR ) { | 
| 779 | 0 |  |  |  |  | 0 | die $thin_grammar->error(); | 
| 780 |  |  |  |  |  |  | } | 
| 781 | 8 | 50 |  |  |  | 18 | if ($trace_terminals) { | 
| 782 | 0 | 0 |  |  |  | 0 | say {$trace_fh} 'Virtual end tag accepted: ', | 
|  | 0 |  |  |  |  | 0 |  | 
| 783 |  |  |  |  |  |  | $tracer->symbol_name($empty_element_end_tag) | 
| 784 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 785 |  |  |  |  |  |  | } | 
| 786 | 8 | 50 |  |  |  | 102 | if ( $recce->earleme_complete() < 0 ) { | 
| 787 | 0 |  |  |  |  | 0 | die $thin_grammar->error(); | 
| 788 |  |  |  |  |  |  | } | 
| 789 | 8 |  |  |  |  | 24 | my $current_earleme = $recce->current_earleme(); | 
| 790 | 8 | 50 |  |  |  | 18 | die $thin_grammar->error() if not defined $current_earleme; | 
| 791 | 8 |  |  |  |  | 16 | $self->{earleme_to_html_token_ix}->[$current_earleme] = | 
| 792 |  |  |  |  |  |  | $latest_html_token; | 
| 793 | 8 |  |  |  |  | 10 | $empty_element_end_tag = undef; | 
| 794 | 8 |  |  |  |  | 18 | next RECCE_RESPONSE; | 
| 795 |  |  |  |  |  |  | } ## end if ( defined $empty_element_end_tag ) | 
| 796 |  |  |  |  |  |  |  | 
| 797 | 2224 |  |  |  |  | 2892 | my $token = $html_parser_tokens[$token_number]; | 
| 798 |  |  |  |  |  |  |  | 
| 799 | 2224 |  |  |  |  | 3009 | my $attempted_symbol_id = $token | 
| 800 |  |  |  |  |  |  | ->[Marpa::R2::HTML::Internal::Token::TOKEN_ID]; | 
| 801 | 2224 |  |  |  |  | 4675 | my $read_result = | 
| 802 |  |  |  |  |  |  | $recce->alternative( $attempted_symbol_id, PHYSICAL_TOKEN, 1 ); | 
| 803 | 2224 | 100 |  |  |  | 3985 | if ( $read_result != $UNEXPECTED_TOKEN_ID ) { | 
| 804 | 1469 | 50 |  |  |  | 2413 | if ( $read_result != $NO_MARPA_ERROR ) { | 
| 805 | 0 |  |  |  |  | 0 | die $thin_grammar->error(); | 
| 806 |  |  |  |  |  |  | } | 
| 807 | 1469 | 50 |  |  |  | 2199 | if ($trace_terminals) { | 
| 808 | 0 | 0 |  |  |  | 0 | say {$trace_fh} 'Token accepted: ', | 
|  | 0 |  |  |  |  | 0 |  | 
| 809 |  |  |  |  |  |  | $tracer->symbol_name($attempted_symbol_id) | 
| 810 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 811 |  |  |  |  |  |  | } | 
| 812 | 1469 | 50 |  |  |  | 26233 | if ( $recce->earleme_complete() < 0 ) { | 
| 813 | 0 |  |  |  |  | 0 | die $thin_grammar->error(); | 
| 814 |  |  |  |  |  |  | } | 
| 815 |  |  |  |  |  |  |  | 
| 816 | 1469 |  |  |  |  | 2968 | my $last_html_token_of_marpa_token = $token_number; | 
| 817 | 1469 |  |  |  |  | 1820 | $token_number++; | 
| 818 | 1469 | 50 |  |  |  | 2684 | if ( defined $last_html_token_of_marpa_token ) { | 
| 819 | 1469 |  |  |  |  | 1879 | $latest_html_token = $last_html_token_of_marpa_token; | 
| 820 |  |  |  |  |  |  | } | 
| 821 | 1469 |  |  |  |  | 2898 | my $current_earleme = $recce->current_earleme(); | 
| 822 | 1469 | 50 |  |  |  | 2455 | die $thin_grammar->error() if not defined $current_earleme; | 
| 823 | 1469 |  |  |  |  | 2586 | $self->{earleme_to_html_token_ix}->[$current_earleme] = | 
| 824 |  |  |  |  |  |  | $latest_html_token; | 
| 825 |  |  |  |  |  |  |  | 
| 826 | 1469 |  |  |  |  | 1948 | $empty_element_end_tag = $empty_element_end_tag[$attempted_symbol_id]; | 
| 827 | 1469 |  |  |  |  | 3260 | next RECCE_RESPONSE; | 
| 828 |  |  |  |  |  |  | } ## end if ( $read_result != $UNEXPECTED_TOKEN_ID ) | 
| 829 |  |  |  |  |  |  |  | 
| 830 | 755 | 50 |  |  |  | 1195 | if ($trace_terminals) { | 
| 831 | 0 | 0 |  |  |  | 0 | say {$trace_fh} 'Literal Token not accepted: ', | 
|  | 0 |  |  |  |  | 0 |  | 
| 832 |  |  |  |  |  |  | $tracer->symbol_name($attempted_symbol_id) | 
| 833 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 834 |  |  |  |  |  |  | } | 
| 835 |  |  |  |  |  |  |  | 
| 836 | 755 |  |  |  |  | 1028 | my $highest_candidate_rank = 0; | 
| 837 | 755 |  |  |  |  | 956 | my $virtual_terminal_to_add; | 
| 838 | 755 |  |  |  |  | 992 | my $ruby_vector        = $ruby_rank_by_id[$attempted_symbol_id]; | 
| 839 | 755 |  |  |  |  | 2850 | my @terminals_expected = $recce->terminals_expected(); | 
| 840 | 755 | 50 |  |  |  | 1436 | die $thin_grammar->error() if not defined $terminals_expected[0]; | 
| 841 | 755 |  |  |  |  | 1306 | CANDIDATE: for my $candidate_id (@terminals_expected) { | 
| 842 | 6736 |  |  |  |  | 8199 | my $this_candidate_rank = $ruby_vector->[$candidate_id]; | 
| 843 | 6736 | 50 |  |  |  | 10202 | if ($trace_terminals) { | 
| 844 | 0 | 0 |  |  |  | 0 | say {$trace_fh} 'Considering candidate: ', | 
|  | 0 |  |  |  |  | 0 |  | 
| 845 |  |  |  |  |  |  | $tracer->symbol_name($candidate_id), | 
| 846 |  |  |  |  |  |  | "; rank is $this_candidate_rank; highest rank so far is $highest_candidate_rank" | 
| 847 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 848 |  |  |  |  |  |  | } ## end if ($trace_terminals) | 
| 849 | 6736 | 100 |  |  |  | 11138 | if ( $this_candidate_rank > $highest_candidate_rank ) { | 
| 850 | 792 | 50 |  |  |  | 1270 | if ($trace_terminals) { | 
| 851 | 0 | 0 |  |  |  | 0 | say {$trace_fh} 'Considering candidate: ', | 
|  | 0 |  |  |  |  | 0 |  | 
| 852 |  |  |  |  |  |  | $tracer->symbol_name($candidate_id), | 
| 853 |  |  |  |  |  |  | '; last seen at ', $terminal_last_seen[$candidate_id], | 
| 854 |  |  |  |  |  |  | "; current token number is $token_number" | 
| 855 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 856 |  |  |  |  |  |  | } ## end if ($trace_terminals) | 
| 857 |  |  |  |  |  |  | next CANDIDATE | 
| 858 | 792 | 50 |  |  |  | 1301 | if $terminal_last_seen[$candidate_id] == $token_number; | 
| 859 | 792 | 50 |  |  |  | 1272 | if ($trace_terminals) { | 
| 860 | 0 | 0 |  |  |  | 0 | say {$trace_fh} 'Current best candidate: ', | 
|  | 0 |  |  |  |  | 0 |  | 
| 861 |  |  |  |  |  |  | $tracer->symbol_name($candidate_id), | 
| 862 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 863 |  |  |  |  |  |  | } | 
| 864 | 792 |  |  |  |  | 969 | $highest_candidate_rank  = $this_candidate_rank; | 
| 865 | 792 |  |  |  |  | 1265 | $virtual_terminal_to_add = $candidate_id; | 
| 866 |  |  |  |  |  |  | } ## end if ( $this_candidate_rank > $highest_candidate_rank ) | 
| 867 |  |  |  |  |  |  | } ## end CANDIDATE: for my $candidate_id (@terminals_expected) | 
| 868 |  |  |  |  |  |  |  | 
| 869 | 755 | 100 |  |  |  | 1264 | if ( defined $virtual_terminal_to_add ) { | 
| 870 |  |  |  |  |  |  |  | 
| 871 | 752 | 50 |  |  |  | 1188 | if ($trace_terminals) { | 
| 872 | 0 | 0 |  |  |  | 0 | say {$trace_fh} 'Adding Ruby Slippers token: ', | 
|  | 0 |  |  |  |  | 0 |  | 
| 873 |  |  |  |  |  |  | $tracer->symbol_name($virtual_terminal_to_add), | 
| 874 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 875 |  |  |  |  |  |  | } | 
| 876 |  |  |  |  |  |  |  | 
| 877 | 752 |  |  |  |  | 1645 | my $ruby_slippers_result = | 
| 878 |  |  |  |  |  |  | $recce->alternative( $virtual_terminal_to_add, | 
| 879 |  |  |  |  |  |  | RUBY_SLIPPERS_TOKEN, 1 ); | 
| 880 | 752 | 50 |  |  |  | 1356 | if ( $ruby_slippers_result != $NO_MARPA_ERROR ) { | 
| 881 | 0 |  |  |  |  | 0 | die $thin_grammar->error(); | 
| 882 |  |  |  |  |  |  | } | 
| 883 | 752 | 50 |  |  |  | 8178 | if ( $recce->earleme_complete() < 0 ) { | 
| 884 | 0 |  |  |  |  | 0 | die $thin_grammar->error(); | 
| 885 |  |  |  |  |  |  | } | 
| 886 |  |  |  |  |  |  |  | 
| 887 |  |  |  |  |  |  | # Only keep track of start tags.  We need to be able to add end | 
| 888 |  |  |  |  |  |  | # tags repeatedly. | 
| 889 |  |  |  |  |  |  | # Adding end tags cannot cause an infinite loop, because each | 
| 890 |  |  |  |  |  |  | # one ends an element and only a finite number of elements | 
| 891 |  |  |  |  |  |  | # can have been started. | 
| 892 | 752 | 100 |  |  |  | 1684 | $terminal_last_seen[$virtual_terminal_to_add] = $token_number | 
| 893 |  |  |  |  |  |  | if $is_start_tag[$virtual_terminal_to_add]; | 
| 894 |  |  |  |  |  |  |  | 
| 895 | 752 |  |  |  |  | 1505 | my $current_earleme = $recce->current_earleme(); | 
| 896 | 752 | 50 |  |  |  | 1353 | die $thin_grammar->error() if not defined $current_earleme; | 
| 897 | 752 |  |  |  |  | 1373 | $self->{earleme_to_html_token_ix}->[$current_earleme] = | 
| 898 |  |  |  |  |  |  | $latest_html_token; | 
| 899 |  |  |  |  |  |  |  | 
| 900 | 752 |  |  |  |  | 990 | $empty_element_end_tag = $empty_element_end_tag[$virtual_terminal_to_add]; | 
| 901 |  |  |  |  |  |  |  | 
| 902 | 752 |  |  |  |  | 1864 | next RECCE_RESPONSE; | 
| 903 |  |  |  |  |  |  | } ## end if ( defined $virtual_terminal_to_add ) | 
| 904 |  |  |  |  |  |  |  | 
| 905 |  |  |  |  |  |  | # If we didn't find a token to add, add the | 
| 906 |  |  |  |  |  |  | # current physical token as CRUFT. | 
| 907 |  |  |  |  |  |  |  | 
| 908 | 3 | 50 |  |  |  | 18 | if ($trace_terminals) { | 
| 909 | 0 | 0 |  |  |  | 0 | say {$trace_fh} 'Adding rejected token as cruft: ', | 
|  | 0 |  |  |  |  | 0 |  | 
| 910 |  |  |  |  |  |  | $tracer->symbol_name($attempted_symbol_id) | 
| 911 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 912 |  |  |  |  |  |  | } | 
| 913 |  |  |  |  |  |  |  | 
| 914 | 3 | 50 |  |  |  | 18 | my $fatal_cruft_error = $token->[Marpa::R2::HTML::Internal::Token::TOKEN_ID] | 
| 915 |  |  |  |  |  |  | == $SYMID_CRUFT ? 1 : 0; | 
| 916 |  |  |  |  |  |  |  | 
| 917 | 3 | 50 | 33 |  |  | 24 | if ( $trace_cruft or $fatal_cruft_error ) { | 
| 918 | 0 |  |  |  |  | 0 | my $current_earleme = $recce->current_earleme(); | 
| 919 | 0 | 0 |  |  |  | 0 | die $thin_grammar->error() if not defined $current_earleme; | 
| 920 | 0 |  |  |  |  | 0 | my ( $line, $col ) = | 
| 921 |  |  |  |  |  |  | earleme_to_linecol( $self, $current_earleme ); | 
| 922 |  |  |  |  |  |  |  | 
| 923 |  |  |  |  |  |  | # HTML::Parser uses one-based line numbers, | 
| 924 |  |  |  |  |  |  | # but zero-based column numbers | 
| 925 |  |  |  |  |  |  | # The convention (in vi and cut) is that | 
| 926 |  |  |  |  |  |  | # columns are also one-based. | 
| 927 | 0 |  |  |  |  | 0 | $col++; | 
| 928 |  |  |  |  |  |  |  | 
| 929 | 0 |  |  |  |  | 0 | say {$trace_fh} qq{Cruft at line $line, column $col: "}, | 
| 930 |  |  |  |  |  |  | ${ | 
| 931 | 0 | 0 |  |  |  | 0 | token_range_to_original( | 
|  | 0 |  |  |  |  | 0 |  | 
| 932 |  |  |  |  |  |  | $self, $token_number, $token_number | 
| 933 |  |  |  |  |  |  | ) | 
| 934 |  |  |  |  |  |  | }, | 
| 935 |  |  |  |  |  |  | q{"} | 
| 936 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 937 | 0 | 0 |  |  |  | 0 | die 'Internal error: cruft token was rejected' | 
| 938 |  |  |  |  |  |  | if $fatal_cruft_error; | 
| 939 |  |  |  |  |  |  | } ## end if ( $trace_cruft or $fatal_cruft_error ) | 
| 940 |  |  |  |  |  |  |  | 
| 941 |  |  |  |  |  |  | # Cruft tokens are not virtual. | 
| 942 |  |  |  |  |  |  | # They are the real things, hacked up. | 
| 943 | 3 |  |  |  |  | 12 | $token->[Marpa::R2::HTML::Internal::Token::TOKEN_ID] = $SYMID_CRUFT; | 
| 944 |  |  |  |  |  |  |  | 
| 945 |  |  |  |  |  |  | } ## end RECCE_RESPONSE: while ( $token_number < $token_count ) | 
| 946 | 92 |  |  |  |  | 303 | $thin_grammar->throw_set(1); | 
| 947 |  |  |  |  |  |  |  | 
| 948 | 92 | 50 |  |  |  | 206 | if ($trace_terminals) { | 
| 949 | 0 | 0 |  |  |  | 0 | say {$trace_fh} 'at end of tokens' | 
|  | 0 |  |  |  |  | 0 |  | 
| 950 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 951 |  |  |  |  |  |  | } | 
| 952 |  |  |  |  |  |  |  | 
| 953 | 92 |  |  |  |  | 4827 | $Marpa::R2::HTML::INSTANCE = $self; | 
| 954 | 92 |  |  |  |  | 244 | local $Marpa::R2::HTML::Internal::PARSE_INSTANCE = $self; | 
| 955 | 92 |  |  |  |  | 388 | my $latest_earley_set_ID = $recce->latest_earley_set(); | 
| 956 | 92 |  |  |  |  | 5390 | my $bocage = Marpa::R2::Thin::B->new( $recce, $latest_earley_set_ID ); | 
| 957 | 92 |  |  |  |  | 384 | my $order  = Marpa::R2::Thin::O->new($bocage); | 
| 958 | 92 |  |  |  |  | 348 | my $tree   = Marpa::R2::Thin::T->new($order); | 
| 959 | 92 |  |  |  |  | 640 | $tree->next(); | 
| 960 |  |  |  |  |  |  |  | 
| 961 | 92 |  |  |  |  | 196 | my @stack = (); | 
| 962 | 92 |  |  |  |  | 173 | local $Marpa::R2::HTML::Internal::STACK = \@stack; | 
| 963 | 92 |  |  |  |  | 152 | my %memoized_handlers = (); | 
| 964 |  |  |  |  |  |  |  | 
| 965 | 92 |  |  |  |  | 490 | my $valuator = Marpa::R2::Thin::V->new($tree); | 
| 966 | 92 |  |  |  |  | 178 | local $Marpa::R2::HTML::Internal::RECCE    = $recce; | 
| 967 | 92 |  |  |  |  | 161 | local $Marpa::R2::HTML::Internal::VALUATOR = $valuator; | 
| 968 |  |  |  |  |  |  |  | 
| 969 | 92 |  |  |  |  | 579 | for my $rule_id ( grep { $thin_grammar->rule_length($_); } | 
|  | 12838 |  |  |  |  | 19449 |  | 
| 970 |  |  |  |  |  |  | 0 .. $thin_grammar->highest_rule_id() ) | 
| 971 |  |  |  |  |  |  | { | 
| 972 | 12746 |  |  |  |  | 19994 | $valuator->rule_is_valued_set( $rule_id, 1 ); | 
| 973 |  |  |  |  |  |  | } | 
| 974 | 92 |  |  |  |  | 381 | STEP: while (1) { | 
| 975 | 7044 |  |  |  |  | 21997 | my ( $type, @step_data ) = $valuator->step(); | 
| 976 | 7044 | 100 |  |  |  | 13469 | last STEP if not defined $type; | 
| 977 | 6952 | 100 |  |  |  | 11679 | if ( $type eq 'MARPA_STEP_TOKEN' ) { | 
| 978 | 2229 | 50 | 0 |  |  | 3659 | say {*STDERR} join q{ }, $type, @step_data, | 
|  | 0 |  |  |  |  | 0 |  | 
| 979 |  |  |  |  |  |  | $tracer->symbol_name( $step_data[0] ) | 
| 980 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO") | 
| 981 |  |  |  |  |  |  | if $trace_values; | 
| 982 | 2229 |  |  |  |  | 3579 | my ( undef, $token_value, $arg_n ) = @step_data; | 
| 983 | 2229 | 100 |  |  |  | 3984 | if ( $token_value eq RUBY_SLIPPERS_TOKEN ) { | 
| 984 | 760 |  |  |  |  | 1411 | $stack[$arg_n] = ['RUBY_SLIPPERS_TOKEN']; | 
| 985 | 760 | 50 | 0 |  |  | 1399 | say {*STDERR} "Stack:\n", Data::Dumper::Dumper( \@stack ) | 
|  | 0 |  |  |  |  | 0 |  | 
| 986 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO") | 
| 987 |  |  |  |  |  |  | if $trace_values; | 
| 988 | 760 |  |  |  |  | 1292 | next STEP; | 
| 989 |  |  |  |  |  |  | } ## end if ( $token_value eq RUBY_SLIPPERS_TOKEN ) | 
| 990 | 1469 |  |  |  |  | 3132 | my ( $start_earley_set_id, $end_earley_set_id ) = | 
| 991 |  |  |  |  |  |  | $valuator->location(); | 
| 992 | 1469 |  |  |  |  | 2932 | my $start_earleme = $recce->earleme($start_earley_set_id); | 
| 993 |  |  |  |  |  |  | my $start_html_token_ix = | 
| 994 | 1469 |  |  |  |  | 2148 | $self->{earleme_to_html_token_ix}->[$start_earleme]; | 
| 995 | 1469 |  |  |  |  | 2539 | my $end_earleme = $recce->earleme($end_earley_set_id); | 
| 996 |  |  |  |  |  |  | my $end_html_token_ix = | 
| 997 | 1469 |  |  |  |  | 1949 | $self->{earleme_to_html_token_ix}->[$end_earleme]; | 
| 998 | 1469 |  |  |  |  | 3025 | $stack[$arg_n] = [ | 
| 999 |  |  |  |  |  |  | 'PHYSICAL_TOKEN' => $start_html_token_ix + 1, | 
| 1000 |  |  |  |  |  |  | $end_html_token_ix, | 
| 1001 |  |  |  |  |  |  | ]; | 
| 1002 | 1469 | 50 | 0 |  |  | 2548 | say {*STDERR} "Stack:\n", Data::Dumper::Dumper( \@stack ) | 
|  | 0 |  |  |  |  | 0 |  | 
| 1003 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO") | 
| 1004 |  |  |  |  |  |  | if $trace_values; | 
| 1005 | 1469 |  |  |  |  | 2379 | next STEP; | 
| 1006 |  |  |  |  |  |  | } ## end if ( $type eq 'MARPA_STEP_TOKEN' ) | 
| 1007 | 4723 | 100 |  |  |  | 7818 | if ( $type eq 'MARPA_STEP_RULE' ) { | 
| 1008 | 4168 | 50 | 0 |  |  | 6527 | say {*STDERR} join q{ }, ( $type, @step_data ) | 
|  | 0 |  |  |  |  | 0 |  | 
| 1009 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO") | 
| 1010 |  |  |  |  |  |  | if $trace_values; | 
| 1011 | 4168 |  |  |  |  | 6329 | my ( $rule_id, $arg_0, $arg_n ) = @step_data; | 
| 1012 |  |  |  |  |  |  |  | 
| 1013 | 4168 |  |  |  |  | 5293 | my $attributes = undef; | 
| 1014 | 4168 |  |  |  |  | 4759 | my $class      = undef; | 
| 1015 | 4168 |  |  |  |  | 5655 | my $action     = $action_by_rule_id[$rule_id]; | 
| 1016 | 4168 |  |  |  |  | 5343 | local $Marpa::R2::HTML::Internal::START_TAG_IX   = undef; | 
| 1017 | 4168 |  |  |  |  | 4790 | local $Marpa::R2::HTML::Internal::END_TAG_IX_REF = undef; | 
| 1018 | 4168 |  |  |  |  | 4808 | local $Marpa::R2::HTML::Internal::ELEMENT        = undef; | 
| 1019 | 4168 |  |  |  |  | 5285 | local $Marpa::R2::HTML::Internal::SPECIES        = q{}; | 
| 1020 |  |  |  |  |  |  |  | 
| 1021 | 4168 | 100 | 100 |  |  | 10161 | if ( defined $action and ( index $action, 'ELE_' ) == 0 ) { | 
| 1022 | 747 |  |  |  |  | 1773 | $Marpa::R2::HTML::Internal::SPECIES = | 
| 1023 |  |  |  |  |  |  | $Marpa::R2::HTML::Internal::ELEMENT = substr $action, 4; | 
| 1024 | 747 |  |  |  |  | 1062 | my $start_tag_marpa_token = $stack[$arg_0]; | 
| 1025 |  |  |  |  |  |  |  | 
| 1026 | 747 |  |  |  |  | 1029 | my $start_tag_type = $start_tag_marpa_token | 
| 1027 |  |  |  |  |  |  | ->[Marpa::R2::HTML::Internal::TDesc::TYPE]; | 
| 1028 | 747 | 100 | 66 |  |  | 2223 | if ( defined $start_tag_type | 
| 1029 |  |  |  |  |  |  | and $start_tag_type eq 'PHYSICAL_TOKEN' ) | 
| 1030 |  |  |  |  |  |  | { | 
| 1031 | 401 |  |  |  |  | 612 | my $start_tag_ix    = $start_tag_marpa_token->[1]; | 
| 1032 | 401 |  |  |  |  | 699 | my $start_tag_token = $html_parser_tokens[$start_tag_ix]; | 
| 1033 | 401 | 50 |  |  |  | 1090 | if ( $start_tag_token | 
| 1034 |  |  |  |  |  |  | ->[Marpa::R2::HTML::Internal::Token::TYPE] eq 'S' ) | 
| 1035 |  |  |  |  |  |  | { | 
| 1036 | 401 |  |  |  |  | 518 | $Marpa::R2::HTML::Internal::START_TAG_IX = | 
| 1037 |  |  |  |  |  |  | $start_tag_ix; | 
| 1038 | 401 |  |  |  |  | 754 | $attributes = $start_tag_token | 
| 1039 |  |  |  |  |  |  | ->[Marpa::R2::HTML::Internal::Token::ATTR]; | 
| 1040 |  |  |  |  |  |  | } ## end if ( $start_tag_token->[...]) | 
| 1041 |  |  |  |  |  |  | } ## end if ( defined $start_tag_type and $start_tag_type eq ...) | 
| 1042 |  |  |  |  |  |  | } ## end if ( defined $action and ( index $action, 'ELE_' ) ==...) | 
| 1043 | 4168 | 100 | 100 |  |  | 8738 | if ( defined $action and ( index $action, 'SPE_' ) == 0 ) { | 
| 1044 | 741 |  |  |  |  | 1583 | $Marpa::R2::HTML::Internal::SPECIES = q{:} . substr $action, | 
| 1045 |  |  |  |  |  |  | 4; | 
| 1046 |  |  |  |  |  |  | } | 
| 1047 | 4168 |  |  |  |  | 5075 | local $Marpa::R2::HTML::Internal::ATTRIBUTES = $attributes; | 
| 1048 | 4168 |  | 100 |  |  | 10707 | $class = $attributes->{class} // q{*}; | 
| 1049 | 4168 |  |  |  |  | 5595 | local $Marpa::R2::HTML::Internal::CLASS = $class; | 
| 1050 | 4168 |  |  |  |  | 4988 | local $Marpa::R2::HTML::Internal::ARG_0 = $arg_0; | 
| 1051 | 4168 |  |  |  |  | 4876 | local $Marpa::R2::HTML::Internal::ARG_N = $arg_n; | 
| 1052 |  |  |  |  |  |  |  | 
| 1053 | 4168 |  |  |  |  | 8831 | my ( $start_earley_set_id, $end_earley_set_id ) = | 
| 1054 |  |  |  |  |  |  | $valuator->location(); | 
| 1055 |  |  |  |  |  |  |  | 
| 1056 | 4168 |  |  |  |  | 8374 | my $start_earleme = $recce->earleme($start_earley_set_id); | 
| 1057 |  |  |  |  |  |  | my $start_html_token_ix = | 
| 1058 | 4168 |  |  |  |  | 6139 | $self->{earleme_to_html_token_ix}->[$start_earleme] + 1; | 
| 1059 | 4168 |  |  |  |  | 6361 | my $end_earleme = $recce->earleme($end_earley_set_id); | 
| 1060 |  |  |  |  |  |  | my $end_html_token_ix = | 
| 1061 | 4168 |  |  |  |  | 5421 | $self->{earleme_to_html_token_ix}->[$end_earleme]; | 
| 1062 |  |  |  |  |  |  |  | 
| 1063 | 4168 | 100 |  |  |  | 6877 | if ( $start_html_token_ix > $end_html_token_ix ) { | 
| 1064 | 117 |  |  |  |  | 195 | $start_html_token_ix = $end_html_token_ix = undef; | 
| 1065 |  |  |  |  |  |  | } | 
| 1066 | 4168 |  |  |  |  | 5111 | local $Marpa::R2::HTML::Internal::START_HTML_TOKEN_IX = | 
| 1067 |  |  |  |  |  |  | $start_html_token_ix; | 
| 1068 | 4168 |  |  |  |  | 4913 | local $Marpa::R2::HTML::Internal::END_HTML_TOKEN_IX = | 
| 1069 |  |  |  |  |  |  | $end_html_token_ix; | 
| 1070 |  |  |  |  |  |  |  | 
| 1071 | 4168 |  |  |  |  | 7025 | my $handler_key = | 
| 1072 |  |  |  |  |  |  | $rule_id . q{;} . $Marpa::R2::HTML::Internal::CLASS; | 
| 1073 |  |  |  |  |  |  |  | 
| 1074 | 4168 |  |  |  |  | 5888 | my $handler = $memoized_handlers{$handler_key}; | 
| 1075 |  |  |  |  |  |  |  | 
| 1076 |  |  |  |  |  |  | $trace_handlers | 
| 1077 |  |  |  |  |  |  | and $handler | 
| 1078 | 4168 | 50 | 0 |  |  | 7257 | and say {*STDERR} | 
|  | 0 |  | 33 |  |  | 0 |  | 
| 1079 |  |  |  |  |  |  | qq{Found memoized handler for rule $rule_id, class "}, | 
| 1080 |  |  |  |  |  |  | ( $class // q{*} ), q{"}; | 
| 1081 |  |  |  |  |  |  |  | 
| 1082 | 4168 | 100 |  |  |  | 6865 | if ( not defined $handler ) { | 
| 1083 | 1341 |  |  |  |  | 2375 | $handler = $memoized_handlers{$handler_key} = | 
| 1084 |  |  |  |  |  |  | handler_find( $self, $rule_id, $class ); | 
| 1085 |  |  |  |  |  |  | } | 
| 1086 |  |  |  |  |  |  |  | 
| 1087 |  |  |  |  |  |  | COMPUTE_VALUE: { | 
| 1088 | 4168 | 100 |  |  |  | 4967 | if ( ref $handler ) { | 
|  | 4168 |  |  |  |  | 6838 |  | 
| 1089 | 581 |  |  |  |  | 1485 | $stack[$arg_0] = [ | 
| 1090 |  |  |  |  |  |  | VALUED_SPAN => $start_html_token_ix, | 
| 1091 |  |  |  |  |  |  | $end_html_token_ix, | 
| 1092 |  |  |  |  |  |  | ( scalar $handler->() ), | 
| 1093 |  |  |  |  |  |  | $rule_id | 
| 1094 |  |  |  |  |  |  | ]; | 
| 1095 | 581 |  |  |  |  | 12489 | last COMPUTE_VALUE; | 
| 1096 |  |  |  |  |  |  | } ## end if ( ref $handler ) | 
| 1097 | 3587 |  |  |  |  | 4458 | my @flat_tdesc_list = (); | 
| 1098 |  |  |  |  |  |  | STACK_IX: | 
| 1099 | 3587 |  |  |  |  | 6522 | for my $stack_ix ( $Marpa::R2::HTML::Internal::ARG_0 .. | 
| 1100 |  |  |  |  |  |  | $Marpa::R2::HTML::Internal::ARG_N ) | 
| 1101 |  |  |  |  |  |  | { | 
| 1102 | 5339 |  |  |  |  | 6802 | my $tdesc_item = | 
| 1103 |  |  |  |  |  |  | $Marpa::R2::HTML::Internal::STACK->[$stack_ix]; | 
| 1104 | 5339 |  |  |  |  | 6607 | my $tdesc_type = $tdesc_item->[0]; | 
| 1105 | 5339 | 50 |  |  |  | 8022 | next STACK_IX if not defined $tdesc_type; | 
| 1106 | 5339 | 100 |  |  |  | 8157 | if ( $tdesc_type eq 'VALUES' ) { | 
| 1107 |  |  |  |  |  |  | push @flat_tdesc_list, | 
| 1108 | 471 |  |  |  |  | 529 | @{ $tdesc_item | 
|  | 471 |  |  |  |  | 1161 |  | 
| 1109 |  |  |  |  |  |  | ->[Marpa::R2::HTML::Internal::TDesc::VALUE] }; | 
| 1110 | 471 |  |  |  |  | 778 | next STACK_IX; | 
| 1111 |  |  |  |  |  |  | } ## end if ( $tdesc_type eq 'VALUES' ) | 
| 1112 | 4868 | 100 |  |  |  | 8483 | next STACK_IX if $tdesc_type ne 'VALUED_SPAN'; | 
| 1113 | 3233 |  |  |  |  | 5485 | push @flat_tdesc_list, $tdesc_item; | 
| 1114 |  |  |  |  |  |  | } ## end STACK_IX: for my $stack_ix ( $Marpa::R2::HTML::Internal::ARG_0...) | 
| 1115 | 3587 | 100 |  |  |  | 6341 | if ( scalar @flat_tdesc_list <= 1 ) { | 
| 1116 | 2972 |  |  |  |  | 6657 | $stack[$arg_0] = [ | 
| 1117 |  |  |  |  |  |  | VALUED_SPAN => $start_html_token_ix, | 
| 1118 |  |  |  |  |  |  | $end_html_token_ix, | 
| 1119 |  |  |  |  |  |  | $flat_tdesc_list[0] | 
| 1120 |  |  |  |  |  |  | ->[Marpa::R2::HTML::Internal::TDesc::VALUE], | 
| 1121 |  |  |  |  |  |  | $rule_id | 
| 1122 |  |  |  |  |  |  | ]; | 
| 1123 | 2972 |  |  |  |  | 5380 | last COMPUTE_VALUE; | 
| 1124 |  |  |  |  |  |  | } ## end if ( scalar @flat_tdesc_list <= 1 ) | 
| 1125 | 615 |  |  |  |  | 1817 | $stack[$arg_0] = [ | 
| 1126 |  |  |  |  |  |  | VALUES => $start_html_token_ix, | 
| 1127 |  |  |  |  |  |  | $end_html_token_ix, | 
| 1128 |  |  |  |  |  |  | \@flat_tdesc_list, | 
| 1129 |  |  |  |  |  |  | $rule_id | 
| 1130 |  |  |  |  |  |  | ]; | 
| 1131 |  |  |  |  |  |  | } ## end COMPUTE_VALUE: | 
| 1132 |  |  |  |  |  |  |  | 
| 1133 | 4168 | 50 |  |  |  | 7122 | if ($trace_values) { | 
| 1134 | 0 | 0 |  |  |  | 0 | say {*STDERR} "rule $rule_id: ", join q{ }, | 
|  | 0 |  |  |  |  | 0 |  | 
| 1135 |  |  |  |  |  |  | symbol_names_by_rule_id( $self, $rule_id ) | 
| 1136 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 1137 | 0 | 0 |  |  |  | 0 | say {*STDERR} "Stack:\n", Data::Dumper::Dumper( \@stack ) | 
|  | 0 |  |  |  |  | 0 |  | 
| 1138 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 1139 |  |  |  |  |  |  | } ## end if ($trace_values) | 
| 1140 | 4168 |  |  |  |  | 10374 | next STEP; | 
| 1141 |  |  |  |  |  |  | } ## end if ( $type eq 'MARPA_STEP_RULE' ) | 
| 1142 |  |  |  |  |  |  |  | 
| 1143 | 555 | 50 |  |  |  | 1065 | if ( $type eq 'MARPA_STEP_NULLING_SYMBOL' ) { | 
| 1144 | 555 |  |  |  |  | 934 | my ( $symbol_id, $arg_n ) = @step_data; | 
| 1145 | 555 |  |  |  |  | 1214 | $stack[$arg_n] = ['ZERO_SPAN']; | 
| 1146 |  |  |  |  |  |  |  | 
| 1147 | 555 | 50 |  |  |  | 1028 | if ($trace_values) { | 
| 1148 | 0 | 0 |  |  |  | 0 | say {*STDERR} join q{ }, $type, @step_data, | 
|  | 0 |  |  |  |  | 0 |  | 
| 1149 |  |  |  |  |  |  | $tracer->symbol_name($symbol_id) | 
| 1150 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 1151 | 0 | 0 |  |  |  | 0 | say {*STDERR} "Stack:\n", Data::Dumper::Dumper( \@stack ) | 
|  | 0 |  |  |  |  | 0 |  | 
| 1152 |  |  |  |  |  |  | or Carp::croak("Cannot print: $ERRNO"); | 
| 1153 |  |  |  |  |  |  | } ## end if ($trace_values) | 
| 1154 | 555 |  |  |  |  | 874 | next STEP; | 
| 1155 |  |  |  |  |  |  | } ## end if ( $type eq 'MARPA_STEP_NULLING_SYMBOL' ) | 
| 1156 | 0 |  |  |  |  | 0 | die "Unexpected step type: $type"; | 
| 1157 |  |  |  |  |  |  | } ## end STEP: while (1) | 
| 1158 |  |  |  |  |  |  |  | 
| 1159 | 92 |  |  |  |  | 174 | my $result = $stack[0]; | 
| 1160 | 92 | 50 |  |  |  | 206 | Marpa::R2::exception('No parse: evaler returned undef') | 
| 1161 |  |  |  |  |  |  | if not defined $result; | 
| 1162 |  |  |  |  |  |  |  | 
| 1163 | 92 | 100 |  |  |  | 242 | if ( ref $self->{handler_by_species}->{TOP} ) { | 
| 1164 |  |  |  |  |  |  | ## This is a user-defined handler.  We assume it returns | 
| 1165 |  |  |  |  |  |  | ## a VALUED_SPAN. | 
| 1166 | 4 |  |  |  |  | 12 | $result = $result->[Marpa::R2::HTML::Internal::TDesc::VALUE]; | 
| 1167 |  |  |  |  |  |  | } | 
| 1168 |  |  |  |  |  |  | else { | 
| 1169 |  |  |  |  |  |  | ## The TOP handler was the default handler. | 
| 1170 |  |  |  |  |  |  | ## We now want to "literalize" its result. | 
| 1171 |  |  |  |  |  |  | FIND_LITERALIZEABLE: { | 
| 1172 | 88 |  |  |  |  | 129 | my $type = $result->[Marpa::R2::HTML::Internal::TDesc::TYPE]; | 
|  | 88 |  |  |  |  | 133 |  | 
| 1173 | 88 | 100 |  |  |  | 166 | if ( $type eq 'VALUES' ) { | 
| 1174 | 11 |  |  |  |  | 24 | $result = $result->[Marpa::R2::HTML::Internal::TDesc::VALUE]; | 
| 1175 | 11 |  |  |  |  | 23 | last FIND_LITERALIZEABLE; | 
| 1176 |  |  |  |  |  |  | } | 
| 1177 | 77 | 50 |  |  |  | 171 | if ( $type eq 'VALUED_SPAN' ) { | 
| 1178 | 77 |  |  |  |  | 142 | $result = [$result]; | 
| 1179 | 77 |  |  |  |  | 161 | last FIND_LITERALIZEABLE; | 
| 1180 |  |  |  |  |  |  | } | 
| 1181 | 0 |  |  |  |  | 0 | die 'Internal: TOP result is not literalize-able'; | 
| 1182 |  |  |  |  |  |  | } ## end FIND_LITERALIZEABLE: | 
| 1183 | 88 |  |  |  |  | 333 | $result = range_and_values_to_literal( $self, 0, $#html_parser_tokens, | 
| 1184 |  |  |  |  |  |  | $result ); | 
| 1185 |  |  |  |  |  |  | } ## end else [ if ( ref $self->{handler_by_species}->{TOP} ) ] | 
| 1186 |  |  |  |  |  |  |  | 
| 1187 | 92 |  |  |  |  | 8672 | return $result; | 
| 1188 |  |  |  |  |  |  |  | 
| 1189 |  |  |  |  |  |  | } ## end sub parse | 
| 1190 |  |  |  |  |  |  |  | 
| 1191 |  |  |  |  |  |  | sub Marpa::R2::HTML::html { | 
| 1192 | 94 |  |  | 94 | 0 | 24711 | my ( $document_ref, @args ) = @_; | 
| 1193 | 94 |  |  |  |  | 249 | my $html = Marpa::R2::HTML::Internal::create(@args); | 
| 1194 | 94 |  |  |  |  | 246 | return Marpa::R2::HTML::Internal::parse( $html, $document_ref ); | 
| 1195 |  |  |  |  |  |  | } | 
| 1196 |  |  |  |  |  |  |  | 
| 1197 |  |  |  |  |  |  | 1; | 
| 1198 |  |  |  |  |  |  |  | 
| 1199 |  |  |  |  |  |  | # vim: set expandtab shiftwidth=4: |