| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package HTML::Zoom::Parser::BuiltIn; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 13 |  |  | 13 |  | 27409 | use strictures 1; | 
|  | 13 |  |  |  |  | 94 |  | 
|  | 13 |  |  |  |  | 360 |  | 
| 4 | 13 |  |  | 13 |  | 1057 | use base qw(HTML::Zoom::SubObject); | 
|  | 13 |  |  |  |  | 25 |  | 
|  | 13 |  |  |  |  | 14616 |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | sub html_to_events { | 
| 7 | 79 |  |  | 79 | 0 | 148 | my ($self, $text) = @_; | 
| 8 | 79 |  |  |  |  | 106 | my @events; | 
| 9 | 79 |  |  | 1070 |  | 427 | _hacky_tag_parser($text => sub { push @events, $_[0] }); | 
|  | 1070 |  |  |  |  | 6768 |  | 
| 10 | 79 |  |  |  |  | 665 | return \@events; | 
| 11 |  |  |  |  |  |  | } | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | sub html_to_stream { | 
| 14 | 23 |  |  | 23 | 0 | 42 | my ($self, $text) = @_; | 
| 15 | 23 |  |  |  |  | 62 | return $self->_zconfig->stream_utils | 
| 16 | 23 |  |  |  |  | 80 | ->stream_from_array(@{$self->html_to_events($text)}); | 
| 17 |  |  |  |  |  |  | } | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | sub _hacky_tag_parser { | 
| 20 | 79 |  |  | 79 |  | 147 | my ($text, $handler) = @_; | 
| 21 | 79 |  |  |  |  | 1156 | while ( | 
| 22 |  |  |  |  |  |  | $text =~ m{ | 
| 23 |  |  |  |  |  |  | ( | 
| 24 |  |  |  |  |  |  | (?:[^<]*) < (?: | 
| 25 |  |  |  |  |  |  | ( / )? ( [^/!<>\s"'=]+ ) | 
| 26 |  |  |  |  |  |  | ( (?:"[^"]*"|'[^']*'|[^/"'<>])+? )? | 
| 27 |  |  |  |  |  |  | | | 
| 28 |  |  |  |  |  |  | (!-- .*? -- | ![^\-] .*? ) | 
| 29 |  |  |  |  |  |  | ) (\s*/\s*)? > | 
| 30 |  |  |  |  |  |  | ) | 
| 31 |  |  |  |  |  |  | ([^<]*) | 
| 32 |  |  |  |  |  |  | }sxg | 
| 33 |  |  |  |  |  |  | ) { | 
| 34 | 595 |  |  |  |  | 2508 | my ($whole, $is_close, $tag_name, $attributes, $is_special, | 
| 35 |  |  |  |  |  |  | $in_place_close, $content) | 
| 36 |  |  |  |  |  |  | = ($1, $2, $3, $4, $5, $6, $7, $8); | 
| 37 | 595 | 100 |  |  |  | 1073 | if ($is_special) { | 
| 38 | 1 |  |  |  |  | 41 | $handler->({ type => 'SPECIAL', raw => $whole }); | 
| 39 |  |  |  |  |  |  | } else { | 
| 40 | 594 |  |  |  |  | 1201 | $tag_name =~ tr/A-Z/a-z/; | 
| 41 | 594 | 100 |  |  |  | 1234 | if ($is_close) { | 
| 42 | 281 |  |  |  |  | 1257 | $handler->({ type => 'CLOSE', name => $tag_name, raw => $whole }); | 
| 43 |  |  |  |  |  |  | } else { | 
| 44 | 313 | 100 | 100 |  |  | 2662 | $attributes = '' if !defined($attributes) or $attributes =~ /^ +$/; | 
| 45 | 313 |  | 100 |  |  | 687 | $handler->({ | 
| 46 |  |  |  |  |  |  | type => 'OPEN', | 
| 47 |  |  |  |  |  |  | name => $tag_name, | 
| 48 |  |  |  |  |  |  | is_in_place_close => $in_place_close, | 
| 49 |  |  |  |  |  |  | _hacky_attribute_parser($attributes), | 
| 50 |  |  |  |  |  |  | raw_attrs => $attributes||'', | 
| 51 |  |  |  |  |  |  | raw => $whole, | 
| 52 |  |  |  |  |  |  | }); | 
| 53 | 313 | 100 |  |  |  | 824 | if ($in_place_close) { | 
| 54 | 32 |  |  |  |  | 144 | $handler->({ | 
| 55 |  |  |  |  |  |  | type => 'CLOSE', name => $tag_name, raw => '', | 
| 56 |  |  |  |  |  |  | is_in_place_close => 1 | 
| 57 |  |  |  |  |  |  | }); | 
| 58 |  |  |  |  |  |  | } | 
| 59 |  |  |  |  |  |  | } | 
| 60 |  |  |  |  |  |  | } | 
| 61 | 595 | 100 |  |  |  | 7675 | if (length $content) { | 
| 62 | 443 |  |  |  |  | 1661 | $handler->({ type => 'TEXT', raw => $content }); | 
| 63 |  |  |  |  |  |  | } | 
| 64 |  |  |  |  |  |  | } | 
| 65 |  |  |  |  |  |  | } | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | sub _hacky_attribute_parser { | 
| 68 | 313 |  |  | 313 |  | 606 | my ($attr_text) = @_; | 
| 69 | 313 |  |  |  |  | 369 | my (%attrs, @attr_names); | 
| 70 | 313 |  |  |  |  | 1482 | while ( | 
| 71 |  |  |  |  |  |  | $attr_text =~ m{ | 
| 72 |  |  |  |  |  |  | ([^\s\=\"\']+)(\s*=\s*(?:(")(.*?)"|(')(.*?)'|([^'"\s=]+)['"]*))? | 
| 73 |  |  |  |  |  |  | }sgx | 
| 74 |  |  |  |  |  |  | ) { | 
| 75 | 195 |  |  |  |  | 367 | my $key  = $1; | 
| 76 | 195 |  |  |  |  | 550 | my $test = $2; | 
| 77 | 195 | 50 |  |  |  | 675 | my $val  = ( $3 ? $4 : ( $5 ? $6 : $7 )); | 
|  |  | 100 |  |  |  |  |  | 
| 78 | 195 |  |  |  |  | 319 | my $lckey = lc($key); | 
| 79 | 195 | 50 |  |  |  | 367 | if ($test) { | 
| 80 | 195 |  |  |  |  | 386 | $attrs{$lckey} = _simple_unescape($val); | 
| 81 |  |  |  |  |  |  | } else { | 
| 82 | 0 |  |  |  |  | 0 | $attrs{$lckey} = $lckey; | 
| 83 |  |  |  |  |  |  | } | 
| 84 | 195 |  |  |  |  | 1069 | push(@attr_names, $lckey); | 
| 85 |  |  |  |  |  |  | } | 
| 86 | 313 |  |  |  |  | 2994 | (attrs => \%attrs, attr_names => \@attr_names); | 
| 87 |  |  |  |  |  |  | } | 
| 88 |  |  |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | sub _simple_unescape { | 
| 90 | 195 |  |  | 195 |  | 720 | my $str = shift; | 
| 91 | 195 |  |  |  |  | 279 | $str =~ s/"/"/g; | 
| 92 | 195 |  |  |  |  | 350 | $str =~ s/</</g; | 
| 93 | 195 |  |  |  |  | 324 | $str =~ s/>/>/g; | 
| 94 | 195 |  |  |  |  | 289 | $str =~ s/&/&/g; | 
| 95 | 195 |  |  |  |  | 627 | $str; | 
| 96 |  |  |  |  |  |  | } | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | sub _simple_escape { | 
| 99 | 114 |  |  | 114 |  | 182 | my $str = shift; | 
| 100 | 114 |  |  |  |  | 596 | $str =~ s/&/&/g; | 
| 101 | 114 |  |  |  |  | 175 | $str =~ s/"/"/g; | 
| 102 | 114 |  |  |  |  | 170 | $str =~ s/</</g; | 
| 103 | 114 |  |  |  |  | 167 | $str =~ s/>/>/g; | 
| 104 | 114 |  |  |  |  | 585 | $str; | 
| 105 |  |  |  |  |  |  | } | 
| 106 |  |  |  |  |  |  |  | 
| 107 | 114 |  |  | 114 | 0 | 294 | sub html_escape { _simple_escape($_[1]) } | 
| 108 |  |  |  |  |  |  |  | 
| 109 | 0 |  |  | 0 | 0 |  | sub html_unescape { _simple_unescape($_[1]) } | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | 1; |