| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package HTML::Zoom::Parser::HH5P; |
|
2
|
|
|
|
|
|
|
|
|
3
|
8
|
|
|
8
|
|
72168
|
use 5.008; |
|
|
8
|
|
|
|
|
32
|
|
|
|
8
|
|
|
|
|
378
|
|
|
4
|
8
|
|
|
8
|
|
48
|
use strict; |
|
|
8
|
|
|
|
|
19
|
|
|
|
8
|
|
|
|
|
289
|
|
|
5
|
8
|
|
|
8
|
|
47
|
use warnings; |
|
|
8
|
|
|
|
|
17
|
|
|
|
8
|
|
|
|
|
323
|
|
|
6
|
8
|
|
|
8
|
|
10408
|
use utf8; |
|
|
8
|
|
|
|
|
107
|
|
|
|
8
|
|
|
|
|
74
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
BEGIN { |
|
9
|
8
|
|
|
8
|
|
1033
|
$HTML::Zoom::Parser::HH5P::AUTHORITY = 'cpan:TOBYINK'; |
|
10
|
8
|
|
|
|
|
183
|
$HTML::Zoom::Parser::HH5P::VERSION = '0.002'; |
|
11
|
|
|
|
|
|
|
} |
|
12
|
|
|
|
|
|
|
|
|
13
|
8
|
|
|
8
|
|
6601
|
use HTML::HTML5::Parser; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
use HTML::HTML5::Entities; |
|
15
|
|
|
|
|
|
|
use namespace::clean; |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# Yes, keep these constants... |
|
18
|
|
|
|
|
|
|
use XML::LibXML 2 ':libxml'; |
|
19
|
|
|
|
|
|
|
use constant { |
|
20
|
|
|
|
|
|
|
EVENT_OPEN_TAG => 'OPEN', |
|
21
|
|
|
|
|
|
|
EVENT_CLOSE_TAG => 'CLOSE', |
|
22
|
|
|
|
|
|
|
EVENT_TEXT => 'TEXT', |
|
23
|
|
|
|
|
|
|
EVENT_DTD => 'SPECIAL', |
|
24
|
|
|
|
|
|
|
EVENT_PI => 'SPECIAL', |
|
25
|
|
|
|
|
|
|
EVENT_OTHER => 'MYSTERYMEAT', |
|
26
|
|
|
|
|
|
|
}; |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
use Moo; |
|
29
|
|
|
|
|
|
|
extends qw(HTML::Zoom::SubObject); |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
has zconfig => ( |
|
32
|
|
|
|
|
|
|
is => 'ro', |
|
33
|
|
|
|
|
|
|
weaken => 1, |
|
34
|
|
|
|
|
|
|
writer => 'with_zconfig', |
|
35
|
|
|
|
|
|
|
); |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub _zconfig |
|
38
|
|
|
|
|
|
|
{ |
|
39
|
|
|
|
|
|
|
shift->zconfig; |
|
40
|
|
|
|
|
|
|
} |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
has parse_as_fragment => ( |
|
43
|
|
|
|
|
|
|
is => 'rw', |
|
44
|
|
|
|
|
|
|
default => sub { +undef }, |
|
45
|
|
|
|
|
|
|
); |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
has ignore_implied_elements => ( |
|
48
|
|
|
|
|
|
|
is => 'rw', |
|
49
|
|
|
|
|
|
|
default => sub { 1 }, |
|
50
|
|
|
|
|
|
|
); |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Stoled from HTML::Zoom::Parser::HTML::BuiltIn! |
|
53
|
|
|
|
|
|
|
sub html_to_events |
|
54
|
|
|
|
|
|
|
{ |
|
55
|
|
|
|
|
|
|
my ($self, $text) = @_; |
|
56
|
|
|
|
|
|
|
my @events; |
|
57
|
|
|
|
|
|
|
$self->_parser($text => sub { push @events, $_[0] }); |
|
58
|
|
|
|
|
|
|
return \@events; |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# Stoled from HTML::Zoom::Parser::HTML::BuiltIn! |
|
62
|
|
|
|
|
|
|
sub html_to_stream |
|
63
|
|
|
|
|
|
|
{ |
|
64
|
|
|
|
|
|
|
my ($self, $text) = @_; |
|
65
|
|
|
|
|
|
|
return $self |
|
66
|
|
|
|
|
|
|
-> _zconfig |
|
67
|
|
|
|
|
|
|
-> stream_utils |
|
68
|
|
|
|
|
|
|
-> stream_from_array( @{$self->html_to_events($text)} ); |
|
69
|
|
|
|
|
|
|
} |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
sub _parser |
|
72
|
|
|
|
|
|
|
{ |
|
73
|
|
|
|
|
|
|
my ($self, $text, $handler) = @_; |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# Decide whether we have a document fragment or a full document. |
|
76
|
|
|
|
|
|
|
my $is_frag = $self->parse_as_fragment; |
|
77
|
|
|
|
|
|
|
defined $is_frag |
|
78
|
|
|
|
|
|
|
or $is_frag = !(substr($text,0,512) =~ /<(html|\!doctype|\?xml)/i); |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
my $dom = $is_frag |
|
81
|
|
|
|
|
|
|
? HTML::HTML5::Parser::->new->parse_balanced_chunk($text) |
|
82
|
|
|
|
|
|
|
: HTML::HTML5::Parser::->load_html(string => $text); |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
$self->_visit($dom, $handler); |
|
85
|
|
|
|
|
|
|
} |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
sub _visit |
|
88
|
|
|
|
|
|
|
{ |
|
89
|
|
|
|
|
|
|
my ($self, $node, $handler, $continuation) = @_; |
|
90
|
|
|
|
|
|
|
$continuation ||= $self->can('_visit'); |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
my $type = $node->nodeType; |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
if ($type == XML_ELEMENT_NODE) |
|
95
|
|
|
|
|
|
|
{ |
|
96
|
|
|
|
|
|
|
my $ignore = $self->ignore_implied_elements; |
|
97
|
|
|
|
|
|
|
my ($line, $col, $implied); |
|
98
|
|
|
|
|
|
|
if ($ignore) |
|
99
|
|
|
|
|
|
|
{ |
|
100
|
|
|
|
|
|
|
($line, $col, $implied) = HTML::HTML5::Parser::->source_line($node); |
|
101
|
|
|
|
|
|
|
$ignore = 0 unless $implied; |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
$handler->({ |
|
105
|
|
|
|
|
|
|
type => EVENT_OPEN_TAG, |
|
106
|
|
|
|
|
|
|
libxml => $node, |
|
107
|
|
|
|
|
|
|
name => $node->localname, |
|
108
|
|
|
|
|
|
|
attrs => +{ %$node }, |
|
109
|
|
|
|
|
|
|
attr_names => [ sort keys %$node ], |
|
110
|
|
|
|
|
|
|
line => $line, |
|
111
|
|
|
|
|
|
|
column => $col, |
|
112
|
|
|
|
|
|
|
}) unless $ignore; |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
$continuation->($self, $_, $handler, $continuation) |
|
115
|
|
|
|
|
|
|
for $node->childNodes; |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
$handler->({ |
|
118
|
|
|
|
|
|
|
type => EVENT_CLOSE_TAG, |
|
119
|
|
|
|
|
|
|
libxml => $node, |
|
120
|
|
|
|
|
|
|
name => $node->localname, |
|
121
|
|
|
|
|
|
|
attrs => +{ %$node }, |
|
122
|
|
|
|
|
|
|
attr_names => [ sort keys %$node ], |
|
123
|
|
|
|
|
|
|
}) unless $ignore; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
elsif ($type == XML_TEXT_NODE) |
|
126
|
|
|
|
|
|
|
{ |
|
127
|
|
|
|
|
|
|
$handler->({ |
|
128
|
|
|
|
|
|
|
type => EVENT_TEXT, |
|
129
|
|
|
|
|
|
|
libxml => $node, |
|
130
|
|
|
|
|
|
|
raw => $node->data, |
|
131
|
|
|
|
|
|
|
}); |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
elsif ($type == XML_DOCUMENT_NODE) |
|
134
|
|
|
|
|
|
|
{ |
|
135
|
|
|
|
|
|
|
my %dtd; |
|
136
|
|
|
|
|
|
|
for my $bit (qw/ dtd_element dtd_system_id dtd_public_id /) { |
|
137
|
|
|
|
|
|
|
$dtd{$bit} = HTML::HTML5::Parser::->$bit($node); |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
if ($dtd{dtd_system_id} and $dtd{dtd_public_id}) { |
|
140
|
|
|
|
|
|
|
$dtd{raw} = sprintf( |
|
141
|
|
|
|
|
|
|
qq[\n], |
|
142
|
|
|
|
|
|
|
uc($dtd{dtd_element} || 'HTML'), |
|
143
|
|
|
|
|
|
|
$dtd{dtd_public_id}, |
|
144
|
|
|
|
|
|
|
$dtd{dtd_system_id}, |
|
145
|
|
|
|
|
|
|
); |
|
146
|
|
|
|
|
|
|
} |
|
147
|
|
|
|
|
|
|
elsif ($dtd{dtd_system_id}) { |
|
148
|
|
|
|
|
|
|
$dtd{raw} = sprintf( |
|
149
|
|
|
|
|
|
|
qq[\n], |
|
150
|
|
|
|
|
|
|
uc($dtd{dtd_element} || 'HTML'), |
|
151
|
|
|
|
|
|
|
$dtd{dtd_system_id}, |
|
152
|
|
|
|
|
|
|
); |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
elsif ($dtd{dtd_public_id}) { |
|
155
|
|
|
|
|
|
|
$dtd{raw} = sprintf( |
|
156
|
|
|
|
|
|
|
qq[\n], |
|
157
|
|
|
|
|
|
|
uc($dtd{dtd_element} || 'HTML'), |
|
158
|
|
|
|
|
|
|
$dtd{dtd_public_id}, |
|
159
|
|
|
|
|
|
|
); |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
$handler->({ |
|
162
|
|
|
|
|
|
|
type => EVENT_DTD, |
|
163
|
|
|
|
|
|
|
%dtd, |
|
164
|
|
|
|
|
|
|
}) if $dtd{raw}; |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
$continuation->($self, $_, $handler, $continuation) |
|
167
|
|
|
|
|
|
|
for $node->childNodes; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
elsif ($type == XML_DOCUMENT_FRAG_NODE) |
|
170
|
|
|
|
|
|
|
{ |
|
171
|
|
|
|
|
|
|
$continuation->($self, $_, $handler, $continuation) |
|
172
|
|
|
|
|
|
|
for $node->childNodes; |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
else |
|
175
|
|
|
|
|
|
|
{ |
|
176
|
|
|
|
|
|
|
warn "OTHER: $type"; |
|
177
|
|
|
|
|
|
|
$handler->({ |
|
178
|
|
|
|
|
|
|
type => EVENT_OTHER, |
|
179
|
|
|
|
|
|
|
libxml => $node, |
|
180
|
|
|
|
|
|
|
raw => $node->toString, |
|
181
|
|
|
|
|
|
|
}); |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
} |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
sub html_escape { encode_entities($_[1]) } |
|
186
|
|
|
|
|
|
|
sub html_unescape { decode_entities($_[1]) } |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
1 |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
__END__ |