line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::Zoom::Parser::HH5P; |
2
|
|
|
|
|
|
|
|
3
|
8
|
|
|
8
|
|
72168
|
use 5.008; |
|
8
|
|
|
|
|
32
|
|
|
8
|
|
|
|
|
378
|
|
4
|
8
|
|
|
8
|
|
48
|
use strict; |
|
8
|
|
|
|
|
19
|
|
|
8
|
|
|
|
|
289
|
|
5
|
8
|
|
|
8
|
|
47
|
use warnings; |
|
8
|
|
|
|
|
17
|
|
|
8
|
|
|
|
|
323
|
|
6
|
8
|
|
|
8
|
|
10408
|
use utf8; |
|
8
|
|
|
|
|
107
|
|
|
8
|
|
|
|
|
74
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
BEGIN { |
9
|
8
|
|
|
8
|
|
1033
|
$HTML::Zoom::Parser::HH5P::AUTHORITY = 'cpan:TOBYINK'; |
10
|
8
|
|
|
|
|
183
|
$HTML::Zoom::Parser::HH5P::VERSION = '0.002'; |
11
|
|
|
|
|
|
|
} |
12
|
|
|
|
|
|
|
|
13
|
8
|
|
|
8
|
|
6601
|
use HTML::HTML5::Parser; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
use HTML::HTML5::Entities; |
15
|
|
|
|
|
|
|
use namespace::clean; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# Yes, keep these constants... |
18
|
|
|
|
|
|
|
use XML::LibXML 2 ':libxml'; |
19
|
|
|
|
|
|
|
use constant { |
20
|
|
|
|
|
|
|
EVENT_OPEN_TAG => 'OPEN', |
21
|
|
|
|
|
|
|
EVENT_CLOSE_TAG => 'CLOSE', |
22
|
|
|
|
|
|
|
EVENT_TEXT => 'TEXT', |
23
|
|
|
|
|
|
|
EVENT_DTD => 'SPECIAL', |
24
|
|
|
|
|
|
|
EVENT_PI => 'SPECIAL', |
25
|
|
|
|
|
|
|
EVENT_OTHER => 'MYSTERYMEAT', |
26
|
|
|
|
|
|
|
}; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
use Moo; |
29
|
|
|
|
|
|
|
extends qw(HTML::Zoom::SubObject); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
has zconfig => ( |
32
|
|
|
|
|
|
|
is => 'ro', |
33
|
|
|
|
|
|
|
weaken => 1, |
34
|
|
|
|
|
|
|
writer => 'with_zconfig', |
35
|
|
|
|
|
|
|
); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub _zconfig |
38
|
|
|
|
|
|
|
{ |
39
|
|
|
|
|
|
|
shift->zconfig; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
has parse_as_fragment => ( |
43
|
|
|
|
|
|
|
is => 'rw', |
44
|
|
|
|
|
|
|
default => sub { +undef }, |
45
|
|
|
|
|
|
|
); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
has ignore_implied_elements => ( |
48
|
|
|
|
|
|
|
is => 'rw', |
49
|
|
|
|
|
|
|
default => sub { 1 }, |
50
|
|
|
|
|
|
|
); |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Stoled from HTML::Zoom::Parser::HTML::BuiltIn! |
53
|
|
|
|
|
|
|
sub html_to_events |
54
|
|
|
|
|
|
|
{ |
55
|
|
|
|
|
|
|
my ($self, $text) = @_; |
56
|
|
|
|
|
|
|
my @events; |
57
|
|
|
|
|
|
|
$self->_parser($text => sub { push @events, $_[0] }); |
58
|
|
|
|
|
|
|
return \@events; |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# Stoled from HTML::Zoom::Parser::HTML::BuiltIn! |
62
|
|
|
|
|
|
|
sub html_to_stream |
63
|
|
|
|
|
|
|
{ |
64
|
|
|
|
|
|
|
my ($self, $text) = @_; |
65
|
|
|
|
|
|
|
return $self |
66
|
|
|
|
|
|
|
-> _zconfig |
67
|
|
|
|
|
|
|
-> stream_utils |
68
|
|
|
|
|
|
|
-> stream_from_array( @{$self->html_to_events($text)} ); |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
sub _parser |
72
|
|
|
|
|
|
|
{ |
73
|
|
|
|
|
|
|
my ($self, $text, $handler) = @_; |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# Decide whether we have a document fragment or a full document. |
76
|
|
|
|
|
|
|
my $is_frag = $self->parse_as_fragment; |
77
|
|
|
|
|
|
|
defined $is_frag |
78
|
|
|
|
|
|
|
or $is_frag = !(substr($text,0,512) =~ /<(html|\!doctype|\?xml)/i); |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
my $dom = $is_frag |
81
|
|
|
|
|
|
|
? HTML::HTML5::Parser::->new->parse_balanced_chunk($text) |
82
|
|
|
|
|
|
|
: HTML::HTML5::Parser::->load_html(string => $text); |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
$self->_visit($dom, $handler); |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
sub _visit |
88
|
|
|
|
|
|
|
{ |
89
|
|
|
|
|
|
|
my ($self, $node, $handler, $continuation) = @_; |
90
|
|
|
|
|
|
|
$continuation ||= $self->can('_visit'); |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
my $type = $node->nodeType; |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
if ($type == XML_ELEMENT_NODE) |
95
|
|
|
|
|
|
|
{ |
96
|
|
|
|
|
|
|
my $ignore = $self->ignore_implied_elements; |
97
|
|
|
|
|
|
|
my ($line, $col, $implied); |
98
|
|
|
|
|
|
|
if ($ignore) |
99
|
|
|
|
|
|
|
{ |
100
|
|
|
|
|
|
|
($line, $col, $implied) = HTML::HTML5::Parser::->source_line($node); |
101
|
|
|
|
|
|
|
$ignore = 0 unless $implied; |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
$handler->({ |
105
|
|
|
|
|
|
|
type => EVENT_OPEN_TAG, |
106
|
|
|
|
|
|
|
libxml => $node, |
107
|
|
|
|
|
|
|
name => $node->localname, |
108
|
|
|
|
|
|
|
attrs => +{ %$node }, |
109
|
|
|
|
|
|
|
attr_names => [ sort keys %$node ], |
110
|
|
|
|
|
|
|
line => $line, |
111
|
|
|
|
|
|
|
column => $col, |
112
|
|
|
|
|
|
|
}) unless $ignore; |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
$continuation->($self, $_, $handler, $continuation) |
115
|
|
|
|
|
|
|
for $node->childNodes; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
$handler->({ |
118
|
|
|
|
|
|
|
type => EVENT_CLOSE_TAG, |
119
|
|
|
|
|
|
|
libxml => $node, |
120
|
|
|
|
|
|
|
name => $node->localname, |
121
|
|
|
|
|
|
|
attrs => +{ %$node }, |
122
|
|
|
|
|
|
|
attr_names => [ sort keys %$node ], |
123
|
|
|
|
|
|
|
}) unless $ignore; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
elsif ($type == XML_TEXT_NODE) |
126
|
|
|
|
|
|
|
{ |
127
|
|
|
|
|
|
|
$handler->({ |
128
|
|
|
|
|
|
|
type => EVENT_TEXT, |
129
|
|
|
|
|
|
|
libxml => $node, |
130
|
|
|
|
|
|
|
raw => $node->data, |
131
|
|
|
|
|
|
|
}); |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
elsif ($type == XML_DOCUMENT_NODE) |
134
|
|
|
|
|
|
|
{ |
135
|
|
|
|
|
|
|
my %dtd; |
136
|
|
|
|
|
|
|
for my $bit (qw/ dtd_element dtd_system_id dtd_public_id /) { |
137
|
|
|
|
|
|
|
$dtd{$bit} = HTML::HTML5::Parser::->$bit($node); |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
if ($dtd{dtd_system_id} and $dtd{dtd_public_id}) { |
140
|
|
|
|
|
|
|
$dtd{raw} = sprintf( |
141
|
|
|
|
|
|
|
qq[\n], |
142
|
|
|
|
|
|
|
uc($dtd{dtd_element} || 'HTML'), |
143
|
|
|
|
|
|
|
$dtd{dtd_public_id}, |
144
|
|
|
|
|
|
|
$dtd{dtd_system_id}, |
145
|
|
|
|
|
|
|
); |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
elsif ($dtd{dtd_system_id}) { |
148
|
|
|
|
|
|
|
$dtd{raw} = sprintf( |
149
|
|
|
|
|
|
|
qq[\n], |
150
|
|
|
|
|
|
|
uc($dtd{dtd_element} || 'HTML'), |
151
|
|
|
|
|
|
|
$dtd{dtd_system_id}, |
152
|
|
|
|
|
|
|
); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
elsif ($dtd{dtd_public_id}) { |
155
|
|
|
|
|
|
|
$dtd{raw} = sprintf( |
156
|
|
|
|
|
|
|
qq[\n], |
157
|
|
|
|
|
|
|
uc($dtd{dtd_element} || 'HTML'), |
158
|
|
|
|
|
|
|
$dtd{dtd_public_id}, |
159
|
|
|
|
|
|
|
); |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
$handler->({ |
162
|
|
|
|
|
|
|
type => EVENT_DTD, |
163
|
|
|
|
|
|
|
%dtd, |
164
|
|
|
|
|
|
|
}) if $dtd{raw}; |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
$continuation->($self, $_, $handler, $continuation) |
167
|
|
|
|
|
|
|
for $node->childNodes; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
elsif ($type == XML_DOCUMENT_FRAG_NODE) |
170
|
|
|
|
|
|
|
{ |
171
|
|
|
|
|
|
|
$continuation->($self, $_, $handler, $continuation) |
172
|
|
|
|
|
|
|
for $node->childNodes; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
else |
175
|
|
|
|
|
|
|
{ |
176
|
|
|
|
|
|
|
warn "OTHER: $type"; |
177
|
|
|
|
|
|
|
$handler->({ |
178
|
|
|
|
|
|
|
type => EVENT_OTHER, |
179
|
|
|
|
|
|
|
libxml => $node, |
180
|
|
|
|
|
|
|
raw => $node->toString, |
181
|
|
|
|
|
|
|
}); |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
sub html_escape { encode_entities($_[1]) } |
186
|
|
|
|
|
|
|
sub html_unescape { decode_entities($_[1]) } |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
1 |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
__END__ |