| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
|
2
|
|
|
|
|
|
|
# Petal::Parser::HTB - Fires Petal::Canonicalizer events |
|
3
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
|
4
|
|
|
|
|
|
|
# A Wrapper class for HTML::TreeBuilder which plugs into Petal |
|
5
|
|
|
|
|
|
|
# backend for complete parsing backwards compatibility with Petal |
|
6
|
|
|
|
|
|
|
# < 1.10. |
|
7
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
|
8
|
|
|
|
|
|
|
package Petal::Parser::HTB; |
|
9
|
48
|
|
|
48
|
|
6956370
|
use strict; |
|
|
48
|
|
|
|
|
124
|
|
|
|
48
|
|
|
|
|
2010
|
|
|
10
|
48
|
|
|
48
|
|
283
|
use warnings; |
|
|
48
|
|
|
|
|
101
|
|
|
|
48
|
|
|
|
|
1741
|
|
|
11
|
48
|
|
|
48
|
|
274
|
use Carp; |
|
|
48
|
|
|
|
|
156
|
|
|
|
48
|
|
|
|
|
4520
|
|
|
12
|
48
|
|
|
48
|
|
84137
|
use HTML::TreeBuilder; |
|
|
48
|
|
|
|
|
2739657
|
|
|
|
48
|
|
|
|
|
992
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
48
|
|
|
48
|
|
2876
|
use Petal; |
|
|
48
|
|
|
|
|
109
|
|
|
|
48
|
|
|
|
|
1456
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
48
|
|
|
|
|
63651
|
use vars qw /@NodeStack @MarkedData $Canonicalizer |
|
17
|
48
|
|
|
48
|
|
321
|
@NameSpaces @XI_NameSpaces/; |
|
|
48
|
|
|
|
|
99
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
$Petal::INPUTS->{HTML} = 'Petal::Parser::HTB'; |
|
20
|
|
|
|
|
|
|
$Petal::INPUTS->{XHTML} = 'Petal::Parser::HTB'; |
|
21
|
|
|
|
|
|
|
our $VERSION = '1.04'; |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# this avoid silly warnings |
|
24
|
|
|
|
|
|
|
sub sillyness |
|
25
|
|
|
|
|
|
|
{ |
|
26
|
0
|
|
|
0
|
0
|
0
|
$Petal::NS, |
|
27
|
|
|
|
|
|
|
$Petal::NS_URI; |
|
28
|
|
|
|
|
|
|
} |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub new |
|
32
|
|
|
|
|
|
|
{ |
|
33
|
131
|
|
|
131
|
0
|
3169068
|
my $class = shift; |
|
34
|
131
|
|
33
|
|
|
1183
|
$class = ref $class || $class; |
|
35
|
131
|
|
|
|
|
731
|
return bless { @_ }, $class; |
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub process |
|
40
|
|
|
|
|
|
|
{ |
|
41
|
131
|
|
|
131
|
0
|
18931
|
my $self = shift; |
|
42
|
131
|
|
|
|
|
317
|
local $Canonicalizer = shift; |
|
43
|
131
|
|
|
|
|
484
|
my $data_ref = shift; |
|
44
|
|
|
|
|
|
|
|
|
45
|
131
|
|
|
|
|
382
|
local @MarkedData = (); |
|
46
|
131
|
|
|
|
|
537
|
local @NodeStack = (); |
|
47
|
131
|
|
|
|
|
300
|
local @NameSpaces = (); |
|
48
|
131
|
50
|
|
|
|
707
|
$data_ref = (ref $data_ref) ? $data_ref : \$data_ref; |
|
49
|
|
|
|
|
|
|
|
|
50
|
131
|
|
|
|
|
1408
|
my $tree = HTML::TreeBuilder->new; |
|
51
|
131
|
|
|
|
|
52088
|
$tree->p_strict (0); |
|
52
|
131
|
|
|
|
|
1706
|
$tree->no_space_compacting (1); |
|
53
|
131
|
|
|
|
|
2324
|
$tree->ignore_unknown (0); |
|
54
|
131
|
|
|
|
|
1372
|
$tree->store_comments(1); |
|
55
|
131
|
|
|
|
|
1337
|
$tree->ignore_ignorable_whitespace(0); |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
eval |
|
58
|
131
|
|
|
|
|
875
|
{ |
|
59
|
131
|
|
|
|
|
3890
|
$tree->parse ($$data_ref); |
|
60
|
131
|
|
|
|
|
435383
|
my @nodes = $tree->guts(); |
|
61
|
131
|
|
|
|
|
4753
|
$tree->elementify(); |
|
62
|
131
|
|
|
|
|
19019
|
$self->generate_events ($_) for (@nodes); |
|
63
|
|
|
|
|
|
|
}; |
|
64
|
|
|
|
|
|
|
|
|
65
|
131
|
|
|
|
|
368
|
@MarkedData = (); |
|
66
|
131
|
|
|
|
|
249
|
@NodeStack = (); |
|
67
|
131
|
|
|
|
|
688
|
$tree->delete; |
|
68
|
131
|
50
|
33
|
|
|
23142
|
carp $@ if (defined $@ and $@); |
|
69
|
|
|
|
|
|
|
} |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# generate_events(); |
|
73
|
|
|
|
|
|
|
# ------------------ |
|
74
|
|
|
|
|
|
|
# Once the HTML::TreeBuilder object is built and elementified, it is |
|
75
|
|
|
|
|
|
|
# passed to that subroutine which will traverse it and will trigger |
|
76
|
|
|
|
|
|
|
# proper subroutines which will generate the XML events which are used |
|
77
|
|
|
|
|
|
|
# by the Petal::Canonicalizer module |
|
78
|
|
|
|
|
|
|
sub generate_events |
|
79
|
|
|
|
|
|
|
{ |
|
80
|
2784
|
|
|
2784
|
0
|
5579
|
my $self = shift; |
|
81
|
2784
|
|
|
|
|
4070
|
my $tree = shift; |
|
82
|
|
|
|
|
|
|
|
|
83
|
2784
|
100
|
|
|
|
6282
|
if (ref $tree) |
|
84
|
|
|
|
|
|
|
{ |
|
85
|
1164
|
|
|
|
|
3390
|
my $tag = $tree->tag; |
|
86
|
1164
|
|
|
|
|
11113
|
my $attr = { $tree->all_external_attr() }; |
|
87
|
|
|
|
|
|
|
|
|
88
|
1164
|
100
|
|
|
|
25466
|
if ($tag eq '~comment') |
|
89
|
|
|
|
|
|
|
{ |
|
90
|
10
|
|
|
|
|
48
|
generate_events_comment ($tree->attr ('text')); |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
else |
|
93
|
|
|
|
|
|
|
{ |
|
94
|
1154
|
|
|
|
|
2487
|
push @NodeStack, $tree; |
|
95
|
1154
|
|
|
|
|
2990
|
generate_events_start ($tag, $attr); |
|
96
|
|
|
|
|
|
|
|
|
97
|
1154
|
|
|
|
|
3164761
|
foreach my $content ($tree->content_list()) |
|
98
|
|
|
|
|
|
|
{ |
|
99
|
2606
|
|
|
|
|
65788
|
$self->generate_events ($content); |
|
100
|
|
|
|
|
|
|
} |
|
101
|
|
|
|
|
|
|
|
|
102
|
1154
|
|
|
|
|
32012
|
generate_events_end ($tag); |
|
103
|
1154
|
|
|
|
|
76866
|
pop (@NodeStack); |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
else |
|
107
|
|
|
|
|
|
|
{ |
|
108
|
1620
|
|
|
|
|
3681
|
generate_events_text ($tree); |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub generate_events_start |
|
114
|
|
|
|
|
|
|
{ |
|
115
|
1154
|
|
|
1154
|
0
|
1766
|
$_ = shift; |
|
116
|
1154
|
|
|
|
|
2831
|
$_ = "<$_>"; |
|
117
|
1154
|
|
|
|
|
1545
|
%_ = %{shift()}; |
|
|
1154
|
|
|
|
|
6581
|
|
|
118
|
1154
|
|
|
|
|
2662
|
delete $_{'/'}; |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# process the Petal namespace... |
|
121
|
1154
|
100
|
|
|
|
2814
|
my $ns = (scalar @NameSpaces) ? $NameSpaces[$#NameSpaces] : $Petal::NS; |
|
122
|
1154
|
|
|
|
|
3199
|
foreach my $key (keys %_) |
|
123
|
|
|
|
|
|
|
{ |
|
124
|
2116
|
|
|
|
|
4585
|
my $value = $_{$key}; |
|
125
|
2116
|
100
|
|
|
|
5517
|
if ($value eq $Petal::NS_URI) |
|
126
|
|
|
|
|
|
|
{ |
|
127
|
48
|
50
|
|
|
|
279
|
next unless ($key =~ /^xmlns\:/); |
|
128
|
48
|
|
|
|
|
117
|
delete $_{$key}; |
|
129
|
48
|
|
|
|
|
75
|
$ns = $key; |
|
130
|
48
|
|
|
|
|
286
|
$ns =~ s/^xmlns\://; |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
|
|
134
|
1154
|
|
|
|
|
3058
|
push @NameSpaces, $ns; |
|
135
|
1154
|
|
|
|
|
1748
|
local ($Petal::NS) = $ns; |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# process the XInclude namespace |
|
138
|
1154
|
100
|
|
|
|
2988
|
my $xi_ns = (scalar @XI_NameSpaces) ? $XI_NameSpaces[$#XI_NameSpaces] : $Petal::XI_NS; |
|
139
|
1154
|
|
|
|
|
3119
|
foreach my $key (keys %_) |
|
140
|
|
|
|
|
|
|
{ |
|
141
|
2068
|
|
|
|
|
3381
|
my $value = $_{$key}; |
|
142
|
2068
|
100
|
|
|
|
5145
|
if ($value eq $Petal::XI_NS_URI) |
|
143
|
|
|
|
|
|
|
{ |
|
144
|
37
|
50
|
|
|
|
729
|
next unless ($key =~ /^xmlns\:/); |
|
145
|
37
|
|
|
|
|
85
|
delete $_{$key}; |
|
146
|
37
|
|
|
|
|
47
|
$xi_ns = $key; |
|
147
|
37
|
|
|
|
|
206
|
$xi_ns =~ s/^xmlns\://; |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
1154
|
|
|
|
|
2229
|
push @XI_NameSpaces, $xi_ns; |
|
152
|
1154
|
|
|
|
|
11960
|
local ($Petal::XI_NS) = $xi_ns; |
|
153
|
|
|
|
|
|
|
|
|
154
|
1154
|
|
|
|
|
4611
|
$Canonicalizer->StartTag(); |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub generate_events_end |
|
159
|
|
|
|
|
|
|
{ |
|
160
|
1154
|
|
|
1154
|
0
|
1744
|
$_ = shift; |
|
161
|
1154
|
|
|
|
|
3115
|
$_ = "$_>"; |
|
162
|
1154
|
|
|
|
|
2343
|
local ($Petal::NS) = pop (@NameSpaces); |
|
163
|
1154
|
|
|
|
|
1840
|
local ($Petal::XI_NS) = pop (@XI_NameSpaces); |
|
164
|
1154
|
|
|
|
|
3808
|
$Canonicalizer->EndTag(); |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
sub generate_events_text |
|
169
|
|
|
|
|
|
|
{ |
|
170
|
1620
|
|
|
1620
|
0
|
2233
|
my $data = shift; |
|
171
|
1620
|
|
|
|
|
2997
|
$data =~ s/\&/&/g; |
|
172
|
1620
|
|
|
|
|
2026
|
$data =~ s/\</g; |
|
173
|
1620
|
|
|
|
|
2609
|
$_ = $data; |
|
174
|
|
|
|
|
|
|
|
|
175
|
1620
|
|
|
|
|
3649
|
local ($Petal::NS) = $NameSpaces[$#NameSpaces]; |
|
176
|
1620
|
|
|
|
|
3040
|
local ($Petal::XI_NS) = $XI_NameSpaces[$#XI_NameSpaces]; |
|
177
|
1620
|
|
|
|
|
5828
|
$Canonicalizer->Text(); |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub generate_events_comment |
|
182
|
|
|
|
|
|
|
{ |
|
183
|
10
|
|
|
10
|
0
|
145
|
my $data = shift; |
|
184
|
10
|
|
|
|
|
29
|
$data =~ s/\&/&/g; |
|
185
|
10
|
|
|
|
|
29
|
$data =~ s/\</g; |
|
186
|
10
|
|
|
|
|
38
|
$_ = ''; |
|
187
|
10
|
|
|
|
|
41
|
$Canonicalizer->Text(); |
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
1; |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
__END__ |