line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
2
|
|
|
|
|
|
|
# Petal::Parser::HTB - Fires Petal::Canonicalizer events |
3
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
4
|
|
|
|
|
|
|
# A Wrapper class for HTML::TreeBuilder which plugs into Petal |
5
|
|
|
|
|
|
|
# backend for complete parsing backwards compatibility with Petal |
6
|
|
|
|
|
|
|
# < 1.10. |
7
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
8
|
|
|
|
|
|
|
package Petal::Parser::HTB; |
9
|
48
|
|
|
48
|
|
6956370
|
use strict; |
|
48
|
|
|
|
|
124
|
|
|
48
|
|
|
|
|
2010
|
|
10
|
48
|
|
|
48
|
|
283
|
use warnings; |
|
48
|
|
|
|
|
101
|
|
|
48
|
|
|
|
|
1741
|
|
11
|
48
|
|
|
48
|
|
274
|
use Carp; |
|
48
|
|
|
|
|
156
|
|
|
48
|
|
|
|
|
4520
|
|
12
|
48
|
|
|
48
|
|
84137
|
use HTML::TreeBuilder; |
|
48
|
|
|
|
|
2739657
|
|
|
48
|
|
|
|
|
992
|
|
13
|
|
|
|
|
|
|
|
14
|
48
|
|
|
48
|
|
2876
|
use Petal; |
|
48
|
|
|
|
|
109
|
|
|
48
|
|
|
|
|
1456
|
|
15
|
|
|
|
|
|
|
|
16
|
48
|
|
|
|
|
63651
|
use vars qw /@NodeStack @MarkedData $Canonicalizer |
17
|
48
|
|
|
48
|
|
321
|
@NameSpaces @XI_NameSpaces/; |
|
48
|
|
|
|
|
99
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
$Petal::INPUTS->{HTML} = 'Petal::Parser::HTB'; |
20
|
|
|
|
|
|
|
$Petal::INPUTS->{XHTML} = 'Petal::Parser::HTB'; |
21
|
|
|
|
|
|
|
our $VERSION = '1.04'; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# this avoid silly warnings |
24
|
|
|
|
|
|
|
sub sillyness |
25
|
|
|
|
|
|
|
{ |
26
|
0
|
|
|
0
|
0
|
0
|
$Petal::NS, |
27
|
|
|
|
|
|
|
$Petal::NS_URI; |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub new |
32
|
|
|
|
|
|
|
{ |
33
|
131
|
|
|
131
|
0
|
3169068
|
my $class = shift; |
34
|
131
|
|
33
|
|
|
1183
|
$class = ref $class || $class; |
35
|
131
|
|
|
|
|
731
|
return bless { @_ }, $class; |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub process |
40
|
|
|
|
|
|
|
{ |
41
|
131
|
|
|
131
|
0
|
18931
|
my $self = shift; |
42
|
131
|
|
|
|
|
317
|
local $Canonicalizer = shift; |
43
|
131
|
|
|
|
|
484
|
my $data_ref = shift; |
44
|
|
|
|
|
|
|
|
45
|
131
|
|
|
|
|
382
|
local @MarkedData = (); |
46
|
131
|
|
|
|
|
537
|
local @NodeStack = (); |
47
|
131
|
|
|
|
|
300
|
local @NameSpaces = (); |
48
|
131
|
50
|
|
|
|
707
|
$data_ref = (ref $data_ref) ? $data_ref : \$data_ref; |
49
|
|
|
|
|
|
|
|
50
|
131
|
|
|
|
|
1408
|
my $tree = HTML::TreeBuilder->new; |
51
|
131
|
|
|
|
|
52088
|
$tree->p_strict (0); |
52
|
131
|
|
|
|
|
1706
|
$tree->no_space_compacting (1); |
53
|
131
|
|
|
|
|
2324
|
$tree->ignore_unknown (0); |
54
|
131
|
|
|
|
|
1372
|
$tree->store_comments(1); |
55
|
131
|
|
|
|
|
1337
|
$tree->ignore_ignorable_whitespace(0); |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
eval |
58
|
131
|
|
|
|
|
875
|
{ |
59
|
131
|
|
|
|
|
3890
|
$tree->parse ($$data_ref); |
60
|
131
|
|
|
|
|
435383
|
my @nodes = $tree->guts(); |
61
|
131
|
|
|
|
|
4753
|
$tree->elementify(); |
62
|
131
|
|
|
|
|
19019
|
$self->generate_events ($_) for (@nodes); |
63
|
|
|
|
|
|
|
}; |
64
|
|
|
|
|
|
|
|
65
|
131
|
|
|
|
|
368
|
@MarkedData = (); |
66
|
131
|
|
|
|
|
249
|
@NodeStack = (); |
67
|
131
|
|
|
|
|
688
|
$tree->delete; |
68
|
131
|
50
|
33
|
|
|
23142
|
carp $@ if (defined $@ and $@); |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# generate_events(); |
73
|
|
|
|
|
|
|
# ------------------ |
74
|
|
|
|
|
|
|
# Once the HTML::TreeBuilder object is built and elementified, it is |
75
|
|
|
|
|
|
|
# passed to that subroutine which will traverse it and will trigger |
76
|
|
|
|
|
|
|
# proper subroutines which will generate the XML events which are used |
77
|
|
|
|
|
|
|
# by the Petal::Canonicalizer module |
78
|
|
|
|
|
|
|
sub generate_events |
79
|
|
|
|
|
|
|
{ |
80
|
2784
|
|
|
2784
|
0
|
5579
|
my $self = shift; |
81
|
2784
|
|
|
|
|
4070
|
my $tree = shift; |
82
|
|
|
|
|
|
|
|
83
|
2784
|
100
|
|
|
|
6282
|
if (ref $tree) |
84
|
|
|
|
|
|
|
{ |
85
|
1164
|
|
|
|
|
3390
|
my $tag = $tree->tag; |
86
|
1164
|
|
|
|
|
11113
|
my $attr = { $tree->all_external_attr() }; |
87
|
|
|
|
|
|
|
|
88
|
1164
|
100
|
|
|
|
25466
|
if ($tag eq '~comment') |
89
|
|
|
|
|
|
|
{ |
90
|
10
|
|
|
|
|
48
|
generate_events_comment ($tree->attr ('text')); |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
else |
93
|
|
|
|
|
|
|
{ |
94
|
1154
|
|
|
|
|
2487
|
push @NodeStack, $tree; |
95
|
1154
|
|
|
|
|
2990
|
generate_events_start ($tag, $attr); |
96
|
|
|
|
|
|
|
|
97
|
1154
|
|
|
|
|
3164761
|
foreach my $content ($tree->content_list()) |
98
|
|
|
|
|
|
|
{ |
99
|
2606
|
|
|
|
|
65788
|
$self->generate_events ($content); |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
1154
|
|
|
|
|
32012
|
generate_events_end ($tag); |
103
|
1154
|
|
|
|
|
76866
|
pop (@NodeStack); |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
else |
107
|
|
|
|
|
|
|
{ |
108
|
1620
|
|
|
|
|
3681
|
generate_events_text ($tree); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub generate_events_start |
114
|
|
|
|
|
|
|
{ |
115
|
1154
|
|
|
1154
|
0
|
1766
|
$_ = shift; |
116
|
1154
|
|
|
|
|
2831
|
$_ = "<$_>"; |
117
|
1154
|
|
|
|
|
1545
|
%_ = %{shift()}; |
|
1154
|
|
|
|
|
6581
|
|
118
|
1154
|
|
|
|
|
2662
|
delete $_{'/'}; |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# process the Petal namespace... |
121
|
1154
|
100
|
|
|
|
2814
|
my $ns = (scalar @NameSpaces) ? $NameSpaces[$#NameSpaces] : $Petal::NS; |
122
|
1154
|
|
|
|
|
3199
|
foreach my $key (keys %_) |
123
|
|
|
|
|
|
|
{ |
124
|
2116
|
|
|
|
|
4585
|
my $value = $_{$key}; |
125
|
2116
|
100
|
|
|
|
5517
|
if ($value eq $Petal::NS_URI) |
126
|
|
|
|
|
|
|
{ |
127
|
48
|
50
|
|
|
|
279
|
next unless ($key =~ /^xmlns\:/); |
128
|
48
|
|
|
|
|
117
|
delete $_{$key}; |
129
|
48
|
|
|
|
|
75
|
$ns = $key; |
130
|
48
|
|
|
|
|
286
|
$ns =~ s/^xmlns\://; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
1154
|
|
|
|
|
3058
|
push @NameSpaces, $ns; |
135
|
1154
|
|
|
|
|
1748
|
local ($Petal::NS) = $ns; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# process the XInclude namespace |
138
|
1154
|
100
|
|
|
|
2988
|
my $xi_ns = (scalar @XI_NameSpaces) ? $XI_NameSpaces[$#XI_NameSpaces] : $Petal::XI_NS; |
139
|
1154
|
|
|
|
|
3119
|
foreach my $key (keys %_) |
140
|
|
|
|
|
|
|
{ |
141
|
2068
|
|
|
|
|
3381
|
my $value = $_{$key}; |
142
|
2068
|
100
|
|
|
|
5145
|
if ($value eq $Petal::XI_NS_URI) |
143
|
|
|
|
|
|
|
{ |
144
|
37
|
50
|
|
|
|
729
|
next unless ($key =~ /^xmlns\:/); |
145
|
37
|
|
|
|
|
85
|
delete $_{$key}; |
146
|
37
|
|
|
|
|
47
|
$xi_ns = $key; |
147
|
37
|
|
|
|
|
206
|
$xi_ns =~ s/^xmlns\://; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
1154
|
|
|
|
|
2229
|
push @XI_NameSpaces, $xi_ns; |
152
|
1154
|
|
|
|
|
11960
|
local ($Petal::XI_NS) = $xi_ns; |
153
|
|
|
|
|
|
|
|
154
|
1154
|
|
|
|
|
4611
|
$Canonicalizer->StartTag(); |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub generate_events_end |
159
|
|
|
|
|
|
|
{ |
160
|
1154
|
|
|
1154
|
0
|
1744
|
$_ = shift; |
161
|
1154
|
|
|
|
|
3115
|
$_ = "$_>"; |
162
|
1154
|
|
|
|
|
2343
|
local ($Petal::NS) = pop (@NameSpaces); |
163
|
1154
|
|
|
|
|
1840
|
local ($Petal::XI_NS) = pop (@XI_NameSpaces); |
164
|
1154
|
|
|
|
|
3808
|
$Canonicalizer->EndTag(); |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
sub generate_events_text |
169
|
|
|
|
|
|
|
{ |
170
|
1620
|
|
|
1620
|
0
|
2233
|
my $data = shift; |
171
|
1620
|
|
|
|
|
2997
|
$data =~ s/\&/&/g; |
172
|
1620
|
|
|
|
|
2026
|
$data =~ s/\</g; |
173
|
1620
|
|
|
|
|
2609
|
$_ = $data; |
174
|
|
|
|
|
|
|
|
175
|
1620
|
|
|
|
|
3649
|
local ($Petal::NS) = $NameSpaces[$#NameSpaces]; |
176
|
1620
|
|
|
|
|
3040
|
local ($Petal::XI_NS) = $XI_NameSpaces[$#XI_NameSpaces]; |
177
|
1620
|
|
|
|
|
5828
|
$Canonicalizer->Text(); |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub generate_events_comment |
182
|
|
|
|
|
|
|
{ |
183
|
10
|
|
|
10
|
0
|
145
|
my $data = shift; |
184
|
10
|
|
|
|
|
29
|
$data =~ s/\&/&/g; |
185
|
10
|
|
|
|
|
29
|
$data =~ s/\</g; |
186
|
10
|
|
|
|
|
38
|
$_ = ''; |
187
|
10
|
|
|
|
|
41
|
$Canonicalizer->Text(); |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
1; |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
__END__ |