line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
2
|
|
|
|
|
|
|
# Petal::Canonicalizer::XHTML - Builds an XHTML canonical Petal file |
3
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
4
|
|
|
|
|
|
|
# Author: Jean-Michel Hiver |
5
|
|
|
|
|
|
|
# Description: This modules mainly implements the XML::Parser |
6
|
|
|
|
|
|
|
# 'Stream' interface. It receives XML events and builds Petal |
7
|
|
|
|
|
|
|
# canonical data, i.e. |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# Hello |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# Might be canonicalized to something like |
12
|
|
|
|
|
|
|
# |
13
|
|
|
|
|
|
|
# |
14
|
|
|
|
|
|
|
# Hello |
15
|
|
|
|
|
|
|
# |
16
|
|
|
|
|
|
|
# |
17
|
|
|
|
|
|
|
# On top of that, Petal::Canonicalizer::XHTML will self close certain |
18
|
|
|
|
|
|
|
# XHTML specific tags, like or |
19
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
20
|
|
|
|
|
|
|
package Petal::Canonicalizer::XHTML; |
21
|
77
|
|
|
77
|
|
269
|
use strict; |
|
77
|
|
|
|
|
82
|
|
|
77
|
|
|
|
|
1866
|
|
22
|
77
|
|
|
77
|
|
253
|
use warnings; |
|
77
|
|
|
|
|
81
|
|
|
77
|
|
|
|
|
1750
|
|
23
|
77
|
|
|
77
|
|
247
|
use base qw /Petal::Canonicalizer::XML/; |
|
77
|
|
|
|
|
79
|
|
|
77
|
|
|
|
|
76507
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# http://lists.webarch.co.uk/pipermail/petal/2002-August/000074.html |
27
|
|
|
|
|
|
|
# here's a list[1] of empty elements (which need careful handling): |
28
|
|
|
|
|
|
|
# |
29
|
|
|
|
|
|
|
# ---8<--- |
30
|
|
|
|
|
|
|
# area |
31
|
|
|
|
|
|
|
# base |
32
|
|
|
|
|
|
|
# basefont |
33
|
|
|
|
|
|
|
# br |
34
|
|
|
|
|
|
|
# col |
35
|
|
|
|
|
|
|
# frame |
36
|
|
|
|
|
|
|
# hr |
37
|
|
|
|
|
|
|
# img |
38
|
|
|
|
|
|
|
# input |
39
|
|
|
|
|
|
|
# isindex |
40
|
|
|
|
|
|
|
# link |
41
|
|
|
|
|
|
|
# meta |
42
|
|
|
|
|
|
|
# param |
43
|
|
|
|
|
|
|
# --->8--- |
44
|
|
|
|
|
|
|
# |
45
|
|
|
|
|
|
|
# [1] http://www.w3.org/TR/html401/index/elements.html |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# $class->StartTag(); |
49
|
|
|
|
|
|
|
# ------------------- |
50
|
|
|
|
|
|
|
# Called for every start tag with a second parameter of the element type. |
51
|
|
|
|
|
|
|
# It will check for special PETAL attributes like petal:if, petal:loop, etc... |
52
|
|
|
|
|
|
|
# and rewrite the start tag into @Petal::Canonicalizer::XML::Result accordingly. |
53
|
|
|
|
|
|
|
# |
54
|
|
|
|
|
|
|
# For example |
55
|
|
|
|
|
|
|
# |
56
|
|
|
|
|
|
|
# |
57
|
|
|
|
|
|
|
# Is rewritten |
58
|
|
|
|
|
|
|
# ... |
59
|
|
|
|
|
|
|
sub StartTag |
60
|
|
|
|
|
|
|
{ |
61
|
116
|
|
|
116
|
0
|
236
|
Petal::load_code_generator(); # we will use it later |
62
|
|
|
|
|
|
|
|
63
|
116
|
|
|
|
|
115
|
my $class = shift; |
64
|
116
|
|
|
|
|
150
|
push @Petal::Canonicalizer::XML::NodeStack, {}; |
65
|
116
|
100
|
|
|
|
331
|
return if ($class->_is_inside_content_or_replace()); |
66
|
|
|
|
|
|
|
|
67
|
112
|
|
|
|
|
109
|
my $tag = $_; |
68
|
112
|
|
|
|
|
478
|
($tag) = $tag =~ /^<\s*((?:\w|\:|\-)*)/; |
69
|
112
|
|
|
|
|
240
|
my $att = { %_ }; |
70
|
|
|
|
|
|
|
|
71
|
112
|
|
|
|
|
275
|
$class->_use_macro ($tag, $att); |
72
|
112
|
|
|
|
|
221
|
$class->_on_error ($tag, $att); |
73
|
112
|
|
|
|
|
246
|
$class->_define ($tag, $att); |
74
|
112
|
|
|
|
|
225
|
$class->_define_slot ($tag, $att); |
75
|
112
|
|
|
|
|
246
|
$class->_condition ($tag, $att); |
76
|
112
|
|
|
|
|
249
|
$class->_repeat ($tag, $att); |
77
|
112
|
100
|
66
|
|
|
253
|
$class->_is_xinclude ($tag) and $class->_xinclude ($tag, $att) and return; |
78
|
111
|
|
|
|
|
255
|
$class->_replace ($tag, $att); |
79
|
|
|
|
|
|
|
|
80
|
111
|
|
|
|
|
129
|
my $petal = quotemeta ($Petal::NS); |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# if a petal:replace attribute was set, then at this point _is_inside_content_or_replace() |
83
|
|
|
|
|
|
|
# should return TRUE and this code should not be executed |
84
|
111
|
100
|
|
|
|
200
|
unless ($class->_is_inside_content_or_replace()) |
85
|
|
|
|
|
|
|
{ |
86
|
|
|
|
|
|
|
# for every attribute which is not a petal: attribute, |
87
|
|
|
|
|
|
|
# we need to convert $variable into |
88
|
102
|
|
|
|
|
86
|
foreach my $key (keys %{$att}) |
|
102
|
|
|
|
|
270
|
|
89
|
|
|
|
|
|
|
{ |
90
|
53
|
100
|
|
|
|
264
|
next if ($key =~ /^$petal:/); |
91
|
30
|
|
|
|
|
40
|
my $text = $att->{$key}; |
92
|
30
|
|
|
|
|
26
|
my $token_re = $Petal::Hash::String::TOKEN_RE; |
93
|
30
|
|
|
|
|
250
|
my @vars = $text =~ /$token_re/gsm; |
94
|
30
|
|
|
|
|
49
|
my %vars = map { $_ => 1 } @vars; |
|
0
|
|
|
|
|
0
|
|
95
|
30
|
|
|
|
|
145
|
@vars = sort { length ($b) <=> length ($a) } keys %vars; |
|
0
|
|
|
|
|
0
|
|
96
|
30
|
|
|
|
|
47
|
foreach my $var (@vars) |
97
|
|
|
|
|
|
|
{ |
98
|
0
|
|
|
|
|
0
|
my $command = $var; |
99
|
0
|
|
|
|
|
0
|
$command =~ s/^\$//; |
100
|
0
|
|
|
|
|
0
|
$command =~ s/^\{//; |
101
|
0
|
|
|
|
|
0
|
$command =~ s/\}$//; |
102
|
0
|
|
|
|
|
0
|
$command = $class->_encode_backslash_semicolon ($command); |
103
|
0
|
|
|
|
|
0
|
$command = ""; |
104
|
0
|
|
|
|
|
0
|
$text =~ s/\Q$var\E/$command/g; |
105
|
|
|
|
|
|
|
} |
106
|
30
|
|
|
|
|
59
|
$att->{$key} = $text; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
102
|
|
|
|
|
264
|
$class->_attributes ($tag, $att); |
110
|
|
|
|
|
|
|
|
111
|
102
|
|
|
|
|
135
|
my @att_str = (); |
112
|
102
|
|
|
|
|
85
|
foreach my $key (keys %{$att}) |
|
102
|
|
|
|
|
176
|
|
113
|
|
|
|
|
|
|
{ |
114
|
46
|
100
|
|
|
|
187
|
next if ($key =~ /^$petal:/); |
115
|
31
|
|
|
|
|
42
|
my $value = $att->{$key}; |
116
|
31
|
100
|
|
|
|
73
|
if ($value =~ /^<\?attr/) |
117
|
|
|
|
|
|
|
{ |
118
|
8
|
|
|
|
|
46
|
push @att_str, $value; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
else |
121
|
|
|
|
|
|
|
{ |
122
|
23
|
|
|
|
|
64
|
my $tokens = Petal::CodeGenerator->_tokenize (\$value); |
123
|
|
|
|
|
|
|
my @res = map { |
124
|
|
|
|
|
|
|
($_ =~ /$Petal::CodeGenerator::PI_RE/s) ? |
125
|
|
|
|
|
|
|
$_ : |
126
|
23
|
50
|
|
|
|
222
|
do { |
127
|
23
|
|
|
|
|
34
|
$_ =~ s/\&/&/g; |
128
|
23
|
|
|
|
|
25
|
$_ =~ s/\</g; |
129
|
23
|
|
|
|
|
23
|
$_ =~ s/\>/>/g; |
130
|
23
|
|
|
|
|
23
|
$_ =~ s/\"/"/g; |
131
|
23
|
|
|
|
|
49
|
$_; |
132
|
|
|
|
|
|
|
}; |
133
|
23
|
|
|
|
|
24
|
} @{$tokens}; |
|
23
|
|
|
|
|
26
|
|
134
|
23
|
|
|
|
|
93
|
push @att_str, $key . '="' . (join '', @res) . '"'; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
} |
137
|
102
|
|
|
|
|
164
|
my $att_str = join " ", @att_str; |
138
|
|
|
|
|
|
|
|
139
|
102
|
100
|
33
|
|
|
2302
|
if ( (uc ($tag) eq 'AREA') or |
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
140
|
|
|
|
|
|
|
(uc ($tag) eq 'BASE') or |
141
|
|
|
|
|
|
|
(uc ($tag) eq 'BASEFONT') or |
142
|
|
|
|
|
|
|
(uc ($tag) eq 'BR') or |
143
|
|
|
|
|
|
|
(uc ($tag) eq 'COL') or |
144
|
|
|
|
|
|
|
(uc ($tag) eq 'FRAME') or |
145
|
|
|
|
|
|
|
(uc ($tag) eq 'HR') or |
146
|
|
|
|
|
|
|
(uc ($tag) eq 'IMG') or |
147
|
|
|
|
|
|
|
(uc ($tag) eq 'INPUT') or |
148
|
|
|
|
|
|
|
(uc ($tag) eq 'ISINDEX') or |
149
|
|
|
|
|
|
|
(uc ($tag) eq 'LINK') or |
150
|
|
|
|
|
|
|
(uc ($tag) eq 'META') or |
151
|
|
|
|
|
|
|
(uc ($tag) eq 'PARAM') ) |
152
|
|
|
|
|
|
|
{ |
153
|
7
|
50
|
|
|
|
21
|
if (defined $att->{"$petal:omit-tag"}) |
154
|
|
|
|
|
|
|
{ |
155
|
0
|
|
0
|
|
|
0
|
my $expression = $att->{"$petal:omit-tag"} || 'string:1'; |
156
|
0
|
|
|
|
|
0
|
$Petal::Canonicalizer::XML::NodeStack[$#Petal::Canonicalizer::XML::NodeStack]->{'omit-tag'} = $expression; |
157
|
0
|
0
|
0
|
|
|
0
|
push @Petal::Canonicalizer::XML::Result, (defined $att_str and $att_str) ? |
158
|
|
|
|
|
|
|
"<$tag $att_str />" : |
159
|
|
|
|
|
|
|
"<$tag />"; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
else |
162
|
|
|
|
|
|
|
{ |
163
|
7
|
100
|
66
|
|
|
39
|
push @Petal::Canonicalizer::XML::Result, (defined $att_str and $att_str) ? "<$tag $att_str />" : "<$tag />"; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
else |
167
|
|
|
|
|
|
|
{ |
168
|
95
|
100
|
|
|
|
200
|
if (defined $att->{"$petal:omit-tag"}) |
169
|
|
|
|
|
|
|
{ |
170
|
3
|
|
100
|
|
|
9
|
my $expression = $att->{"$petal:omit-tag"} || 'string:1'; |
171
|
3
|
|
|
|
|
5
|
$Petal::Canonicalizer::XML::NodeStack[$#Petal::Canonicalizer::XML::NodeStack]->{'omit-tag'} = $expression; |
172
|
3
|
50
|
33
|
|
|
18
|
push @Petal::Canonicalizer::XML::Result, (defined $att_str and $att_str) ? |
173
|
|
|
|
|
|
|
"<$tag $att_str>" : |
174
|
|
|
|
|
|
|
"<$tag>"; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
else |
177
|
|
|
|
|
|
|
{ |
178
|
92
|
100
|
66
|
|
|
428
|
push @Petal::Canonicalizer::XML::Result, (defined $att_str and $att_str) ? "<$tag $att_str>" : "<$tag>"; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
102
|
|
|
|
|
289
|
$class->_content ($tag, $att); |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# $class->EndTag(); |
188
|
|
|
|
|
|
|
# ----------------- |
189
|
|
|
|
|
|
|
# Called for every end tag with a second parameter of the element type. |
190
|
|
|
|
|
|
|
# It will check in the @Petal::Canonicalizer::XML::NodeStack to see if this end-tag also needs to close |
191
|
|
|
|
|
|
|
# some 'condition' or 'repeat' statements, i.e. |
192
|
|
|
|
|
|
|
# |
193
|
|
|
|
|
|
|
# |
194
|
|
|
|
|
|
|
# |
195
|
|
|
|
|
|
|
# Could be rewritten |
196
|
|
|
|
|
|
|
# |
197
|
|
|
|
|
|
|
# |
198
|
|
|
|
|
|
|
# |
199
|
|
|
|
|
|
|
# If the starting LI used a loop, i.e. |
200
|
|
|
|
|
|
|
sub EndTag |
201
|
|
|
|
|
|
|
{ |
202
|
116
|
|
|
116
|
0
|
109
|
my $class = shift; |
203
|
116
|
100
|
|
|
|
191
|
return if ($class->_is_inside_content_or_replace ( 'endtag' )); |
204
|
108
|
|
|
|
|
474
|
my ($tag) = $_ =~ /^<\/\s*((?:\w|\:|\-)*)/; |
205
|
108
|
|
|
|
|
131
|
my $node = pop (@Petal::Canonicalizer::XML::NodeStack); |
206
|
|
|
|
|
|
|
|
207
|
108
|
100
|
|
|
|
207
|
return if ($class->_is_xinclude ($tag)); |
208
|
|
|
|
|
|
|
|
209
|
107
|
50
|
66
|
|
|
2513
|
if ( (not (defined $node->{replace} and $node->{replace})) and |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
210
|
|
|
|
|
|
|
(uc ($tag) ne 'AREA') and |
211
|
|
|
|
|
|
|
(uc ($tag) ne 'BASE') and |
212
|
|
|
|
|
|
|
(uc ($tag) ne 'BASEFONT') and |
213
|
|
|
|
|
|
|
(uc ($tag) ne 'BR') and |
214
|
|
|
|
|
|
|
(uc ($tag) ne 'COL') and |
215
|
|
|
|
|
|
|
(uc ($tag) ne 'FRAME') and |
216
|
|
|
|
|
|
|
(uc ($tag) ne 'HR') and |
217
|
|
|
|
|
|
|
(uc ($tag) ne 'IMG') and |
218
|
|
|
|
|
|
|
(uc ($tag) ne 'INPUT') and |
219
|
|
|
|
|
|
|
(uc ($tag) ne 'ISINDEX') and |
220
|
|
|
|
|
|
|
(uc ($tag) ne 'LINK') and |
221
|
|
|
|
|
|
|
(uc ($tag) ne 'META') and |
222
|
|
|
|
|
|
|
(uc ($tag) ne 'PARAM') ) |
223
|
|
|
|
|
|
|
{ |
224
|
95
|
100
|
|
|
|
157
|
if (defined $node->{'omit-tag'}) |
225
|
|
|
|
|
|
|
{ |
226
|
3
|
|
|
|
|
4
|
my $expression = $node->{'omit-tag'}; |
227
|
3
|
|
|
|
|
10
|
push @Petal::Canonicalizer::XML::Result, "$tag>"; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
else |
230
|
|
|
|
|
|
|
{ |
231
|
92
|
|
|
|
|
205
|
push @Petal::Canonicalizer::XML::Result, "$tag>"; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
107
|
|
100
|
|
|
320
|
my $repeat = $node->{repeat} || '0'; |
236
|
107
|
|
100
|
|
|
261
|
my $condition = $node->{condition} || '0'; |
237
|
107
|
|
50
|
|
|
269
|
my $define_slot = $node->{define_slot} || '0'; |
238
|
107
|
|
|
|
|
258
|
push @Petal::Canonicalizer::XML::Result, map { '' } 1 .. ($repeat+$condition+$define_slot); |
|
21
|
|
|
|
|
30
|
|
239
|
|
|
|
|
|
|
|
240
|
107
|
50
|
|
|
|
286
|
if (exists $node->{'on-error'}) |
241
|
|
|
|
|
|
|
{ |
242
|
0
|
|
|
|
|
|
my $expression = $node->{'on-error'}; |
243
|
0
|
|
|
|
|
|
push @Petal::Canonicalizer::XML::Result, ""; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
1; |