line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MojoMojo::Declaw; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
MojoMojo::Declaw - Cleans HTML as well as CSS of scripting and other executable contents, and neutralises XSS attacks. |
6
|
|
|
|
|
|
|
Derived from HTML::Defang version 1.01. |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 SYNOPSIS |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
my $InputHtml = "<html><body></body></html>"; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my $Defang = MojoMojo::Declaw->new( |
13
|
|
|
|
|
|
|
context => $Self, |
14
|
|
|
|
|
|
|
fix_mismatched_tags => 1, |
15
|
|
|
|
|
|
|
tags_to_callback => [ br embed img ], |
16
|
|
|
|
|
|
|
tags_callback => \&DefangTagsCallback, |
17
|
|
|
|
|
|
|
url_callback => \&DefangUrlCallback, |
18
|
|
|
|
|
|
|
css_callback => \&DefangCssCallback, |
19
|
|
|
|
|
|
|
attribs_to_callback => [ qw(border src) ], |
20
|
|
|
|
|
|
|
attribs_callback => \&DefangAttribsCallback |
21
|
|
|
|
|
|
|
); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
my $SanitizedHtml = $Defang->defang($InputHtml); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# Callback for custom handling specific HTML tags |
26
|
|
|
|
|
|
|
sub DefangTagsCallback { |
27
|
|
|
|
|
|
|
my ($Self, $Defang, $OpenAngle, $lcTag, $IsEndTag, $AttributeHash, $CloseAngle, $HtmlR, $OutR) = @_; |
28
|
|
|
|
|
|
|
return 1 if $lcTag eq 'br'; # Explicitly defang this tag, eventhough safe |
29
|
|
|
|
|
|
|
return 0 if $lcTag eq 'embed'; # Explicitly whitelist this tag, eventhough unsafe |
30
|
|
|
|
|
|
|
return 2 if $lcTag eq 'img'; # I am not sure what to do with this tag, so process as HTML::Defang normally would |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Callback for custom handling URLs in HTML attributes as well as style tag/attribute declarations |
34
|
|
|
|
|
|
|
sub DefangUrlCallback { |
35
|
|
|
|
|
|
|
my ($Self, $Defang, $lcTag, $lcAttrKey, $AttrValR, $AttributeHash, $HtmlR) = @_; |
36
|
|
|
|
|
|
|
return 0 if $$AttrValR =~ /safesite.com/i; # Explicitly allow this URL in tag attributes or stylesheets |
37
|
|
|
|
|
|
|
return 1 if $$AttrValR =~ /evilsite.com/i; # Explicitly defang this URL in tag attributes or stylesheets |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# Callback for custom handling style tags/attributes |
41
|
|
|
|
|
|
|
sub DefangCssCallback { |
42
|
|
|
|
|
|
|
my ($Self, $Defang, $Selectors, $SelectorRules, $Tag, $IsAttr) = @_; |
43
|
|
|
|
|
|
|
my $i = 0; |
44
|
|
|
|
|
|
|
foreach (@$Selectors) { |
45
|
|
|
|
|
|
|
my $SelectorRule = $$SelectorRules[$i]; |
46
|
|
|
|
|
|
|
foreach my $KeyValueRules (@$SelectorRule) { |
47
|
|
|
|
|
|
|
foreach my $KeyValueRule (@$KeyValueRules) { |
48
|
|
|
|
|
|
|
my ($Key, $Value) = @$KeyValueRule; |
49
|
|
|
|
|
|
|
$$KeyValueRule[2] = 1 if $Value =~ '!important'; # Comment out any '!important' directive |
50
|
|
|
|
|
|
|
$$KeyValueRule[2] = 1 if $Key =~ 'position' && $Value =~ 'fixed'; # Comment out any 'position=fixed;' declaration |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
$i++; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# Callback for custom handling HTML tag attributes |
58
|
|
|
|
|
|
|
sub DefangAttribsCallback { |
59
|
|
|
|
|
|
|
my ($Self, $Defang, $lcTag, $lcAttrKey, $AttrValR, $HtmlR) = @_; |
60
|
|
|
|
|
|
|
$$AttrValR = '0' if $lcAttrKey eq 'border'; # Change all 'border' attribute values to zero. |
61
|
|
|
|
|
|
|
return 1 if $lcAttrKey eq 'src'; # Defang all 'src' attributes |
62
|
|
|
|
|
|
|
return 0; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head1 DESCRIPTION |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
This module accepts an input HTML and/or CSS string and removes any executable code including scripting, embedded objects, applets, etc., and neutralises any XSS attacks. A whitelist based approach is used which means only HTML known to be safe is allowed through. |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
HTML::Defang uses a custom html tag parser. The parser has been designed and tested to work with nasty real world html and to try and emulate as close as possible what browsers actually do with strange looking constructs. The test suite has been built based on examples from a range of sources such as http://ha.ckers.org/xss.html and http://imfo.ru/csstest/css_hacks/import.php to ensure that as many as possible XSS attack scenarios have been dealt with. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
HTML::Defang can make callbacks to client code when it encounters the following: |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=over 4 |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=item * |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
When a specified tag is parsed |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=item * |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
When a specified attribute is parsed |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=item * |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
When a URL is parsed as part of an HTML attribute, or CSS property value. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=item * |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
When style data is parsed, as part of an HTML style attribute, or as part of an HTML <style> tag. |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=back |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
The callbacks include details about the current tag/attribute that is being parsed, and also gives a scalar reference to the input HTML. Querying pos() on the input HTML should indicate where the module is with parsing. This gives the client code flexibility in working with HTML::Declaw. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
HTML::Declaw can defang whole tags, any attribute in a tag, any URL that appear as an attribute or style property, or any CSS declaration in a declaration block in a style rule. This helps one to precisely block the most specific unwanted elements in the contents(for example, block just an offending attribute instead of the whole tag), while retaining any safe HTML/CSS. |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=cut |
98
|
|
|
|
|
|
|
|
99
|
27
|
|
|
27
|
|
14487
|
use Exporter; |
|
27
|
|
|
|
|
65
|
|
|
27
|
|
|
|
|
1966
|
|
100
|
|
|
|
|
|
|
our @ISA = ('Exporter'); |
101
|
|
|
|
|
|
|
%EXPORT_TAGS = ( all => [qw(@FormTags)] ); |
102
|
|
|
|
|
|
|
Exporter::export_ok_tags('all'); |
103
|
|
|
|
|
|
|
|
104
|
27
|
|
|
27
|
|
163
|
use strict; |
|
27
|
|
|
|
|
60
|
|
|
27
|
|
|
|
|
475
|
|
105
|
27
|
|
|
27
|
|
122
|
use warnings; |
|
27
|
|
|
|
|
63
|
|
|
27
|
|
|
|
|
937
|
|
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
our $VERSION = 1.01; |
108
|
|
|
|
|
|
|
|
109
|
27
|
|
|
27
|
|
991
|
use Encode; |
|
27
|
|
|
|
|
14654
|
|
|
27
|
|
|
|
|
2162
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
my $HasScalarReadonly = 0; |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
BEGIN { |
114
|
27
|
50
|
|
27
|
|
1508
|
eval "use Scalar::Readonly qw(readonly_on);" && ( $HasScalarReadonly = 1 ); |
|
27
|
|
|
27
|
|
63002
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
our @FormTags = |
118
|
|
|
|
|
|
|
qw(form input textarea select option button fieldset label legend multicol nextid optgroup); |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# Some regexps for matching HTML tags + key=value attributes |
121
|
|
|
|
|
|
|
my $AttrKeyStartLineRE = qr/[^=<>\s\/\\]{1,}/; |
122
|
|
|
|
|
|
|
my $AttrKeyRE = qr/(?<=[\s'"\/])$AttrKeyStartLineRE/; |
123
|
|
|
|
|
|
|
my $AttrValRE = |
124
|
|
|
|
|
|
|
qr/[^>\s'"`][^>\s]*|'[^']{0,2000}?'|"[^"]{0,2000}?"|`[^`]{0,2000}?`/; |
125
|
|
|
|
|
|
|
my $AttributesRE = qr/(?:(?:$AttrKeyRE\s*)?(?:=\s*$AttrValRE\s*)?)*/; |
126
|
|
|
|
|
|
|
my $TagNameRE = qr/[A-Za-z][A-Za-z0-9\#\&\;\:\!_-]*/; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
my $Selectors = qr/[^{]*?/; |
129
|
|
|
|
|
|
|
my $StyleKey = qr/[^:}]+?/; |
130
|
|
|
|
|
|
|
my $StyleValue = qr/[^;}]+|.*$/; |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
my $Fonts = qr/"?([A-Za-z0-9\s-]+)"?/; |
133
|
|
|
|
|
|
|
my $Alignments = |
134
|
|
|
|
|
|
|
qr/(absbottom|absmiddle|all|autocentre|baseline|bottom|center|justify|left|middle|none|right|texttop|top)/; |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
my $Executables = |
137
|
|
|
|
|
|
|
'([^@]\.com|' |
138
|
|
|
|
|
|
|
. '.*\.(exe|cmd|bat|pif|scr|sys|sct|lnk|dll' |
139
|
|
|
|
|
|
|
. '|vbs?|vbe|hta|shb|shs|hlp|chm|eml|wsf|wsh|js' |
140
|
|
|
|
|
|
|
. '|asx|wm.|mdb|mht|msi|msp|cpl|lib|reg))'; |
141
|
|
|
|
|
|
|
my $SrcBanStd = |
142
|
|
|
|
|
|
|
qr/^([A-Za-z]*script|.*\&\{|mocha|about|opera|mailto:|hcp:|\/(dev|proc)|\\|file|smb|cid:${Executables}(@|\?|$))/i; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
my %Rules = ( |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# Disallow unknown tags by default |
147
|
|
|
|
|
|
|
"_unknown" => qr/.*/, |
148
|
|
|
|
|
|
|
"align" => qr/^${Alignments}$/i, |
149
|
|
|
|
|
|
|
"alnum" => qr/^[A-Za-z0-9_.-]+$/, |
150
|
|
|
|
|
|
|
"boolean" => qr/^(0|1|true|yes|no|false)$/, |
151
|
|
|
|
|
|
|
"charset" => qr/^[A-Za-z0-9_][A-Za-z0-9_.-]*$/, |
152
|
|
|
|
|
|
|
"class" => qr/^[A-Za-z0-9_.:\s-]*$/, |
153
|
|
|
|
|
|
|
"color" => qr/^#?[0-9A-Z]+$/i, |
154
|
|
|
|
|
|
|
"coords" => qr/^(\d+,)+\d+$/i, |
155
|
|
|
|
|
|
|
"datetime" => qr/^\d\d\d\d-\d\d-\d\d.{0,5}\d\d:\d\d:\d\d.{0,5}$/, |
156
|
|
|
|
|
|
|
"dir" => qr/^(ltr|rtl)$/i, |
157
|
|
|
|
|
|
|
"eudora" => qr/^(autourl)$/i, |
158
|
|
|
|
|
|
|
"font-face" => qr/^((${Fonts})[,\s]*)+$/i, |
159
|
|
|
|
|
|
|
"form-enctype" => |
160
|
|
|
|
|
|
|
qr/^(application\/x-www-form-urlencoded|multipart\/form-data)$/i, |
161
|
|
|
|
|
|
|
"form-method" => qr/^(get|post)$/i, |
162
|
|
|
|
|
|
|
"frame" => qr/^(void|above|below|hsides|vsides|lhs|rhs|box|border)$/i, |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# href: Not javascript, vbs or vbscript |
165
|
|
|
|
|
|
|
"href" => qr/^([A-Za-z]*script|.*\&\{|mocha|hcp|opera|about|smb|\/dev\/)/i, |
166
|
|
|
|
|
|
|
"usemap-href" => qr/^#[A-Za-z0-9_.-]+$/, # this is not really a href at all! |
167
|
|
|
|
|
|
|
"input-size" => |
168
|
|
|
|
|
|
|
qr/^(\d{1,4})$/, # some browsers freak out with very large widgets |
169
|
|
|
|
|
|
|
"input-type" => |
170
|
|
|
|
|
|
|
qr/^(button|checkbox|file|hidden|image|password|radio|readonly|reset|submit|text)$/i, |
171
|
|
|
|
|
|
|
"integer" => qr/^(-|\+)?\d+$/, |
172
|
|
|
|
|
|
|
"number" => qr/^(-|\+)?[\d.,]+$/, |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# language: Not javascript, vbs or vbscript |
175
|
|
|
|
|
|
|
"language" => qr/^(XML)$/i, |
176
|
|
|
|
|
|
|
"media" => qr/^((screen|print|projection|braille|speech|all)[,\s]*)+$/i, |
177
|
|
|
|
|
|
|
"meta:name" => |
178
|
|
|
|
|
|
|
qr/^(author|progid|originator|generator|keywords|description|content-type|pragma|expires)$/i, |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# mime-type: Not javascript |
181
|
|
|
|
|
|
|
"mime-type" => qr/^(cite|text\/(plain|css|html|xml))$/i, |
182
|
|
|
|
|
|
|
"list-type" => |
183
|
|
|
|
|
|
|
qr/^(none,a,i,upper-alpha,lower-alpha,upper-roman,lower-roman,decimal,disc,square,circle,round)$/i, |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# "rel" => qr/^((copyright|author|stylesheet)\s*)+$/i, |
186
|
|
|
|
|
|
|
"rel" => qr/^((copyright|author)\s*)+$/i |
187
|
|
|
|
|
|
|
, # XXX external stylesheets can contain scripting, so kill them |
188
|
|
|
|
|
|
|
"rules" => qr/^(none|groups|rows|cols|all)$/i, |
189
|
|
|
|
|
|
|
"scope" => qr/^(row|col|rowgroup|colgroup)$/i, |
190
|
|
|
|
|
|
|
"shape" => qr/^(rect|rectangle|circ|circle|poly|polygon)$/i, |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# The following two are for URLs we expect to be auto-loaded by the browser, |
193
|
|
|
|
|
|
|
# because they are within a frame, image or something like that. |
194
|
|
|
|
|
|
|
# "src" => qr/^([a-z]+):|^[\w\.\/\%]+$/i, |
195
|
|
|
|
|
|
|
"src" => qr/^https?:\/\/|^[\w.\/%]+$/i, |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
# "style" => qr/^([A-Za-z0-9_-]+\\s*:\\s*(yes|no)|text-align\\s*:\\s*$alignments|((background|(background-|font-)?color)\\s*:\\s*(\\#?[A-Z0-9]+)?|((margin|padding|border)-(right|left)|tab-interval|height|width)\\s*:\\s*[\\d\\.]+(pt|px)|font(-family|-size|-weight|)\\s*:(\\s*[\\d\\.]+(pt|px)|\\s*$fonts)+)[;\\s]*)+\$/i, |
198
|
|
|
|
|
|
|
# "style" => qr/expression|eval|script:|mocha:|\&{|\@import|(?<!background-)position:|background-image/i, # XXX there are probably a million more ways to cause trouble with css! |
199
|
|
|
|
|
|
|
"style" => qr/^.*$/s, |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
#kc In addition to this, we could strip all 'javascript:|expression|' etc. from all attributes(in attribute_cleanup()) |
202
|
|
|
|
|
|
|
"stylesheet" => qr/expression|eval|script:|mocha:|\&\{|\@import/i |
203
|
|
|
|
|
|
|
, # stylesheets are forbidden if Embedded => 1. css positioning can be allowed in an iframe. |
204
|
|
|
|
|
|
|
# NB see also `process_stylesheet' below |
205
|
|
|
|
|
|
|
"style-type" => qr/script|mocha/i, |
206
|
|
|
|
|
|
|
"size" => qr/^[\d.]+(px|%)?$/i, |
207
|
|
|
|
|
|
|
"target" => qr/^[A-Za-z0-9_][A-Za-z0-9_.-]*$/, |
208
|
|
|
|
|
|
|
"base-href" => qr/^https?:\/\/[\w.\/]+$/, |
209
|
|
|
|
|
|
|
"anything" => qr/^.*$/, #[ 0, 0 ], |
210
|
|
|
|
|
|
|
"meta:content" => [ 0, 0 ], |
211
|
|
|
|
|
|
|
); |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
my %CommonAttributes = ( |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# Core attributes |
216
|
|
|
|
|
|
|
"class" => "class", |
217
|
|
|
|
|
|
|
"id" => "alnum", |
218
|
|
|
|
|
|
|
"name" => "alnum", |
219
|
|
|
|
|
|
|
"style" => "style", |
220
|
|
|
|
|
|
|
"accesskey" => "alnum", |
221
|
|
|
|
|
|
|
"tabindex" => "integer", |
222
|
|
|
|
|
|
|
"title" => "anything", |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# Language attributes |
225
|
|
|
|
|
|
|
"dir" => "dir", |
226
|
|
|
|
|
|
|
"lang" => "alnum", |
227
|
|
|
|
|
|
|
"language" => "language", |
228
|
|
|
|
|
|
|
"longdesc" => "anything", |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
# Height, width, alignment, etc. |
231
|
|
|
|
|
|
|
#-mxy allow more flexible values for align |
232
|
|
|
|
|
|
|
# "align" => "align", |
233
|
|
|
|
|
|
|
"align" => "alnum", |
234
|
|
|
|
|
|
|
"bgcolor" => "color", |
235
|
|
|
|
|
|
|
"bottommargin" => "size", |
236
|
|
|
|
|
|
|
"clear" => "align", |
237
|
|
|
|
|
|
|
"color" => "color", |
238
|
|
|
|
|
|
|
"height" => "size", |
239
|
|
|
|
|
|
|
"leftmargin" => "size", |
240
|
|
|
|
|
|
|
"marginheight" => "size", |
241
|
|
|
|
|
|
|
"marginwidth" => "size", |
242
|
|
|
|
|
|
|
"nowrap" => "anything", |
243
|
|
|
|
|
|
|
"rightmargin" => "size", |
244
|
|
|
|
|
|
|
"scroll" => "boolean", |
245
|
|
|
|
|
|
|
"scrolling" => "boolean", |
246
|
|
|
|
|
|
|
"topmargin" => "size", |
247
|
|
|
|
|
|
|
"valign" => "align", |
248
|
|
|
|
|
|
|
"width" => "size", |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
# youtube embedded objects |
251
|
|
|
|
|
|
|
"value" => "anything", |
252
|
|
|
|
|
|
|
"type" => "anything", |
253
|
|
|
|
|
|
|
"allowscriptaccess" => 'alnum', |
254
|
|
|
|
|
|
|
"allowfullscreen" => "boolean", |
255
|
|
|
|
|
|
|
"src" => "src", |
256
|
|
|
|
|
|
|
); |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
my %ListAttributes = ( |
259
|
|
|
|
|
|
|
"compact" => "anything", |
260
|
|
|
|
|
|
|
"start" => "integer", |
261
|
|
|
|
|
|
|
"type" => "list-type", |
262
|
|
|
|
|
|
|
); |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
my %TableAttributes = ( |
265
|
|
|
|
|
|
|
"axis" => "alnum", |
266
|
|
|
|
|
|
|
"background" => "src", |
267
|
|
|
|
|
|
|
"border" => "number", |
268
|
|
|
|
|
|
|
"bordercolor" => "color", |
269
|
|
|
|
|
|
|
"bordercolordark" => "color", |
270
|
|
|
|
|
|
|
"bordercolorlight" => "color", |
271
|
|
|
|
|
|
|
"padding" => "integer", |
272
|
|
|
|
|
|
|
"spacing" => "integer", |
273
|
|
|
|
|
|
|
"cellpadding" => "integer", |
274
|
|
|
|
|
|
|
"cellspacing" => "integer", |
275
|
|
|
|
|
|
|
"cols" => "anything", |
276
|
|
|
|
|
|
|
"colspan" => "integer", |
277
|
|
|
|
|
|
|
"char" => "alnum", |
278
|
|
|
|
|
|
|
"charoff" => "integer", |
279
|
|
|
|
|
|
|
"datapagesize" => "integer", |
280
|
|
|
|
|
|
|
"frame" => "frame", |
281
|
|
|
|
|
|
|
"frameborder" => "boolean", |
282
|
|
|
|
|
|
|
"framespacing" => "integer", |
283
|
|
|
|
|
|
|
"headers" => "anything", |
284
|
|
|
|
|
|
|
"rows" => "anything", |
285
|
|
|
|
|
|
|
"rowspan" => "size", |
286
|
|
|
|
|
|
|
"rules" => "rules", |
287
|
|
|
|
|
|
|
"scope" => "scope", |
288
|
|
|
|
|
|
|
"span" => "integer", |
289
|
|
|
|
|
|
|
"summary" => "anything" |
290
|
|
|
|
|
|
|
); |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
my %UrlRules = ( |
293
|
|
|
|
|
|
|
"src" => 1, |
294
|
|
|
|
|
|
|
"href" => 1, |
295
|
|
|
|
|
|
|
"base-href" => 1, |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# cite => 1, |
298
|
|
|
|
|
|
|
# action => 1, |
299
|
|
|
|
|
|
|
); |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
my %Tags = ( |
302
|
|
|
|
|
|
|
script => \&defang_script, |
303
|
|
|
|
|
|
|
style => \&defang_style, |
304
|
|
|
|
|
|
|
"html" => 100, |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
# |
307
|
|
|
|
|
|
|
# Safe elements commonly found in the <head> block follow. |
308
|
|
|
|
|
|
|
# |
309
|
|
|
|
|
|
|
"head" => 2, |
310
|
|
|
|
|
|
|
"base" => { |
311
|
|
|
|
|
|
|
"href" => "base-href", |
312
|
|
|
|
|
|
|
"target" => "target", |
313
|
|
|
|
|
|
|
}, |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# TODO: Deal with link below later |
316
|
|
|
|
|
|
|
#"link" => \$r_link, |
317
|
|
|
|
|
|
|
# { |
318
|
|
|
|
|
|
|
# "rel" => "rel", |
319
|
|
|
|
|
|
|
# "rev" => "rel", |
320
|
|
|
|
|
|
|
# "src" => "src", |
321
|
|
|
|
|
|
|
# "href" => "src", # Might be auto-loaded by the browser!! |
322
|
|
|
|
|
|
|
# "charset" => "charset", |
323
|
|
|
|
|
|
|
# "media" => "media", |
324
|
|
|
|
|
|
|
# "target" => "target", |
325
|
|
|
|
|
|
|
# "type" => "mime-type", |
326
|
|
|
|
|
|
|
# }, |
327
|
|
|
|
|
|
|
"meta" => { |
328
|
|
|
|
|
|
|
"_score" => 2, |
329
|
|
|
|
|
|
|
"content" => "meta:content", |
330
|
|
|
|
|
|
|
"http-equiv" => "meta:name", |
331
|
|
|
|
|
|
|
"name" => "meta:name", |
332
|
|
|
|
|
|
|
"charset" => "charset", |
333
|
|
|
|
|
|
|
}, |
334
|
|
|
|
|
|
|
"title" => 2, |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
# |
337
|
|
|
|
|
|
|
# Safe elements commonly found in the <body> block follow. |
338
|
|
|
|
|
|
|
# |
339
|
|
|
|
|
|
|
"body" => { |
340
|
|
|
|
|
|
|
"_score" => 2, |
341
|
|
|
|
|
|
|
"link" => "color", |
342
|
|
|
|
|
|
|
"alink" => "color", |
343
|
|
|
|
|
|
|
"vlink" => "color", |
344
|
|
|
|
|
|
|
"background" => "src", |
345
|
|
|
|
|
|
|
"nowrap" => "boolean", |
346
|
|
|
|
|
|
|
"text" => "color", |
347
|
|
|
|
|
|
|
"vlink" => "color", |
348
|
|
|
|
|
|
|
}, |
349
|
|
|
|
|
|
|
"a" => { |
350
|
|
|
|
|
|
|
"charset" => "charset", |
351
|
|
|
|
|
|
|
"coords" => "coords", |
352
|
|
|
|
|
|
|
"href" => "href", |
353
|
|
|
|
|
|
|
"shape" => "shape", |
354
|
|
|
|
|
|
|
"target" => "target", |
355
|
|
|
|
|
|
|
"type" => "mime-type", |
356
|
|
|
|
|
|
|
"eudora" => "eudora", |
357
|
|
|
|
|
|
|
"notrack" => "anything", |
358
|
|
|
|
|
|
|
}, |
359
|
|
|
|
|
|
|
"address" => 1, |
360
|
|
|
|
|
|
|
"area" => { |
361
|
|
|
|
|
|
|
"alt" => "anything", |
362
|
|
|
|
|
|
|
"coords" => "coords", |
363
|
|
|
|
|
|
|
"href" => "href", |
364
|
|
|
|
|
|
|
"nohref" => "anything", |
365
|
|
|
|
|
|
|
"shape" => "shape", |
366
|
|
|
|
|
|
|
"target" => "target", |
367
|
|
|
|
|
|
|
}, |
368
|
|
|
|
|
|
|
"applet" => 0, |
369
|
|
|
|
|
|
|
"basefont" => { |
370
|
|
|
|
|
|
|
"face" => "font-face", |
371
|
|
|
|
|
|
|
"family" => "font-face", |
372
|
|
|
|
|
|
|
"back" => "color", |
373
|
|
|
|
|
|
|
"size" => "number", |
374
|
|
|
|
|
|
|
"ptsize" => "number", |
375
|
|
|
|
|
|
|
}, |
376
|
|
|
|
|
|
|
"bdo" => 1, |
377
|
|
|
|
|
|
|
"bgsound" => { |
378
|
|
|
|
|
|
|
"balance" => "integer", |
379
|
|
|
|
|
|
|
"delay" => "integer", |
380
|
|
|
|
|
|
|
"loop" => "alnum", |
381
|
|
|
|
|
|
|
"src" => "src", |
382
|
|
|
|
|
|
|
"volume" => "integer", |
383
|
|
|
|
|
|
|
}, |
384
|
|
|
|
|
|
|
"blockquote" => { |
385
|
|
|
|
|
|
|
"cite" => "href", |
386
|
|
|
|
|
|
|
"type" => "mime-type", |
387
|
|
|
|
|
|
|
}, |
388
|
|
|
|
|
|
|
"br" => 1, |
389
|
|
|
|
|
|
|
"button" => # FORM |
390
|
|
|
|
|
|
|
{ |
391
|
|
|
|
|
|
|
"type" => "input-type", |
392
|
|
|
|
|
|
|
"disabled" => "anything", |
393
|
|
|
|
|
|
|
"value" => "anything", |
394
|
|
|
|
|
|
|
"tabindex" => "number", |
395
|
|
|
|
|
|
|
}, |
396
|
|
|
|
|
|
|
"caption" => 1, |
397
|
|
|
|
|
|
|
"center" => 1, |
398
|
|
|
|
|
|
|
"col" => \%TableAttributes, |
399
|
|
|
|
|
|
|
"colgroup" => \%TableAttributes, |
400
|
|
|
|
|
|
|
"comment" => 1, |
401
|
|
|
|
|
|
|
"dd" => 1, |
402
|
|
|
|
|
|
|
"del" => { |
403
|
|
|
|
|
|
|
"cite" => "href", |
404
|
|
|
|
|
|
|
"datetime" => "datetime", |
405
|
|
|
|
|
|
|
}, |
406
|
|
|
|
|
|
|
"dir" => \%ListAttributes, |
407
|
|
|
|
|
|
|
"div" => 1, |
408
|
|
|
|
|
|
|
"dl" => \%ListAttributes, |
409
|
|
|
|
|
|
|
"dt" => 1, |
410
|
|
|
|
|
|
|
"embed" => 0, |
411
|
|
|
|
|
|
|
"fieldset" => 1, # FORM |
412
|
|
|
|
|
|
|
"font" => { |
413
|
|
|
|
|
|
|
"face" => "font-face", |
414
|
|
|
|
|
|
|
"family" => "font-face", |
415
|
|
|
|
|
|
|
"back" => "color", |
416
|
|
|
|
|
|
|
"size" => "number", |
417
|
|
|
|
|
|
|
"ptsize" => "number", |
418
|
|
|
|
|
|
|
}, |
419
|
|
|
|
|
|
|
"form" => # FORM |
420
|
|
|
|
|
|
|
{ |
421
|
|
|
|
|
|
|
"method" => "form-method", |
422
|
|
|
|
|
|
|
"action" => "href", |
423
|
|
|
|
|
|
|
"enctype" => "form-enctype", |
424
|
|
|
|
|
|
|
"accept" => "anything", |
425
|
|
|
|
|
|
|
"accept-charset" => "anything", |
426
|
|
|
|
|
|
|
}, |
427
|
|
|
|
|
|
|
"hr" => { |
428
|
|
|
|
|
|
|
"size" => "number", |
429
|
|
|
|
|
|
|
"noshade" => "anything", |
430
|
|
|
|
|
|
|
}, |
431
|
|
|
|
|
|
|
"h1" => 1, |
432
|
|
|
|
|
|
|
"h2" => 1, |
433
|
|
|
|
|
|
|
"h3" => 1, |
434
|
|
|
|
|
|
|
"h4" => 1, |
435
|
|
|
|
|
|
|
"h5" => 1, |
436
|
|
|
|
|
|
|
"h6" => 1, |
437
|
|
|
|
|
|
|
"iframe" => 0, |
438
|
|
|
|
|
|
|
"ilayer" => 0, |
439
|
|
|
|
|
|
|
"img" => { |
440
|
|
|
|
|
|
|
"alt" => "anything", |
441
|
|
|
|
|
|
|
"border" => "size", |
442
|
|
|
|
|
|
|
"dynsrc" => "src", |
443
|
|
|
|
|
|
|
"hspace" => "size", |
444
|
|
|
|
|
|
|
"ismap" => "anything", |
445
|
|
|
|
|
|
|
"loop" => "alnum", |
446
|
|
|
|
|
|
|
"lowsrc" => "src", |
447
|
|
|
|
|
|
|
"nosend" => "alnum", |
448
|
|
|
|
|
|
|
"src" => "src", |
449
|
|
|
|
|
|
|
"start" => "alnum", |
450
|
|
|
|
|
|
|
"usemap" => "usemap-href", |
451
|
|
|
|
|
|
|
"vspace" => "size", |
452
|
|
|
|
|
|
|
}, |
453
|
|
|
|
|
|
|
"inlineinput" => 0, |
454
|
|
|
|
|
|
|
"input" => # FORM |
455
|
|
|
|
|
|
|
{ |
456
|
|
|
|
|
|
|
"type" => "input-type", |
457
|
|
|
|
|
|
|
"disabled" => "anything", |
458
|
|
|
|
|
|
|
"value" => "anything", |
459
|
|
|
|
|
|
|
"maxlength" => "input-size", |
460
|
|
|
|
|
|
|
"size" => "input-size", |
461
|
|
|
|
|
|
|
"readonly" => "anything", |
462
|
|
|
|
|
|
|
"tabindex" => "number", |
463
|
|
|
|
|
|
|
"checked" => "anything", |
464
|
|
|
|
|
|
|
"accept" => "anything", |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
# for type "image": |
467
|
|
|
|
|
|
|
"alt" => "anything", |
468
|
|
|
|
|
|
|
"border" => "size", |
469
|
|
|
|
|
|
|
"dynsrc" => "src", |
470
|
|
|
|
|
|
|
"hspace" => "size", |
471
|
|
|
|
|
|
|
"ismap" => "anything", |
472
|
|
|
|
|
|
|
"loop" => "alnum", |
473
|
|
|
|
|
|
|
"lowsrc" => "src", |
474
|
|
|
|
|
|
|
"nosend" => "alnum", |
475
|
|
|
|
|
|
|
"src" => "src", |
476
|
|
|
|
|
|
|
"start" => "alnum", |
477
|
|
|
|
|
|
|
"usemap" => "usemap-href", |
478
|
|
|
|
|
|
|
"vspace" => "size", |
479
|
|
|
|
|
|
|
}, |
480
|
|
|
|
|
|
|
"ins" => { |
481
|
|
|
|
|
|
|
"cite" => "href", |
482
|
|
|
|
|
|
|
"datetime" => "datetime", |
483
|
|
|
|
|
|
|
}, |
484
|
|
|
|
|
|
|
"isindex" => 0, |
485
|
|
|
|
|
|
|
"keygen" => 0, |
486
|
|
|
|
|
|
|
"label" => # FORM |
487
|
|
|
|
|
|
|
{ "for" => "alnum", }, |
488
|
|
|
|
|
|
|
"layer" => 0, |
489
|
|
|
|
|
|
|
"legend" => 1, # FORM |
490
|
|
|
|
|
|
|
"li" => { "value" => "integer", }, |
491
|
|
|
|
|
|
|
"listing" => 0, |
492
|
|
|
|
|
|
|
"map" => 1, |
493
|
|
|
|
|
|
|
"marquee" => 0, |
494
|
|
|
|
|
|
|
"menu" => \%ListAttributes, |
495
|
|
|
|
|
|
|
"multicol" => 0, |
496
|
|
|
|
|
|
|
"nextid" => 0, |
497
|
|
|
|
|
|
|
"nobr" => 0, |
498
|
|
|
|
|
|
|
"noembed" => 1, |
499
|
|
|
|
|
|
|
"nolayer" => 1, |
500
|
|
|
|
|
|
|
"noscript" => 1, |
501
|
|
|
|
|
|
|
"noembed" => 1, |
502
|
|
|
|
|
|
|
"object" => 0, |
503
|
|
|
|
|
|
|
"ol" => \%ListAttributes, |
504
|
|
|
|
|
|
|
"optgroup" => # FORM |
505
|
|
|
|
|
|
|
{ |
506
|
|
|
|
|
|
|
"disabled" => "anything", |
507
|
|
|
|
|
|
|
"label" => "anything", |
508
|
|
|
|
|
|
|
}, |
509
|
|
|
|
|
|
|
"option" => # FORM |
510
|
|
|
|
|
|
|
{ |
511
|
|
|
|
|
|
|
"disabled" => "anything", |
512
|
|
|
|
|
|
|
"label" => "anything", |
513
|
|
|
|
|
|
|
"selected" => "anything", |
514
|
|
|
|
|
|
|
"value" => "anything", |
515
|
|
|
|
|
|
|
}, |
516
|
|
|
|
|
|
|
"o:p" => 1, |
517
|
|
|
|
|
|
|
"p" => 1, |
518
|
|
|
|
|
|
|
"param" => 0, |
519
|
|
|
|
|
|
|
"plaintext" => 0, |
520
|
|
|
|
|
|
|
"pre" => 1, |
521
|
|
|
|
|
|
|
"rt" => 0, |
522
|
|
|
|
|
|
|
"ruby" => 0, |
523
|
|
|
|
|
|
|
"select" => # FORM |
524
|
|
|
|
|
|
|
{ |
525
|
|
|
|
|
|
|
"disabled" => "anything", |
526
|
|
|
|
|
|
|
"multiple" => "anything", |
527
|
|
|
|
|
|
|
"size" => "input-size", |
528
|
|
|
|
|
|
|
"tabindex" => "number", |
529
|
|
|
|
|
|
|
}, |
530
|
|
|
|
|
|
|
"spacer" => 0, |
531
|
|
|
|
|
|
|
"span" => 1, |
532
|
|
|
|
|
|
|
"spell" => 0, |
533
|
|
|
|
|
|
|
"sound" => { |
534
|
|
|
|
|
|
|
"delay" => "number", |
535
|
|
|
|
|
|
|
"loop" => "integer", |
536
|
|
|
|
|
|
|
"src" => "src", |
537
|
|
|
|
|
|
|
}, |
538
|
|
|
|
|
|
|
"table" => \%TableAttributes, |
539
|
|
|
|
|
|
|
"tbody" => \%TableAttributes, |
540
|
|
|
|
|
|
|
"textarea" => # FORM |
541
|
|
|
|
|
|
|
{ |
542
|
|
|
|
|
|
|
"cols" => "input-size", |
543
|
|
|
|
|
|
|
"rows" => "input-size", |
544
|
|
|
|
|
|
|
"disabled" => "anything", |
545
|
|
|
|
|
|
|
"readonly" => "anything", |
546
|
|
|
|
|
|
|
"tabindex" => "number", |
547
|
|
|
|
|
|
|
"wrap" => "anything", |
548
|
|
|
|
|
|
|
}, |
549
|
|
|
|
|
|
|
"td" => \%TableAttributes, |
550
|
|
|
|
|
|
|
"tfoot" => \%TableAttributes, |
551
|
|
|
|
|
|
|
"th" => \%TableAttributes, |
552
|
|
|
|
|
|
|
"thead" => \%TableAttributes, |
553
|
|
|
|
|
|
|
"tr" => \%TableAttributes, |
554
|
|
|
|
|
|
|
"ul" => \%ListAttributes, |
555
|
|
|
|
|
|
|
"wbr" => 1, |
556
|
|
|
|
|
|
|
"xml" => 0, |
557
|
|
|
|
|
|
|
"xmp" => 0, |
558
|
|
|
|
|
|
|
"x-html" => 0, |
559
|
|
|
|
|
|
|
"x-tab" => 1, |
560
|
|
|
|
|
|
|
"x-sigsep" => 1, |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
# Character formatting |
563
|
|
|
|
|
|
|
"abbr" => 1, |
564
|
|
|
|
|
|
|
"acronym" => 1, |
565
|
|
|
|
|
|
|
"big" => 1, |
566
|
|
|
|
|
|
|
"blink" => 0, |
567
|
|
|
|
|
|
|
"b" => 1, |
568
|
|
|
|
|
|
|
"cite" => 1, |
569
|
|
|
|
|
|
|
"code" => 1, |
570
|
|
|
|
|
|
|
"dfn" => 1, |
571
|
|
|
|
|
|
|
"em" => 1, |
572
|
|
|
|
|
|
|
"i" => 1, |
573
|
|
|
|
|
|
|
"kbd" => 1, |
574
|
|
|
|
|
|
|
"q" => 1, |
575
|
|
|
|
|
|
|
"s" => 1, |
576
|
|
|
|
|
|
|
"samp" => 1, |
577
|
|
|
|
|
|
|
"small" => 1, |
578
|
|
|
|
|
|
|
"strike" => 1, |
579
|
|
|
|
|
|
|
"strong" => 1, |
580
|
|
|
|
|
|
|
"sub" => 1, |
581
|
|
|
|
|
|
|
"sup" => 1, |
582
|
|
|
|
|
|
|
"tt" => 1, |
583
|
|
|
|
|
|
|
"u" => 1, |
584
|
|
|
|
|
|
|
"var" => 1, |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
# |
587
|
|
|
|
|
|
|
# Safe elements commonly found in the <frameset> block follow. |
588
|
|
|
|
|
|
|
# |
589
|
|
|
|
|
|
|
"frameset" => 0, |
590
|
|
|
|
|
|
|
"frame" => 0, |
591
|
|
|
|
|
|
|
"noframes" => 1, |
592
|
|
|
|
|
|
|
); |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
# Some entity conversions for attributes |
595
|
|
|
|
|
|
|
my %EntityToChar = |
596
|
|
|
|
|
|
|
( quot => '"', apos => "'", amp => '&', 'lt' => '<', 'gt' => '>' ); |
597
|
|
|
|
|
|
|
my %CharToEntity = reverse %EntityToChar; |
598
|
|
|
|
|
|
|
my %QuoteRe = ( '"' => qr/(["&<>])/, "'" => qr/(['&<>])/, "" => qr/(["&<>])/ ); |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
# Default list of mismatched tags to track |
601
|
|
|
|
|
|
|
my %MismatchedTags = |
602
|
|
|
|
|
|
|
map { $_ => 1 } qw(table tbody thead tr td th font div span pre center); |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
# When fixing mismatched tags, sometimes a close tag |
605
|
|
|
|
|
|
|
# shouldn't close all the way out |
606
|
|
|
|
|
|
|
# For example, consider: |
607
|
|
|
|
|
|
|
# <table><tr><td><table><tr></td> |
608
|
|
|
|
|
|
|
# A naive version would see the ending </td>, and thus |
609
|
|
|
|
|
|
|
# try to fix the mismatched tags by doing: |
610
|
|
|
|
|
|
|
# <table><tr><td><table><tr></tr></table></td> |
611
|
|
|
|
|
|
|
# This is not what a browser does. So given a tag, we |
612
|
|
|
|
|
|
|
# give a list of closing tags which cause us to stop |
613
|
|
|
|
|
|
|
# and not close any more |
614
|
|
|
|
|
|
|
my %MismatchedTagNest = ( |
615
|
|
|
|
|
|
|
table => [qw(tbody thead tfoot tr th td colgroup)], |
616
|
|
|
|
|
|
|
tbody => [qw(tr th td)], |
617
|
|
|
|
|
|
|
tr => [qw(th td)], |
618
|
|
|
|
|
|
|
font => [''], |
619
|
|
|
|
|
|
|
); |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
# Convert to hash of hashes |
622
|
|
|
|
|
|
|
$_ = { map { $_ => 1 } @$_ } for values %MismatchedTagNest; |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
# If we see a table, we should expect to see a tbody |
625
|
|
|
|
|
|
|
# next. If not, we need to add it because the browser |
626
|
|
|
|
|
|
|
# will implicitly open it! |
627
|
|
|
|
|
|
|
my %ImplicitOpenTags = ( |
628
|
|
|
|
|
|
|
table => [qw(tbody tr thead tfoot caption colgroup col)], |
629
|
|
|
|
|
|
|
thead => [qw(tr)], |
630
|
|
|
|
|
|
|
tbody => [qw(tr)], |
631
|
|
|
|
|
|
|
tr => [qw(td th)], |
632
|
|
|
|
|
|
|
); |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
# Convert to hash of hashes |
635
|
|
|
|
|
|
|
$_ = { default => $_->[0], map { $_ => 1 } @$_ } for values %ImplicitOpenTags; |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=over 4 |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=cut |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
=item I<MojoMojo::Declaw-E<gt>new(%Options)> |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
Constructs a new HTML::Declaw object. The following options are supported: |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
=over 4 |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
=item B<Options> |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
=over 4 |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
=item B<tags_to_callback> |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
Array reference of tags for which a call back should be made. If a tag in this array is parsed, the subroutine tags_callback() is invoked. |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
=item B<attribs_to_callback> |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
Array reference of tag attributes for which a call back should be made. If an attribute in this array is parsed, the subroutine attribs_callback() is invoked. |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
=item B<tags_callback> |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
Subroutine reference to be invoked when a tag listed in @$tags_to_callback is parsed. |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
=item B<attribs_callback> |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
Subroutine reference to be invoked when an attribute listed in @$attribs_to_callback is parsed. |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=item B<url_callback> |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
Subroutine reference to be invoked when a URL is detected in an HTML tag attribute or a CSS property. |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
=item B<css_callback> |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
Subroutine reference to be invoked when CSS data is found either as the contents of a 'style' attribute in an HTML tag, or as the contents of a <style> HTML tag. |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=item B<fix_mismatched_tags> |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
This property, if set, fixes mismatched tags in the HTML input. By default, tags present in the default %mismatched_tags_to_fix hash are fixed. This set of tags can be overridden by passing in an array reference $mismatched_tags_to_fix to the constructor. Any opened tags in the set are automatically closed if no corresponding closing tag is found. If an unbalanced closing tag is found, that is commented out. |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
=item B<mismatched_tags_to_fix> |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
Array reference of tags for which the code would check for matching opening and closing tags. See the property $fix_mismatched_tags. |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
=item B<context> |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
You can pass an arbitrary scalar as a 'context' value that's then passed as the first parameter to all callback functions. Most commonly this is something like '$Self' |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
=item B<Debug> |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
If set, prints debugging output. |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
=back |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
=back |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
=back |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
=cut |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
sub new { |
702
|
141
|
|
|
141
|
1
|
387
|
my $Proto = shift; |
703
|
141
|
|
33
|
|
|
904
|
my $Class = ref($Proto) || $Proto; |
704
|
|
|
|
|
|
|
|
705
|
141
|
|
|
|
|
1129
|
my %Opts = @_; |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
# my $Context = shift; |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
my ( $tags_to_callback, $attribs_to_callback ) = |
710
|
141
|
|
|
|
|
529
|
( $Opts{"tags_to_callback"}, $Opts{"attribs_to_callback"} ); |
711
|
141
|
50
|
|
|
|
626
|
my %tags_to_callback = map { $_ => 1 } @$tags_to_callback |
|
705
|
|
|
|
|
2067
|
|
712
|
|
|
|
|
|
|
if $tags_to_callback; |
713
|
141
|
50
|
|
|
|
653
|
my %attribs_to_callback = map { $_ => 1 } @$attribs_to_callback |
|
423
|
|
|
|
|
1140
|
|
714
|
|
|
|
|
|
|
if $attribs_to_callback; |
715
|
141
|
|
|
|
|
1678
|
my %mismatched_tags_to_fix = %MismatchedTags; |
716
|
|
|
|
|
|
|
%mismatched_tags_to_fix = |
717
|
0
|
|
|
|
|
0
|
map { $_ => 1 } @{ $Opts{'mismatched_tags_to_fix'} } |
|
0
|
|
|
|
|
0
|
|
718
|
141
|
50
|
|
|
|
603
|
if $Opts{'mismatched_tags_to_fix'}; |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
my $Self = { |
721
|
|
|
|
|
|
|
DefangString => 'defang_', |
722
|
|
|
|
|
|
|
tags_to_callback => \%tags_to_callback, |
723
|
|
|
|
|
|
|
tags_callback => $Opts{tags_callback}, |
724
|
|
|
|
|
|
|
attribs_to_callback => \%attribs_to_callback, |
725
|
|
|
|
|
|
|
attribs_callback => $Opts{attribs_callback}, |
726
|
|
|
|
|
|
|
url_callback => $Opts{url_callback}, |
727
|
|
|
|
|
|
|
css_callback => $Opts{css_callback}, |
728
|
|
|
|
|
|
|
mismatched_tags_to_fix => \%mismatched_tags_to_fix, |
729
|
|
|
|
|
|
|
fix_mismatched_tags => $Opts{fix_mismatched_tags}, |
730
|
|
|
|
|
|
|
context => $Opts{context}, |
731
|
|
|
|
|
|
|
OpenedTags => [], |
732
|
|
|
|
|
|
|
OpenedTagsCount => {}, |
733
|
|
|
|
|
|
|
ImplicitTags => [], |
734
|
|
|
|
|
|
|
Debug => $Opts{Debug}, |
735
|
141
|
|
|
|
|
1637
|
}; |
736
|
|
|
|
|
|
|
|
737
|
141
|
|
|
|
|
452
|
bless( $Self, $Class ); |
738
|
141
|
|
|
|
|
646
|
return $Self; |
739
|
|
|
|
|
|
|
} |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
=head1 CALLBACK METHODS |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
=over 4 |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
=cut |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
=item B<COMMON PARAMETERS> |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
A number of the callbacks share the same parameters. These common parameters are documented here. Certain variables may have specific meanings in certain callbacks, so be sure to check the documentation for that method first before referring this section. |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
=over 4 |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
=item I<$context> |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
You can pass an arbitrary scalar as a 'context' value that's then passed as the first parameter to all callback functions. Most commonly this is something like '$Self' |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
=item I<$Defang> |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
Current HTML::Declaw instance |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
=item I<$OpenAngle> |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
Opening angle(<) sign of the current tag. |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
=item I<$lcTag> |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
Lower case version of the HTML tag that is currently being parsed. |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
=item I<$IsEndTag> |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
Has the value '/' if the current tag is a closing tag. |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
=item I<$AttributeHash> |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
A reference to a hash containing the attributes of the current tag and |
776
|
|
|
|
|
|
|
their values. Each value is a scalar reference to the value, rather |
777
|
|
|
|
|
|
|
than just a scalar value. You can add attributes (remember to make it a |
778
|
|
|
|
|
|
|
scalar ref, eg $AttributeHash{"newattr"} = \"newval"), delete attributes, |
779
|
|
|
|
|
|
|
or modify attribute values in this hash, and any changes you make will |
780
|
|
|
|
|
|
|
be incorporated into the output HTML stream. |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
The attribute values will have any entity references decoded before |
783
|
|
|
|
|
|
|
being passed to you, and any unsafe values we be re-encoded back into |
784
|
|
|
|
|
|
|
the HTML stream. |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
So for instance, the tag: |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
<div title="<"Hi there <"> |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
Will have the attribute hash: |
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
{ title => \q[<"Hi there <] } |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
And will be turned back into the HTML on output: |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
<div title="<"Hi there <"> |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=item I<$CloseAngle> |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
Anything after the end of last attribute including the closing HTML angle(>) |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
=item I<$HtmlR> |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
A scalar reference to the input HTML. The input HTML is parsed using |
805
|
|
|
|
|
|
|
m/\G$SomeRegex/c constructs, so to continue from where HTML:Defang left, |
806
|
|
|
|
|
|
|
clients can use m/\G$SomeRegex/c for further processing on the input. This |
807
|
|
|
|
|
|
|
will resume parsing from where HTML::Declaw left. One can also use the |
808
|
|
|
|
|
|
|
pos() function to determine where HTML::Declaw left off. This combined |
809
|
|
|
|
|
|
|
with the add_to_output() method should give reasonable flexibility for |
810
|
|
|
|
|
|
|
the client to process the input. |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
=item I<$OutR> |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
A scalar reference to the processed output HTML so far. |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
=back |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
=item I<tags_callback($context, $Defang, $OpenAngle, $lcTag, $IsEndTag, $AttributeHash, $CloseAngle, $HtmlR, $OutR)> |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
If $Defang->{tags_callback} exists, and HTML::Declaw has parsed a tag preset in $Defang->{tags_to_callback}, the above callback is made to the client code. The return value of this method determines whether the tag is defanged or not. More details below. |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
=over 4 |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
=item B<Return values> |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
=over 4 |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
=item I<0> |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
The current tag will not be defanged. |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=item I<1> |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
The current tag will be defanged. |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
=item I<2> |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
The current tag will be processed normally by HTML:Defang as if there was no callback method specified. |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
=back |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
=back |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
=item I<attribs_callback($context, $Defang, $lcTag, $lcAttrKey, $AttrVal, $HtmlR, $OutR)> |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
If $Defang->{attribs_callback} exists, and HTML::Declaw has parsed an attribute present in $Defang->{attribs_to_callback}, the above callback is made to the client code. The return value of this method determines whether the attribute is defanged or not. More details below. |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
=over 4 |
849
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
=item B<Method parameters> |
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
=over 4 |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
=item I<$lcAttrKey> |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
Lower case version of the HTML attribute that is currently being parsed. |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
=item I<$AttrVal> |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
Reference to the HTML attribute value that is currently being parsed. |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
See $AttributeHash for details of decoding. |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
=back |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
=item B<Return values> |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
=over 4 |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
=item I<0> |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
The current attribute will not be defanged. |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
=item I<1> |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
The current attribute will be defanged. |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
=item I<2> |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
The current attribute will be processed normally by HTML:Defang as if there was no callback method specified. |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
=back |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
=back |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
=item I<url_callback($context, $Defang, $lcTag, $lcAttrKey, $AttrVal, $AttributeHash, $HtmlR, $OutR)> |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
If $Defang->{url_callback} exists, and HTML::Declaw has parsed a URL, the above callback is made to the client code. The return value of this method determines whether the attribute containing the URL is defanged or not. URL callbacks can be made from <style> tags as well style attributes, in which case the particular style declaration will be commented out. More details below. |
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
=over 4 |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
=item B<Method parameters> |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
=over 4 |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
=item I<$lcAttrKey> |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
Lower case version of the HTML attribute that is currently being parsed. However if this callback is made as a result of parsing a URL in a style attribute, $lcAttrKey will be set to the string I<style>, or will be set to I<undef> if this callback is made as a result of parsing a URL inside a style tag. |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
=item I<$AttrVal> |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
Reference to the URL value that is currently being parsed. |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
=item I<$AttributeHash> |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
A reference to a hash containing the attributes of the current tag and their values. Each value is a scalar reference to the value, |
907
|
|
|
|
|
|
|
rather than just a scalar value. You can add attributes (remember to make it a scalar ref, eg $AttributeHash{"newattr"} = \"newval"), delete attributes, or modify attribute values in this hash, and any changes you make will be incorporated into the output HTML stream. Will be set to I<undef> if the callback is made due to URL in a <style> tag or attribute. |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
=back |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
=item B<Return values> |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
=over 4 |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
=item I<0> |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
The current URL will not be defanged. |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
=item I<1> |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
The current URL will be defanged. |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
=item I<2> |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
The current URL will be processed normally by HTML:Defang as if there was no callback method specified. |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
=back |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
=back |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
=item I<css_callback($context, $Defang, $Selectors, $SelectorRules, $lcTag, $IsAttr, $OutR)> |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
If $Defang->{css_callback} exists, and HTML::Declaw has parsed a <style> tag or style attribtue, the above callback is made to the client code. The return value of this method determines whether a particular declaration in the style rules is defanged or not. More details below. |
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
=over 4 |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
=item B<Method parameters> |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
=over 4 |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
=item I<$Selectors> |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
Reference to an array containing the selectors in a style tag or attribute. |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
=item I<$SelectorRules> |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
Reference to an array containing the style declaration blocks of all selectors in a style tag or attribute. Consider the below CSS: |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
a { b:c; d:e} |
950
|
|
|
|
|
|
|
j { k:l; m:n} |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
The declaration blocks will get parsed into the following data structure: |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
[ |
955
|
|
|
|
|
|
|
[ |
956
|
|
|
|
|
|
|
[ "b", "c", 2], |
957
|
|
|
|
|
|
|
[ "d", "e", 2] |
958
|
|
|
|
|
|
|
], |
959
|
|
|
|
|
|
|
[ |
960
|
|
|
|
|
|
|
[ "k", "l", 2], |
961
|
|
|
|
|
|
|
[ "m", "n", 2] |
962
|
|
|
|
|
|
|
] |
963
|
|
|
|
|
|
|
] |
964
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
So, generally each property:value pair in a declaration is parsed into an array of the form |
966
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
["property", "value", X] |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
where X can be 0, 1 or 2, and 2 the default value. A client can manipulate this value to instruct HTML::Declaw to defang this property:value pair. |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
0 - Do not defang |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
1 - Defang the style:property value |
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
2 - Process this as if there is no callback specified |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
=item I<$IsAttr> |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
True if the currently processed item is a style attribute. False if the currently processed item is a style tag. |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
=back |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
=back |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
=back |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
=cut |
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
=head1 METHODS |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
=over 4 |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
=item B<PUBLIC METHODS> |
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
=over 4 |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
=item I<defang($InputHtml)> |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
Cleans up $InputHtml of any executable code including scripting, embedded objects, applets, etc., and defang any XSS attacks. |
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
=over 4 |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
=item B<Method parameters> |
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
=over 4 |
1006
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
=item I<$InputHtml> |
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
The input HTML string that needs to be sanitized. |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
=back |
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
=back |
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
Returns the cleaned HTML. If fix_mismatched_tags is set, any tags that appear in @$mismatched_tags_to_fix that are unbalanced are automatically commented or closed. |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
=cut |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
sub defang { |
1020
|
141
|
|
|
141
|
1
|
324
|
my $Self = shift; |
1021
|
|
|
|
|
|
|
|
1022
|
141
|
|
|
|
|
308
|
my $I = shift; |
1023
|
|
|
|
|
|
|
|
1024
|
141
|
|
|
|
|
436
|
my $Debug = $Self->{Debug}; |
1025
|
|
|
|
|
|
|
|
1026
|
141
|
|
|
|
|
300
|
my $HeaderCharset = shift; |
1027
|
141
|
50
|
|
|
|
458
|
warn("defang HeaderCharset=$HeaderCharset") if $Debug; |
1028
|
141
|
|
|
|
|
283
|
my $FallbackCharset = shift; |
1029
|
141
|
50
|
|
|
|
485
|
warn("defang FallbackCharset=$FallbackCharset") if $Debug; |
1030
|
|
|
|
|
|
|
|
1031
|
141
|
|
|
|
|
399
|
$Self->{Reentrant}++; |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
# Get encoded characters |
1034
|
|
|
|
|
|
|
# $Self->{Charset} = $Self->get_applicable_charset($_, $HeaderCharset, $FallbackCharset); |
1035
|
|
|
|
|
|
|
# warn("defang Charset=$Self->{Charset}") if $Self->{Debug}; |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
# if ($Self->{Charset}) { |
1038
|
|
|
|
|
|
|
# $I =~ s/(.)/chr(ord($1) & 127)/ge if $Self->{Charset} eq "US-ASCII"; |
1039
|
|
|
|
|
|
|
# my $Encoder = Encode::Encoder->new($I, $Self->{Charset}); |
1040
|
|
|
|
|
|
|
# $I = $Encoder->bytes($Self->{Charset}); |
1041
|
|
|
|
|
|
|
# } |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
# We pass a ref to $I to each callback. It should |
1044
|
|
|
|
|
|
|
# never be modified because we use a m/\G.../gc loop |
1045
|
|
|
|
|
|
|
# on it. If possible, stop people modifying it |
1046
|
141
|
50
|
|
|
|
459
|
readonly_on($I) if $HasScalarReadonly; |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
# It seems regexp matching on perl unicode strings can be *way* |
1049
|
|
|
|
|
|
|
# slower than byte string (defang 1M email = 100 seconds unicode, |
1050
|
|
|
|
|
|
|
# 5 seconds bytes). |
1051
|
|
|
|
|
|
|
# So we're going to do a bit of a hack. Engaged "use bytes" to do |
1052
|
|
|
|
|
|
|
# byte matching everywhere, but since we know we'll be matching |
1053
|
|
|
|
|
|
|
# on correct boundaries to make up full code points in utf-8, we'll |
1054
|
|
|
|
|
|
|
# turn on the magic utf-8 flag again for those values |
1055
|
141
|
|
|
|
|
594
|
my $UTF8Input = Encode::is_utf8($I); |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
# Force byte matching everywhere (see above) |
1058
|
27
|
|
|
27
|
|
242
|
use bytes; |
|
27
|
|
|
|
|
81
|
|
|
27
|
|
|
|
|
280
|
|
1059
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
# Strip all NUL chars |
1061
|
141
|
|
|
|
|
450
|
$I =~ s/\0//g; |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
# Output buffer |
1064
|
141
|
|
|
|
|
356
|
my $O = ''; |
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
# This parser uses standard /\G.../gc matching, so have to be careful |
1067
|
|
|
|
|
|
|
# to not reset pos() on the string |
1068
|
|
|
|
|
|
|
# |
1069
|
|
|
|
|
|
|
# Previously we tried an "eating" parser (s/^.../, or /^.../ + substr), |
1070
|
|
|
|
|
|
|
# which in theory should be fast with perls internal string offset |
1071
|
|
|
|
|
|
|
# feature, but it seems offset doesn't work on unicode strings, |
1072
|
|
|
|
|
|
|
# so you end up with a slow parser because of string reallocations |
1073
|
|
|
|
|
|
|
|
1074
|
141
|
|
|
|
|
306
|
while (1) { |
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
# walk to next < (testing in 5.8.8 shows .*? is faster than [^<]* or [^<]*?) |
1077
|
1356
|
100
|
|
|
|
5330
|
if ( $I =~ m{\G(.*?)<}gcso ) { |
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
# Everything before tag goes into the output |
1080
|
1215
|
|
|
|
|
2968
|
$O .= $1; |
1081
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
# All tags default to open/close with </> |
1083
|
1215
|
|
|
|
|
2359
|
my ( $OpenAngle, $CloseAngle ) = ( '<', '>' ); |
1084
|
1215
|
100
|
|
|
|
3038
|
my $IsEndTag = $I =~ m{\G/}gcso ? '/' : ''; |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
# It's a standard tag |
1087
|
1215
|
100
|
|
|
|
3840
|
if ( $I =~ m{\G($TagNameRE)}gcso ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
|
1089
|
1210
|
|
|
|
|
2252
|
my $Tag = $1; |
1090
|
1210
|
100
|
|
|
|
3092
|
my $TagTrail = $I =~ m{\G([\s/]+)}gcso ? $1 : ''; |
1091
|
|
|
|
|
|
|
|
1092
|
1210
|
50
|
|
|
|
2634
|
warn "defang IsEndTag=$IsEndTag Tag=$Tag" if $Debug; |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
# Skip attribute parsing if none |
1095
|
1210
|
|
|
|
|
1754
|
my @Attributes; |
1096
|
1210
|
100
|
|
|
|
3880
|
goto NoParseAttributes if $I =~ m{\G>}gcso; |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
# Pull off any trailing component after the tag |
1099
|
|
|
|
|
|
|
# Now match all key=value attributes |
1100
|
228
|
|
|
|
|
2706
|
while ( $I =~ |
1101
|
|
|
|
|
|
|
m{\G(?:($AttrKeyStartLineRE)(\s*))?(?:(=\s*)($AttrValRE)(\s*))?}gcso |
1102
|
|
|
|
|
|
|
) |
1103
|
|
|
|
|
|
|
{ |
1104
|
|
|
|
|
|
|
|
1105
|
573
|
50
|
66
|
|
|
1916
|
last if !defined($1) && !defined($4); |
1106
|
|
|
|
|
|
|
my ( |
1107
|
345
|
|
|
|
|
1338
|
$Attribute, $AttrTrail, $Equals, |
1108
|
|
|
|
|
|
|
$AttrVal, $AttrValTrail |
1109
|
|
|
|
|
|
|
) = ( $1, $2, $3, $4, $5 ); |
1110
|
345
|
|
|
|
|
712
|
my ( $AttrQuote, $AttrValWithoutQuote ) = ''; |
1111
|
345
|
50
|
33
|
|
|
2140
|
if ( defined($4) && $4 =~ /^([`"']?)(.*)\1$/s ) { |
1112
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
# IE supports `, but nothing else does, turn it into " |
1114
|
345
|
50
|
|
|
|
1033
|
$AttrQuote = $1 eq '`' ? '"' : $1; |
1115
|
345
|
|
|
|
|
709
|
$AttrValWithoutQuote = $2; |
1116
|
|
|
|
|
|
|
} |
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
# Turn on utf-8 for things that might be |
1119
|
345
|
100
|
|
|
|
849
|
Encode::_utf8_on($Attribute) if $UTF8Input; |
1120
|
345
|
100
|
|
|
|
789
|
Encode::_utf8_on($AttrValWithoutQuote) if $UTF8Input; |
1121
|
|
|
|
|
|
|
|
1122
|
345
|
|
|
|
|
1185
|
push @Attributes, |
1123
|
|
|
|
|
|
|
[ |
1124
|
|
|
|
|
|
|
$Attribute, $AttrTrail, |
1125
|
|
|
|
|
|
|
$Equals, $AttrQuote, |
1126
|
|
|
|
|
|
|
$AttrValWithoutQuote, $AttrQuote, |
1127
|
|
|
|
|
|
|
$AttrValTrail |
1128
|
|
|
|
|
|
|
]; |
1129
|
345
|
50
|
|
|
|
1669
|
warn |
1130
|
|
|
|
|
|
|
"defang AttributeKey=$1 AttrQuote=$AttrQuote AttributeValue=$Attribute" |
1131
|
|
|
|
|
|
|
if $Debug; |
1132
|
|
|
|
|
|
|
} |
1133
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
# Better be at end of attributes, or attach our own ending tag |
1135
|
228
|
100
|
|
|
|
908
|
if ( $I =~ m{\G(?:(\s*[/\\]*\s*(?:--)?\s*)?>|([\s/-]*))}gcs ) { |
1136
|
225
|
50
|
|
|
|
872
|
$CloseAngle = $1 ? $1 . '>' : ( $2 ? $2 . '>' : '>' ); |
|
|
100
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
} |
1138
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
NoParseAttributes: |
1140
|
1210
|
|
|
|
|
1953
|
my $Defang = 1; |
1141
|
|
|
|
|
|
|
|
1142
|
1210
|
|
|
|
|
2564
|
my $TagOps = $Tags{ lc $Tag }; |
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
# Process this tag |
1145
|
1210
|
100
|
|
|
|
2596
|
if ( ref $TagOps eq "CODE" ) { |
1146
|
|
|
|
|
|
|
|
1147
|
2
|
50
|
|
|
|
6
|
warn "process_tag Found CODE reference" if $Debug; |
1148
|
2
|
|
|
|
|
7
|
$Defang = $Self ->${TagOps}( |
1149
|
|
|
|
|
|
|
\$O, \$I, $TagOps, |
1150
|
|
|
|
|
|
|
\$OpenAngle, $IsEndTag, $Tag, |
1151
|
|
|
|
|
|
|
$TagTrail, \@Attributes, \$CloseAngle |
1152
|
|
|
|
|
|
|
); |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
} |
1155
|
|
|
|
|
|
|
else { |
1156
|
|
|
|
|
|
|
|
1157
|
1208
|
50
|
|
|
|
2537
|
warn "process_tag Found regular tag" if $Debug; |
1158
|
1208
|
|
|
|
|
3571
|
$Defang = $Self->defang_attributes( |
1159
|
|
|
|
|
|
|
\$O, \$I, $TagOps, |
1160
|
|
|
|
|
|
|
\$OpenAngle, $IsEndTag, $Tag, |
1161
|
|
|
|
|
|
|
$TagTrail, \@Attributes, \$CloseAngle |
1162
|
|
|
|
|
|
|
); |
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
} |
1165
|
1210
|
50
|
|
|
|
3067
|
die "Callback reset pos on Tag=$Tag IsEndTag=$IsEndTag" |
1166
|
|
|
|
|
|
|
if !defined pos($I); |
1167
|
1210
|
50
|
|
|
|
2559
|
warn "defang Defang=$Defang" if $Debug; |
1168
|
|
|
|
|
|
|
|
1169
|
|
|
|
|
|
|
# defang unknown tags |
1170
|
1210
|
100
|
|
|
|
2767
|
if ($Defang) { |
1171
|
3
|
50
|
|
|
|
7
|
warn "defang Defanging $Tag" if $Debug; |
1172
|
3
|
|
|
|
|
6
|
$Tag = $Self->{DefangString} . $Tag; |
1173
|
3
|
|
|
|
|
10
|
$OpenAngle =~ s/</<!--/; |
1174
|
3
|
|
|
|
|
7
|
$CloseAngle =~ s/>/-->/; |
1175
|
|
|
|
|
|
|
} |
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
# And put it all back together into the output string |
1178
|
|
|
|
|
|
|
$O .= |
1179
|
|
|
|
|
|
|
$OpenAngle |
1180
|
|
|
|
|
|
|
. $IsEndTag |
1181
|
|
|
|
|
|
|
. $Tag |
1182
|
|
|
|
|
|
|
. $TagTrail |
1183
|
1210
|
|
|
|
|
3335
|
. join( "", grep { defined } map { @$_ } @Attributes ) |
|
2759
|
|
|
|
|
5003
|
|
|
345
|
|
|
|
|
1277
|
|
1184
|
|
|
|
|
|
|
. $CloseAngle; |
1185
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
# It's a comment of some sort. We are looking for regular HTML comment, XML CDATA section and |
1187
|
|
|
|
|
|
|
# IE conditional comments |
1188
|
|
|
|
|
|
|
# Refer http://msdn.microsoft.com/en-us/library/ms537512.aspx for IE conditional comment information |
1189
|
|
|
|
|
|
|
} |
1190
|
|
|
|
|
|
|
elsif ( $I =~ m{\G(!)((?:\[CDATA\[|(?:--)?\[if|--)?)}gcis ) { |
1191
|
|
|
|
|
|
|
|
1192
|
4
|
|
|
|
|
15
|
my ( $Comment, $CommentDelim ) = ( $1, $2 ); |
1193
|
4
|
50
|
|
|
|
13
|
warn "defang Comment=$Comment CommentDelim=$CommentDelim" |
1194
|
|
|
|
|
|
|
if $Debug; |
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
# Find the appropriate closing delimiter |
1197
|
4
|
|
|
|
|
11
|
my $IsCDATA = $CommentDelim eq "[CDATA["; |
1198
|
4
|
50
|
|
|
|
11
|
my $ClosingCommentDelim = $IsCDATA ? "]]" : $CommentDelim; |
1199
|
|
|
|
|
|
|
|
1200
|
4
|
|
|
|
|
8
|
my $EndRestartCommentsText = ''; |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
# Handle IE conditionals specially. We can have <![if ...]>, <!--[if ...]> and <!--[if ...]--> |
1203
|
|
|
|
|
|
|
# for the third case, we just want to immediately match the --> |
1204
|
4
|
50
|
|
|
|
13
|
if ( $CommentDelim =~ /((?:--)?)\[if/ ) { |
1205
|
0
|
|
|
|
|
0
|
my $ConditionalDelim = $1; |
1206
|
0
|
0
|
|
|
|
0
|
$EndRestartCommentsText = '--' if $ConditionalDelim eq ''; |
1207
|
0
|
|
|
|
|
0
|
$ClosingCommentDelim = $CommentDelim; |
1208
|
0
|
0
|
|
|
|
0
|
if ( $I !~ m{\G[^\]]*\]-->}gcis ) { |
1209
|
0
|
|
|
|
|
0
|
$ClosingCommentDelim = "<![endif]$ConditionalDelim"; |
1210
|
|
|
|
|
|
|
} |
1211
|
|
|
|
|
|
|
} |
1212
|
|
|
|
|
|
|
|
1213
|
4
|
50
|
|
|
|
12
|
warn "defang ClosingCommentDelim=$ClosingCommentDelim" |
1214
|
|
|
|
|
|
|
if $Debug; |
1215
|
|
|
|
|
|
|
|
1216
|
4
|
|
|
|
|
10
|
my ( $CommentStartText, $CommentEndText ) = |
1217
|
|
|
|
|
|
|
( "--/*SC*/", "/*EC*/--" ); |
1218
|
|
|
|
|
|
|
|
1219
|
|
|
|
|
|
|
# Convert to regular HTML comment |
1220
|
4
|
|
|
|
|
10
|
$O .= $OpenAngle . $Comment . $CommentStartText; |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
# Find closing comment |
1223
|
4
|
50
|
33
|
|
|
61
|
if ( $I =~ m{\G(.*?)(\Q${ClosingCommentDelim}\E!?\s*)(>)}gcis |
1224
|
|
|
|
|
|
|
|| $I =~ m{\G(.*?)(--)(>)}gcis ) |
1225
|
|
|
|
|
|
|
{ |
1226
|
|
|
|
|
|
|
|
1227
|
4
|
|
|
|
|
14
|
my ( $StartTag, $CommentData, $ClosingTag, $CloseAngle ) = |
1228
|
|
|
|
|
|
|
( $CommentDelim, $1, $2, $3 ); |
1229
|
|
|
|
|
|
|
|
1230
|
4
|
50
|
33
|
|
|
16
|
if ( $EndRestartCommentsText |
1231
|
|
|
|
|
|
|
&& $CommentData =~ s/^(.*?)(>.*)$/$2/s ) |
1232
|
|
|
|
|
|
|
{ |
1233
|
0
|
|
|
|
|
0
|
$StartTag .= $1; |
1234
|
|
|
|
|
|
|
} |
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
# Strip all HTML comment markers |
1237
|
4
|
|
|
|
|
14
|
$StartTag =~ s/--//g; |
1238
|
4
|
|
|
|
|
10
|
$CommentData =~ s/--//g; |
1239
|
4
|
|
|
|
|
11
|
$ClosingTag =~ s/--//g; |
1240
|
|
|
|
|
|
|
|
1241
|
4
|
50
|
|
|
|
10
|
$StartTag .= $EndRestartCommentsText if $CommentData; |
1242
|
4
|
50
|
|
|
|
12
|
$ClosingTag =~ s{^(<!)}{$1$EndRestartCommentsText} |
1243
|
|
|
|
|
|
|
if $CommentData; |
1244
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
# Put it all into the output |
1246
|
4
|
50
|
|
|
|
17
|
$O .= $StartTag |
1247
|
|
|
|
|
|
|
. ( |
1248
|
|
|
|
|
|
|
$EndRestartCommentsText |
1249
|
|
|
|
|
|
|
? $Self->defang($CommentData) |
1250
|
|
|
|
|
|
|
: $CommentData |
1251
|
|
|
|
|
|
|
) |
1252
|
|
|
|
|
|
|
. $ClosingTag |
1253
|
|
|
|
|
|
|
. $CommentEndText |
1254
|
|
|
|
|
|
|
. $CloseAngle; |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
# No closing comment, so we add that |
1257
|
|
|
|
|
|
|
} |
1258
|
|
|
|
|
|
|
else { |
1259
|
|
|
|
|
|
|
|
1260
|
0
|
0
|
|
|
|
0
|
$I =~ m/\G(.*)$/gcs || die "Remainder of line match failed"; |
1261
|
|
|
|
|
|
|
|
1262
|
0
|
|
|
|
|
0
|
my $Data = $1; |
1263
|
0
|
|
|
|
|
0
|
$Data =~ s/--//g; |
1264
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
# Output |
1266
|
0
|
|
|
|
|
0
|
$O .= $Data . $CommentEndText . ">"; |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
} |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
# XML processing instruction |
1271
|
|
|
|
|
|
|
} |
1272
|
|
|
|
|
|
|
elsif ( $I =~ m{\G(\?)}gcs ) { |
1273
|
0
|
|
|
|
|
0
|
my ($Processing) = ($1); |
1274
|
0
|
0
|
|
|
|
0
|
warn "defang XML processing instruction" if $Debug; |
1275
|
|
|
|
|
|
|
|
1276
|
0
|
|
|
|
|
0
|
my $Data; |
1277
|
0
|
0
|
|
|
|
0
|
if ( $I =~ m{\G(.*?\??)>}gcs ) { # || goto OutputRemainder; |
1278
|
0
|
|
|
|
|
0
|
$Data = $1; |
1279
|
|
|
|
|
|
|
} |
1280
|
|
|
|
|
|
|
else { |
1281
|
0
|
|
|
|
|
0
|
$I =~ m{\G(.*)$}gcs; |
1282
|
0
|
|
|
|
|
0
|
$Data = $1; |
1283
|
|
|
|
|
|
|
} |
1284
|
|
|
|
|
|
|
|
1285
|
0
|
|
|
|
|
0
|
$Data =~ s{--}{}g; |
1286
|
|
|
|
|
|
|
|
1287
|
0
|
|
|
|
|
0
|
$O .= $OpenAngle . '!--' . $Processing . $Data . '-->'; |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
} |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
# Some other thing starting with <, keep looking |
1292
|
|
|
|
|
|
|
|
1293
|
1215
|
100
|
|
|
|
3022
|
if ( exists $Self->{AppendOutput} ) { |
1294
|
1
|
|
|
|
|
2
|
$O .= delete $Self->{AppendOutput}; |
1295
|
|
|
|
|
|
|
} |
1296
|
1215
|
|
|
|
|
2135
|
next; |
1297
|
|
|
|
|
|
|
} |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
OutputRemainder: |
1300
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
# No tag found, just copy rest |
1302
|
141
|
50
|
|
|
|
450
|
warn "defang OutputRemainder" if $Debug; |
1303
|
141
|
|
|
|
|
539
|
$I =~ m/\G(.*)$/gcs; |
1304
|
|
|
|
|
|
|
|
1305
|
141
|
100
|
|
|
|
586
|
$O .= $1 if $1; |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
# Exit if we got here |
1308
|
141
|
|
|
|
|
286
|
last; |
1309
|
|
|
|
|
|
|
} |
1310
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
# If not a recursive call, close mismatched tags |
1312
|
141
|
50
|
|
|
|
568
|
if ( $Self->{Reentrant} <= 1 ) { |
1313
|
141
|
|
|
|
|
351
|
my $RemainingClosingTags = ''; |
1314
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
my ( $OpenedTags, $OpenedTagsCount ) = |
1316
|
141
|
|
|
|
|
423
|
@$Self{qw(OpenedTags OpenedTagsCount)}; |
1317
|
141
|
|
|
|
|
569
|
while ( my $PreviousOpenedTag = pop @$OpenedTags ) { |
1318
|
0
|
|
|
|
|
0
|
$RemainingClosingTags .= |
1319
|
|
|
|
|
|
|
"<!-- close mismatch --></$PreviousOpenedTag>"; |
1320
|
0
|
|
|
|
|
0
|
$OpenedTagsCount->{$PreviousOpenedTag}--; |
1321
|
|
|
|
|
|
|
} |
1322
|
141
|
|
|
|
|
316
|
$O .= $RemainingClosingTags; |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
# Also clear implicit tags |
1325
|
141
|
|
|
|
|
373
|
$Self->{ImplicitTags} = []; |
1326
|
|
|
|
|
|
|
|
1327
|
141
|
50
|
|
|
|
496
|
if ($Debug) { |
1328
|
0
|
|
|
|
|
0
|
warn "Check all tags closed and counts zeroed"; |
1329
|
|
|
|
|
|
|
warn "Not all tags closed" |
1330
|
0
|
0
|
|
|
|
0
|
if grep { $_ > 0 } values %$OpenedTagsCount; |
|
0
|
|
|
|
|
0
|
|
1331
|
|
|
|
|
|
|
} |
1332
|
|
|
|
|
|
|
} |
1333
|
|
|
|
|
|
|
|
1334
|
141
|
|
|
|
|
348
|
$Self->{Reentrant}--; |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
# Turn on utf-8 flag again |
1337
|
141
|
100
|
|
|
|
488
|
Encode::_utf8_on($O) if $UTF8Input; |
1338
|
|
|
|
|
|
|
|
1339
|
141
|
|
|
|
|
646
|
return $O; |
1340
|
|
|
|
|
|
|
} |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
=item I<add_to_output($String)> |
1343
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
Appends $String to the output after the current parsed tag ends. Can be used by client code in callback methods to add HTML text to the processed output. If the HTML text needs to be defanged, client code can safely call HTML::Declaw->defang() recursively from within the callback. |
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
=over 4 |
1347
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
=item B<Method parameters> |
1349
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
=over 4 |
1351
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
=item I<$String> |
1353
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
The string that is added after the current parsed tag ends. |
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
=back |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
=back |
1359
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
=back |
1361
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
=cut |
1363
|
|
|
|
|
|
|
|
1364
|
|
|
|
|
|
|
# Callbacks call this method |
1365
|
|
|
|
|
|
|
sub add_to_output { |
1366
|
0
|
|
|
0
|
1
|
0
|
my $Self = shift; |
1367
|
0
|
|
|
|
|
0
|
$Self->{AppendOutput} = shift; |
1368
|
|
|
|
|
|
|
} |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
=item defang_and_add_to_output |
1371
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
defang and add result to output |
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
=cut |
1375
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
sub defang_and_add_to_output { |
1377
|
0
|
|
|
0
|
1
|
0
|
my $Self = shift; |
1378
|
0
|
|
|
|
|
0
|
$Self->add_to_output( $Self->defang(shift) ); |
1379
|
|
|
|
|
|
|
} |
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
=item B<INTERNAL METHODS> |
1382
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
Generally these methods never need to be called by users of the class, because they'll be called internally as the appropriate tags are |
1384
|
|
|
|
|
|
|
encountered, but they may be useful for some users in some cases. |
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
=over 4 |
1387
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
=item I<defang_script($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle)> |
1389
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
This method is invoked when a <script> tag is parsed. Defangs the <script> opening tag, and any closing tag. Any scripting content is also commented out, so browsers don't display them. |
1391
|
|
|
|
|
|
|
|
1392
|
|
|
|
|
|
|
Returns 1 to indicate that the <script> tag must be defanged. |
1393
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
=over 4 |
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
=item B<Method parameters> |
1397
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
=over 4 |
1399
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
=item I<$OutR> |
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
A reference to the processed output HTML before the tag that is currently being parsed. |
1403
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
=item I<$HtmlR> |
1405
|
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
|
A scalar reference to the input HTML. |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
=item I<$TagOps> |
1409
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
Indicates what operation should be done on a tag. Can be undefined, integer or code reference. Undefined indicates an unknown tag to HTML::Declaw, 1 indicates a known safe tag, 0 indicates a known unsafe tag, and a code reference indicates a subroutine that should be called to parse the current tag. For example, <style> and <script> tags are parsed by dedicated subroutines. |
1411
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
=item I<$OpenAngle> |
1413
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
Opening angle(<) sign of the current tag. |
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
=item I<$IsEndTag> |
1417
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
Has the value '/' if the current tag is a closing tag. |
1419
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
=item I<$Tag> |
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
The HTML tag that is currently being parsed. |
1423
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
=item I<$TagTrail> |
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
Any space after the tag, but before attributes. |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
=item I<$Attributes> |
1429
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
A reference to an array of the attributes and their values, including any surrouding spaces. Each element of the array is added by 'push' calls like below. |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
push @$Attributes, [ $AttributeName, $SpaceBeforeEquals, $EqualsAndSubsequentSpace, $QuoteChar, $AttributeValue, $QuoteChar, $SpaceAfterAtributeValue ]; |
1433
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
=item I<$CloseAngle> |
1435
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
Anything after the end of last attribute including the closing HTML angle(>) |
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
=back |
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
=back |
1441
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
=cut |
1443
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
sub defang_script { |
1445
|
2
|
|
|
2
|
1
|
4
|
my $Self = shift; |
1446
|
|
|
|
|
|
|
my ( |
1447
|
2
|
|
|
|
|
5
|
$OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, |
1448
|
|
|
|
|
|
|
$Tag, $TagTrail, $Attributes, $CloseAngle |
1449
|
|
|
|
|
|
|
) = @_; |
1450
|
2
|
50
|
|
|
|
7
|
warn "defang_script Processing <script> tag" if $Self->{Debug}; |
1451
|
|
|
|
|
|
|
|
1452
|
2
|
100
|
|
|
|
5
|
if ( !$IsEndTag ) { |
1453
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
# If we just parsed a starting <script> tag, code better be commented. If |
1455
|
|
|
|
|
|
|
# not, we attach comments around the code. |
1456
|
1
|
50
|
|
|
|
6
|
if ( $$HtmlR =~ m{\G(.*?)(?=</script\b)}gcsi ) { |
1457
|
1
|
|
|
|
|
3
|
my $ScriptTagContents = $1; |
1458
|
|
|
|
|
|
|
warn "defang_script ScriptTagContents $ScriptTagContents" |
1459
|
1
|
50
|
|
|
|
3
|
if $Self->{Debug}; |
1460
|
1
|
|
|
|
|
8
|
$ScriptTagContents =~ |
1461
|
|
|
|
|
|
|
s/^(\s*)(<!--)?(.*?)(-->)?(\s*)$/$1<!-- $3 -->$5/s; |
1462
|
1
|
|
|
|
|
5
|
$Self->{AppendOutput} .= $ScriptTagContents; |
1463
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
} |
1465
|
|
|
|
|
|
|
} |
1466
|
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
|
# Also defang tag |
1468
|
2
|
|
|
|
|
3
|
return 1; |
1469
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
} |
1471
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
=item I<defang_style($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle, $IsAttr)> |
1473
|
|
|
|
|
|
|
|
1474
|
|
|
|
|
|
|
Builds a list of selectors and declarations from HTML style tags as well as style attributes in HTML tags and calls defang_stylerule() to do the actual defanging. |
1475
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
Returns 0 to indicate that style tags must not be defanged. |
1477
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
=over 4 |
1479
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
=item B<Method parameters> |
1481
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
=over 4 |
1483
|
|
|
|
|
|
|
|
1484
|
|
|
|
|
|
|
=item I<$IsAttr> |
1485
|
|
|
|
|
|
|
|
1486
|
|
|
|
|
|
|
Whether we are currently parsing a style attribute or style tag. $IsAttr will be true if we are currently parsing a style attribute. |
1487
|
|
|
|
|
|
|
|
1488
|
|
|
|
|
|
|
=back |
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
For a description of other parameters, see documentation of defang_script() method |
1491
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
=back |
1493
|
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
=cut |
1495
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
sub defang_style { |
1497
|
|
|
|
|
|
|
|
1498
|
|
|
|
|
|
|
my ( |
1499
|
4
|
|
|
4
|
1
|
19
|
$Self, $OutR, $HtmlR, $TagOps, |
1500
|
|
|
|
|
|
|
$OpenAngle, $IsEndTag, $Tag, $TagTrail, |
1501
|
|
|
|
|
|
|
$Attributes, $CloseAngle, $IsAttr |
1502
|
|
|
|
|
|
|
) = @_; |
1503
|
4
|
|
|
|
|
13
|
my $lcTag = lc $Tag; |
1504
|
|
|
|
|
|
|
|
1505
|
|
|
|
|
|
|
warn "defang_style Tag=$Tag IsEndTag=$IsEndTag IsAttr=$IsAttr" |
1506
|
4
|
50
|
|
|
|
21
|
if $Self->{Debug}; |
1507
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
# Nothing to do if end tag |
1509
|
4
|
50
|
33
|
|
|
21
|
return 0 if !$IsAttr && $IsEndTag; |
1510
|
|
|
|
|
|
|
|
1511
|
4
|
|
|
|
|
10
|
my $Content = ''; |
1512
|
4
|
|
|
|
|
9
|
my $ClosingStyleTagPresent = 1; |
1513
|
|
|
|
|
|
|
|
1514
|
4
|
|
|
|
|
12
|
for ($$HtmlR) { |
1515
|
|
|
|
|
|
|
|
1516
|
4
|
50
|
|
|
|
15
|
if ( !$IsAttr ) { |
1517
|
0
|
0
|
|
|
|
0
|
if (m{\G(.*?)(?=</style\b)}gcis) { |
|
|
0
|
|
|
|
|
|
1518
|
0
|
|
|
|
|
0
|
$Content = $1; |
1519
|
|
|
|
|
|
|
|
1520
|
|
|
|
|
|
|
# No ending style tag |
1521
|
|
|
|
|
|
|
} |
1522
|
|
|
|
|
|
|
elsif (m{\G([^<]*)}gcis) { |
1523
|
0
|
|
|
|
|
0
|
$Content = $1; |
1524
|
0
|
|
|
|
|
0
|
$ClosingStyleTagPresent = 0; |
1525
|
|
|
|
|
|
|
} |
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
# Its a style attribute |
1528
|
|
|
|
|
|
|
} |
1529
|
|
|
|
|
|
|
else { |
1530
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
# Avoid undef warning for style tag with no value. eg <tag style> |
1532
|
4
|
50
|
|
|
|
28
|
$Content = defined($_) ? $_ : ''; |
1533
|
|
|
|
|
|
|
} |
1534
|
|
|
|
|
|
|
} |
1535
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
# Clean up all comments, expand character escapes and such |
1537
|
4
|
|
|
|
|
20
|
$Self->cleanup_style($Content); |
1538
|
|
|
|
|
|
|
|
1539
|
|
|
|
|
|
|
# Handle any wrapping HTML comments. If no comments, we add |
1540
|
4
|
|
|
|
|
12
|
my ( $OpeningHtmlComment, $ClosingHtmlComment ) = ( '', '' ); |
1541
|
4
|
50
|
|
|
|
14
|
if ( !$IsAttr ) { |
1542
|
0
|
|
|
|
|
0
|
$Content =~ s{^(\s*<!--)?(.*?)(-->\s*)?$}{$2}s; |
1543
|
0
|
|
0
|
|
|
0
|
( $OpeningHtmlComment, $ClosingHtmlComment ) = |
|
|
|
0
|
|
|
|
|
1544
|
|
|
|
|
|
|
( $1 || "<!--", $3 || "-->" ); |
1545
|
|
|
|
|
|
|
} |
1546
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
# Style attributes can optionally have selector type elements, so we check whether we |
1548
|
|
|
|
|
|
|
# have a '{' in $Content: if yes, its style data with selector type elements |
1549
|
4
|
|
|
|
|
13
|
my $Naked = $Content !~ m/\{/; |
1550
|
4
|
|
|
|
|
163
|
my $StyleRule = |
1551
|
|
|
|
|
|
|
qr/\s*$StyleKey\s*:\s*$StyleValue\s*(?:;\s*$StyleKey\s*:\s*$StyleValue\s*)*;?\s*/o; |
1552
|
4
|
50
|
|
|
|
21
|
warn "defang_style Naked=$Naked" if $Self->{Debug}; |
1553
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
# And suitably change the regex to match the data |
1555
|
4
|
50
|
|
|
|
131
|
my $SelectorRuleRE = |
1556
|
|
|
|
|
|
|
$Naked |
1557
|
|
|
|
|
|
|
? qr/(\s*)()()()($StyleRule)()(\s*)/o |
1558
|
|
|
|
|
|
|
: qr/(\s*)((?:$Selectors))(\s*)(\{)($StyleRule)(\})(\s*)/o; |
1559
|
|
|
|
|
|
|
|
1560
|
4
|
|
|
|
|
15
|
my ( @Selectors, @SelectorRules, %ExtraData ); |
1561
|
|
|
|
|
|
|
|
1562
|
|
|
|
|
|
|
# Now we parse the selectors and declarations |
1563
|
4
|
|
|
|
|
61
|
while ( $Content =~ s{$SelectorRuleRE}{} ) { |
1564
|
4
|
|
|
|
|
18
|
my ( $Selector, $SelectorRule ) = ( $2, $5 ); |
1565
|
4
|
|
|
|
|
14
|
push @Selectors, $Selector; |
1566
|
4
|
|
|
|
|
10
|
push @SelectorRules, $SelectorRule; |
1567
|
4
|
50
|
|
|
|
18
|
warn "defang_style Selector=$Selector" if $Self->{Debug}; |
1568
|
4
|
50
|
|
|
|
17
|
warn "defang_style SelectorRule=$SelectorRule" if $Self->{Debug}; |
1569
|
4
|
|
|
|
|
39
|
$ExtraData{$Selector} = [ $1, $3, $4, $6, $7 ]; |
1570
|
|
|
|
|
|
|
} |
1571
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
# Check declaration elements for defanging |
1573
|
4
|
|
|
|
|
25
|
$Self->defang_stylerule( \@Selectors, \@SelectorRules, $lcTag, $IsAttr, |
1574
|
|
|
|
|
|
|
$HtmlR, $OutR ); |
1575
|
|
|
|
|
|
|
|
1576
|
4
|
|
|
|
|
8
|
my $StyleOut = ""; |
1577
|
|
|
|
|
|
|
|
1578
|
|
|
|
|
|
|
# Re-create the style data |
1579
|
4
|
|
|
|
|
11
|
foreach my $Selector (@Selectors) { |
1580
|
|
|
|
|
|
|
|
1581
|
4
|
|
|
|
|
9
|
my $SelectorRule = shift @SelectorRules; |
1582
|
4
|
|
|
|
|
10
|
my $Spaces = $ExtraData{$Selector}; |
1583
|
|
|
|
|
|
|
my ( |
1584
|
4
|
50
|
|
|
|
21
|
$BeforeSelector, $AfterSelector, $OpenBrace, |
1585
|
|
|
|
|
|
|
$CloseBrace, $AfterRule |
1586
|
|
|
|
|
|
|
) |
1587
|
|
|
|
|
|
|
= @$Spaces |
1588
|
|
|
|
|
|
|
if $Spaces; |
1589
|
|
|
|
|
|
|
( $BeforeSelector, $AfterSelector, $AfterRule ) = ( "", " ", "\n" ) |
1590
|
4
|
50
|
|
|
|
14
|
unless $ExtraData{$Selector}; |
1591
|
4
|
0
|
33
|
|
|
15
|
( $OpenBrace, $CloseBrace ) = ( "{", "}" ) if !$Spaces && !$IsAttr; |
1592
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
# Put back the rule together |
1594
|
4
|
50
|
|
|
|
16
|
if ( defined($Selector) ) { |
1595
|
4
|
50
|
|
|
|
16
|
$StyleOut .= $BeforeSelector if defined($BeforeSelector); |
1596
|
4
|
|
|
|
|
9
|
$StyleOut .= $Selector; |
1597
|
4
|
50
|
|
|
|
15
|
$StyleOut .= $AfterSelector if defined($AfterSelector); |
1598
|
4
|
50
|
|
|
|
12
|
$StyleOut .= $OpenBrace if defined($OpenBrace); |
1599
|
4
|
50
|
|
|
|
13
|
$StyleOut .= $SelectorRule if defined($SelectorRule); |
1600
|
4
|
50
|
|
|
|
12
|
$StyleOut .= $CloseBrace if defined($CloseBrace); |
1601
|
4
|
50
|
|
|
|
16
|
$StyleOut .= $AfterRule if defined($AfterRule); |
1602
|
|
|
|
|
|
|
} |
1603
|
|
|
|
|
|
|
|
1604
|
|
|
|
|
|
|
} |
1605
|
|
|
|
|
|
|
|
1606
|
4
|
50
|
|
|
|
15
|
warn "defang_style StyleOut=$StyleOut" if $Self->{Debug}; |
1607
|
|
|
|
|
|
|
|
1608
|
4
|
50
|
|
|
|
15
|
if ($IsAttr) { |
1609
|
4
|
|
|
|
|
9
|
$$HtmlR = $StyleOut; |
1610
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
} |
1612
|
|
|
|
|
|
|
else { |
1613
|
|
|
|
|
|
|
$Self->{AppendOutput} .= |
1614
|
0
|
|
|
|
|
0
|
$OpeningHtmlComment . $StyleOut . $ClosingHtmlComment; |
1615
|
0
|
0
|
|
|
|
0
|
$Self->{AppendOutput} .= "</style>" if !$ClosingStyleTagPresent; |
1616
|
|
|
|
|
|
|
} |
1617
|
|
|
|
|
|
|
|
1618
|
|
|
|
|
|
|
# We don't want <style> tags to be defanged |
1619
|
4
|
|
|
|
|
21
|
return 0; |
1620
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
} |
1622
|
|
|
|
|
|
|
|
1623
|
|
|
|
|
|
|
=item I<cleanup_style($StyleString)> |
1624
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
Helper function to clean up CSS data. This function directly operates on the input string without taking a copy. |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
=over 4 |
1628
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
=item B<Method parameters> |
1630
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
=over 4 |
1632
|
|
|
|
|
|
|
|
1633
|
|
|
|
|
|
|
=item I<$StyleString> |
1634
|
|
|
|
|
|
|
|
1635
|
|
|
|
|
|
|
The input style string that is cleaned. |
1636
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
=back |
1638
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
=back |
1640
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
=cut |
1642
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
sub cleanup_style { |
1644
|
4
|
|
|
4
|
1
|
11
|
my $Self = shift; |
1645
|
|
|
|
|
|
|
|
1646
|
4
|
|
|
|
|
13
|
for ( $_[0] ) { |
1647
|
|
|
|
|
|
|
|
1648
|
|
|
|
|
|
|
# Expand escapes |
1649
|
4
|
0
|
|
|
|
21
|
s/(?:&x|\\)(0?[\da-f]{1,6});?/defined($1) ? chr(hex($1)) : ""/egi; |
|
0
|
|
|
|
|
0
|
|
1650
|
4
|
0
|
|
|
|
15
|
s/(?:&#)([\d]{1,7});?/defined($1) ? chr($1) : ""/egi; |
|
0
|
|
|
|
|
0
|
|
1651
|
|
|
|
|
|
|
|
1652
|
|
|
|
|
|
|
# Remove all remaining invalid escapes TODO This probably is not correct. Backslashes are required to be left alone by the CSS syntax |
1653
|
4
|
|
|
|
|
11
|
s/\\//g; |
1654
|
|
|
|
|
|
|
|
1655
|
|
|
|
|
|
|
# Remove all CSS comments |
1656
|
4
|
|
|
|
|
12
|
s{/\*.*?\*/}{}sg; |
1657
|
|
|
|
|
|
|
|
1658
|
|
|
|
|
|
|
# Remove any CSS imports |
1659
|
4
|
|
|
|
|
14
|
s{(\@import[^;]+;?)}{}sg; |
1660
|
|
|
|
|
|
|
|
1661
|
4
|
50
|
|
|
|
17
|
warn "cleanup_style Content=$_" if $Self->{Debug}; |
1662
|
|
|
|
|
|
|
} |
1663
|
|
|
|
|
|
|
|
1664
|
|
|
|
|
|
|
} |
1665
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
=item I<defang_stylerule($SelectorsIn, $StyleRules, $lcTag, $IsAttr, $HtmlR, $OutR)> |
1667
|
|
|
|
|
|
|
|
1668
|
|
|
|
|
|
|
Defangs style data. |
1669
|
|
|
|
|
|
|
|
1670
|
|
|
|
|
|
|
=over 4 |
1671
|
|
|
|
|
|
|
|
1672
|
|
|
|
|
|
|
=item B<Method parameters> |
1673
|
|
|
|
|
|
|
|
1674
|
|
|
|
|
|
|
=over 4 |
1675
|
|
|
|
|
|
|
|
1676
|
|
|
|
|
|
|
=item I<$SelectorsIn> |
1677
|
|
|
|
|
|
|
|
1678
|
|
|
|
|
|
|
An array reference to the selectors in the style tag/attribute contents. |
1679
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
=item I<$StyleRules> |
1681
|
|
|
|
|
|
|
|
1682
|
|
|
|
|
|
|
An array reference to the declaration blocks in the style tag/attribute contents. |
1683
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
=item I<$lcTag> |
1685
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
Lower case version of the HTML tag that is currently being parsed. |
1687
|
|
|
|
|
|
|
|
1688
|
|
|
|
|
|
|
=item I<$IsAttr> |
1689
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
Whether we are currently parsing a style attribute or style tag. $IsAttr will be true if we are currently parsing a style attribute. |
1691
|
|
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
=item I<$HtmlR> |
1693
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
A scalar reference to the input HTML. |
1695
|
|
|
|
|
|
|
|
1696
|
|
|
|
|
|
|
=item I<$OutR> |
1697
|
|
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
A scalar reference to the processed output so far. |
1699
|
|
|
|
|
|
|
|
1700
|
|
|
|
|
|
|
=back |
1701
|
|
|
|
|
|
|
|
1702
|
|
|
|
|
|
|
=back |
1703
|
|
|
|
|
|
|
|
1704
|
|
|
|
|
|
|
=cut |
1705
|
|
|
|
|
|
|
|
1706
|
|
|
|
|
|
|
sub defang_stylerule { |
1707
|
|
|
|
|
|
|
|
1708
|
4
|
|
|
4
|
1
|
15
|
my ( $Self, $SelectorsIn, $StyleRules, $lcTag, $IsAttr, $HtmlR, $OutR ) = |
1709
|
|
|
|
|
|
|
@_; |
1710
|
|
|
|
|
|
|
|
1711
|
4
|
|
|
|
|
17
|
my ( @SelectorStyleKeyValues, %SelectorStyleKeyExtraData ); |
1712
|
|
|
|
|
|
|
|
1713
|
4
|
|
|
|
|
0
|
my ( @Selectors, @SelectorRules ); |
1714
|
|
|
|
|
|
|
|
1715
|
4
|
|
|
|
|
13
|
foreach my $Selector (@$SelectorsIn) { |
1716
|
|
|
|
|
|
|
|
1717
|
4
|
50
|
|
|
|
14
|
warn "defang_stylerule Selector=$Selector" if $Self->{Debug}; |
1718
|
4
|
|
|
|
|
37
|
my $Rule = shift @$StyleRules; |
1719
|
4
|
|
|
|
|
11
|
my ( @SelectorRule, @KeyValueRules, %StyleKeyExtraData ); |
1720
|
|
|
|
|
|
|
|
1721
|
|
|
|
|
|
|
# Split style declaration to basic elements |
1722
|
4
|
|
|
|
|
38
|
while ( $Rule =~ |
1723
|
|
|
|
|
|
|
s{^(\{?\s*)([^:]+?)(\s*:\s*)((?:)?)([^;\}]+)()?(\s*;?)(\s*\}?)}{} ) |
1724
|
|
|
|
|
|
|
{ |
1725
|
|
|
|
|
|
|
my ( |
1726
|
6
|
|
|
|
|
35
|
$KeyPilot, $Key, $Separator, $QuoteStart, |
1727
|
|
|
|
|
|
|
$Value, $QuoteEnd, $ValueEnd, $ValueTrail |
1728
|
|
|
|
|
|
|
) = ( $1, $2, $3, $4, $5, $6, $7, $8 ); |
1729
|
|
|
|
|
|
|
|
1730
|
|
|
|
|
|
|
warn |
1731
|
|
|
|
|
|
|
"defang_stylerule Key=$Key Value=$Value Separator=$Separator ValueEnd=$ValueEnd" |
1732
|
6
|
50
|
|
|
|
21
|
if $Self->{Debug}; |
1733
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
# Store everything except style property and value in a hash |
1735
|
6
|
|
|
|
|
35
|
$StyleKeyExtraData{ lc $Key } = [ |
1736
|
|
|
|
|
|
|
$KeyPilot, $Separator, $QuoteStart, |
1737
|
|
|
|
|
|
|
$QuoteEnd, $ValueEnd, $ValueTrail |
1738
|
|
|
|
|
|
|
]; |
1739
|
6
|
|
|
|
|
16
|
my $DefangStyleRule = 2; |
1740
|
|
|
|
|
|
|
|
1741
|
|
|
|
|
|
|
# If the style value has a URL in it and URL callback has been supplied, make a url_callback |
1742
|
6
|
50
|
33
|
|
|
45
|
if ( $Self->{url_callback} |
1743
|
|
|
|
|
|
|
&& $Value =~ m/\s*url\(\s*((?:['"])?)(.*?)\1\s*\)/i ) |
1744
|
|
|
|
|
|
|
{ |
1745
|
0
|
0
|
|
|
|
0
|
my ( $UrlOrig, $Url ) = ( $2, $2 ) if $2; |
1746
|
|
|
|
|
|
|
warn |
1747
|
|
|
|
|
|
|
"defang_stylerule Url found in style property value. Url=$Url" |
1748
|
0
|
0
|
|
|
|
0
|
if $Self->{Debug}; |
1749
|
0
|
0
|
|
|
|
0
|
my $lcAttrKey = $IsAttr ? "style" : undef; |
1750
|
|
|
|
|
|
|
$DefangStyleRule = $Self->{url_callback}->( |
1751
|
0
|
0
|
|
|
|
0
|
$Self->{context}, $Self, $lcTag, $lcAttrKey, \$Url, undef, |
1752
|
|
|
|
|
|
|
$HtmlR, $OutR |
1753
|
|
|
|
|
|
|
) if $Url; |
1754
|
|
|
|
|
|
|
|
1755
|
|
|
|
|
|
|
# Save back any changes |
1756
|
|
|
|
|
|
|
warn |
1757
|
|
|
|
|
|
|
"defang_stylerule After URL callback, Value=$Value DefangStyleRule=$DefangStyleRule" |
1758
|
0
|
0
|
|
|
|
0
|
if $Self->{Debug}; |
1759
|
0
|
0
|
|
|
|
0
|
$Value =~ s{\Q$UrlOrig\E}{$Url} if $UrlOrig; |
1760
|
|
|
|
|
|
|
} |
1761
|
|
|
|
|
|
|
|
1762
|
|
|
|
|
|
|
# Save the style property, value and defang flag |
1763
|
6
|
|
|
|
|
18
|
push @KeyValueRules, [ $Key, $Value, $DefangStyleRule ]; |
1764
|
|
|
|
|
|
|
warn |
1765
|
|
|
|
|
|
|
"defang_stylerule Key=$Key Value=$Value DefangStyleRule=$DefangStyleRule" |
1766
|
6
|
50
|
|
|
|
44
|
if $Self->{Debug}; |
1767
|
|
|
|
|
|
|
|
1768
|
|
|
|
|
|
|
} |
1769
|
|
|
|
|
|
|
|
1770
|
4
|
|
|
|
|
14
|
push( @SelectorRule, \@KeyValueRules ); |
1771
|
4
|
|
|
|
|
234
|
push( @Selectors, $Selector ); |
1772
|
4
|
|
|
|
|
10
|
push( @SelectorRules, \@SelectorRule ); |
1773
|
4
|
|
|
|
|
18
|
$SelectorStyleKeyExtraData{$Selector} = \%StyleKeyExtraData; |
1774
|
|
|
|
|
|
|
|
1775
|
|
|
|
|
|
|
} |
1776
|
|
|
|
|
|
|
|
1777
|
|
|
|
|
|
|
# If a CSS callback is supplied, we call that |
1778
|
|
|
|
|
|
|
$Self->{css_callback}->( |
1779
|
|
|
|
|
|
|
$Self->{context}, $Self, \@Selectors, \@SelectorRules, $lcTag, $IsAttr, |
1780
|
|
|
|
|
|
|
$OutR |
1781
|
4
|
50
|
|
|
|
33
|
) if $Self->{css_callback}; |
1782
|
|
|
|
|
|
|
|
1783
|
|
|
|
|
|
|
warn |
1784
|
|
|
|
|
|
|
"defang_stylerule More selectors($#Selectors) than selector rules($#SelectorRules)" |
1785
|
|
|
|
|
|
|
if $Self->{Debug} |
1786
|
4
|
50
|
33
|
|
|
19
|
&& $#Selectors > $#SelectorRules; |
1787
|
|
|
|
|
|
|
|
1788
|
4
|
|
|
|
|
11
|
my $Counter = 0; |
1789
|
4
|
|
|
|
|
12
|
foreach my $Selector (@Selectors) { |
1790
|
|
|
|
|
|
|
|
1791
|
4
|
|
|
|
|
13
|
my $SelectorRule = $SelectorRules[$Counter]; |
1792
|
4
|
|
|
|
|
9
|
my $ExtraData = $SelectorStyleKeyExtraData{$Selector}; |
1793
|
4
|
|
|
|
|
10
|
my $Rule; |
1794
|
|
|
|
|
|
|
|
1795
|
4
|
|
|
|
|
18
|
for ( my $j = 0 ; $j <= $#$SelectorRule ; $j++ ) { |
1796
|
4
|
|
|
|
|
11
|
my $KeyValueRules = $$SelectorRule[$j]; |
1797
|
|
|
|
|
|
|
|
1798
|
4
|
|
|
|
|
17
|
for ( my $k = 0 ; $k <= $#$KeyValueRules ; $k++ ) { |
1799
|
6
|
|
|
|
|
18
|
my $KeyValueRule = $$KeyValueRules[$k]; |
1800
|
6
|
|
|
|
|
16
|
my ( $Key, $Value, $Defang ) = @$KeyValueRule; |
1801
|
6
|
|
|
|
|
18
|
my @v = $$ExtraData{ lc $Key }; |
1802
|
|
|
|
|
|
|
my ( |
1803
|
|
|
|
|
|
|
$KeyPilot, $Separator, $QuoteStart, |
1804
|
|
|
|
|
|
|
$QuoteEnd, $ValueEnd, $ValueTrail |
1805
|
|
|
|
|
|
|
) |
1806
|
6
|
|
|
|
|
21
|
= @{ $v[0] } |
1807
|
6
|
50
|
|
|
|
22
|
if $$ExtraData{ lc $Key }; |
1808
|
|
|
|
|
|
|
|
1809
|
|
|
|
|
|
|
# If an intermediate style property-value pair doesn't have a terminating semi-colon, add it |
1810
|
6
|
50
|
66
|
|
|
48
|
if ( $k > 0 && !$$ExtraData{ lc $Key } ) { |
1811
|
0
|
|
|
|
|
0
|
my $PreviousRule = $KeyValueRules->[ $k - 1 ]; |
1812
|
0
|
|
|
|
|
0
|
my $PreviousKey = $PreviousRule->[0]; |
1813
|
0
|
|
|
|
|
0
|
my @PrevExtra = $ExtraData->{ lc $PreviousKey }; |
1814
|
0
|
0
|
0
|
|
|
0
|
$ExtraData->{ lc $PreviousKey }->[4] .= ";" |
1815
|
|
|
|
|
|
|
if defined( $PrevExtra[0]->[4] ) |
1816
|
|
|
|
|
|
|
&& $PrevExtra[0]->[4] !~ m/;/; |
1817
|
0
|
|
|
|
|
0
|
$ExtraData->{ lc $Key }->[1] = ":"; |
1818
|
|
|
|
|
|
|
} |
1819
|
|
|
|
|
|
|
|
1820
|
|
|
|
|
|
|
} |
1821
|
|
|
|
|
|
|
|
1822
|
|
|
|
|
|
|
} |
1823
|
|
|
|
|
|
|
|
1824
|
4
|
|
|
|
|
12
|
$Counter++; |
1825
|
|
|
|
|
|
|
} |
1826
|
|
|
|
|
|
|
|
1827
|
4
|
|
|
|
|
7
|
$Counter = 0; |
1828
|
4
|
|
|
|
|
11
|
foreach my $Selector (@Selectors) { |
1829
|
|
|
|
|
|
|
|
1830
|
4
|
50
|
33
|
|
|
153
|
$SelectorsIn->[$Counter] = $Selector |
1831
|
|
|
|
|
|
|
if $SelectorsIn->[$Counter] && !$IsAttr; |
1832
|
4
|
|
|
|
|
12
|
my $SelectorRule = $SelectorRules[$Counter]; |
1833
|
4
|
|
|
|
|
9
|
my $ExtraData = $SelectorStyleKeyExtraData{$Selector}; |
1834
|
4
|
|
|
|
|
10
|
my $Rule; |
1835
|
|
|
|
|
|
|
|
1836
|
4
|
|
|
|
|
11
|
foreach my $KeyRules (@$SelectorRule) { |
1837
|
|
|
|
|
|
|
|
1838
|
4
|
|
|
|
|
9
|
foreach my $KeyValueRule (@$KeyRules) { |
1839
|
|
|
|
|
|
|
|
1840
|
6
|
|
|
|
|
17
|
my ( $Key, $Value, $Defang ) = @$KeyValueRule; |
1841
|
6
|
|
|
|
|
18
|
my @v = $$ExtraData{ lc $Key }; |
1842
|
|
|
|
|
|
|
my ( |
1843
|
|
|
|
|
|
|
$KeyPilot, $Separator, $QuoteStart, |
1844
|
|
|
|
|
|
|
$QuoteEnd, $ValueEnd, $ValueTrail |
1845
|
|
|
|
|
|
|
) |
1846
|
6
|
|
|
|
|
19
|
= @{ $v[0] } |
1847
|
6
|
50
|
|
|
|
22
|
if $ExtraData->{ lc $Key }; |
1848
|
|
|
|
|
|
|
( $Separator, $ValueEnd, $ValueTrail ) = ( ":", ";", " " ) |
1849
|
6
|
50
|
|
|
|
21
|
unless $ExtraData->{ lc $Key }; |
1850
|
|
|
|
|
|
|
|
1851
|
|
|
|
|
|
|
# Flag to defang if a url, expression or unallowed character found |
1852
|
6
|
50
|
|
|
|
18
|
if ( $Defang == 2 ) { |
1853
|
6
|
50
|
|
|
|
34
|
$Defang = |
1854
|
|
|
|
|
|
|
$Value =~ m{^\s*[a-z0-9%!"'`:()#\s.,\/+-]+\s*;?\s*$}i |
1855
|
|
|
|
|
|
|
? 0 |
1856
|
|
|
|
|
|
|
: 1; |
1857
|
6
|
50
|
|
|
|
21
|
$Defang = $Value =~ m{^\s*url\s*\(}i ? 1 : $Defang; |
1858
|
6
|
50
|
|
|
|
19
|
$Defang = $Value =~ m{^\s*expression\s*\(}i ? 1 : $Defang; |
1859
|
|
|
|
|
|
|
} |
1860
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
# Comment out the style property-value pair if $Defang |
1862
|
6
|
50
|
|
|
|
20
|
$Key = $Defang ? "/*" . $Key : $Key; |
1863
|
6
|
50
|
|
|
|
17
|
$ValueEnd = $Defang ? $ValueEnd . "*/" : $ValueEnd; |
1864
|
|
|
|
|
|
|
|
1865
|
|
|
|
|
|
|
# Put the rule together back |
1866
|
6
|
50
|
|
|
|
18
|
if ( defined($Key) ) { |
1867
|
6
|
50
|
|
|
|
43
|
$Rule .= $KeyPilot if defined($KeyPilot); |
1868
|
6
|
|
|
|
|
12
|
$Rule .= $Key; |
1869
|
6
|
50
|
|
|
|
18
|
$Rule .= $Separator if defined($Separator); |
1870
|
6
|
50
|
|
|
|
17
|
$Rule .= $QuoteStart if defined($QuoteStart); |
1871
|
6
|
50
|
|
|
|
21
|
$Rule .= $Value if defined($Value); |
1872
|
6
|
50
|
|
|
|
13
|
$Rule .= $QuoteEnd if defined($QuoteEnd); |
1873
|
6
|
50
|
|
|
|
17
|
$Rule .= $ValueEnd if defined($ValueEnd); |
1874
|
6
|
50
|
|
|
|
17
|
$Rule .= $ValueTrail if defined($ValueTrail); |
1875
|
|
|
|
|
|
|
} |
1876
|
|
|
|
|
|
|
|
1877
|
6
|
50
|
|
|
|
27
|
warn "defang_stylerule Rule=$Rule" if $Self->{Debug}; |
1878
|
|
|
|
|
|
|
|
1879
|
|
|
|
|
|
|
} |
1880
|
|
|
|
|
|
|
|
1881
|
|
|
|
|
|
|
} |
1882
|
|
|
|
|
|
|
|
1883
|
|
|
|
|
|
|
# Modify the original array |
1884
|
4
|
|
|
|
|
17
|
$StyleRules->[$Counter] = $Rule; |
1885
|
4
|
|
|
|
|
24
|
$Counter++; |
1886
|
|
|
|
|
|
|
} |
1887
|
|
|
|
|
|
|
|
1888
|
|
|
|
|
|
|
} |
1889
|
|
|
|
|
|
|
|
1890
|
|
|
|
|
|
|
=item I<defang_attributes($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle)> |
1891
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
Defangs attributes, defangs tags, does tag, attrib, css and url callbacks. |
1893
|
|
|
|
|
|
|
|
1894
|
|
|
|
|
|
|
=over 4 |
1895
|
|
|
|
|
|
|
|
1896
|
|
|
|
|
|
|
=item B<Method parameters> |
1897
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
For a description of the method parameters, see documentation of defang_script() method |
1899
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
=back |
1901
|
|
|
|
|
|
|
|
1902
|
|
|
|
|
|
|
=cut |
1903
|
|
|
|
|
|
|
|
1904
|
|
|
|
|
|
|
sub defang_attributes { |
1905
|
|
|
|
|
|
|
my ( |
1906
|
1208
|
|
|
1208
|
1
|
2965
|
$Self, $OutR, $HtmlR, $TagOps, $OpenAngle, |
1907
|
|
|
|
|
|
|
$IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle |
1908
|
|
|
|
|
|
|
) = @_; |
1909
|
1208
|
|
|
|
|
2014
|
my $lcTag = lc $Tag; |
1910
|
|
|
|
|
|
|
|
1911
|
1208
|
|
|
|
|
2040
|
my $Debug = $Self->{Debug}; |
1912
|
|
|
|
|
|
|
|
1913
|
|
|
|
|
|
|
# Create a key -> \value mapping of all attributes up front |
1914
|
|
|
|
|
|
|
# so we have a complete hash for each callback |
1915
|
1208
|
|
|
|
|
2455
|
my %AttributeHash = map { lc( $_->[0] ) => \$_->[4] } @$Attributes; |
|
344
|
|
|
|
|
1337
|
|
1916
|
|
|
|
|
|
|
|
1917
|
|
|
|
|
|
|
# Now process each attribute |
1918
|
1208
|
|
|
|
|
2452
|
foreach my $Attr (@$Attributes) { |
1919
|
|
|
|
|
|
|
|
1920
|
|
|
|
|
|
|
# We get the key and value of the attribute |
1921
|
344
|
|
|
|
|
815
|
my ( $AttrKey, $AttrValR ) = ( $Attr->[0], \$Attr->[4] ); |
1922
|
344
|
|
|
|
|
666
|
my $lcAttrKey = lc $AttrKey; |
1923
|
344
|
50
|
|
|
|
803
|
warn "defang_attributes Tag=$Tag AttrKey=$AttrKey AttrVal=$$AttrValR" |
1924
|
|
|
|
|
|
|
if $Debug; |
1925
|
|
|
|
|
|
|
|
1926
|
|
|
|
|
|
|
# Get the attribute value cleaned up |
1927
|
344
|
|
|
|
|
959
|
( $$AttrValR, my $AttrValStripped ) = |
1928
|
|
|
|
|
|
|
$Self->cleanup_attribute( $Attr, $AttrKey, $$AttrValR ); |
1929
|
344
|
50
|
|
|
|
891
|
warn "defang_attributes AttrValStripped=$AttrValStripped" if $Debug; |
1930
|
|
|
|
|
|
|
|
1931
|
344
|
|
|
|
|
592
|
my $AttribRule = ""; |
1932
|
344
|
100
|
|
|
|
912
|
if ( ref( $Tags{$lcTag} ) ) { |
1933
|
237
|
|
|
|
|
540
|
$AttribRule = $Tags{$lcTag}{$lcAttrKey}; |
1934
|
|
|
|
|
|
|
} |
1935
|
|
|
|
|
|
|
|
1936
|
344
|
|
|
|
|
533
|
my $DefangAttrib = 2; |
1937
|
|
|
|
|
|
|
|
1938
|
344
|
100
|
|
|
|
956
|
$AttribRule = $CommonAttributes{$lcAttrKey} unless $AttribRule; |
1939
|
344
|
50
|
|
|
|
747
|
warn "defang_attributes AttribRule=$AttribRule" if $Debug; |
1940
|
|
|
|
|
|
|
|
1941
|
|
|
|
|
|
|
# If this is a URL type $AttrKey and URL callback method is supplied, make a url_callback |
1942
|
344
|
100
|
33
|
|
|
1985
|
if ( $Self->{url_callback} |
|
|
|
33
|
|
|
|
|
1943
|
|
|
|
|
|
|
&& $AttribRule |
1944
|
|
|
|
|
|
|
&& exists( $UrlRules{$AttribRule} ) ) |
1945
|
|
|
|
|
|
|
{ |
1946
|
124
|
50
|
|
|
|
403
|
warn "defang_attributes Making URL callback" if $Debug; |
1947
|
|
|
|
|
|
|
$DefangAttrib = $Self->{url_callback}->( |
1948
|
124
|
|
|
|
|
574
|
$Self->{context}, $Self, $lcTag, $lcAttrKey, $AttrValR, |
1949
|
|
|
|
|
|
|
\%AttributeHash, $HtmlR, $OutR |
1950
|
|
|
|
|
|
|
); |
1951
|
124
|
50
|
|
|
|
437
|
die "url_callback reset" if !defined pos($$HtmlR); |
1952
|
|
|
|
|
|
|
} |
1953
|
|
|
|
|
|
|
|
1954
|
|
|
|
|
|
|
# We have a style attribute, so we call defang_style |
1955
|
344
|
100
|
|
|
|
917
|
if ( $lcAttrKey eq "style" ) { |
1956
|
4
|
50
|
|
|
|
16
|
warn "defang_attributes Found style attribute, calling defang_style" |
1957
|
|
|
|
|
|
|
if $Debug; |
1958
|
4
|
|
|
|
|
23
|
$Self->defang_style( |
1959
|
|
|
|
|
|
|
$OutR, $AttrValR, $TagOps, $OpenAngle, |
1960
|
|
|
|
|
|
|
$IsEndTag, $lcTag, $TagTrail, $Attributes, |
1961
|
|
|
|
|
|
|
$CloseAngle, 1 |
1962
|
|
|
|
|
|
|
); |
1963
|
|
|
|
|
|
|
} |
1964
|
|
|
|
|
|
|
|
1965
|
|
|
|
|
|
|
# If a attribute callback is supplied and its interested in this attribute, we make a attribs_callback |
1966
|
344
|
100
|
33
|
|
|
1373
|
if ( $Self->{attribs_callback} |
1967
|
|
|
|
|
|
|
&& exists( $Self->{attribs_to_callback}->{$lcAttrKey} ) ) |
1968
|
|
|
|
|
|
|
{ |
1969
|
61
|
50
|
|
|
|
226
|
warn |
1970
|
|
|
|
|
|
|
"defang_attributes Making attribute callback for Tag=$Tag AttrKey=$AttrKey" |
1971
|
|
|
|
|
|
|
if $Debug; |
1972
|
|
|
|
|
|
|
$DefangAttrib = $Self->{attribs_callback}->( |
1973
|
61
|
|
|
|
|
292
|
$Self->{context}, $Self, $lcTag, $lcAttrKey, $AttrValR, $HtmlR, |
1974
|
|
|
|
|
|
|
$OutR |
1975
|
|
|
|
|
|
|
); |
1976
|
|
|
|
|
|
|
} |
1977
|
|
|
|
|
|
|
|
1978
|
344
|
100
|
66
|
|
|
1499
|
if ( ( $DefangAttrib == 2 ) && $AttribRule ) { |
|
|
50
|
|
|
|
|
|
1979
|
175
|
|
|
|
|
395
|
my $Rule = $Rules{$AttribRule}; |
1980
|
175
|
50
|
|
|
|
438
|
warn "defang_attributes AttribRule=$AttribRule Rule=$Rule" |
1981
|
|
|
|
|
|
|
if $Debug; |
1982
|
|
|
|
|
|
|
|
1983
|
|
|
|
|
|
|
# We whitelist the attribute if the value matches the rule |
1984
|
175
|
100
|
66
|
|
|
1505
|
if ( ref($Rule) eq "Regexp" && $AttrValStripped =~ $Rule ) { |
1985
|
171
|
|
|
|
|
366
|
$DefangAttrib = 0; |
1986
|
|
|
|
|
|
|
} |
1987
|
|
|
|
|
|
|
|
1988
|
|
|
|
|
|
|
# Defang all scripts in attributes |
1989
|
|
|
|
|
|
|
$DefangAttrib = |
1990
|
175
|
50
|
|
|
|
612
|
$AttrValStripped =~ /^(javascript:|livescript:|mocha:|vbscript:)/i |
1991
|
|
|
|
|
|
|
? 1 |
1992
|
|
|
|
|
|
|
: $DefangAttrib; |
1993
|
|
|
|
|
|
|
|
1994
|
|
|
|
|
|
|
} |
1995
|
|
|
|
|
|
|
elsif ( !$AttribRule ) { |
1996
|
0
|
|
|
|
|
0
|
$DefangAttrib = 1; |
1997
|
|
|
|
|
|
|
} |
1998
|
|
|
|
|
|
|
|
1999
|
344
|
50
|
|
|
|
790
|
warn "defang_attributes DefangAttrib=$DefangAttrib" if $Debug; |
2000
|
|
|
|
|
|
|
|
2001
|
|
|
|
|
|
|
# Store the attribute defang flag |
2002
|
344
|
100
|
|
|
|
984
|
push @$Attr, $DefangAttrib if $DefangAttrib; |
2003
|
|
|
|
|
|
|
|
2004
|
|
|
|
|
|
|
} |
2005
|
|
|
|
|
|
|
|
2006
|
1208
|
|
|
|
|
1821
|
my $DefangTag = 2; |
2007
|
|
|
|
|
|
|
|
2008
|
|
|
|
|
|
|
# Callback if the tag is in @$tags_to_callback |
2009
|
1208
|
100
|
|
|
|
2909
|
if ( exists( $Self->{tags_to_callback}->{$lcTag} ) ) { |
2010
|
30
|
50
|
|
|
|
79
|
warn "defang_attributes Calling tags_callback for $Tag" if $Debug; |
2011
|
|
|
|
|
|
|
$DefangTag = $Self->{tags_callback}->( |
2012
|
30
|
|
|
|
|
128
|
$Self->{context}, $Self, $OpenAngle, |
2013
|
|
|
|
|
|
|
$lcTag, $IsEndTag, \%AttributeHash, |
2014
|
|
|
|
|
|
|
$CloseAngle, $HtmlR, $OutR |
2015
|
|
|
|
|
|
|
); |
2016
|
|
|
|
|
|
|
} |
2017
|
|
|
|
|
|
|
|
2018
|
1208
|
|
|
|
|
1818
|
my @OutputAttributes; |
2019
|
|
|
|
|
|
|
|
2020
|
1208
|
|
|
|
|
2058
|
foreach my $Attr (@$Attributes) { |
2021
|
|
|
|
|
|
|
|
2022
|
344
|
|
|
|
|
703
|
my $lcAttr = lc $Attr->[0]; |
2023
|
|
|
|
|
|
|
|
2024
|
|
|
|
|
|
|
# If the attribute is deleted don't output it |
2025
|
344
|
50
|
|
|
|
867
|
unless ( $AttributeHash{$lcAttr} ) { |
2026
|
0
|
0
|
|
|
|
0
|
warn "defang_attributes Marking attribute $Attr->[0] for deletion" |
2027
|
|
|
|
|
|
|
if $Debug; |
2028
|
0
|
|
|
|
|
0
|
next; |
2029
|
|
|
|
|
|
|
} |
2030
|
|
|
|
|
|
|
|
2031
|
|
|
|
|
|
|
# And we attach the defang string here, if the attribute should be defanged |
2032
|
|
|
|
|
|
|
# (attribute could be undef for buggy html, eg <ahref=blah>) |
2033
|
344
|
100
|
50
|
|
|
836
|
$Attr->[0] = $Self->{DefangString} . ( $Attr->[0] || '' ) if $Attr->[7]; |
2034
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
# Set this to undef, or this value will appear in the output |
2036
|
344
|
|
|
|
|
701
|
$Attr->[7] = undef; |
2037
|
|
|
|
|
|
|
|
2038
|
|
|
|
|
|
|
# Requote specials in attribute value |
2039
|
344
|
|
33
|
|
|
1022
|
my $QuoteRe = $QuoteRe{ $Attr->[3] } || $QuoteRe{""}; |
2040
|
344
|
50
|
|
|
|
1491
|
$Attr->[4] =~ s/$QuoteRe/'&'.$CharToEntity{$1}.';'/eg |
|
4
|
|
|
|
|
24
|
|
2041
|
|
|
|
|
|
|
if defined( $Attr->[4] ); |
2042
|
|
|
|
|
|
|
|
2043
|
|
|
|
|
|
|
# Add to attributes to output |
2044
|
344
|
|
|
|
|
669
|
push @OutputAttributes, $Attr; |
2045
|
|
|
|
|
|
|
|
2046
|
|
|
|
|
|
|
# Remove all processed attributes in the hash, so we can track ones that we added |
2047
|
344
|
|
|
|
|
817
|
delete $AttributeHash{$lcAttr}; |
2048
|
|
|
|
|
|
|
} |
2049
|
|
|
|
|
|
|
|
2050
|
|
|
|
|
|
|
# Append all remaining attribute keys (which must have been newly added attributes by |
2051
|
|
|
|
|
|
|
# the callback)and values in no particular order |
2052
|
1208
|
|
|
|
|
3435
|
while ( my ( $Key, $Value ) = each %AttributeHash ) { |
2053
|
0
|
|
|
|
|
0
|
my $Attr = [ " " . $Key, "", "=", '"', $$Value, '"', "" ]; |
2054
|
0
|
0
|
|
|
|
0
|
if ( defined $Attr->[4] ) { |
2055
|
0
|
|
|
|
|
0
|
$Attr->[4] =~ s/(['"<>&])/$CharToEntity{$1}/eg; |
|
0
|
|
|
|
|
0
|
|
2056
|
|
|
|
|
|
|
} |
2057
|
|
|
|
|
|
|
else { |
2058
|
0
|
|
|
|
|
0
|
@$Attr[ 2 .. 6 ] = (undef) x 5; |
2059
|
|
|
|
|
|
|
} |
2060
|
0
|
|
|
|
|
0
|
push @OutputAttributes, $Attr; |
2061
|
|
|
|
|
|
|
} |
2062
|
|
|
|
|
|
|
|
2063
|
|
|
|
|
|
|
# Replace attributes array with just the ones we want to output |
2064
|
1208
|
|
|
|
|
2524
|
@$Attributes = @OutputAttributes; |
2065
|
|
|
|
|
|
|
|
2066
|
|
|
|
|
|
|
# If its a known tag, we whitelist it |
2067
|
1208
|
100
|
100
|
|
|
5203
|
if ( $DefangTag == 2 && ( my $TagOps = $Tags{$lcTag} ) ) { |
2068
|
1177
|
|
|
|
|
1958
|
$DefangTag = 0; |
2069
|
|
|
|
|
|
|
} |
2070
|
|
|
|
|
|
|
|
2071
|
1208
|
50
|
66
|
|
|
5962
|
if ( $Self->{fix_mismatched_tags} |
|
|
|
33
|
|
|
|
|
2072
|
|
|
|
|
|
|
&& ( $DefangTag == 2 || $DefangTag == 0 ) ) |
2073
|
|
|
|
|
|
|
{ |
2074
|
|
|
|
|
|
|
my ( $OpenedTags, $OpenedTagsCount ) = |
2075
|
1208
|
|
|
|
|
2485
|
@$Self{qw(OpenedTags OpenedTagsCount)}; |
2076
|
|
|
|
|
|
|
|
2077
|
|
|
|
|
|
|
# Check for correctly nest closing tags |
2078
|
1208
|
100
|
66
|
|
|
3418
|
if ( $IsEndTag && $Self->{mismatched_tags_to_fix}->{$lcTag} ) { |
2079
|
148
|
|
|
|
|
317
|
my ( $Found, $ClosingTags ) = ( 0, '' ); |
2080
|
|
|
|
|
|
|
|
2081
|
|
|
|
|
|
|
# Tag not even open, just defang it |
2082
|
148
|
50
|
|
|
|
421
|
return 1 if !$OpenedTagsCount->{$lcTag}; |
2083
|
|
|
|
|
|
|
|
2084
|
|
|
|
|
|
|
# Check tag stack up to find mismatches |
2085
|
148
|
|
|
|
|
354
|
for my $PreviousOpenedTag ( reverse @$OpenedTags ) { |
2086
|
|
|
|
|
|
|
|
2087
|
148
|
50
|
33
|
|
|
767
|
if ( $PreviousOpenedTag eq $lcTag && !$ClosingTags ) { |
2088
|
|
|
|
|
|
|
|
2089
|
|
|
|
|
|
|
# Common case for correctly matched tags should pop tag |
2090
|
148
|
|
|
|
|
299
|
pop @$OpenedTags; |
2091
|
148
|
|
|
|
|
288
|
$OpenedTagsCount->{$lcTag}--; |
2092
|
148
|
|
|
|
|
264
|
$Found = 1; |
2093
|
148
|
|
|
|
|
345
|
last; |
2094
|
|
|
|
|
|
|
} |
2095
|
|
|
|
|
|
|
|
2096
|
|
|
|
|
|
|
# Check for tags that don't break out further |
2097
|
0
|
0
|
|
|
|
0
|
if ( my $NestList = $MismatchedTagNest{$PreviousOpenedTag} ) { |
2098
|
0
|
0
|
0
|
|
|
0
|
last if $NestList->{""} || $NestList->{$lcTag}; |
2099
|
|
|
|
|
|
|
} |
2100
|
|
|
|
|
|
|
|
2101
|
|
|
|
|
|
|
$ClosingTags .= |
2102
|
0
|
|
|
|
|
0
|
"<!-- close mismatched tag --></$PreviousOpenedTag>"; |
2103
|
|
|
|
|
|
|
} |
2104
|
|
|
|
|
|
|
|
2105
|
|
|
|
|
|
|
# Attach closing tags to the processed output (but call defang on them) |
2106
|
148
|
50
|
|
|
|
368
|
$$OutR .= $Self->defang($ClosingTags) if $ClosingTags; |
2107
|
|
|
|
|
|
|
|
2108
|
|
|
|
|
|
|
# If we had $ClosingTags, defanging should have popped stack correctly |
2109
|
148
|
0
|
33
|
|
|
469
|
if ( $ClosingTags |
|
|
|
33
|
|
|
|
|
2110
|
|
|
|
|
|
|
&& @$OpenedTags |
2111
|
|
|
|
|
|
|
&& $lcTag eq $OpenedTags->[ @$OpenedTags - 1 ] ) |
2112
|
|
|
|
|
|
|
{ |
2113
|
0
|
|
|
|
|
0
|
pop @$OpenedTags; |
2114
|
0
|
|
|
|
|
0
|
$OpenedTagsCount->{$lcTag}--; |
2115
|
|
|
|
|
|
|
} |
2116
|
|
|
|
|
|
|
|
2117
|
148
|
50
|
|
|
|
409
|
return 1 if !$Found; |
2118
|
|
|
|
|
|
|
} |
2119
|
|
|
|
|
|
|
|
2120
|
|
|
|
|
|
|
# Track browser implicitly opened tags |
2121
|
1208
|
100
|
100
|
|
|
4035
|
if ( !$IsEndTag && @$OpenedTags ) { |
2122
|
|
|
|
|
|
|
|
2123
|
|
|
|
|
|
|
# Are we expecting a particular tag based on last open tag? |
2124
|
164
|
100
|
|
|
|
625
|
if ( my $ImplicitTags = |
2125
|
|
|
|
|
|
|
$ImplicitOpenTags{ $OpenedTags->[ @$OpenedTags - 1 ] } ) |
2126
|
|
|
|
|
|
|
{ |
2127
|
|
|
|
|
|
|
|
2128
|
|
|
|
|
|
|
# We didn't get a tag we were expecting (eg <table><div> rather |
2129
|
|
|
|
|
|
|
# than <table><tbody><tr><td><div>), so insert opening tags recursively |
2130
|
22
|
|
|
|
|
29
|
my $lastTag = $lcTag; |
2131
|
22
|
|
33
|
|
|
92
|
while ( $ImplicitTags && !$ImplicitTags->{$lastTag} ) { |
2132
|
0
|
|
|
|
|
0
|
my $Tag = $ImplicitTags->{default}; |
2133
|
0
|
|
|
|
|
0
|
$$OutR .= |
2134
|
|
|
|
|
|
|
"<!-- $Tag implicit open due to $lastTag --><$Tag>"; |
2135
|
0
|
0
|
|
|
|
0
|
if ( $Self->{mismatched_tags_to_fix}->{$Tag} ) { |
2136
|
0
|
|
|
|
|
0
|
push @$OpenedTags, $Tag; |
2137
|
0
|
|
|
|
|
0
|
$OpenedTagsCount->{$Tag}++; |
2138
|
|
|
|
|
|
|
} |
2139
|
0
|
|
|
|
|
0
|
$ImplicitTags = $ImplicitOpenTags{$Tag}; |
2140
|
0
|
|
|
|
|
0
|
$lastTag = $Tag; |
2141
|
|
|
|
|
|
|
} |
2142
|
|
|
|
|
|
|
} |
2143
|
|
|
|
|
|
|
} |
2144
|
|
|
|
|
|
|
|
2145
|
|
|
|
|
|
|
# Track this tag that was opened |
2146
|
1208
|
100
|
66
|
|
|
3523
|
if ( !$IsEndTag && $Self->{mismatched_tags_to_fix}->{$lcTag} ) { |
2147
|
148
|
|
|
|
|
347
|
push @$OpenedTags, $lcTag; |
2148
|
148
|
|
|
|
|
378
|
$OpenedTagsCount->{$lcTag}++; |
2149
|
|
|
|
|
|
|
} |
2150
|
|
|
|
|
|
|
|
2151
|
|
|
|
|
|
|
} |
2152
|
|
|
|
|
|
|
|
2153
|
1208
|
|
|
|
|
2877
|
return $DefangTag; |
2154
|
|
|
|
|
|
|
|
2155
|
|
|
|
|
|
|
} |
2156
|
|
|
|
|
|
|
|
2157
|
|
|
|
|
|
|
=item I<cleanup_attribute($AttributeString)> |
2158
|
|
|
|
|
|
|
|
2159
|
|
|
|
|
|
|
Helper function to cleanup attributes |
2160
|
|
|
|
|
|
|
|
2161
|
|
|
|
|
|
|
=over 4 |
2162
|
|
|
|
|
|
|
|
2163
|
|
|
|
|
|
|
=item B<Method parameters> |
2164
|
|
|
|
|
|
|
|
2165
|
|
|
|
|
|
|
=over 4 |
2166
|
|
|
|
|
|
|
|
2167
|
|
|
|
|
|
|
=item I<$AttributeString> |
2168
|
|
|
|
|
|
|
|
2169
|
|
|
|
|
|
|
The value of the attribute. |
2170
|
|
|
|
|
|
|
|
2171
|
|
|
|
|
|
|
=back |
2172
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
=back |
2174
|
|
|
|
|
|
|
|
2175
|
|
|
|
|
|
|
=back |
2176
|
|
|
|
|
|
|
|
2177
|
|
|
|
|
|
|
=back |
2178
|
|
|
|
|
|
|
|
2179
|
|
|
|
|
|
|
=cut |
2180
|
|
|
|
|
|
|
|
2181
|
|
|
|
|
|
|
sub cleanup_attribute { |
2182
|
344
|
|
|
344
|
1
|
786
|
my ( $Self, $Attr, $AttrKey, $AttrVal ) = @_; |
2183
|
|
|
|
|
|
|
|
2184
|
344
|
50
|
|
|
|
840
|
return ( undef, '' ) unless defined($AttrVal); |
2185
|
|
|
|
|
|
|
|
2186
|
|
|
|
|
|
|
# Create a "stripped" attribute value which removes all embedded whitespace and control characters |
2187
|
|
|
|
|
|
|
|
2188
|
|
|
|
|
|
|
# Substitute character entities with actual characters |
2189
|
|
|
|
|
|
|
# (avoid invalid chars + surrogate pairs) |
2190
|
344
|
|
|
|
|
1397
|
$AttrVal =~ |
2191
|
11
|
100
|
66
|
|
|
183
|
s/(?:&#x|\\[xu]|%)(0?[\da-f]{1,6});?/defined($1) && hex($1) < 1_114_111 && hex($1) != 65535 && !(hex($1) > 55295 && hex($1) < 57344) ? chr(hex($1)) : ""/egi; |
2192
|
344
|
|
|
|
|
704
|
$AttrVal =~ |
2193
|
0
|
0
|
0
|
|
|
0
|
s/(?:&#)([\d]{1,7});?/defined($1) && $1 < 1_114_111 && $1 != 65535 && !($1 > 55295 && $1 < 57344)? chr($1) : ""/egi; |
2194
|
344
|
|
|
|
|
592
|
$AttrVal =~ |
2195
|
3
|
50
|
|
|
|
20
|
s/(?:&)(quot|apos|amp|lt|gt);?/$EntityToChar{lc($1)} || warn "no entity for: $1"/egi; |
2196
|
|
|
|
|
|
|
|
2197
|
344
|
|
|
|
|
588
|
my $AttrValStripped = $AttrVal; |
2198
|
344
|
|
|
|
|
3205
|
$AttrValStripped =~ s/[\x00-\x19]*//g; |
2199
|
344
|
|
|
|
|
1150
|
$AttrValStripped =~ |
2200
|
|
|
|
|
|
|
s/^\x20*//g; # http://ha.ckers.org/xss.html#XSS_Spaces_meta_chars |
2201
|
|
|
|
|
|
|
|
2202
|
344
|
50
|
|
|
|
957
|
warn "cleanup_attribute AttrValStripped=$AttrVal" if $Self->{Debug}; |
2203
|
344
|
|
|
|
|
957
|
return ( $AttrVal, $AttrValStripped ); |
2204
|
|
|
|
|
|
|
} |
2205
|
|
|
|
|
|
|
|
2206
|
|
|
|
|
|
|
=head2 get_applicable_charset |
2207
|
|
|
|
|
|
|
|
2208
|
|
|
|
|
|
|
Get the charset from the content meta attribute? |
2209
|
|
|
|
|
|
|
|
2210
|
|
|
|
|
|
|
=cut |
2211
|
|
|
|
|
|
|
|
2212
|
|
|
|
|
|
|
sub get_applicable_charset { |
2213
|
|
|
|
|
|
|
|
2214
|
0
|
|
|
0
|
1
|
|
my $Self = shift; |
2215
|
0
|
|
|
|
|
|
local $_ = shift; |
2216
|
0
|
|
|
|
|
|
my $Charset = shift; |
2217
|
|
|
|
|
|
|
|
2218
|
0
|
0
|
|
|
|
|
if ( !$Charset ) { |
2219
|
|
|
|
|
|
|
|
2220
|
|
|
|
|
|
|
# Look for <meta> tags |
2221
|
0
|
|
|
|
|
|
my @MetaAttrs = /<meta[\s\/]+(${AttributesRE})/gi; |
2222
|
|
|
|
|
|
|
|
2223
|
0
|
|
|
|
|
|
for (@MetaAttrs) { |
2224
|
0
|
|
|
|
|
|
my %Attrs; |
2225
|
|
|
|
|
|
|
|
2226
|
|
|
|
|
|
|
# Get attributes and their values |
2227
|
0
|
|
|
|
|
|
while ( |
2228
|
|
|
|
|
|
|
s/(?:($AttrKeyStartLineRE)(\s*))?(?:(=\s*)($AttrValRE)(\s*))?//so |
2229
|
|
|
|
|
|
|
) |
2230
|
|
|
|
|
|
|
{ |
2231
|
0
|
0
|
0
|
|
|
|
last if !defined($1) && !defined($4); |
2232
|
0
|
|
|
|
|
|
$Attrs{ lc $1 } = $4; |
2233
|
|
|
|
|
|
|
} |
2234
|
|
|
|
|
|
|
|
2235
|
|
|
|
|
|
|
# Look for charset information |
2236
|
0
|
0
|
|
|
|
|
if ( $Attrs{"content"} ) { |
2237
|
|
|
|
|
|
|
$Charset = |
2238
|
0
|
0
|
|
|
|
|
$Attrs{"content"} =~ m/charset\s*=\s*([^\s;'"`]+)[\s;'"`]*/i |
2239
|
|
|
|
|
|
|
? $1 |
2240
|
|
|
|
|
|
|
: $Charset; |
2241
|
|
|
|
|
|
|
} |
2242
|
|
|
|
|
|
|
} |
2243
|
|
|
|
|
|
|
} |
2244
|
|
|
|
|
|
|
|
2245
|
|
|
|
|
|
|
# Return fallback charset if no header or meta charset found |
2246
|
0
|
0
|
|
|
|
|
return $Charset ? $Charset : shift; |
2247
|
|
|
|
|
|
|
|
2248
|
|
|
|
|
|
|
} |
2249
|
|
|
|
|
|
|
|
2250
|
|
|
|
|
|
|
=head1 SEE ALSO |
2251
|
|
|
|
|
|
|
|
2252
|
|
|
|
|
|
|
I<HTML::Defang>, L<http://mailtools.anomy.net/>, L<http://htmlcleaner.sourceforge.net/>, I<HTML::StripScripts>, I<HTML::Detoxifier>, I<HTML::Sanitizer>, I<HTML::Scrubber> |
2253
|
|
|
|
|
|
|
|
2254
|
|
|
|
|
|
|
=cut |
2255
|
|
|
|
|
|
|
|
2256
|
|
|
|
|
|
|
=head1 AUTHOR |
2257
|
|
|
|
|
|
|
|
2258
|
|
|
|
|
|
|
Kurian Jose Aerthail E<lt>cpan@kurianja.fastmail.fmE<gt>. Thanks to Rob Mueller E<lt>cpan@robm.fastmail.fmE<gt> for initial code, guidance and support and bug fixes. |
2259
|
|
|
|
|
|
|
|
2260
|
|
|
|
|
|
|
=cut |
2261
|
|
|
|
|
|
|
|
2262
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
2263
|
|
|
|
|
|
|
|
2264
|
|
|
|
|
|
|
HTML::Declaw is a modifed version of HTML::Defang which has the following license: |
2265
|
|
|
|
|
|
|
|
2266
|
|
|
|
|
|
|
Copyright (C) 2003-2009 by The FastMail Partnership |
2267
|
|
|
|
|
|
|
|
2268
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
2269
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
2270
|
|
|
|
|
|
|
|
2271
|
|
|
|
|
|
|
=cut |
2272
|
|
|
|
|
|
|
|
2273
|
|
|
|
|
|
|
1; |