line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::Defang; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
HTML::Defang - Cleans HTML as well as CSS of scripting and other executable contents, and neutralises XSS attacks. |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $InputHtml = ""; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
my $Defang = HTML::Defang->new( |
12
|
|
|
|
|
|
|
context => $Self, |
13
|
|
|
|
|
|
|
fix_mismatched_tags => 1, |
14
|
|
|
|
|
|
|
tags_to_callback => [ br embed img ], |
15
|
|
|
|
|
|
|
tags_callback => \&DefangTagsCallback, |
16
|
|
|
|
|
|
|
url_callback => \&DefangUrlCallback, |
17
|
|
|
|
|
|
|
css_callback => \&DefangCssCallback, |
18
|
|
|
|
|
|
|
attribs_to_callback => [ qw(border src) ], |
19
|
|
|
|
|
|
|
attribs_callback => \&DefangAttribsCallback, |
20
|
|
|
|
|
|
|
content_callback => \&ContentCallback, |
21
|
|
|
|
|
|
|
); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
my $SanitizedHtml = $Defang->defang($InputHtml); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# Callback for custom handling specific HTML tags |
26
|
|
|
|
|
|
|
sub DefangTagsCallback { |
27
|
|
|
|
|
|
|
my ($Self, $Defang, $OpenAngle, $lcTag, $IsEndTag, $AttributeHash, $CloseAngle, $HtmlR, $OutR) = @_; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# Explicitly defang this tag, eventhough safe |
30
|
|
|
|
|
|
|
return DEFANG_ALWAYS if $lcTag eq 'br'; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# Explicitly whitelist this tag, eventhough unsafe |
33
|
|
|
|
|
|
|
return DEFANG_NONE if $lcTag eq 'embed'; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# I am not sure what to do with this tag, so process as HTML::Defang normally would |
36
|
|
|
|
|
|
|
return DEFANG_DEFAULT if $lcTag eq 'img'; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# Callback for custom handling URLs in HTML attributes as well as style tag/attribute declarations |
40
|
|
|
|
|
|
|
sub DefangUrlCallback { |
41
|
|
|
|
|
|
|
my ($Self, $Defang, $lcTag, $lcAttrKey, $AttrValR, $AttributeHash, $HtmlR) = @_; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# Explicitly allow this URL in tag attributes or stylesheets |
44
|
|
|
|
|
|
|
return DEFANG_NONE if $$AttrValR =~ /safesite.com/i; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# Explicitly defang this URL in tag attributes or stylesheets |
47
|
|
|
|
|
|
|
return DEFANG_ALWAYS if $$AttrValR =~ /evilsite.com/i; |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# Callback for custom handling style tags/attributes |
51
|
|
|
|
|
|
|
sub DefangCssCallback { |
52
|
|
|
|
|
|
|
my ($Self, $Defang, $Selectors, $SelectorRules, $Tag, $IsAttr) = @_; |
53
|
|
|
|
|
|
|
my $i = 0; |
54
|
|
|
|
|
|
|
foreach (@$Selectors) { |
55
|
|
|
|
|
|
|
my $SelectorRule = $$SelectorRules[$i]; |
56
|
|
|
|
|
|
|
foreach my $KeyValueRules (@$SelectorRule) { |
57
|
|
|
|
|
|
|
foreach my $KeyValueRule (@$KeyValueRules) { |
58
|
|
|
|
|
|
|
my ($Key, $Value) = @$KeyValueRule; |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# Comment out any '!important' directive |
61
|
|
|
|
|
|
|
$$KeyValueRule[2] = DEFANG_ALWAYS if $Value =~ '!important'; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# Comment out any 'position=fixed;' declaration |
64
|
|
|
|
|
|
|
$$KeyValueRule[2] = DEFANG_ALWAYS if $Key =~ 'position' && $Value =~ 'fixed'; |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
$i++; |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# Callback for custom handling HTML tag attributes |
72
|
|
|
|
|
|
|
sub DefangAttribsCallback { |
73
|
|
|
|
|
|
|
my ($Self, $Defang, $lcTag, $lcAttrKey, $AttrValR, $HtmlR) = @_; |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# Change all 'border' attribute values to zero. |
76
|
|
|
|
|
|
|
$$AttrValR = '0' if $lcAttrKey eq 'border'; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# Defang all 'src' attributes |
79
|
|
|
|
|
|
|
return DEFANG_ALWAYS if $lcAttrKey eq 'src'; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
return DEFANG_NONE; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# Callback for all content between tags (except
|
|
|
50
|
|
|
|
|
|
1586
|
110
|
|
|
|
|
413
|
$Content = $1; |
1587
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
# No ending style tag |
1589
|
|
|
|
|
|
|
} elsif (m{\G([^<]*)}gcis) { |
1590
|
2
|
|
|
|
|
9
|
$Content = $1; |
1591
|
2
|
|
|
|
|
7
|
$ClosingStyleTagPresent = 0; |
1592
|
|
|
|
|
|
|
} |
1593
|
|
|
|
|
|
|
} |
1594
|
|
|
|
|
|
|
|
1595
|
|
|
|
|
|
|
# Handle any wrapping HTML comments. If no comments, we add |
1596
|
112
|
|
|
|
|
262
|
my ($OpeningHtmlComment, $ClosingHtmlComment) = ('', ''); |
1597
|
112
|
100
|
|
|
|
339
|
$OpeningHtmlComment = $Content =~ s{^(\s*\s*)$}{} ? " " . $1 : " -->"; |
1599
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
# Check for large bogus style data with mostly HTML tags and blat it |
1601
|
112
|
50
|
|
|
|
259
|
if (length $Content > 16384) { |
1602
|
0
|
|
|
|
|
0
|
my $TagCount = 0; |
1603
|
0
|
|
|
|
|
0
|
$TagCount++ while $Content =~ m{?\w+\b[^>]*>}g; |
1604
|
0
|
0
|
|
|
|
0
|
if ($TagCount > length($Content)/256) { |
1605
|
0
|
|
|
|
|
0
|
$Content = ''; |
1606
|
|
|
|
|
|
|
} |
1607
|
|
|
|
|
|
|
} |
1608
|
|
|
|
|
|
|
|
1609
|
112
|
|
|
|
|
312
|
my $StyleOut = $Self->defang_style_text($Content, $lcTag, 0, undef, $HtmlR, $OutR); |
1610
|
|
|
|
|
|
|
|
1611
|
112
|
|
|
|
|
471
|
$Self->add_to_output($OpeningHtmlComment . $StyleOut . $ClosingHtmlComment); |
1612
|
112
|
100
|
|
|
|
283
|
$Self->add_to_output("") if !$ClosingStyleTagPresent; |
1613
|
|
|
|
|
|
|
|
1614
|
112
|
|
|
|
|
297
|
return $Defang; |
1615
|
|
|
|
|
|
|
} |
1616
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
=item I |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
Defang some raw css data and return the defanged content |
1620
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
=over 4 |
1622
|
|
|
|
|
|
|
|
1623
|
|
|
|
|
|
|
=item B |
1624
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
=over 4 |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
=item I<$Content> |
1628
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
The input style string that is defanged. |
1630
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
=item I<$IsAttr> |
1632
|
|
|
|
|
|
|
|
1633
|
|
|
|
|
|
|
True if $Content is from an attribute, otherwise from a |