line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package HTML::Defang; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
HTML::Defang - Cleans HTML as well as CSS of scripting and other executable contents, and neutralises XSS attacks. |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
my $InputHtml = ""; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
my $Defang = HTML::Defang->new( |
14
|
|
|
|
|
|
|
context => $Self, |
15
|
|
|
|
|
|
|
fix_mismatched_tags => 1, |
16
|
|
|
|
|
|
|
tags_to_callback => [ br embed img ], |
17
|
|
|
|
|
|
|
tags_callback => \&DefangTagsCallback, |
18
|
|
|
|
|
|
|
url_callback => \&DefangUrlCallback, |
19
|
|
|
|
|
|
|
css_callback => \&DefangCssCallback, |
20
|
|
|
|
|
|
|
attribs_to_callback => [ qw(border src) ], |
21
|
|
|
|
|
|
|
attribs_callback => \&DefangAttribsCallback |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
my $SanitizedHtml = $Defang->defang($InputHtml); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# Callback for custom handling specific HTML tags |
27
|
|
|
|
|
|
|
sub DefangTagsCallback { |
28
|
|
|
|
|
|
|
my ($Self, $Defang, $OpenAngle, $lcTag, $IsEndTag, $AttributeHash, $CloseAngle, $HtmlR, $OutR) = @_; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# Explicitly defang this tag, eventhough safe |
31
|
|
|
|
|
|
|
return DEFANG_ALWAYS if $lcTag eq 'br'; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Explicitly whitelist this tag, eventhough unsafe |
34
|
|
|
|
|
|
|
return DEFANG_NONE if $lcTag eq 'embed'; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# I am not sure what to do with this tag, so process as HTML::Defang normally would |
37
|
|
|
|
|
|
|
return DEFANG_DEFAULT if $lcTag eq 'img'; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# Callback for custom handling URLs in HTML attributes as well as style tag/attribute declarations |
41
|
|
|
|
|
|
|
sub DefangUrlCallback { |
42
|
|
|
|
|
|
|
my ($Self, $Defang, $lcTag, $lcAttrKey, $AttrValR, $AttributeHash, $HtmlR) = @_; |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# Explicitly allow this URL in tag attributes or stylesheets |
45
|
|
|
|
|
|
|
return DEFANG_NONE if $$AttrValR =~ /safesite.com/i; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# Explicitly defang this URL in tag attributes or stylesheets |
48
|
|
|
|
|
|
|
return DEFANG_ALWAYS if $$AttrValR =~ /evilsite.com/i; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Callback for custom handling style tags/attributes |
52
|
|
|
|
|
|
|
sub DefangCssCallback { |
53
|
|
|
|
|
|
|
my ($Self, $Defang, $Selectors, $SelectorRules, $Tag, $IsAttr) = @_; |
54
|
|
|
|
|
|
|
my $i = 0; |
55
|
|
|
|
|
|
|
foreach (@$Selectors) { |
56
|
|
|
|
|
|
|
my $SelectorRule = $$SelectorRules[$i]; |
57
|
|
|
|
|
|
|
foreach my $KeyValueRules (@$SelectorRule) { |
58
|
|
|
|
|
|
|
foreach my $KeyValueRule (@$KeyValueRules) { |
59
|
|
|
|
|
|
|
my ($Key, $Value) = @$KeyValueRule; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# Comment out any '!important' directive |
62
|
|
|
|
|
|
|
$$KeyValueRule[2] = DEFANG_ALWAYS if $Value =~ '!important'; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# Comment out any 'position=fixed;' declaration |
65
|
|
|
|
|
|
|
$$KeyValueRule[2] = DEFANG_ALWAYS if $Key =~ 'position' && $Value =~ 'fixed'; |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
$i++; |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# Callback for custom handling HTML tag attributes |
73
|
|
|
|
|
|
|
sub DefangAttribsCallback { |
74
|
|
|
|
|
|
|
my ($Self, $Defang, $lcTag, $lcAttrKey, $AttrValR, $HtmlR) = @_; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# Change all 'border' attribute values to zero. |
77
|
|
|
|
|
|
|
$$AttrValR = '0' if $lcAttrKey eq 'border'; |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# Defang all 'src' attributes |
80
|
|
|
|
|
|
|
return DEFANG_ALWAYS if $lcAttrKey eq 'src'; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
return DEFANG_NONE; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head1 DESCRIPTION |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
This module accepts an input HTML and/or CSS string and removes any executable code including scripting, embedded objects, applets, etc., and neutralises any XSS attacks. A whitelist based approach is used which means only HTML known to be safe is allowed through. |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
HTML::Defang uses a custom html tag parser. The parser has been designed and tested to work with nasty real world html and to try and emulate as close as possible what browsers actually do with strange looking constructs. The test suite has been built based on examples from a range of sources such as http://ha.ckers.org/xss.html and http://imfo.ru/csstest/css_hacks/import.php to ensure that as many as possible XSS attack scenarios have been dealt with. |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
HTML::Defang can make callbacks to client code when it encounters the following: |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=over 4 |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=item * |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
When a specified tag is parsed |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=item * |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
When a specified attribute is parsed |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=item * |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
When a URL is parsed as part of an HTML attribute, or CSS property value. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=item * |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
When style data is parsed, as part of an HTML style attribute, or as part of an HTML
|
|
|
50
|
|
|
|
|
|
1506
|
107
|
|
|
|
|
335
|
$Content = $1; |
1507
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
# No ending style tag |
1509
|
|
|
|
|
|
|
} elsif (m{\G([^<]*)}gcis) { |
1510
|
1
|
|
|
|
|
2
|
$Content = $1; |
1511
|
1
|
|
|
|
|
3
|
$ClosingStyleTagPresent = 0; |
1512
|
|
|
|
|
|
|
} |
1513
|
|
|
|
|
|
|
# Its a style attribute |
1514
|
|
|
|
|
|
|
} else { |
1515
|
|
|
|
|
|
|
# Avoid undef warning for style attr with no value. eg |
1516
|
202
|
50
|
|
|
|
516
|
$Content = defined($_) ? $_ : ''; |
1517
|
|
|
|
|
|
|
} |
1518
|
|
|
|
|
|
|
} |
1519
|
|
|
|
|
|
|
|
1520
|
|
|
|
|
|
|
# Handle any wrapping HTML comments. If no comments, we add |
1521
|
310
|
|
|
|
|
470
|
my ($OpeningHtmlComment, $ClosingHtmlComment) = ('', ''); |
1522
|
310
|
100
|
|
|
|
550
|
if (!$IsAttr) { |
1523
|
108
|
100
|
|
|
|
261
|
$OpeningHtmlComment = $Content =~ s{^(\s*\s*)$}{} ? $1 : "-->"; |
1525
|
|
|
|
|
|
|
} |
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
# Clean up all comments, expand character escapes and such |
1528
|
310
|
|
|
|
|
791
|
$Self->cleanup_style($Content, $IsAttr); |
1529
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
# Style attributes can optionally have selector type elements, so we check whether we |
1531
|
|
|
|
|
|
|
# have a '{' in $Content: if yes, its style data with selector type elements |
1532
|
310
|
|
|
|
|
794
|
my $Naked = $Content !~ m/\{/; |
1533
|
310
|
50
|
|
|
|
591
|
warn "defang_style Naked=$Naked" if $Self->{Debug}; |
1534
|
|
|
|
|
|
|
|
1535
|
|
|
|
|
|
|
# And suitably change the regex to match the data |
1536
|
310
|
100
|
|
|
|
2197
|
my $SelectorRuleRE = $Naked ? qr/(\s*)()()()($StyleRules)()(\s*)/o : |
1537
|
|
|
|
|
|
|
qr/(\s*)((?:$StyleSelectors)?)(\s*)(\{)($StyleRules)(\})(\s*)/o; |
1538
|
|
|
|
|
|
|
|
1539
|
310
|
|
|
|
|
390
|
my (@Selectors, @SelectorRules, %ExtraData ); |
1540
|
|
|
|
|
|
|
|
1541
|
|
|
|
|
|
|
# Now we parse the selectors and declarations |
1542
|
310
|
|
|
|
|
7872
|
while ($Content =~ m{\G.*?$SelectorRuleRE}sgc) { |
1543
|
513
|
|
|
|
|
1383
|
my ($Selector, $SelectorRule) = ($2, $5); |
1544
|
513
|
100
|
100
|
|
|
1995
|
last if $Selector eq '' && $SelectorRule eq ''; |
1545
|
379
|
|
|
|
|
548
|
push @Selectors, $Selector; |
1546
|
379
|
|
|
|
|
510
|
push @SelectorRules, $SelectorRule; |
1547
|
379
|
50
|
|
|
|
750
|
warn "defang_style Selector=$Selector" if $Self->{Debug}; |
1548
|
379
|
50
|
|
|
|
635
|
warn "defang_style SelectorRule=$SelectorRule" if $Self->{Debug}; |
1549
|
379
|
|
|
|
|
3976
|
$ExtraData{$Selector} = [ $1, $3, $4, $6, $7]; |
1550
|
|
|
|
|
|
|
} |
1551
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
# Check declaration elements for defanging |
1553
|
310
|
|
|
|
|
829
|
$Self->defang_stylerule(\@Selectors, \@SelectorRules, $lcTag, $IsAttr, $HtmlR, $OutR); |
1554
|
|
|
|
|
|
|
|
1555
|
310
|
|
|
|
|
505
|
my $StyleOut = ""; |
1556
|
|
|
|
|
|
|
|
1557
|
|
|
|
|
|
|
# Re-create the style data |
1558
|
310
|
|
|
|
|
416
|
foreach my $Selector (@Selectors) { |
1559
|
|
|
|
|
|
|
|
1560
|
379
|
|
|
|
|
498
|
my $SelectorRule = shift @SelectorRules; |
1561
|
379
|
|
|
|
|
538
|
my $Spaces = $ExtraData{$Selector}; |
1562
|
379
|
50
|
|
|
|
909
|
my ($BeforeSelector, $AfterSelector, $OpenBrace, $CloseBrace, $AfterRule) = @$Spaces if $Spaces; |
1563
|
379
|
50
|
|
|
|
683
|
($BeforeSelector, $AfterSelector, $AfterRule) = ("", " ", "\n") unless $ExtraData{$Selector}; |
1564
|
379
|
0
|
33
|
|
|
615
|
($OpenBrace, $CloseBrace) = ("{", "}") if !$Spaces && !$IsAttr; |
1565
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
# Put back the rule together |
1567
|
379
|
50
|
|
|
|
597
|
if (defined($Selector)) { |
1568
|
379
|
50
|
|
|
|
641
|
$StyleOut .= $BeforeSelector if defined($BeforeSelector); |
1569
|
379
|
|
|
|
|
428
|
$StyleOut .= $Selector; |
1570
|
379
|
50
|
|
|
|
606
|
$StyleOut .= $AfterSelector if defined($AfterSelector); |
1571
|
379
|
50
|
|
|
|
594
|
$StyleOut .= $OpenBrace if defined($OpenBrace); |
1572
|
379
|
100
|
|
|
|
674
|
$StyleOut .= $SelectorRule if defined($SelectorRule); |
1573
|
379
|
50
|
|
|
|
583
|
$StyleOut .= $CloseBrace if defined($CloseBrace); |
1574
|
379
|
50
|
|
|
|
1007
|
$StyleOut .= $AfterRule if defined($AfterRule); |
1575
|
|
|
|
|
|
|
} |
1576
|
|
|
|
|
|
|
|
1577
|
|
|
|
|
|
|
} |
1578
|
|
|
|
|
|
|
|
1579
|
310
|
50
|
|
|
|
641
|
warn "defang_style StyleOut=$StyleOut" if $Self->{Debug}; |
1580
|
|
|
|
|
|
|
|
1581
|
310
|
100
|
|
|
|
469
|
if ($IsAttr) { |
1582
|
202
|
|
|
|
|
300
|
$$HtmlR = $StyleOut; |
1583
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
} else { |
1585
|
108
|
|
|
|
|
364
|
$Self->add_to_output($OpeningHtmlComment . $StyleOut . $ClosingHtmlComment); |
1586
|
108
|
100
|
|
|
|
221
|
$Self->add_to_output("") if !$ClosingStyleTagPresent; |
1587
|
|
|
|
|
|
|
} |
1588
|
|
|
|
|
|
|
|
1589
|
|
|
|
|
|
|
# We don't want |