line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package CSS::Inliner::TreeBuilder; |
2
|
26
|
|
|
26
|
|
230
|
use strict; |
|
26
|
|
|
|
|
60
|
|
|
26
|
|
|
|
|
783
|
|
3
|
26
|
|
|
26
|
|
155
|
use warnings; |
|
26
|
|
|
|
|
81
|
|
|
26
|
|
|
|
|
907
|
|
4
|
|
|
|
|
|
|
|
5
|
26
|
|
|
26
|
|
158
|
use Storable qw(dclone); |
|
26
|
|
|
|
|
72
|
|
|
26
|
|
|
|
|
1105
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
26
|
|
|
BEGIN { |
8
|
|
|
|
|
|
|
# $HTML::TreeBuilder::DEBUG = 1; |
9
|
|
|
|
|
|
|
} |
10
|
|
|
|
|
|
|
|
11
|
26
|
|
|
26
|
|
154
|
use base qw(HTML::TreeBuilder); |
|
26
|
|
|
|
|
49
|
|
|
26
|
|
|
|
|
20541
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=pod |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
CSS::Inliner::TreeBuilder - Parser that builds a HTML syntax tree |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use CSS::Inliner::TreeBuilder; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
foreach my $file_name (@ARGV) { |
24
|
|
|
|
|
|
|
my $tree = CSS::Inliner::TreeBuilder->new(); |
25
|
|
|
|
|
|
|
$tree->parse_file($file_name); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
print "Hey, here's a dump of the parse tree of $file_name:\n"; |
28
|
|
|
|
|
|
|
$tree->dump(); # a method we inherit from HTML::Element |
29
|
|
|
|
|
|
|
print "And here it is, bizarrely rerendered as HTML:\n", $tree->as_HTML, "\n"; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
$tree = $tree->delete(); |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 DESCRIPTION |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Class to handling parsing of generic HTML |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
This sub-module is derived from HTML::TreeBuilder. The aforementioned module is almost completely incapable |
39
|
|
|
|
|
|
|
of handling non-standard HTML4 documents commonly seen in the wild, let alone HTML5 documents. This module |
40
|
|
|
|
|
|
|
basically performs some minor adjustments to the way parsing and printing occur such that an acceptable result |
41
|
|
|
|
|
|
|
can be reached when handling real world documents. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=cut |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub as_HTML { |
46
|
25
|
|
|
25
|
1
|
77
|
my $self = shift; |
47
|
|
|
|
|
|
|
|
48
|
25
|
|
|
|
|
47
|
my $html; |
49
|
25
|
100
|
|
|
|
104
|
if ($self->implicit_tags() == 0) { |
50
|
1
|
|
|
|
|
29
|
$html = $self->SUPER::as_HTML(@_); |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
#strip trailing and leading whitespace which our relaxed mode may have |
53
|
|
|
|
|
|
|
#inadvertently adds |
54
|
1
|
|
|
|
|
2853
|
$html =~ s/^\s+|\s+$//g; |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# our indentation is messed up by 1 space, try to clean it up |
57
|
1
|
|
|
|
|
40
|
my @lines = split /\n/, $html; |
58
|
1
|
|
|
|
|
8
|
for (my $count = 0; $count < scalar @lines; $count++) { |
59
|
9
|
|
|
|
|
31
|
$lines[$count] =~ s/^ //; |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# put html back together after whitespace processing, probably still indentation |
63
|
|
|
|
|
|
|
# problems, but this is the best we can do without some sort of indentation library |
64
|
1
|
|
|
|
|
5
|
$html = join("\n",@lines); |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
else { |
67
|
24
|
|
|
|
|
518
|
$html = $self->SUPER::as_HTML(@_); |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
|
70
|
25
|
|
|
|
|
42474
|
return $html; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub parse_content { |
74
|
52
|
|
|
52
|
1
|
129
|
my $self = shift; |
75
|
|
|
|
|
|
|
|
76
|
52
|
100
|
|
|
|
205
|
if ($self->implicit_tags() == 0) { |
77
|
|
|
|
|
|
|
# protect doctype declarations... parser is too strict here |
78
|
28
|
|
|
|
|
527
|
$_[0] =~ s/\]+)\>/\$1 $2<\/decl\>/gi; |
79
|
|
|
|
|
|
|
|
80
|
28
|
|
|
|
|
180
|
$self->SUPER::parse_content(@_); |
81
|
|
|
|
|
|
|
|
82
|
28
|
|
|
|
|
139384
|
$self->{_tag} = '~literal'; |
83
|
28
|
|
|
|
|
89
|
$self->{text} = ''; |
84
|
|
|
|
|
|
|
|
85
|
28
|
|
|
|
|
146
|
my @decls = $self->look_down('_tag','decl','~pi','1'); |
86
|
28
|
|
|
|
|
5299
|
foreach my $decl (@decls) { |
87
|
6
|
|
|
|
|
38
|
my $text = 'as_text() . '>'; |
88
|
6
|
|
|
|
|
206
|
my $literal = HTML::Element->new('~literal', 'text' => $text ); |
89
|
|
|
|
|
|
|
|
90
|
6
|
|
|
|
|
211
|
$decl->replace_with($literal); |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
else { |
94
|
24
|
|
|
|
|
1042
|
$self->SUPER::parse_content(@_); |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
52
|
|
|
|
|
129793
|
return(); |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
1; |