File Coverage

lib/CSS/Inliner/TreeBuilder.pm
Criterion Covered Total %
statement 36 36 100.0
branch 4 4 100.0
condition n/a
subroutine 7 7 100.0
pod 2 2 100.0
total 49 49 100.0


line stmt bran cond sub pod time code
1             package CSS::Inliner::TreeBuilder;
2 26     26   162 use strict;
  26         54  
  26         687  
3 26     26   129 use warnings;
  26         50  
  26         683  
4              
5 26     26   413 use Storable qw(dclone);
  26         48  
  26         987  
6              
7       26     BEGIN {
8             # $HTML::TreeBuilder::DEBUG = 1;
9             }
10              
11 26     26   135 use base qw(HTML::TreeBuilder);
  26         45  
  26         17020  
12              
13             =pod
14              
15             =head1 NAME
16              
17             CSS::Inliner::TreeBuilder - Parser that builds a HTML syntax tree
18              
19             =head1 SYNOPSIS
20              
21             use CSS::Inliner::TreeBuilder;
22              
23             foreach my $file_name (@ARGV) {
24             my $tree = CSS::Inliner::TreeBuilder->new();
25             $tree->parse_file($file_name);
26              
27             print "Hey, here's a dump of the parse tree of $file_name:\n";
28             $tree->dump(); # a method we inherit from HTML::Element
29             print "And here it is, bizarrely rerendered as HTML:\n", $tree->as_HTML, "\n";
30              
31             $tree = $tree->delete();
32             }
33              
34             =head1 DESCRIPTION
35              
36             Class to handling parsing of generic HTML
37              
38             This sub-module is derived from HTML::TreeBuilder. The aforementioned module is almost completely incapable
39             of handling non-standard HTML4 documents commonly seen in the wild, let alone HTML5 documents. This module
40             basically performs some minor adjustments to the way parsing and printing occur such that an acceptable result
41             can be reached when handling real world documents.
42              
43             =cut
44              
45             sub as_HTML {
46 27     27 1 69 my $self = shift;
47              
48 27         59 my $html;
49 27 100       133 if ($self->implicit_tags() == 0) {
50 1         30 $html = $self->SUPER::as_HTML(@_);
51              
52             #strip trailing and leading whitespace which our relaxed mode may have
53             #inadvertently adds
54 1         3324 $html =~ s/^\s+|\s+$//g;
55              
56             # our indentation is messed up by 1 space, try to clean it up
57 1         10 my @lines = split /\n/, $html;
58 1         5 for (my $count = 0; $count < scalar @lines; $count++) {
59 9         30 $lines[$count] =~ s/^ //;
60             }
61              
62             # put html back together after whitespace processing, probably still indentation
63             # problems, but this is the best we can do without some sort of indentation library
64 1         5 $html = join("\n",@lines);
65             }
66             else {
67 26         560 $html = $self->SUPER::as_HTML(@_);
68             }
69              
70 27         44589 return $html;
71             }
72              
73             sub parse_content {
74 55     55 1 115 my $self = shift;
75              
76 55 100       229 if ($self->implicit_tags() == 0) {
77             # protect doctype declarations... parser is too strict here
78 29         466 $_[0] =~ s/\]+)\>/\$1 $2<\/decl\>/gi;
79              
80 29         160 $self->SUPER::parse_content(@_);
81              
82 29         146759 $self->{_tag} = '~literal';
83 29         106 $self->{text} = '';
84              
85 29         201 my @decls = $self->look_down('_tag','decl','~pi','1');
86 29         5712 foreach my $decl (@decls) {
87 6         37 my $text = 'as_text() . '>';
88 6         245 my $literal = HTML::Element->new('~literal', 'text' => $text );
89              
90 6         231 $decl->replace_with($literal);
91             }
92             }
93             else {
94 26         400 $self->SUPER::parse_content(@_);
95             }
96              
97 55         140436 return();
98             }
99              
100             1;