| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Test::HTML::Content; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | require 5.005_62; | 
| 4 | 18 |  |  | 18 |  | 218545 | use strict; | 
|  | 18 |  |  |  |  | 59 |  | 
|  | 18 |  |  |  |  | 481 |  | 
| 5 | 18 |  |  | 18 |  | 79 | use File::Spec; | 
|  | 18 |  |  |  |  | 29 |  | 
|  | 18 |  |  |  |  | 430 |  | 
| 6 | 18 |  |  | 18 |  | 94 | use Carp qw(carp croak); | 
|  | 18 |  |  |  |  | 31 |  | 
|  | 18 |  |  |  |  | 927 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 18 |  |  | 18 |  | 7537 | use HTML::TokeParser; | 
|  | 18 |  |  |  |  | 146088 |  | 
|  | 18 |  |  |  |  | 737 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | # we want to stay compatible to 5.5 and use warnings if | 
| 11 |  |  |  |  |  |  | # we can | 
| 12 | 18 |  |  | 18 |  | 86 | eval 'use warnings' if $] >= 5.006; | 
|  | 18 |  |  |  |  | 32 |  | 
|  | 18 |  |  |  |  | 262 |  | 
| 13 | 18 |  |  | 18 |  | 109 | use Test::Builder; | 
|  | 18 |  |  |  |  | 35 |  | 
|  | 18 |  |  |  |  | 600 |  | 
| 14 |  |  |  |  |  |  | require Exporter; | 
| 15 |  |  |  |  |  |  |  | 
| 16 | 18 |  |  | 18 |  | 87 | use vars qw/@ISA @EXPORT_OK @EXPORT $VERSION $can_xpath/; | 
|  | 18 |  |  |  |  | 30 |  | 
|  | 18 |  |  |  |  | 1115 |  | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | @ISA = qw(Exporter); | 
| 19 |  |  |  |  |  |  |  | 
| 20 | 18 |  |  | 18 |  | 89 | use vars qw( $tidy ); | 
|  | 18 |  |  |  |  | 59 |  | 
|  | 18 |  |  |  |  | 1020 |  | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | # DONE: | 
| 23 |  |  |  |  |  |  | # * use Test::Builder; | 
| 24 |  |  |  |  |  |  | # * Add comment_ok() method | 
| 25 |  |  |  |  |  |  | # * Allow RE instead of plain strings in the functions (for tag attributes and comments) | 
| 26 |  |  |  |  |  |  | # * Create a function to check the DOCTYPE and other directives | 
| 27 |  |  |  |  |  |  | # * Have a better way to diagnose ignored candidates in tag_ok(), tag_count | 
| 28 |  |  |  |  |  |  | #   and no_tag() in case a test fails | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | @EXPORT = qw( | 
| 31 |  |  |  |  |  |  | link_ok no_link link_count | 
| 32 |  |  |  |  |  |  | tag_ok no_tag tag_count | 
| 33 |  |  |  |  |  |  | comment_ok no_comment comment_count | 
| 34 |  |  |  |  |  |  | has_declaration no_declaration | 
| 35 |  |  |  |  |  |  | text_ok no_text text_count | 
| 36 |  |  |  |  |  |  | title_ok no_title | 
| 37 |  |  |  |  |  |  | xpath_ok no_xpath xpath_count | 
| 38 |  |  |  |  |  |  | ); | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | $VERSION = '0.11'; | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | my $Test = Test::Builder->new; | 
| 43 |  |  |  |  |  |  |  | 
| 44 | 18 |  |  | 18 |  | 87 | use vars qw($HTML_PARSER_StripsTags $parsing_method); | 
|  | 18 |  |  |  |  | 35 |  | 
|  | 18 |  |  |  |  | 64534 |  | 
| 45 |  |  |  |  |  |  | $parsing_method = 'parse_html_string'; | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | # Cribbed from the Test::Builder synopsis | 
| 48 |  |  |  |  |  |  | sub import { | 
| 49 | 20 |  |  | 20 |  | 595 | my($self) = shift; | 
| 50 | 20 |  |  |  |  | 43 | my $pack = caller; | 
| 51 | 20 |  |  |  |  | 89 | $Test->exported_to($pack); | 
| 52 | 20 |  |  |  |  | 214 | $Test->plan(@_); | 
| 53 | 20 |  |  |  |  | 3239 | $self->export_to_level(1, $self, @EXPORT); | 
| 54 |  |  |  |  |  |  | } | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | sub __dwim_compare { | 
| 57 |  |  |  |  |  |  | # Do the Right Thing (Perl 6 style) with the RHS being a Regex or a string | 
| 58 | 167 |  |  | 167 |  | 5358 | my ($target,$template) = @_; | 
| 59 | 167 | 100 |  |  |  | 266 | if (ref $template) { # supposedly a Regexp, but possibly blessed, so no eq comparision | 
| 60 | 77 |  |  |  |  | 582 | return ($target =~ $template ) | 
| 61 |  |  |  |  |  |  | } else { | 
| 62 | 90 |  |  |  |  | 329 | return $target eq $template; | 
| 63 |  |  |  |  |  |  | }; | 
| 64 |  |  |  |  |  |  | }; | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | sub __node_content { | 
| 67 | 0 |  |  | 0 |  | 0 | my $node = shift; | 
| 68 | 0 | 0 |  |  |  | 0 | if ($can_xpath eq 'XML::XPath') { return XML::XPath::XMLParser::as_string($node) }; | 
|  | 0 |  |  |  |  | 0 |  | 
| 69 | 0 | 0 |  |  |  | 0 | if ($can_xpath eq 'XML::LibXML') { return $node->toString }; | 
|  | 0 |  |  |  |  | 0 |  | 
| 70 |  |  |  |  |  |  | }; | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | sub __text_content { | 
| 73 | 0 |  |  | 0 |  | 0 | my $node = shift; | 
| 74 | 0 | 0 |  |  |  | 0 | if ($can_xpath eq 'XML::XPath') { return $node->string_value }; | 
|  | 0 |  |  |  |  | 0 |  | 
| 75 | 0 | 0 |  |  |  | 0 | if ($can_xpath eq 'XML::LibXML') { return $node->textContent }; | 
|  | 0 |  |  |  |  | 0 |  | 
| 76 |  |  |  |  |  |  | } | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | sub __match_comment { | 
| 79 |  |  |  |  |  |  | my ($text,$template) = @_; | 
| 80 |  |  |  |  |  |  | $text =~ s/^$/$1/sm unless $HTML_PARSER_StripsTags; | 
| 81 |  |  |  |  |  |  | unless (ref $template eq "Regexp") { | 
| 82 |  |  |  |  |  |  | $text =~ s/^\s*(.*?)\s*$/$1/; | 
| 83 |  |  |  |  |  |  | $template =~ s/^\s*(.*?)\s*$/$1/; | 
| 84 |  |  |  |  |  |  | }; | 
| 85 |  |  |  |  |  |  | return __dwim_compare($text, $template); | 
| 86 |  |  |  |  |  |  | }; | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | sub __count_comments { | 
| 89 |  |  |  |  |  |  | my ($HTML,$comment) = @_; | 
| 90 |  |  |  |  |  |  | my $tree; | 
| 91 |  |  |  |  |  |  | $tree = __get_node_tree($HTML,'//comment()'); | 
| 92 |  |  |  |  |  |  | return (undef,undef) unless ($tree); | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | my $result = 0; | 
| 95 |  |  |  |  |  |  | my @seen; | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | foreach my $node ($tree->get_nodelist) { | 
| 98 |  |  |  |  |  |  | my $content = __node_content($node); | 
| 99 |  |  |  |  |  |  | $content =~ s/\A\Z/$1/gsm; | 
| 100 |  |  |  |  |  |  | push @seen, $content; | 
| 101 |  |  |  |  |  |  | $result++ if __match_comment($content,$comment); | 
| 102 |  |  |  |  |  |  | }; | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | $_ = "" for @seen; | 
| 105 |  |  |  |  |  |  | return ($result, \@seen); | 
| 106 |  |  |  |  |  |  | }; | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  | sub __output_diag { | 
| 109 | 37 |  |  | 37 |  | 97 | my ($cond,$match,$descr,$kind,$name,$seen) = @_; | 
| 110 |  |  |  |  |  |  |  | 
| 111 | 37 |  |  |  |  | 55 | local $Test::Builder::Level = $Test::Builder::Level + 2; | 
| 112 |  |  |  |  |  |  |  | 
| 113 | 37 | 100 |  |  |  | 99 | unless ($Test->ok($cond,$name)) { | 
| 114 | 7 | 100 |  |  |  | 5777 | if (@$seen) { | 
| 115 | 4 |  |  |  |  | 16 | $Test->diag( "Saw '$_'" ) for @$seen; | 
| 116 |  |  |  |  |  |  | } else { | 
| 117 | 3 |  |  |  |  | 10 | $Test->diag( "No $kind found at all" ); | 
| 118 |  |  |  |  |  |  | }; | 
| 119 | 7 |  |  |  |  | 2311 | $Test->diag( "Expected $descr like '$match'" ); | 
| 120 |  |  |  |  |  |  | }; | 
| 121 |  |  |  |  |  |  | }; | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | sub __invalid_html { | 
| 124 | 0 |  |  | 0 |  | 0 | my ($HTML,$name) = @_; | 
| 125 | 0 | 0 |  |  |  | 0 | carp "No test name given" unless $name; | 
| 126 | 0 |  |  |  |  | 0 | $Test->ok(0,$name); | 
| 127 | 0 |  |  |  |  | 0 | $Test->diag( "Invalid HTML:"); | 
| 128 | 0 |  |  |  |  | 0 | $Test->diag($HTML); | 
| 129 |  |  |  |  |  |  | }; | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | sub __output_comment { | 
| 132 | 20 |  |  | 20 |  | 40 | my ($check,$expectation,$HTML,$comment,$name) = @_; | 
| 133 | 20 |  |  |  |  | 47 | my ($result,$seen) = __count_comments($HTML,$comment); | 
| 134 |  |  |  |  |  |  |  | 
| 135 | 20 | 50 |  |  |  | 39 | if (defined $result) { | 
| 136 | 20 |  |  |  |  | 38 | $result = $check->($result); | 
| 137 | 20 |  |  |  |  | 34 | __output_diag($result,$comment,$expectation,"comment",$name,$seen); | 
| 138 |  |  |  |  |  |  | } else { | 
| 139 | 0 |  |  |  |  | 0 | local $Test::Builder::Level = $Test::Builder::Level +2; | 
| 140 | 0 |  |  |  |  | 0 | __invalid_html($HTML,$name); | 
| 141 |  |  |  |  |  |  | }; | 
| 142 |  |  |  |  |  |  |  | 
| 143 | 20 |  |  |  |  | 4482 | $result; | 
| 144 |  |  |  |  |  |  | }; | 
| 145 |  |  |  |  |  |  |  | 
| 146 |  |  |  |  |  |  | sub comment_ok { | 
| 147 | 10 |  |  | 10 | 0 | 3925 | my ($HTML,$comment,$name) = @_; | 
| 148 | 10 |  |  | 10 |  | 47 | __output_comment(sub{shift},"at least one comment",$HTML,$comment,$name); | 
|  | 10 |  |  |  |  | 13 |  | 
| 149 |  |  |  |  |  |  | }; | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | sub no_comment { | 
| 152 | 5 |  |  | 5 | 0 | 2188 | my ($HTML,$comment,$name) = @_; | 
| 153 | 5 |  |  | 5 |  | 21 | __output_comment(sub{shift == 0},"no comment",$HTML,$comment,$name); | 
|  | 5 |  |  |  |  | 11 |  | 
| 154 |  |  |  |  |  |  | }; | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | sub comment_count { | 
| 157 | 5 |  |  | 5 | 0 | 4219 | my ($HTML,$comment,$count,$name) = @_; | 
| 158 | 5 |  |  | 5 |  | 31 | __output_comment(sub{shift == $count},"exactly $count comments",$HTML,$comment,$name); | 
|  | 5 |  |  |  |  | 10 |  | 
| 159 |  |  |  |  |  |  | }; | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | sub __match_text { | 
| 162 |  |  |  |  |  |  | my ($text,$template) = @_; | 
| 163 |  |  |  |  |  |  | unless (ref $template eq "Regexp") { | 
| 164 |  |  |  |  |  |  | $text =~ s/^\s*(.*?)\s*$/$1/; | 
| 165 |  |  |  |  |  |  | $template =~ s/^\s*(.*?)\s*$/$1/; | 
| 166 |  |  |  |  |  |  | }; | 
| 167 |  |  |  |  |  |  | return __dwim_compare($text, $template); | 
| 168 |  |  |  |  |  |  | }; | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | sub __count_text { | 
| 171 |  |  |  |  |  |  | my ($HTML,$text) = @_; | 
| 172 |  |  |  |  |  |  | my $tree = __get_node_tree($HTML,'//text()'); | 
| 173 |  |  |  |  |  |  | return (undef,undef) unless $tree; | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | my $result = 0; | 
| 176 |  |  |  |  |  |  | my @seen; | 
| 177 |  |  |  |  |  |  |  | 
| 178 |  |  |  |  |  |  | foreach my $node ($tree->get_nodelist) { | 
| 179 |  |  |  |  |  |  | my $content = __node_content($node); | 
| 180 |  |  |  |  |  |  | push @seen, $content | 
| 181 |  |  |  |  |  |  | unless $content =~ /\A\r?\n?\Z/sm; | 
| 182 |  |  |  |  |  |  | $result++ if __match_text($content,$text); | 
| 183 |  |  |  |  |  |  | }; | 
| 184 |  |  |  |  |  |  |  | 
| 185 |  |  |  |  |  |  | return ($result, \@seen); | 
| 186 |  |  |  |  |  |  | }; | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | sub __output_text { | 
| 189 | 13 |  |  | 13 |  | 23 | my ($check,$expectation,$HTML,$text,$name) = @_; | 
| 190 | 13 |  |  |  |  | 27 | my ($result,$seen) = __count_text($HTML,$text); | 
| 191 |  |  |  |  |  |  |  | 
| 192 | 13 | 50 |  |  |  | 26 | if (defined $result) { | 
| 193 | 13 |  |  |  |  | 18 | local $Test::Builder::Level = $Test::Builder::Level; | 
| 194 | 13 |  |  |  |  | 21 | $result = $check->($result); | 
| 195 | 13 |  |  |  |  | 25 | __output_diag($result,$text,$expectation,"text",$name,$seen); | 
| 196 |  |  |  |  |  |  | } else { | 
| 197 | 0 |  |  |  |  | 0 | local $Test::Builder::Level = $Test::Builder::Level +2; | 
| 198 | 0 |  |  |  |  | 0 | __invalid_html($HTML,$name); | 
| 199 |  |  |  |  |  |  | }; | 
| 200 |  |  |  |  |  |  |  | 
| 201 | 13 |  |  |  |  | 2901 | $result; | 
| 202 |  |  |  |  |  |  | }; | 
| 203 |  |  |  |  |  |  |  | 
| 204 |  |  |  |  |  |  | sub text_ok { | 
| 205 | 6 |  |  | 6 | 0 | 2715 | my ($HTML,$text,$name) = @_; | 
| 206 | 6 |  |  | 6 |  | 22 | __output_text(sub{shift > 0}, "at least one text element",$HTML,$text,$name); | 
|  | 6 |  |  |  |  | 11 |  | 
| 207 |  |  |  |  |  |  | }; | 
| 208 |  |  |  |  |  |  |  | 
| 209 |  |  |  |  |  |  | sub no_text { | 
| 210 | 3 |  |  | 3 | 0 | 21 | my ($HTML,$text,$name) = @_; | 
| 211 | 3 |  |  | 3 |  | 11 | __output_text(sub{shift == 0}, "no text elements",$HTML,$text,$name); | 
|  | 3 |  |  |  |  | 6 |  | 
| 212 |  |  |  |  |  |  | }; | 
| 213 |  |  |  |  |  |  |  | 
| 214 |  |  |  |  |  |  | sub text_count { | 
| 215 | 4 |  |  | 4 | 0 | 32 | my ($HTML,$text,$count,$name) = @_; | 
| 216 | 4 |  |  | 4 |  | 18 | __output_text(sub{shift == $count}, "exactly $count elements",$HTML,$text,$name); | 
|  | 4 |  |  |  |  | 6 |  | 
| 217 |  |  |  |  |  |  | }; | 
| 218 |  |  |  |  |  |  |  | 
| 219 |  |  |  |  |  |  | sub __match { | 
| 220 |  |  |  |  |  |  | my ($attrs,$currattr,$key) = @_; | 
| 221 |  |  |  |  |  |  | my $result = 1; | 
| 222 |  |  |  |  |  |  |  | 
| 223 |  |  |  |  |  |  | if (exists $currattr->{$key}) { | 
| 224 |  |  |  |  |  |  | if (! defined $attrs->{$key}) { | 
| 225 |  |  |  |  |  |  | $result = 0; # We don't want to see this attribute here | 
| 226 |  |  |  |  |  |  | } else { | 
| 227 |  |  |  |  |  |  | $result = 0 unless __dwim_compare($currattr->{$key}, $attrs->{$key}); | 
| 228 |  |  |  |  |  |  | }; | 
| 229 |  |  |  |  |  |  | } else { | 
| 230 |  |  |  |  |  |  | if (! defined $attrs->{$key}) { | 
| 231 |  |  |  |  |  |  | $result = 0 if (exists $currattr->{$key}); | 
| 232 |  |  |  |  |  |  | } else { | 
| 233 |  |  |  |  |  |  | $result = 0; | 
| 234 |  |  |  |  |  |  | }; | 
| 235 |  |  |  |  |  |  | }; | 
| 236 |  |  |  |  |  |  | return $result; | 
| 237 |  |  |  |  |  |  | }; | 
| 238 |  |  |  |  |  |  |  | 
| 239 |  |  |  |  |  |  | sub __get_node_tree { | 
| 240 | 0 |  |  | 0 |  | 0 | my ($HTML,$query) = @_; | 
| 241 |  |  |  |  |  |  |  | 
| 242 | 0 | 0 |  |  |  | 0 | croak "No HTML given" unless defined $HTML; | 
| 243 | 0 | 0 |  |  |  | 0 | croak "No query given" unless defined $query; | 
| 244 |  |  |  |  |  |  |  | 
| 245 | 0 |  |  |  |  | 0 | my ($tree,$find,$result); | 
| 246 | 0 | 0 |  |  |  | 0 | if ($HTML !~ m!\A\s*\Z!ms) { | 
| 247 | 0 |  |  |  |  | 0 | eval { | 
| 248 | 0 |  |  |  |  | 0 | require XML::LibXML; XML::LibXML->import; | 
|  | 0 |  |  |  |  | 0 |  | 
| 249 | 0 |  |  |  |  | 0 | my $parser = XML::LibXML->new(); | 
| 250 | 0 |  |  |  |  | 0 | $parser->recover(1); | 
| 251 | 0 |  |  |  |  | 0 | $tree = $parser->$parsing_method($HTML); | 
| 252 | 0 |  |  |  |  | 0 | $find = 'findnodes'; | 
| 253 | 0 |  |  |  |  | 0 | $HTML_PARSER_StripsTags = 1; | 
| 254 |  |  |  |  |  |  | }; | 
| 255 | 0 | 0 |  |  |  | 0 | unless ($tree) { | 
| 256 | 0 |  |  |  |  | 0 | eval { | 
| 257 | 0 |  |  |  |  | 0 | require XML::XPath; XML::XPath->import; | 
|  | 0 |  |  |  |  | 0 |  | 
| 258 | 0 |  |  |  |  | 0 | require XML::Parser; | 
| 259 |  |  |  |  |  |  |  | 
| 260 | 0 |  |  |  |  | 0 | my $p = XML::Parser->new( ErrorContext => 2, ParseParamEnt => 0, NoLWP => 1 ); | 
| 261 | 0 |  |  |  |  | 0 | $tree = XML::XPath->new( parser => $p, xml => $HTML ); | 
| 262 | 0 |  |  |  |  | 0 | $find = 'find'; | 
| 263 |  |  |  |  |  |  | }; | 
| 264 |  |  |  |  |  |  | }; | 
| 265 | 0 | 0 |  |  |  | 0 | undef $tree if $@; | 
| 266 |  |  |  |  |  |  |  | 
| 267 | 0 | 0 |  |  |  | 0 | if ($tree) { | 
| 268 | 0 |  |  |  |  | 0 | eval { | 
| 269 | 0 |  |  |  |  | 0 | $result = $tree->$find($query); | 
| 270 | 0 | 0 |  |  |  | 0 | unless ($result) { | 
| 271 | 0 |  |  |  |  | 0 | $result = {}; | 
| 272 | 0 |  |  |  |  | 0 | bless $result, 'Test::HTML::Content::EmptyXPathResult'; | 
| 273 |  |  |  |  |  |  | }; | 
| 274 |  |  |  |  |  |  | }; | 
| 275 | 0 | 0 |  |  |  | 0 | warn $@ if $@; | 
| 276 |  |  |  |  |  |  | }; | 
| 277 |  |  |  |  |  |  | } else { }; | 
| 278 | 0 |  |  |  |  | 0 | return $result; | 
| 279 |  |  |  |  |  |  | }; | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | sub __get_node_content { | 
| 282 | 0 |  |  | 0 |  | 0 | my ($node,$name) = @_; | 
| 283 |  |  |  |  |  |  |  | 
| 284 | 0 | 0 |  |  |  | 0 | if ($name eq '_content') { | 
| 285 | 0 |  |  |  |  | 0 | return __text_content( $node ) | 
| 286 |  |  |  |  |  |  | #    return $node->textContent() | 
| 287 |  |  |  |  |  |  | } else { | 
| 288 | 0 |  |  |  |  | 0 | return $node->getAttribute($name) | 
| 289 |  |  |  |  |  |  | }; | 
| 290 |  |  |  |  |  |  | }; | 
| 291 |  |  |  |  |  |  |  | 
| 292 |  |  |  |  |  |  | sub __build_xpath_query { | 
| 293 | 4 |  |  | 4 |  | 1407 | my ($query,$attrref) = @_; | 
| 294 | 4 |  |  |  |  | 6 | my @postvalidation; | 
| 295 | 4 | 50 |  |  |  | 9 | if ($attrref) { | 
| 296 | 4 |  |  |  |  | 4 | my @query; | 
| 297 | 4 |  |  |  |  | 19 | for (sort keys %$attrref) { | 
| 298 | 8 |  |  |  |  | 11 | my $name = $_; | 
| 299 | 8 |  |  |  |  | 13 | my $value = $attrref->{$name}; | 
| 300 | 8 |  |  |  |  | 9 | my $xpath_name = '@' . $name; | 
| 301 | 8 | 50 |  |  |  | 14 | if ($name eq '_content') { $xpath_name = "text()" }; | 
|  | 0 |  |  |  |  | 0 |  | 
| 302 | 8 | 100 |  |  |  | 18 | if (! defined $value) { | 
|  |  | 100 |  |  |  |  |  | 
| 303 | 2 |  |  |  |  | 5 | push @query, "not($xpath_name)" | 
| 304 |  |  |  |  |  |  | } elsif ((ref $value) ne 'Regexp') { | 
| 305 | 3 |  |  |  |  | 8 | push @query, "$xpath_name = \"$value\""; | 
| 306 |  |  |  |  |  |  | push @postvalidation, sub { | 
| 307 | 0 |  |  | 0 |  | 0 | return __get_node_content( shift,$name ) eq $value | 
| 308 | 3 |  |  |  |  | 10 | }; | 
| 309 |  |  |  |  |  |  | } else { | 
| 310 | 3 |  |  |  |  | 5 | push @query, "$xpath_name"; | 
| 311 |  |  |  |  |  |  | push @postvalidation, sub { | 
| 312 | 0 |  |  | 0 |  | 0 | return __get_node_content( shift,$name ) =~ $value | 
| 313 | 3 |  |  |  |  | 9 | }; | 
| 314 |  |  |  |  |  |  | }; | 
| 315 |  |  |  |  |  |  | }; | 
| 316 | 4 | 50 |  |  |  | 12 | $query .= "[" . join( " and ", map {"$_"} @query ) . "]" | 
|  | 8 |  |  |  |  | 22 |  | 
| 317 |  |  |  |  |  |  | if @query; | 
| 318 |  |  |  |  |  |  | }; | 
| 319 |  |  |  |  |  |  | my $postvalidation = sub { | 
| 320 | 0 |  |  | 0 |  | 0 | my $node = shift; | 
| 321 | 0 |  |  |  |  | 0 | my $test; | 
| 322 | 0 |  |  |  |  | 0 | for $test (@postvalidation) { | 
| 323 | 0 | 0 |  |  |  | 0 | return () unless $test->($node); | 
| 324 |  |  |  |  |  |  | }; | 
| 325 | 0 |  |  |  |  | 0 | return 1; | 
| 326 | 4 |  |  |  |  | 9 | }; | 
| 327 | 4 |  |  |  |  | 13 | ($query,$postvalidation); | 
| 328 |  |  |  |  |  |  | }; | 
| 329 |  |  |  |  |  |  |  | 
| 330 |  |  |  |  |  |  | sub __count_tags { | 
| 331 |  |  |  |  |  |  | my ($HTML,$tag,$attrref) = @_; | 
| 332 |  |  |  |  |  |  | $attrref = {} unless defined $attrref; | 
| 333 |  |  |  |  |  |  |  | 
| 334 |  |  |  |  |  |  | my $fallback = lc "//$tag"; | 
| 335 |  |  |  |  |  |  | my ($query,$valid) = __build_xpath_query( lc "//$tag", $attrref ); | 
| 336 |  |  |  |  |  |  | my $tree = __get_node_tree($HTML,$query); | 
| 337 |  |  |  |  |  |  | return (undef,undef) unless $tree; | 
| 338 |  |  |  |  |  |  |  | 
| 339 |  |  |  |  |  |  | my @found = grep { $valid->($_) } ($tree->get_nodelist); | 
| 340 |  |  |  |  |  |  |  | 
| 341 |  |  |  |  |  |  | # Collect the nodes we did see for later reference : | 
| 342 |  |  |  |  |  |  | my @seen; | 
| 343 |  |  |  |  |  |  | foreach my $node (__get_node_tree($HTML,$fallback)->get_nodelist) { | 
| 344 |  |  |  |  |  |  | push @seen, __node_content($node); | 
| 345 |  |  |  |  |  |  | }; | 
| 346 |  |  |  |  |  |  | return scalar(@found),\@seen; | 
| 347 |  |  |  |  |  |  | }; | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | sub __tag_diag { | 
| 350 | 5 |  |  | 5 |  | 11 | my ($tag,$num,$attrs,$found) = @_; | 
| 351 | 5 |  |  |  |  | 13 | my $phrase = "Expected to find $num <$tag> tag(s)"; | 
| 352 | 5 | 50 |  |  |  | 17 | $phrase .= " matching" if (scalar keys %$attrs > 0); | 
| 353 | 5 |  |  |  |  | 14 | $Test->diag($phrase); | 
| 354 |  |  |  |  |  |  | $Test->diag("  $_ = " . (defined $attrs->{$_} ? $attrs->{$_} : '')) | 
| 355 | 5 | 50 |  |  |  | 1026 | for sort keys %$attrs; | 
| 356 | 5 | 100 |  |  |  | 979 | if (@$found) { | 
| 357 | 4 |  |  |  |  | 16 | $Test->diag("Got"); | 
| 358 | 4 |  |  |  |  | 757 | $Test->diag("  " . $_) for @$found; | 
| 359 |  |  |  |  |  |  | } else { | 
| 360 | 1 |  |  |  |  | 4 | $Test->diag("Got none"); | 
| 361 |  |  |  |  |  |  | }; | 
| 362 |  |  |  |  |  |  | }; | 
| 363 |  |  |  |  |  |  |  | 
| 364 |  |  |  |  |  |  | sub __output_tag { | 
| 365 | 48 |  |  | 48 |  | 104 | my ($check,$expectation,$HTML,$tag,$attrref,$name) = @_; | 
| 366 | 48 | 100 |  |  |  | 111 | ($attrref,$name) = ({},$attrref) | 
| 367 |  |  |  |  |  |  | unless defined $name; | 
| 368 | 48 | 100 |  |  |  | 81 | $attrref = {} | 
| 369 |  |  |  |  |  |  | unless defined $attrref; | 
| 370 | 48 | 50 |  |  |  | 112 | croak "$attrref dosen't look like a hash reference for the attributes" | 
| 371 |  |  |  |  |  |  | unless ref $attrref eq 'HASH'; | 
| 372 | 48 |  |  |  |  | 122 | my ($currcount,$seen) = __count_tags($HTML,$tag,$attrref); | 
| 373 | 48 |  |  |  |  | 62 | my $result; | 
| 374 | 48 | 50 |  |  |  | 92 | if (defined $currcount) { | 
| 375 | 48 | 100 |  |  |  | 74 | if ($currcount eq 'skip') { | 
| 376 | 6 |  |  |  |  | 17 | $Test->skip($seen); | 
| 377 |  |  |  |  |  |  | } else { | 
| 378 | 42 |  |  |  |  | 79 | local $Test::Builder::Level = $Test::Builder::Level +1; | 
| 379 | 42 |  |  |  |  | 64 | $result = $check->($currcount); | 
| 380 | 42 | 100 |  |  |  | 111 | unless ($Test->ok($result, $name)) { | 
| 381 | 5 |  |  |  |  | 4226 | __tag_diag($tag,$expectation,$attrref,$seen) ; | 
| 382 |  |  |  |  |  |  | }; | 
| 383 |  |  |  |  |  |  | }; | 
| 384 |  |  |  |  |  |  | } else { | 
| 385 | 0 |  |  |  |  | 0 | local $Test::Builder::Level = $Test::Builder::Level +2; | 
| 386 | 0 |  |  |  |  | 0 | __invalid_html($HTML,$name); | 
| 387 |  |  |  |  |  |  | }; | 
| 388 |  |  |  |  |  |  |  | 
| 389 | 48 |  |  |  |  | 13495 | $result; | 
| 390 |  |  |  |  |  |  | }; | 
| 391 |  |  |  |  |  |  |  | 
| 392 |  |  |  |  |  |  | sub tag_count { | 
| 393 | 14 |  |  | 14 | 0 | 94 | my ($HTML,$tag,$attrref,$count,$name) = @_; | 
| 394 | 14 |  |  | 14 |  | 64 | __output_tag(sub { shift == $count }, "exactly $count",$HTML,$tag,$attrref,$name); | 
|  | 14 |  |  |  |  | 47 |  | 
| 395 |  |  |  |  |  |  | }; | 
| 396 |  |  |  |  |  |  |  | 
| 397 |  |  |  |  |  |  | sub tag_ok { | 
| 398 | 22 |  |  | 22 | 0 | 148 | my ($HTML,$tag,$attrref,$name) = @_; | 
| 399 | 22 |  |  | 18 |  | 82 | __output_tag(sub { shift > 0 }, "at least one",$HTML,$tag,$attrref,$name); | 
|  | 18 |  |  |  |  | 33 |  | 
| 400 |  |  |  |  |  |  | }; | 
| 401 |  |  |  |  |  |  |  | 
| 402 |  |  |  |  |  |  | sub no_tag { | 
| 403 | 12 |  |  | 12 | 0 | 67 | my ($HTML,$tag,$attrref,$name) = @_; | 
| 404 | 12 |  |  | 10 |  | 47 | __output_tag(sub { shift == 0 }, "no",$HTML,$tag,$attrref,$name); | 
|  | 10 |  |  |  |  | 20 |  | 
| 405 |  |  |  |  |  |  | }; | 
| 406 |  |  |  |  |  |  |  | 
| 407 |  |  |  |  |  |  | sub link_count { | 
| 408 | 3 |  |  | 3 | 0 | 4415 | my ($HTML,$link,$count,$name) = @_; | 
| 409 | 3 |  |  |  |  | 5 | local $Test::Builder::Level = 2; | 
| 410 | 3 |  |  |  |  | 11 | return tag_count($HTML,"a",{href => $link},$count,$name); | 
| 411 |  |  |  |  |  |  | }; | 
| 412 |  |  |  |  |  |  |  | 
| 413 |  |  |  |  |  |  | sub link_ok { | 
| 414 | 4 |  |  | 4 | 0 | 3794 | my ($HTML,$link,$name) = (@_); | 
| 415 | 4 |  |  |  |  | 8 | local $Test::Builder::Level = 2; | 
| 416 | 4 |  |  |  |  | 15 | return tag_ok($HTML,'a',{ href => $link },$name); | 
| 417 |  |  |  |  |  |  | }; | 
| 418 |  |  |  |  |  |  |  | 
| 419 |  |  |  |  |  |  | sub no_link { | 
| 420 | 3 |  |  | 3 | 0 | 2231 | my ($HTML,$link,$name) = (@_); | 
| 421 | 3 |  |  |  |  | 5 | local $Test::Builder::Level = 2; | 
| 422 | 3 |  |  |  |  | 11 | return no_tag($HTML,'a',{ href => $link },$name); | 
| 423 |  |  |  |  |  |  | }; | 
| 424 |  |  |  |  |  |  |  | 
| 425 |  |  |  |  |  |  | sub title_ok { | 
| 426 | 4 |  |  | 4 | 0 | 1833 | my ($HTML,$title,$name) = @_; | 
| 427 | 4 |  |  |  |  | 7 | local $Test::Builder::Level = 2; | 
| 428 | 4 |  |  |  |  | 11 | return tag_ok($HTML,"title",{_content => $title},$name); | 
| 429 |  |  |  |  |  |  | }; | 
| 430 |  |  |  |  |  |  |  | 
| 431 |  |  |  |  |  |  | sub no_title { | 
| 432 | 2 |  |  | 2 | 0 | 2940 | my ($HTML,$title,$name) = (@_); | 
| 433 | 2 |  |  |  |  | 5 | local $Test::Builder::Level = 2; | 
| 434 | 2 |  |  |  |  | 10 | return no_tag($HTML,'title',{ _content => $title },$name); | 
| 435 |  |  |  |  |  |  | }; | 
| 436 |  |  |  |  |  |  |  | 
| 437 |  |  |  |  |  |  | sub __match_declaration { | 
| 438 |  |  |  |  |  |  | my ($text,$template) = @_; | 
| 439 |  |  |  |  |  |  | $text =~ s/^$/$1/ unless $HTML_PARSER_StripsTags; | 
| 440 |  |  |  |  |  |  | unless (ref $template eq "Regexp") { | 
| 441 |  |  |  |  |  |  | $text =~ s/^\s*(.*?)\s*$/$1/; | 
| 442 |  |  |  |  |  |  | $template =~ s/^\s*(.*?)\s*$/$1/; | 
| 443 |  |  |  |  |  |  | }; | 
| 444 |  |  |  |  |  |  | return __dwim_compare($text, $template); | 
| 445 |  |  |  |  |  |  | }; | 
| 446 |  |  |  |  |  |  |  | 
| 447 |  |  |  |  |  |  | sub __count_declarations { | 
| 448 |  |  |  |  |  |  | my ($HTML,$doctype) = @_; | 
| 449 |  |  |  |  |  |  | my $result = 0; | 
| 450 |  |  |  |  |  |  | my $seen = []; | 
| 451 |  |  |  |  |  |  |  | 
| 452 |  |  |  |  |  |  | my $p = HTML::TokeParser->new(\$HTML); | 
| 453 |  |  |  |  |  |  | my $token; | 
| 454 |  |  |  |  |  |  | while ($token = $p->get_token) { | 
| 455 |  |  |  |  |  |  | my ($type,$text) = @$token; | 
| 456 |  |  |  |  |  |  | if ($type eq "D") { | 
| 457 |  |  |  |  |  |  | push @$seen, $text; | 
| 458 |  |  |  |  |  |  | $result++ if __match_declaration($text,$doctype); | 
| 459 |  |  |  |  |  |  | }; | 
| 460 |  |  |  |  |  |  | }; | 
| 461 |  |  |  |  |  |  |  | 
| 462 |  |  |  |  |  |  | return $result, $seen; | 
| 463 |  |  |  |  |  |  | }; | 
| 464 |  |  |  |  |  |  |  | 
| 465 |  |  |  |  |  |  | sub has_declaration { | 
| 466 | 3 |  |  | 3 | 0 | 278 | my ($HTML,$declaration,$name) = @_; | 
| 467 | 3 |  |  |  |  | 7 | my ($result,$seen) = __count_declarations($HTML,$declaration); | 
| 468 |  |  |  |  |  |  |  | 
| 469 | 3 | 50 |  |  |  | 7 | if (defined $result) { | 
| 470 | 3 |  |  |  |  | 7 | __output_diag($result == 1,$declaration,"exactly one declaration","declaration",$name,$seen); | 
| 471 |  |  |  |  |  |  | } else { | 
| 472 | 0 |  |  |  |  | 0 | local $Test::Builder::Level = $Test::Builder::Level +1; | 
| 473 | 0 |  |  |  |  | 0 | __invalid_html($HTML,$name); | 
| 474 |  |  |  |  |  |  | }; | 
| 475 |  |  |  |  |  |  |  | 
| 476 | 3 |  |  |  |  | 942 | $result; | 
| 477 |  |  |  |  |  |  | }; | 
| 478 |  |  |  |  |  |  |  | 
| 479 |  |  |  |  |  |  | sub no_declaration { | 
| 480 | 1 |  |  | 1 | 0 | 7 | my ($HTML,$declaration,$name) = @_; | 
| 481 | 1 |  |  |  |  | 4 | my ($result,$seen) = __count_declarations($HTML,$declaration); | 
| 482 |  |  |  |  |  |  |  | 
| 483 | 1 | 50 |  |  |  | 13 | if (defined $result) { | 
| 484 | 1 |  |  |  |  | 3 | __output_diag($result == 0,$declaration,"no declaration","declaration",$name,$seen); | 
| 485 |  |  |  |  |  |  | } else { | 
| 486 | 0 |  |  |  |  | 0 | local $Test::Builder::Level = $Test::Builder::Level +1; | 
| 487 | 0 |  |  |  |  | 0 | __invalid_html($HTML,$name); | 
| 488 |  |  |  |  |  |  | }; | 
| 489 |  |  |  |  |  |  |  | 
| 490 | 1 |  |  |  |  | 373 | $result; | 
| 491 |  |  |  |  |  |  | }; | 
| 492 |  |  |  |  |  |  |  | 
| 493 |  |  |  |  |  |  | sub __count_xpath { | 
| 494 | 0 |  |  | 0 |  | 0 | my ($HTML,$query,$fallback) = @_; | 
| 495 |  |  |  |  |  |  |  | 
| 496 | 0 | 0 |  |  |  | 0 | $fallback = $query unless defined $fallback; | 
| 497 | 0 |  |  |  |  | 0 | my $tree = __get_node_tree($HTML,$query); | 
| 498 | 0 | 0 |  |  |  | 0 | return (undef,undef) unless $tree; | 
| 499 |  |  |  |  |  |  |  | 
| 500 | 0 |  |  |  |  | 0 | my @found = ($tree->get_nodelist); | 
| 501 |  |  |  |  |  |  |  | 
| 502 |  |  |  |  |  |  | # Collect the nodes we did see for later reference : | 
| 503 | 0 |  |  |  |  | 0 | my @seen; | 
| 504 | 0 |  |  |  |  | 0 | foreach my $node (__get_node_tree($HTML,$fallback)->get_nodelist) { | 
| 505 | 0 |  |  |  |  | 0 | push @seen, __node_content($node); | 
| 506 |  |  |  |  |  |  | }; | 
| 507 | 0 |  |  |  |  | 0 | return scalar(@found),\@seen; | 
| 508 |  |  |  |  |  |  | }; | 
| 509 |  |  |  |  |  |  |  | 
| 510 |  |  |  |  |  |  | sub __xpath_diag { | 
| 511 | 0 |  |  | 0 |  | 0 | my ($query,$num,$found) = @_; | 
| 512 | 0 |  |  |  |  | 0 | my $phrase = "Expected to find $num nodes matching on '$query'"; | 
| 513 | 0 | 0 |  |  |  | 0 | if (@$found) { | 
| 514 | 0 |  |  |  |  | 0 | $Test->diag("Got"); | 
| 515 | 0 |  |  |  |  | 0 | $Test->diag("  $_") for @$found; | 
| 516 |  |  |  |  |  |  | } else { | 
| 517 | 0 |  |  |  |  | 0 | $Test->diag("Got none"); | 
| 518 |  |  |  |  |  |  | }; | 
| 519 |  |  |  |  |  |  | }; | 
| 520 |  |  |  |  |  |  |  | 
| 521 |  |  |  |  |  |  | sub __output_xpath { | 
| 522 | 0 |  |  | 0 |  | 0 | my ($check,$expectation,$HTML,$query,$fallback,$name) = @_; | 
| 523 | 0 | 0 |  |  |  | 0 | ($fallback,$name) = ($query,$fallback) unless $name; | 
| 524 | 0 |  |  |  |  | 0 | my ($currcount,$seen) = __count_xpath($HTML,$query,$fallback); | 
| 525 | 0 |  |  |  |  | 0 | my $result; | 
| 526 | 0 | 0 |  |  |  | 0 | if (defined $currcount) { | 
| 527 | 0 | 0 |  |  |  | 0 | if ($currcount eq 'skip') { | 
| 528 | 0 |  |  |  |  | 0 | $Test->skip($seen); | 
| 529 |  |  |  |  |  |  | } else { | 
| 530 | 0 |  |  |  |  | 0 | local $Test::Builder::Level = $Test::Builder::Level +1; | 
| 531 | 0 |  |  |  |  | 0 | $result = $check->($currcount); | 
| 532 | 0 | 0 |  |  |  | 0 | unless ($Test->ok($result, $name)) { | 
| 533 | 0 |  |  |  |  | 0 | __xpath_diag($query,$expectation,$seen) ; | 
| 534 |  |  |  |  |  |  | }; | 
| 535 |  |  |  |  |  |  | }; | 
| 536 |  |  |  |  |  |  | } else { | 
| 537 | 0 |  |  |  |  | 0 | local $Test::Builder::Level = $Test::Builder::Level +1; | 
| 538 | 0 |  |  |  |  | 0 | __invalid_html($HTML,$name); | 
| 539 |  |  |  |  |  |  | }; | 
| 540 |  |  |  |  |  |  |  | 
| 541 | 0 |  |  |  |  | 0 | $result; | 
| 542 |  |  |  |  |  |  | }; | 
| 543 |  |  |  |  |  |  |  | 
| 544 |  |  |  |  |  |  | sub xpath_count { | 
| 545 | 0 |  |  | 0 | 0 | 0 | my ($HTML,$query,$count,$fallback,$name) = @_; | 
| 546 | 0 |  |  | 0 |  | 0 | __output_xpath( sub {shift == $count},"exactly $count",$HTML,$query,$fallback,$name); | 
|  | 0 |  |  |  |  | 0 |  | 
| 547 |  |  |  |  |  |  | }; | 
| 548 |  |  |  |  |  |  |  | 
| 549 |  |  |  |  |  |  | sub xpath_ok { | 
| 550 | 0 |  |  | 0 | 0 | 0 | my ($HTML,$query,$fallback,$name) = @_; | 
| 551 | 0 |  |  | 0 |  | 0 | __output_xpath( sub{shift > 0},"at least one",$HTML,$query,$fallback,$name); | 
|  | 0 |  |  |  |  | 0 |  | 
| 552 |  |  |  |  |  |  | }; | 
| 553 |  |  |  |  |  |  |  | 
| 554 |  |  |  |  |  |  | sub no_xpath { | 
| 555 | 0 |  |  | 0 | 0 | 0 | my ($HTML,$query,$fallback,$name) = @_; | 
| 556 | 0 |  |  | 0 |  | 0 | __output_xpath( sub{shift == 0},"no",$HTML,$query,$fallback,$name); | 
|  | 0 |  |  |  |  | 0 |  | 
| 557 |  |  |  |  |  |  | }; | 
| 558 |  |  |  |  |  |  |  | 
| 559 |  |  |  |  |  |  | sub install_xpath { | 
| 560 | 18 |  |  | 18 | 0 | 1981 | require XML::XPath; | 
| 561 | 0 |  |  |  |  | 0 | XML::XPath->import(); | 
| 562 | 0 | 0 |  |  |  | 0 | die "Need XML::XPath 1.13 or higher" | 
| 563 |  |  |  |  |  |  | unless $XML::XPath::VERSION >= 1.13; | 
| 564 | 0 |  |  |  |  | 0 | $can_xpath = 'XML::XPath'; | 
| 565 |  |  |  |  |  |  | }; | 
| 566 |  |  |  |  |  |  |  | 
| 567 |  |  |  |  |  |  | sub install_libxml { | 
| 568 | 18 |  |  | 18 | 0 | 165 | local $^W; | 
| 569 | 18 |  |  |  |  | 2342 | require XML::LibXML; | 
| 570 | 0 |  |  |  |  | 0 | XML::LibXML->import(); | 
| 571 | 0 |  |  |  |  | 0 | $can_xpath = 'XML::LibXML'; | 
| 572 |  |  |  |  |  |  | }; | 
| 573 |  |  |  |  |  |  |  | 
| 574 |  |  |  |  |  |  | # And install our plain handlers if we have to : | 
| 575 |  |  |  |  |  |  | sub install_pureperl { | 
| 576 | 29 |  |  | 29 | 0 | 12026 | require Test::HTML::Content::NoXPath; | 
| 577 | 29 |  |  |  |  | 119 | Test::HTML::Content::NoXPath->import; | 
| 578 |  |  |  |  |  |  | }; | 
| 579 |  |  |  |  |  |  |  | 
| 580 |  |  |  |  |  |  | BEGIN { | 
| 581 |  |  |  |  |  |  | # Load the XML-variant if our prerequisites are there : | 
| 582 | 18 |  |  |  |  | 67 | eval { install_libxml } | 
| 583 | 18 | 50 | 33 | 18 |  | 81 | or eval { install_xpath } | 
|  | 18 |  |  |  |  | 75 |  | 
| 584 |  |  |  |  |  |  | or install_pureperl; | 
| 585 |  |  |  |  |  |  | }; | 
| 586 |  |  |  |  |  |  |  | 
| 587 |  |  |  |  |  |  | { | 
| 588 |  |  |  |  |  |  | package Test::HTML::Content::EmptyXPathResult; | 
| 589 | 0 |  |  | 0 |  |  | sub size { 0 }; | 
| 590 | 0 |  |  | 0 |  |  | sub get_nodelist { () }; | 
| 591 |  |  |  |  |  |  | }; | 
| 592 |  |  |  |  |  |  |  | 
| 593 |  |  |  |  |  |  | 1; | 
| 594 |  |  |  |  |  |  |  | 
| 595 |  |  |  |  |  |  | __END__ |