| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package HTML::ListScraper::Interactive; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 678 | use warnings; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 28 |  | 
| 4 | 1 |  |  | 1 |  | 5 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 20 |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 | 1 |  |  | 1 |  | 5 | use HTML::Entities; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 73 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | require Exporter; | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 1 |  |  | 1 |  | 5 | use vars qw(@ISA @EXPORT_OK); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 68 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | @ISA = qw(Exporter); | 
| 13 |  |  |  |  |  |  | @EXPORT_OK = qw(format_tags canonicalize_tags); | 
| 14 |  |  |  |  |  |  |  | 
| 15 | 1 |  |  | 1 |  | 6 | use Class::Generate qw(class); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 986 |  | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | class 'HTML::ListScraper::FormTag' => { | 
| 18 |  |  |  |  |  |  | name => { type => '$', required => 1 }, | 
| 19 |  |  |  |  |  |  | index => { type => '$', required => 1, readonly => 1 }, | 
| 20 |  |  |  |  |  |  | link => { type => '$', required => 1, readonly => 1 }, | 
| 21 |  |  |  |  |  |  | text => { type => '$', required => 1, readonly => 1 }, | 
| 22 |  |  |  |  |  |  | '&close_name' => q{ $name .= '/'; } | 
| 23 |  |  |  |  |  |  | }; | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | sub is_opening { | 
| 26 | 11 |  |  | 11 | 0 | 20 | my $tag = shift; | 
| 27 |  |  |  |  |  |  |  | 
| 28 | 11 |  |  |  |  | 31 | return $tag !~ m~\/~; | 
| 29 |  |  |  |  |  |  | } | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | sub is_closing { | 
| 32 | 11 |  |  | 11 | 0 | 15 | my $tag = shift; | 
| 33 |  |  |  |  |  |  |  | 
| 34 | 11 |  |  |  |  | 29 | return $tag =~ m~^\/~; | 
| 35 |  |  |  |  |  |  | } | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | sub format_tags { | 
| 38 | 3 |  |  | 3 | 1 | 1848 | my ($scraper, $tags, $incl) = @_; | 
| 39 |  |  |  |  |  |  |  | 
| 40 | 3 |  |  |  |  | 5 | my $incl_attr; | 
| 41 |  |  |  |  |  |  | my $incl_text; | 
| 42 | 0 |  |  |  |  | 0 | my $incl_index; | 
| 43 | 3 | 50 |  |  |  | 10 | if (ref($incl)) { | 
| 44 | 0 |  |  |  |  | 0 | $incl_attr = $incl->{attr}; | 
| 45 | 0 |  |  |  |  | 0 | $incl_text = $incl->{text}; | 
| 46 | 0 |  |  |  |  | 0 | $incl_index = $incl->{index}; | 
| 47 |  |  |  |  |  |  | } | 
| 48 |  |  |  |  |  |  |  | 
| 49 | 3 |  |  |  |  | 4 | my @buffer; | 
| 50 |  |  |  |  |  |  | my @stack; | 
| 51 | 3 |  |  |  |  | 8 | foreach my $td (@$tags) { | 
| 52 | 14 |  |  |  |  | 931 | my $name = $td->name; | 
| 53 | 14 |  |  |  |  | 100 | my $tag = $name; | 
| 54 | 14 |  |  |  |  | 34 | $tag =~ s~^\/~~; | 
| 55 |  |  |  |  |  |  |  | 
| 56 | 14 |  | 100 |  |  | 313 | my $text = $td->text || ''; | 
| 57 | 14 |  |  |  |  | 131 | $text =~ s/[\s[:cntrl:]]+/ /g; | 
| 58 |  |  |  |  |  |  |  | 
| 59 | 14 |  | 50 |  |  | 306 | my $link = $td->link || ''; | 
| 60 | 14 |  |  |  |  | 127 | $link =~ s/[\s[:cntrl:]]+//g; | 
| 61 |  |  |  |  |  |  |  | 
| 62 | 14 | 100 |  |  |  | 32 | if ($name eq $tag) { | 
| 63 | 8 |  |  |  |  | 22 | push @stack, [ $tag, scalar(@buffer) ]; | 
| 64 | 8 |  |  |  |  | 182 | push @buffer, HTML::ListScraper::FormTag->new(name => $name, | 
| 65 |  |  |  |  |  |  | index => $td->index, link => $link, text => $text); | 
| 66 |  |  |  |  |  |  | } else { | 
| 67 | 6 |  | 33 |  |  | 45 | while (scalar(@stack) && | 
| 68 |  |  |  |  |  |  | ($stack[scalar(@stack) - 1]->[0] ne $tag)) { | 
| 69 | 0 | 0 |  |  |  | 0 | if ($scraper->is_unclosed_tag( | 
| 70 |  |  |  |  |  |  | $stack[scalar(@stack) - 1]->[0])) { | 
| 71 | 0 |  |  |  |  | 0 | my $pair = pop @stack; | 
| 72 |  |  |  |  |  |  |  | 
| 73 | 0 |  |  |  |  | 0 | $buffer[$pair->[1]]->close_name(); | 
| 74 |  |  |  |  |  |  | } else { | 
| 75 | 0 |  |  |  |  | 0 | last; | 
| 76 |  |  |  |  |  |  | } | 
| 77 |  |  |  |  |  |  | } | 
| 78 |  |  |  |  |  |  |  | 
| 79 | 6 | 50 |  |  |  | 18 | if (scalar(@stack)) { | 
| 80 | 6 |  |  |  |  | 9 | pop @stack; | 
| 81 |  |  |  |  |  |  | } | 
| 82 |  |  |  |  |  |  |  | 
| 83 | 6 |  |  |  |  | 136 | push @buffer, HTML::ListScraper::FormTag->new(name => $name, | 
| 84 |  |  |  |  |  |  | index => $td->index, link => $link, text => $text); | 
| 85 |  |  |  |  |  |  | } | 
| 86 |  |  |  |  |  |  | } | 
| 87 |  |  |  |  |  |  |  | 
| 88 | 3 |  |  |  |  | 171 | while (scalar(@stack)) { | 
| 89 | 2 |  |  |  |  | 10 | my $pair = pop @stack; | 
| 90 | 2 |  |  |  |  | 50 | $buffer[$pair->[1]]->close_name(); | 
| 91 |  |  |  |  |  |  | } | 
| 92 |  |  |  |  |  |  |  | 
| 93 | 3 |  |  |  |  | 11 | my @out; | 
| 94 |  |  |  |  |  |  | my $prev; | 
| 95 | 0 |  |  |  |  | 0 | my $prev_index; | 
| 96 | 3 |  |  |  |  | 6 | my $depth = 0; | 
| 97 | 3 |  |  |  |  | 7 | foreach my $ft (@buffer) { | 
| 98 | 14 |  |  |  |  | 367 | my $name = $ft->name; | 
| 99 | 14 | 100 |  |  |  | 115 | if (defined($prev)) { | 
| 100 | 11 |  |  |  |  | 24 | my $op = is_opening($prev); | 
| 101 | 11 |  |  |  |  | 27 | my $cl = is_closing($name); | 
| 102 | 11 | 100 | 100 |  |  | 85 | if ($op && !$cl) { | 
|  |  | 100 | 100 |  |  |  |  | 
| 103 | 2 |  |  |  |  | 3 | ++$depth; | 
| 104 |  |  |  |  |  |  | } elsif (!$op && $cl) { | 
| 105 | 2 | 50 |  |  |  | 7 | if ($depth > 0) { | 
| 106 | 2 |  |  |  |  | 4 | --$depth; | 
| 107 |  |  |  |  |  |  | } | 
| 108 |  |  |  |  |  |  | } | 
| 109 |  |  |  |  |  |  | } | 
| 110 |  |  |  |  |  |  |  | 
| 111 | 14 |  |  |  |  | 29 | my $indent = ' ' x (2 * $depth); | 
| 112 |  |  |  |  |  |  |  | 
| 113 | 14 |  |  |  |  | 21 | my $attr = ''; | 
| 114 | 14 | 50 | 33 |  |  | 37 | if ($incl_attr && $ft->link) { | 
| 115 | 0 |  |  |  |  | 0 | $attr = ' href="' . encode_entities($ft->link, '"') . '"'; | 
| 116 |  |  |  |  |  |  | } | 
| 117 |  |  |  |  |  |  |  | 
| 118 | 14 |  |  |  |  | 22 | my $lncol = ''; | 
| 119 | 14 | 50 |  |  |  | 32 | if ($incl_index) { | 
| 120 | 0 |  |  |  |  | 0 | $lncol = $ft->index . "\t"; | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  |  | 
| 123 | 14 | 50 | 66 |  |  | 263 | if (defined($prev_index) && (($ft->index - $prev_index) != 1)) { | 
| 124 | 0 |  |  |  |  | 0 | push @out, "\n"; | 
| 125 |  |  |  |  |  |  | } | 
| 126 |  |  |  |  |  |  |  | 
| 127 | 14 |  |  |  |  | 126 | push @out, "$lncol$indent<$name$attr>\n"; | 
| 128 |  |  |  |  |  |  |  | 
| 129 | 14 | 50 | 33 |  |  | 37 | if ($incl_text && ($ft->text !~ /^[\s\r\n]*$/)) { | 
| 130 | 0 | 0 |  |  |  | 0 | $lncol = $incl_index ? "\t" : ""; | 
| 131 | 0 |  |  |  |  | 0 | push @out, $lncol . $indent . encode_entities($ft->text, "<>&") . "\n"; | 
| 132 |  |  |  |  |  |  | } | 
| 133 |  |  |  |  |  |  |  | 
| 134 | 14 |  |  |  |  | 19 | $prev = $name; | 
| 135 | 14 |  |  |  |  | 308 | $prev_index = $ft->index; | 
| 136 |  |  |  |  |  |  | } | 
| 137 |  |  |  |  |  |  |  | 
| 138 | 3 | 50 |  |  |  | 57 | return wantarray ? @out : \@out; | 
| 139 |  |  |  |  |  |  | } | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | sub canonicalize_tags { | 
| 142 | 1 |  |  | 1 | 1 | 860 | my @out; | 
| 143 | 1 |  |  |  |  | 4 | foreach (@_) { | 
| 144 | 6 |  |  |  |  | 12 | my $ln = lc $_; | 
| 145 | 6 |  |  |  |  | 21 | $ln =~ s/^\s*/; | 
| 146 | 6 |  |  |  |  | 19 | $ln =~ s/\/?>[\s\r\n]*$//; | 
| 147 |  |  |  |  |  |  |  | 
| 148 | 6 | 50 |  |  |  | 17 | if ($ln) { | 
| 149 | 6 |  |  |  |  | 17 | push @out, $ln; | 
| 150 |  |  |  |  |  |  | } | 
| 151 |  |  |  |  |  |  | } | 
| 152 |  |  |  |  |  |  |  | 
| 153 | 1 | 50 |  |  |  | 8 | return wantarray ? @out : \@out; | 
| 154 |  |  |  |  |  |  | } | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | 1; | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | __END__ |