| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package HTML::Scrubber; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | # ABSTRACT: Perl extension for scrubbing/sanitizing HTML | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 | 16 |  |  | 16 |  | 611079 | use 5.008;    # enforce minimum perl version of 5.8 | 
|  | 16 |  |  |  |  | 134 |  | 
| 7 | 16 |  |  | 16 |  | 81 | use strict; | 
|  | 16 |  |  |  |  | 74 |  | 
|  | 16 |  |  |  |  | 364 |  | 
| 8 | 16 |  |  | 16 |  | 73 | use warnings; | 
|  | 16 |  |  |  |  | 33 |  | 
|  | 16 |  |  |  |  | 518 |  | 
| 9 | 16 |  |  | 16 |  | 7832 | use HTML::Parser 3.47 (); | 
|  | 16 |  |  |  |  | 84274 |  | 
|  | 16 |  |  |  |  | 410 |  | 
| 10 | 16 |  |  | 16 |  | 98 | use HTML::Entities; | 
|  | 16 |  |  |  |  | 31 |  | 
|  | 16 |  |  |  |  | 831 |  | 
| 11 | 16 |  |  | 16 |  | 85 | use Scalar::Util ('weaken'); | 
|  | 16 |  |  |  |  | 22 |  | 
|  | 16 |  |  |  |  | 796 |  | 
| 12 | 16 |  |  | 16 |  | 84 | use List::Util 1.33 qw(any); | 
|  | 16 |  |  |  |  | 229 |  | 
|  | 16 |  |  |  |  | 34499 |  | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | our ( @_scrub, @_scrub_fh ); | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | our $VERSION = '0.19'; # VERSION | 
| 17 |  |  |  |  |  |  | our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | # my my my my, these here to prevent foolishness like | 
| 20 |  |  |  |  |  |  | # http://perlmonks.org/index.pl?node_id=251127#Stealing+Lexicals | 
| 21 |  |  |  |  |  |  | (@_scrub)    = ( \&_scrub,    "self, event, tagname, attr, attrseq, text" ); | 
| 22 |  |  |  |  |  |  | (@_scrub_fh) = ( \&_scrub_fh, "self, event, tagname, attr, attrseq, text" ); | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | sub new { | 
| 26 | 23 |  |  | 23 | 1 | 10674 | my $package = shift; | 
| 27 | 23 |  |  |  |  | 169 | my $p       = HTML::Parser->new( | 
| 28 |  |  |  |  |  |  | api_version             => 3, | 
| 29 |  |  |  |  |  |  | default_h               => \@_scrub, | 
| 30 |  |  |  |  |  |  | marked_sections         => 0, | 
| 31 |  |  |  |  |  |  | strict_comment          => 0, | 
| 32 |  |  |  |  |  |  | unbroken_text           => 1, | 
| 33 |  |  |  |  |  |  | case_sensitive          => 0, | 
| 34 |  |  |  |  |  |  | boolean_attribute_value => undef, | 
| 35 |  |  |  |  |  |  | empty_element_tags      => 1, | 
| 36 |  |  |  |  |  |  | ); | 
| 37 |  |  |  |  |  |  |  | 
| 38 | 23 |  |  |  |  | 1777 | my $self = { | 
| 39 |  |  |  |  |  |  | _p        => $p, | 
| 40 |  |  |  |  |  |  | _rules    => { '*' => 0, }, | 
| 41 |  |  |  |  |  |  | _comment  => 0, | 
| 42 |  |  |  |  |  |  | _process  => 0, | 
| 43 |  |  |  |  |  |  | _r        => "", | 
| 44 |  |  |  |  |  |  | _optimize => 1, | 
| 45 |  |  |  |  |  |  | _script   => 0, | 
| 46 |  |  |  |  |  |  | _style    => 0, | 
| 47 |  |  |  |  |  |  | }; | 
| 48 |  |  |  |  |  |  |  | 
| 49 | 23 |  |  |  |  | 116 | $p->{"\0_s"} = bless $self, $package; | 
| 50 | 23 |  |  |  |  | 86 | weaken( $p->{"\0_s"} ); | 
| 51 |  |  |  |  |  |  |  | 
| 52 | 23 | 100 |  |  |  | 85 | return $self unless @_; | 
| 53 |  |  |  |  |  |  |  | 
| 54 | 10 |  |  |  |  | 25 | my (%args) = @_; | 
| 55 |  |  |  |  |  |  |  | 
| 56 | 10 |  |  |  |  | 22 | for my $f (qw[ default allow deny rules process comment ]) { | 
| 57 | 60 | 100 |  |  |  | 146 | next unless exists $args{$f}; | 
| 58 | 12 | 100 |  |  |  | 29 | if ( ref $args{$f} ) { | 
| 59 | 10 |  |  |  |  | 38 | $self->$f( @{ $args{$f} } ); | 
|  | 10 |  |  |  |  | 54 |  | 
| 60 |  |  |  |  |  |  | } | 
| 61 |  |  |  |  |  |  | else { | 
| 62 | 2 |  |  |  |  | 5 | $self->$f( $args{$f} ); | 
| 63 |  |  |  |  |  |  | } | 
| 64 |  |  |  |  |  |  | } | 
| 65 |  |  |  |  |  |  |  | 
| 66 | 10 |  |  |  |  | 34 | return $self; | 
| 67 |  |  |  |  |  |  | } | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | sub comment { | 
| 71 |  |  |  |  |  |  | return $_[0]->{_comment} | 
| 72 | 16 | 100 |  | 16 | 1 | 372 | if @_ == 1; | 
| 73 | 7 |  |  |  |  | 13 | $_[0]->{_comment} = $_[1]; | 
| 74 | 7 |  |  |  |  | 14 | return; | 
| 75 |  |  |  |  |  |  | } | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | sub process { | 
| 79 |  |  |  |  |  |  | return $_[0]->{_process} | 
| 80 | 10 | 100 |  | 10 | 1 | 60 | if @_ == 1; | 
| 81 | 1 |  |  |  |  | 2 | $_[0]->{_process} = $_[1]; | 
| 82 | 1 |  |  |  |  | 2 | return; | 
| 83 |  |  |  |  |  |  | } | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | sub script { | 
| 87 |  |  |  |  |  |  | return $_[0]->{_script} | 
| 88 | 3 | 100 |  | 3 | 1 | 342 | if @_ == 1; | 
| 89 | 1 |  |  |  |  | 2 | $_[0]->{_script} = $_[1]; | 
| 90 | 1 |  |  |  |  | 2 | return; | 
| 91 |  |  |  |  |  |  | } | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | sub style { | 
| 95 |  |  |  |  |  |  | return $_[0]->{_style} | 
| 96 | 3 | 100 |  | 3 | 1 | 12 | if @_ == 1; | 
| 97 | 1 |  |  |  |  | 3 | $_[0]->{_style} = $_[1]; | 
| 98 | 1 |  |  |  |  | 2 | return; | 
| 99 |  |  |  |  |  |  | } | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | sub allow { | 
| 103 | 10 |  |  | 10 | 1 | 418 | my $self = shift; | 
| 104 | 10 |  |  |  |  | 21 | for my $k (@_) { | 
| 105 | 43 |  |  |  |  | 90 | $self->{_rules}{ lc $k } = 1; | 
| 106 |  |  |  |  |  |  | } | 
| 107 | 10 |  |  |  |  | 16 | $self->{_optimize} = 1;    # each time a rule changes, reoptimize when parse | 
| 108 |  |  |  |  |  |  |  | 
| 109 | 10 |  |  |  |  | 40 | return; | 
| 110 |  |  |  |  |  |  | } | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | sub deny { | 
| 114 | 2 |  |  | 2 | 1 | 4 | my $self = shift; | 
| 115 |  |  |  |  |  |  |  | 
| 116 | 2 |  |  |  |  | 4 | for my $k (@_) { | 
| 117 | 7 |  |  |  |  | 12 | $self->{_rules}{ lc $k } = 0; | 
| 118 |  |  |  |  |  |  | } | 
| 119 |  |  |  |  |  |  |  | 
| 120 | 2 |  |  |  |  | 6 | $self->{_optimize} = 1;    # each time a rule changes, reoptimize when parse | 
| 121 |  |  |  |  |  |  |  | 
| 122 | 2 |  |  |  |  | 2 | return; | 
| 123 |  |  |  |  |  |  | } | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | sub rules { | 
| 127 | 2 |  |  | 2 | 1 | 340 | my $self = shift; | 
| 128 | 2 |  |  |  |  | 6 | my (%rules) = @_; | 
| 129 | 2 |  |  |  |  | 6 | for my $k ( keys %rules ) { | 
| 130 | 2 |  |  |  |  | 9 | $self->{_rules}{ lc $k } = $rules{$k}; | 
| 131 |  |  |  |  |  |  | } | 
| 132 |  |  |  |  |  |  |  | 
| 133 | 2 |  |  |  |  | 17 | $self->{_optimize} = 1;    # each time a rule changes, reoptimize when parse | 
| 134 |  |  |  |  |  |  |  | 
| 135 | 2 |  |  |  |  | 5 | return; | 
| 136 |  |  |  |  |  |  | } | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | sub default { | 
| 140 | 19 | 100 |  | 19 | 1 | 369 | return $_[0]->{_rules}{'*'} | 
| 141 |  |  |  |  |  |  | if @_ == 1; | 
| 142 |  |  |  |  |  |  |  | 
| 143 | 12 | 100 |  |  |  | 40 | $_[0]->{_rules}{'*'} = $_[1] if defined $_[1]; | 
| 144 | 12 | 100 | 66 |  |  | 52 | $_[0]->{_rules}{'_'} = $_[2] if defined $_[2] and ref $_[2]; | 
| 145 | 12 |  |  |  |  | 22 | $_[0]->{_optimize} = 1;    # each time a rule changes, reoptimize when parse | 
| 146 |  |  |  |  |  |  |  | 
| 147 | 12 |  |  |  |  | 49 | return; | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | sub scrub_file { | 
| 152 | 2 | 50 |  | 2 | 1 | 3325 | if ( @_ > 2 ) { | 
| 153 | 2 | 50 |  |  |  | 6 | return unless defined $_[0]->_out( $_[2] ); | 
| 154 |  |  |  |  |  |  | } | 
| 155 |  |  |  |  |  |  | else { | 
| 156 | 0 |  |  |  |  | 0 | $_[0]->{_p}->handler( default => @_scrub ); | 
| 157 |  |  |  |  |  |  | } | 
| 158 |  |  |  |  |  |  |  | 
| 159 | 2 |  |  |  |  | 6 | $_[0]->_optimize();    #if $_[0]->{_optimize}; | 
| 160 |  |  |  |  |  |  |  | 
| 161 | 2 |  |  |  |  | 9 | $_[0]->{_p}->parse_file( $_[1] ); | 
| 162 |  |  |  |  |  |  |  | 
| 163 | 2 | 50 |  |  |  | 5 | return delete $_[0]->{_r} unless exists $_[0]->{_out}; | 
| 164 | 2 | 50 |  |  |  | 4 | print { $_[0]->{_out} } $_[0]->{_r} if length $_[0]->{_r}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 165 | 2 |  |  |  |  | 148 | delete $_[0]->{_out}; | 
| 166 | 2 |  |  |  |  | 9 | return 1; | 
| 167 |  |  |  |  |  |  | } | 
| 168 |  |  |  |  |  |  |  | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | sub scrub { | 
| 171 | 63 | 100 |  | 63 | 1 | 32859 | if ( @_ > 2 ) { | 
| 172 | 2 | 50 |  |  |  | 8 | return unless defined $_[0]->_out( $_[2] ); | 
| 173 |  |  |  |  |  |  | } | 
| 174 |  |  |  |  |  |  | else { | 
| 175 | 61 |  |  |  |  | 360 | $_[0]->{_p}->handler( default => @_scrub ); | 
| 176 |  |  |  |  |  |  | } | 
| 177 |  |  |  |  |  |  |  | 
| 178 | 63 |  |  |  |  | 172 | $_[0]->_optimize();    # if $_[0]->{_optimize}; | 
| 179 |  |  |  |  |  |  |  | 
| 180 | 63 | 100 |  |  |  | 565 | $_[0]->{_p}->parse( $_[1] ) if defined( $_[1] ); | 
| 181 | 63 |  |  |  |  | 275 | $_[0]->{_p}->eof(); | 
| 182 |  |  |  |  |  |  |  | 
| 183 | 63 | 100 |  |  |  | 289 | return delete $_[0]->{_r} unless exists $_[0]->{_out}; | 
| 184 | 2 |  |  |  |  | 80 | delete $_[0]->{_out}; | 
| 185 | 2 |  |  |  |  | 9 | return 1; | 
| 186 |  |  |  |  |  |  | } | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  |  | 
| 189 |  |  |  |  |  |  | sub _out { | 
| 190 | 4 |  |  | 4 |  | 11 | my ( $self, $o ) = @_; | 
| 191 |  |  |  |  |  |  |  | 
| 192 | 4 | 100 | 66 |  |  | 28 | unless ( ref $o and ref \$o ne 'GLOB' ) { | 
| 193 | 2 | 50 |  |  |  | 94 | open my $F, '>', $o or return; | 
| 194 | 2 |  |  |  |  | 10 | binmode $F; | 
| 195 | 2 |  |  |  |  | 10 | $self->{_out} = $F; | 
| 196 |  |  |  |  |  |  | } | 
| 197 |  |  |  |  |  |  | else { | 
| 198 | 2 |  |  |  |  | 5 | $self->{_out} = $o; | 
| 199 |  |  |  |  |  |  | } | 
| 200 |  |  |  |  |  |  |  | 
| 201 | 4 |  |  |  |  | 36 | $self->{_p}->handler( default => @_scrub_fh ); | 
| 202 |  |  |  |  |  |  |  | 
| 203 | 4 |  |  |  |  | 14 | return 1; | 
| 204 |  |  |  |  |  |  | } | 
| 205 |  |  |  |  |  |  |  | 
| 206 |  |  |  |  |  |  |  | 
| 207 |  |  |  |  |  |  | sub _validate { | 
| 208 | 91 |  |  | 91 |  | 149 | my ( $s, $t, $r, $a, $as ) = @_; | 
| 209 | 91 | 100 |  |  |  | 196 | return "<$t>" unless %$a; | 
| 210 |  |  |  |  |  |  |  | 
| 211 | 41 |  |  |  |  | 62 | $r = $s->{_rules}->{$r}; | 
| 212 | 41 |  |  |  |  | 50 | my %f; | 
| 213 |  |  |  |  |  |  |  | 
| 214 | 41 |  |  |  |  | 102 | for my $k ( keys %$a ) { | 
| 215 | 61 | 100 |  |  |  | 159 | my $check = exists $r->{$k} ? $r->{$k} : exists $r->{'*'} ? $r->{'*'} : next; | 
|  |  | 100 |  |  |  |  |  | 
| 216 |  |  |  |  |  |  |  | 
| 217 | 53 | 100 | 33 |  |  | 183 | if ( ref $check eq 'CODE' ) { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 218 | 4 |  |  |  |  | 10 | my @v = $check->( $s, $t, $k, $a->{$k}, $a, \%f ); | 
| 219 | 4 | 100 |  |  |  | 45 | next unless @v; | 
| 220 | 3 |  |  |  |  | 10 | $f{$k} = shift @v; | 
| 221 |  |  |  |  |  |  | } | 
| 222 |  |  |  |  |  |  | elsif ( ref $check || length($check) > 1 ) { | 
| 223 | 0 | 0 |  |  |  | 0 | $f{$k} = $a->{$k} if $a->{$k} =~ m{$check}; | 
| 224 |  |  |  |  |  |  | } | 
| 225 |  |  |  |  |  |  | elsif ($check) { | 
| 226 | 47 |  |  |  |  | 94 | $f{$k} = $a->{$k}; | 
| 227 |  |  |  |  |  |  | } | 
| 228 |  |  |  |  |  |  | } | 
| 229 |  |  |  |  |  |  |  | 
| 230 | 41 | 100 |  |  |  | 80 | if (%f) { | 
| 231 | 31 |  |  |  |  | 34 | my %seen; | 
| 232 |  |  |  |  |  |  | return "<$t $r>" | 
| 233 |  |  |  |  |  |  | if $r = join ' ', map { | 
| 234 |  |  |  |  |  |  | defined $f{$_} | 
| 235 | 50 | 100 |  |  |  | 358 | ? qq[$_="] . encode_entities( $f{$_} ) . q["] | 
| 236 |  |  |  |  |  |  | : $_;    # boolean attribute (TODO?) | 
| 237 | 31 | 100 |  |  |  | 42 | } grep { exists $f{$_} and !$seen{$_}++; } @$as; | 
|  | 55 | 50 |  |  |  | 195 |  | 
| 238 |  |  |  |  |  |  | } | 
| 239 |  |  |  |  |  |  |  | 
| 240 | 10 |  |  |  |  | 28 | return "<$t>"; | 
| 241 |  |  |  |  |  |  | } | 
| 242 |  |  |  |  |  |  |  | 
| 243 |  |  |  |  |  |  |  | 
| 244 |  |  |  |  |  |  | sub _scrub_str { | 
| 245 | 504 |  |  | 504 |  | 844 | my ( $p, $e, $t, $a, $as, $text ) = @_; | 
| 246 |  |  |  |  |  |  |  | 
| 247 | 504 |  |  |  |  | 584 | my $s      = $p->{"\0_s"}; | 
| 248 | 504 |  |  |  |  | 542 | my $outstr = ''; | 
| 249 |  |  |  |  |  |  |  | 
| 250 | 504 | 100 | 66 |  |  | 1454 | if ( $e eq 'start' ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 251 | 109 | 100 |  |  |  | 278 | if ( exists $s->{_rules}->{$t} )    # is there a specific rule | 
|  |  | 100 |  |  |  |  |  | 
| 252 |  |  |  |  |  |  | { | 
| 253 | 61 | 100 |  |  |  | 142 | if ( ref $s->{_rules}->{$t} )    # is it complicated?(not simple;) | 
|  |  | 50 |  |  |  |  |  | 
| 254 |  |  |  |  |  |  | { | 
| 255 | 5 |  |  |  |  | 9 | $outstr .= $s->_validate( $t, $t, $a, $as ); | 
| 256 |  |  |  |  |  |  | } | 
| 257 |  |  |  |  |  |  | elsif ( $s->{_rules}->{$t} )     # validate using default attribute rule | 
| 258 |  |  |  |  |  |  | { | 
| 259 | 56 |  |  |  |  | 103 | $outstr .= $s->_validate( $t, '_', $a, $as ); | 
| 260 |  |  |  |  |  |  | } | 
| 261 |  |  |  |  |  |  | } | 
| 262 |  |  |  |  |  |  | elsif ( $s->{_rules}->{'*'} )        # default allow tags | 
| 263 |  |  |  |  |  |  | { | 
| 264 | 30 |  |  |  |  | 48 | $outstr .= $s->_validate( $t, '_', $a, $as ); | 
| 265 |  |  |  |  |  |  | } | 
| 266 |  |  |  |  |  |  | } | 
| 267 |  |  |  |  |  |  | elsif ( $e eq 'end' ) { | 
| 268 |  |  |  |  |  |  |  | 
| 269 |  |  |  |  |  |  | # empty tags list taken from | 
| 270 |  |  |  |  |  |  | # https://developer.mozilla.org/en/docs/Glossary/empty_element | 
| 271 | 79 |  |  |  |  | 211 | my @empty_tags = qw(area base br col embed hr img input link meta param source track wbr); | 
| 272 | 79 | 100 | 100 | 940 |  | 345 | return "" if $text ne '' && any { $t eq $_ } @empty_tags;    # skip false closing empty tags | 
|  | 940 |  |  |  |  | 1333 |  | 
| 273 |  |  |  |  |  |  |  | 
| 274 | 63 |  |  |  |  | 134 | my $place = 0; | 
| 275 | 63 | 100 |  |  |  | 146 | if ( exists $s->{_rules}->{$t} ) { | 
|  |  | 100 |  |  |  |  |  | 
| 276 | 27 | 50 |  |  |  | 53 | $place = 1 if $s->{_rules}->{$t}; | 
| 277 |  |  |  |  |  |  | } | 
| 278 |  |  |  |  |  |  | elsif ( $s->{_rules}->{'*'} ) { | 
| 279 | 23 |  |  |  |  | 26 | $place = 1; | 
| 280 |  |  |  |  |  |  | } | 
| 281 | 63 | 100 |  |  |  | 109 | if ($place) { | 
| 282 | 50 | 100 |  |  |  | 73 | if ( length $text ) { | 
| 283 | 46 |  |  |  |  | 98 | $outstr .= "$t>"; | 
| 284 |  |  |  |  |  |  | } | 
| 285 |  |  |  |  |  |  | else { | 
| 286 | 4 |  |  |  |  | 12 | substr $s->{_r}, -1, 0, ' /'; | 
| 287 |  |  |  |  |  |  | } | 
| 288 |  |  |  |  |  |  | } | 
| 289 |  |  |  |  |  |  | } | 
| 290 |  |  |  |  |  |  | elsif ( $e eq 'comment' ) { | 
| 291 | 14 | 100 |  |  |  | 25 | if ( $s->{_comment} ) { | 
| 292 |  |  |  |  |  |  |  | 
| 293 |  |  |  |  |  |  | # only copy comments through if they are well formed... | 
| 294 | 6 | 100 |  |  |  | 34 | $outstr .= $text if ( $text =~ m|^$|ms ); | 
| 295 |  |  |  |  |  |  | } | 
| 296 |  |  |  |  |  |  | } | 
| 297 |  |  |  |  |  |  | elsif ( $e eq 'process' ) { | 
| 298 | 10 | 100 |  |  |  | 19 | $outstr .= $text if $s->{_process}; | 
| 299 |  |  |  |  |  |  | } | 
| 300 |  |  |  |  |  |  | elsif ( $e eq 'text' or $e eq 'default' ) { | 
| 301 | 162 |  |  |  |  | 266 | $text =~ s/</g;    #https://rt.cpan.org/Public/Ticket/Attachment/83958/10332/scrubber.patch | 
| 302 | 162 |  |  |  |  | 179 | $text =~ s/>/>/g; | 
| 303 |  |  |  |  |  |  |  | 
| 304 | 162 |  |  |  |  | 222 | $outstr .= $text; | 
| 305 |  |  |  |  |  |  | } | 
| 306 |  |  |  |  |  |  | elsif ( $e eq 'start_document' ) { | 
| 307 | 65 |  |  |  |  | 139 | $outstr = ""; | 
| 308 |  |  |  |  |  |  | } | 
| 309 |  |  |  |  |  |  |  | 
| 310 | 488 |  |  |  |  | 2351 | return $outstr; | 
| 311 |  |  |  |  |  |  | } | 
| 312 |  |  |  |  |  |  |  | 
| 313 |  |  |  |  |  |  |  | 
| 314 |  |  |  |  |  |  | sub _scrub_fh { | 
| 315 | 38 |  |  | 38 |  | 212 | my $self = $_[0]->{"\0_s"}; | 
| 316 | 38 | 100 |  |  |  | 56 | print { $self->{_out} } $self->{'_r'} if length $self->{_r}; | 
|  | 12 |  |  |  |  | 37 |  | 
| 317 | 38 |  |  |  |  | 66 | $self->{'_r'} = _scrub_str(@_); | 
| 318 |  |  |  |  |  |  | } | 
| 319 |  |  |  |  |  |  |  | 
| 320 |  |  |  |  |  |  |  | 
| 321 |  |  |  |  |  |  | sub _scrub { | 
| 322 |  |  |  |  |  |  |  | 
| 323 | 466 |  |  | 466 |  | 930 | $_[0]->{"\0_s"}->{_r} .= _scrub_str(@_); | 
| 324 |  |  |  |  |  |  | } | 
| 325 |  |  |  |  |  |  |  | 
| 326 |  |  |  |  |  |  | sub _optimize { | 
| 327 | 65 |  |  | 65 |  | 108 | my ($self) = @_; | 
| 328 |  |  |  |  |  |  |  | 
| 329 | 65 |  |  |  |  | 125 | my (@ignore_elements) = grep { not $self->{"_$_"} } qw(script style); | 
|  | 130 |  |  |  |  | 374 |  | 
| 330 | 65 |  |  |  |  | 226 | $self->{_p}->ignore_elements(@ignore_elements);    # if @ is empty, we reset ;) | 
| 331 |  |  |  |  |  |  |  | 
| 332 | 65 | 100 |  |  |  | 149 | return unless $self->{_optimize}; | 
| 333 |  |  |  |  |  |  |  | 
| 334 |  |  |  |  |  |  | #sub allow | 
| 335 |  |  |  |  |  |  | #    return unless $self->{_optimize}; # till I figure it out (huh) | 
| 336 |  |  |  |  |  |  |  | 
| 337 | 26 | 100 |  |  |  | 60 | if ( $self->{_rules}{'*'} ) {    # default allow | 
| 338 | 8 |  |  |  |  | 19 | $self->{_p}->report_tags();    # so clear it | 
| 339 |  |  |  |  |  |  | } | 
| 340 |  |  |  |  |  |  | else { | 
| 341 |  |  |  |  |  |  |  | 
| 342 |  |  |  |  |  |  | my (@reports) = | 
| 343 |  |  |  |  |  |  | grep {                     # report only tags we want | 
| 344 | 76 |  |  |  |  | 111 | $self->{_rules}{$_} | 
| 345 | 18 |  |  |  |  | 27 | } keys %{ $self->{_rules} }; | 
|  | 18 |  |  |  |  | 55 |  | 
| 346 |  |  |  |  |  |  |  | 
| 347 |  |  |  |  |  |  | $self->{_p}->report_tags(      # default deny, so optimize | 
| 348 |  |  |  |  |  |  | @reports | 
| 349 | 18 | 100 |  |  |  | 77 | ) if @reports; | 
| 350 |  |  |  |  |  |  | } | 
| 351 |  |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  | # sub deny | 
| 353 |  |  |  |  |  |  | #    return unless $self->{_optimize}; # till I figure it out (huh) | 
| 354 |  |  |  |  |  |  | my (@ignores) = | 
| 355 | 26 |  |  |  |  | 44 | grep { not $self->{_rules}{$_} } grep { $_ ne '*' } keys %{ $self->{_rules} }; | 
|  | 62 |  |  |  |  | 96 |  | 
|  | 88 |  |  |  |  | 144 |  | 
|  | 26 |  |  |  |  | 323 |  | 
| 356 |  |  |  |  |  |  |  | 
| 357 |  |  |  |  |  |  | $self->{_p}->ignore_tags(    # always ignore stuff we don't want | 
| 358 |  |  |  |  |  |  | @ignores | 
| 359 | 26 | 100 |  |  |  | 91 | ) if @ignores; | 
| 360 |  |  |  |  |  |  |  | 
| 361 | 26 |  |  |  |  | 46 | $self->{_optimize} = 0; | 
| 362 | 26 |  |  |  |  | 43 | return; | 
| 363 |  |  |  |  |  |  | } | 
| 364 |  |  |  |  |  |  |  | 
| 365 |  |  |  |  |  |  | 1; | 
| 366 |  |  |  |  |  |  |  | 
| 367 |  |  |  |  |  |  | #print sprintf q[ '%-12s => %s,], "$_'", $h{$_} for sort keys %h;# perl! | 
| 368 |  |  |  |  |  |  | #perl -ne"chomp;print $_;print qq'\t\t# test ', ++$a if /ok\(/;print $/" test.pl >test2.pl | 
| 369 |  |  |  |  |  |  | #perl -ne"chomp;print $_;if( /ok\(/ ){s/\#test \d+$//;print qq'\t\t# test ', ++$a }print $/" test.pl >test2.pl | 
| 370 |  |  |  |  |  |  | #perl -ne"chomp;if(/ok\(/){s/# test .*$//;print$_,qq'\t\t# test ',++$a}else{print$_}print$/" test.pl >test2.pl | 
| 371 |  |  |  |  |  |  |  | 
| 372 |  |  |  |  |  |  | __END__ |