line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::AsText::Fix; |
2
|
|
|
|
|
|
|
# ABSTRACT: extends HTML::Element::as_text() to render text properly |
3
|
|
|
|
|
|
|
|
4
|
3
|
|
|
3
|
|
124478
|
use strict; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
136
|
|
5
|
3
|
|
|
3
|
|
17
|
use warnings; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
97
|
|
6
|
|
|
|
|
|
|
|
7
|
3
|
|
|
3
|
|
1107
|
use HTML::Tree; |
|
3
|
|
|
|
|
38087
|
|
|
3
|
|
|
|
|
78
|
|
8
|
3
|
|
|
3
|
|
2974
|
use Monkey::Patch qw(:all); |
|
3
|
|
|
|
|
37862
|
|
|
3
|
|
|
|
|
2191
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our $VERSION = '0.003'; # VERSION |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
my $block_tags = { |
14
|
|
|
|
|
|
|
map { $_ => 1 } qw( |
15
|
|
|
|
|
|
|
p |
16
|
|
|
|
|
|
|
h1 h2 h3 h4 h5 h6 |
17
|
|
|
|
|
|
|
dl dt dd |
18
|
|
|
|
|
|
|
ol ul li |
19
|
|
|
|
|
|
|
dir |
20
|
|
|
|
|
|
|
address |
21
|
|
|
|
|
|
|
blockquote |
22
|
|
|
|
|
|
|
center |
23
|
|
|
|
|
|
|
del |
24
|
|
|
|
|
|
|
div |
25
|
|
|
|
|
|
|
hr |
26
|
|
|
|
|
|
|
ins |
27
|
|
|
|
|
|
|
noscript script |
28
|
|
|
|
|
|
|
pre |
29
|
|
|
|
|
|
|
) |
30
|
|
|
|
|
|
|
}; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
my $nillio = []; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
sub as_text { |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# Yet another iteratively implemented traverser |
38
|
3
|
|
|
3
|
1
|
14
|
my ( $this, %options ) = @_; |
39
|
3
|
|
50
|
|
|
26
|
my $skip_dels = $options{'skip_dels'} || 0; |
40
|
3
|
100
|
|
|
|
16
|
my $lf = defined( $options{'lf_char'} ) |
41
|
|
|
|
|
|
|
? $options{'lf_char'} |
42
|
|
|
|
|
|
|
: $/; |
43
|
3
|
50
|
|
|
|
12
|
my $zwsp = defined( $options{'zwsp_char'} ) |
44
|
|
|
|
|
|
|
? $options{'zwsp_char'} |
45
|
|
|
|
|
|
|
: "\x{200b}"; # zero-width space (ZWSP) |
46
|
|
|
|
|
|
|
|
47
|
3
|
|
|
|
|
8
|
my (@pile) = ($this); |
48
|
3
|
|
|
|
|
6
|
my $tag; |
49
|
3
|
|
|
|
|
6
|
my $text = ''; |
50
|
3
|
|
|
|
|
11
|
while (@pile) { |
51
|
431
|
50
|
|
|
|
916
|
if ( !defined( $pile[0] ) ) { # undef! |
|
|
100
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# no-op |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
elsif ( !ref( $pile[0] ) ) { # text bit! save it! |
55
|
275
|
|
|
|
|
654
|
$text .= shift @pile; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
else { # it's a ref -- traverse under it |
58
|
156
|
|
|
|
|
215
|
$this = shift @pile; |
59
|
156
|
|
|
|
|
276
|
$tag = $this->{'_tag'}; |
60
|
156
|
100
|
|
|
|
157
|
my @rest = @{ $this->{'_content'} || $nillio }; |
|
156
|
|
|
|
|
558
|
|
61
|
|
|
|
|
|
|
|
62
|
156
|
100
|
|
|
|
401
|
if ( exists $block_tags->{$tag} ) { |
|
|
100
|
|
|
|
|
|
63
|
86
|
|
|
|
|
123
|
push @rest, $lf; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
elsif ( $tag eq 'br' ) { |
66
|
2
|
|
|
|
|
4
|
push @rest, $lf; |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
else { |
69
|
68
|
|
|
|
|
88
|
push @rest, $zwsp; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
156
|
50
|
33
|
|
|
1195
|
unshift @pile, @rest |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
73
|
|
|
|
|
|
|
unless $tag eq 'style' |
74
|
|
|
|
|
|
|
or $tag eq 'script' |
75
|
|
|
|
|
|
|
or ( $skip_dels and $tag eq 'del' ); |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
3
|
50
|
|
|
|
13
|
if ( $options{'trim'} ) { |
80
|
0
|
|
0
|
|
|
0
|
my $extra_chars = $options{'extra_chars'} || ''; |
81
|
0
|
|
|
|
|
0
|
$text =~ s/[\n\r\f\t\x{a0}${extra_chars}\x{20}]+$//sx; |
82
|
0
|
|
|
|
|
0
|
$text =~ s/^[\n\r\f\t\x{a0}${extra_chars}\x{20}]+//sx; |
83
|
0
|
|
|
|
|
0
|
$text =~ s/[\x{a0}${extra_chars}\x{20}]/ /gx; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
3
|
|
|
|
|
35
|
return $text; |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub global { |
91
|
1
|
|
|
1
|
1
|
1308
|
my ( %options ) = @_; |
92
|
|
|
|
|
|
|
return patch_package 'HTML::Element', as_text => sub { |
93
|
1
|
|
|
1
|
|
563
|
shift; # $original |
94
|
1
|
|
|
|
|
8
|
as_text( @_, %options ); |
95
|
1
|
|
|
|
|
9
|
}; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub object { |
100
|
2
|
|
|
2
|
1
|
45230
|
my ( $obj, %options ) = @_; |
101
|
|
|
|
|
|
|
return patch_object $obj, as_text => sub { |
102
|
2
|
|
|
2
|
|
17347
|
shift; # $original |
103
|
2
|
|
|
|
|
9
|
my $self = shift; |
104
|
2
|
|
|
|
|
16
|
as_text( $self, @_, %options ); |
105
|
2
|
|
|
|
|
24
|
}; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
1; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
__END__ |