line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::Split; |
2
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
1527
|
use strict; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
126
|
|
4
|
4
|
|
|
4
|
|
20
|
use warnings; |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
116
|
|
5
|
4
|
|
|
4
|
|
98
|
use 5.008001; |
|
4
|
|
|
|
|
19
|
|
|
4
|
|
|
|
|
205
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = '0.04'; |
8
|
|
|
|
|
|
|
|
9
|
4
|
|
|
4
|
|
3968
|
use Encode; |
|
4
|
|
|
|
|
64642
|
|
|
4
|
|
|
|
|
387
|
|
10
|
4
|
|
|
4
|
|
2134158
|
use HTML::Parser; |
|
4
|
|
|
|
|
40837
|
|
|
4
|
|
|
|
|
3982
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my %_is_empty_tag = map { $_ => 1 } qw( br hr img br/ hr/ ); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub split { |
15
|
16
|
|
|
16
|
1
|
118
|
my $class = shift; |
16
|
16
|
|
|
|
|
94
|
my %param = @_; |
17
|
|
|
|
|
|
|
|
18
|
16
|
50
|
|
|
|
64
|
my $html = $param{html} or return; |
19
|
16
|
50
|
|
|
|
70
|
my $max_length = $param{length} or return ($html); |
20
|
16
|
|
100
|
|
|
76
|
my $extend_tags = $param{extend_tags} || []; |
21
|
|
|
|
|
|
|
|
22
|
16
|
|
|
|
|
72
|
my $is_utf8 = Encode::is_utf8($html); |
23
|
|
|
|
|
|
|
|
24
|
16
|
50
|
|
|
|
73
|
Encode::_utf8_on($html) unless $is_utf8; |
25
|
16
|
100
|
|
|
|
82
|
return ( $param{html} ) if length $html <= $max_length; |
26
|
|
|
|
|
|
|
|
27
|
15
|
|
|
|
|
20
|
my (@pages, @tags, $last_tag, $forwarded_tags); |
28
|
15
|
|
|
|
|
27
|
my $page = ''; |
29
|
15
|
|
|
|
|
23
|
my $find_end_tag = ''; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
## page generator |
32
|
|
|
|
|
|
|
my $create_page = sub { |
33
|
|
|
|
|
|
|
# append unclosed tags forwarded from previous page to beginning of page. |
34
|
43
|
100
|
|
43
|
|
96
|
$page = $forwarded_tags . $page if $forwarded_tags; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# append unclosed tags to the end of page. |
37
|
43
|
|
|
|
|
84
|
$page .= join '', map { ''.$_->{tagname}.'>' } reverse @tags; |
|
20
|
|
|
|
|
81
|
|
38
|
|
|
|
|
|
|
|
39
|
43
|
100
|
|
|
|
86
|
return unless $page; |
40
|
40
|
|
|
|
|
619
|
push @pages, $page; |
41
|
40
|
|
|
|
|
56
|
$forwarded_tags = join '', map { $_->{text} } @tags; |
|
20
|
|
|
|
|
46
|
|
42
|
40
|
|
|
|
|
95
|
$page = ''; |
43
|
15
|
|
|
|
|
79
|
}; |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
my $start_tag_handler = sub { |
46
|
30
|
|
|
30
|
|
61
|
my ($p, $tagname, $text) = @_; |
47
|
30
|
100
|
|
|
|
259
|
if ($find_end_tag) { |
48
|
3
|
100
|
|
|
|
10
|
unless ($_is_empty_tag{$tagname}) { |
49
|
1
|
|
|
|
|
4
|
push @tags, $last_tag = { tagname => $tagname, text => $text }; |
50
|
|
|
|
|
|
|
} |
51
|
3
|
|
|
|
|
7
|
$page .= $text; |
52
|
3
|
|
|
|
|
12
|
return; |
53
|
|
|
|
|
|
|
} |
54
|
27
|
100
|
|
|
|
89
|
$page .= $text if $_is_empty_tag{$tagname}; |
55
|
27
|
100
|
66
|
|
|
132
|
if (length $page.$text > $max_length && !$find_end_tag) { |
56
|
8
|
|
|
|
|
29
|
$create_page->(); |
57
|
|
|
|
|
|
|
} |
58
|
27
|
100
|
|
|
|
74
|
unless ($_is_empty_tag{$tagname}) { |
59
|
23
|
|
|
|
|
82
|
push @tags, $last_tag = { tagname => $tagname, text => $text }; |
60
|
23
|
|
|
|
|
47
|
$page .= $text; |
61
|
|
|
|
|
|
|
} |
62
|
27
|
100
|
|
|
|
143
|
$find_end_tag = $tagname if $tagname eq 'a'; |
63
|
15
|
|
|
|
|
90
|
}; |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
my $end_tag_handler = sub { |
66
|
23
|
|
|
23
|
|
48
|
my ($p, $tagname, $text) = @_; |
67
|
23
|
50
|
33
|
|
|
123
|
return unless $last_tag && $last_tag->{tagname} eq $tagname; |
68
|
23
|
|
|
|
|
30
|
pop @tags; |
69
|
23
|
|
|
|
|
187
|
$last_tag = $tags[-1]; |
70
|
23
|
|
|
|
|
52
|
$page .= $text; |
71
|
23
|
100
|
|
|
|
49
|
$find_end_tag = '' if $find_end_tag eq $tagname; |
72
|
23
|
100
|
100
|
|
|
107
|
if (length $page > $max_length && !$find_end_tag) { |
73
|
9
|
|
|
|
|
14
|
$create_page->(); |
74
|
|
|
|
|
|
|
} |
75
|
15
|
|
|
|
|
117
|
}; |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
my $default_handler = sub { |
78
|
61
|
|
|
61
|
|
91
|
my ($p, $text) = @_; |
79
|
61
|
|
|
|
|
100
|
my $src = $page . $text; |
80
|
61
|
100
|
|
|
|
120
|
if ($find_end_tag) { |
81
|
4
|
|
|
|
|
6
|
$page = $src; |
82
|
4
|
|
|
|
|
16
|
return; |
83
|
|
|
|
|
|
|
} |
84
|
57
|
|
|
|
|
151
|
while (length $src > $max_length) { |
85
|
11
|
|
|
|
|
25
|
$page = substr $src, 0, $max_length; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
## find indivisible extend tag |
88
|
11
|
|
|
|
|
15
|
my $over = 0; |
89
|
11
|
|
|
|
|
26
|
for my $tag (@$extend_tags) { |
90
|
3
|
50
|
|
|
|
10
|
my $full_re = $tag->{full} or next; |
91
|
3
|
50
|
|
|
|
8
|
my $begin_re = $tag->{begin} or next; |
92
|
3
|
50
|
|
|
|
9
|
my $end_re = $tag->{end} or next; |
93
|
3
|
50
|
|
|
|
68
|
if (my ($first) = $page =~ /($begin_re)$/) { |
94
|
3
|
|
|
|
|
7
|
my $next = substr $src, $max_length; |
95
|
3
|
50
|
|
|
|
45
|
if (my ($second) = $next =~ /^($end_re)/) { |
96
|
3
|
|
|
|
|
5
|
my $may_have_tag = $first.$second; |
97
|
3
|
100
|
|
|
|
91
|
if ($may_have_tag =~ /^$full_re$/) { |
98
|
2
|
|
|
|
|
6
|
$page .= $second; |
99
|
2
|
|
|
|
|
127
|
$over = length $second; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
11
|
|
|
|
|
29
|
$create_page->(); |
106
|
11
|
|
|
|
|
44
|
$src = substr $src, $max_length + $over; |
107
|
|
|
|
|
|
|
} |
108
|
57
|
|
|
|
|
263
|
$page = $src; |
109
|
15
|
|
|
|
|
110
|
}; |
110
|
|
|
|
|
|
|
|
111
|
15
|
|
|
|
|
128
|
my $p = HTML::Parser->new( |
112
|
|
|
|
|
|
|
api_version => 3, |
113
|
|
|
|
|
|
|
start_h => [ $start_tag_handler, "self,tagname,text", ], |
114
|
|
|
|
|
|
|
end_h => [ $end_tag_handler, "self,tagname,text", ], |
115
|
|
|
|
|
|
|
default_h => [ $default_handler, "self,text", ], |
116
|
|
|
|
|
|
|
); |
117
|
15
|
|
|
|
|
1176
|
$p->parse($html); |
118
|
15
|
|
|
|
|
77
|
$p->eof; |
119
|
15
|
|
|
|
|
30
|
$create_page->(); |
120
|
|
|
|
|
|
|
|
121
|
15
|
50
|
|
|
|
32
|
unless ($is_utf8) { |
122
|
15
|
|
|
|
|
93
|
Encode::_utf8_off($_) for @pages; |
123
|
|
|
|
|
|
|
} |
124
|
15
|
|
|
|
|
1730
|
return @pages; |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
sub new { |
128
|
0
|
|
|
0
|
1
|
|
my $class = shift; |
129
|
0
|
|
|
|
|
|
my %param = @_; |
130
|
|
|
|
|
|
|
|
131
|
0
|
|
|
|
|
|
warn "This method will be depricated. Please use HTML::Split::Pager->new instead."; |
132
|
0
|
|
|
|
|
|
require HTML::Split::Pager; |
133
|
0
|
|
|
|
|
|
return HTML::Split::Pager->new(%param); |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
1; |
137
|
|
|
|
|
|
|
__END__ |