line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package CCCP::HTML::Truncate; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
30102
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
47
|
|
4
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
60
|
|
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
441
|
use XML::LibXML; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use Encode qw(); |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.04'; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
$CCCP::HTML::Truncate::enc = 'utf-8'; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# ------------------------ EXTEND XML::LibXML::Element ----------------- |
14
|
|
|
|
|
|
|
# return serialize XML::LibXML::Element in correct encoding |
15
|
|
|
|
|
|
|
sub XML::LibXML::Element::html { |
16
|
|
|
|
|
|
|
my ($node, $actualEncoding) = @_; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# correct decode |
19
|
|
|
|
|
|
|
my $f = Encode::find_encoding($CCCP::HTML::Truncate::enc || $node->ownerDocument->encoding() || $node->ownerDocument->actualEncoding()); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
return $f->encode($node->toString,Encode::FB_XMLCREF); |
22
|
|
|
|
|
|
|
} |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# ---------------------------------------- MAIN -------------------------------------------- |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# parser obj |
27
|
|
|
|
|
|
|
my $lx; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub _init_parser { |
30
|
|
|
|
|
|
|
return if $lx; |
31
|
|
|
|
|
|
|
$lx = XML::LibXML->new(); |
32
|
|
|
|
|
|
|
$lx->recover_silently(1); |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# truncate html |
36
|
|
|
|
|
|
|
sub truncate { |
37
|
|
|
|
|
|
|
my ($class,$html_str,$length,$elips) = @_; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
return unless $html_str; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
$elips ||= "..."; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
$length ||= 0; |
44
|
|
|
|
|
|
|
$length =~ /(\d+)/; |
45
|
|
|
|
|
|
|
$length = $1 ? $1 : 0; |
46
|
|
|
|
|
|
|
return '' unless $length; |
47
|
|
|
|
|
|
|
$html_str =~ s/&/&/gm; |
48
|
|
|
|
|
|
|
return $html_str if length $html_str < $length; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
my $f = Encode::find_encoding($CCCP::HTML::Truncate::enc); |
51
|
|
|
|
|
|
|
$html_str = $f->decode($html_str); |
52
|
|
|
|
|
|
|
$elips = $f->decode($elips); |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
$class->_init_parser(); |
55
|
|
|
|
|
|
|
my $root = $lx->parse_html_string($html_str); |
56
|
|
|
|
|
|
|
my ($body) = $root->documentElement()->findnodes('//body'); |
57
|
|
|
|
|
|
|
return '' unless $body; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
my $add_elips = 0; |
60
|
|
|
|
|
|
|
foreach ($body->ownerDocument->findnodes('//child::text()')) { |
61
|
|
|
|
|
|
|
if ($length>0) { |
62
|
|
|
|
|
|
|
my $str = $_->to_literal; |
63
|
|
|
|
|
|
|
my $new_str = substr($str,0,$length); |
64
|
|
|
|
|
|
|
$length -= length $str; |
65
|
|
|
|
|
|
|
if ($length < 1 and not $add_elips) { |
66
|
|
|
|
|
|
|
$new_str .= $elips; |
67
|
|
|
|
|
|
|
$add_elips++; |
68
|
|
|
|
|
|
|
# and skip all another text child |
69
|
|
|
|
|
|
|
my $text_parent = $_->parentNode; |
70
|
|
|
|
|
|
|
if ($_->nodePath =~ /\[(\d+)]$/) { |
71
|
|
|
|
|
|
|
foreach my $skip_text ($text_parent->findnodes(sprintf('//child::text()[position()>%d]',$1))) { |
72
|
|
|
|
|
|
|
$_->setData(''); |
73
|
|
|
|
|
|
|
}; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
}; |
76
|
|
|
|
|
|
|
$_->setData($new_str); |
77
|
|
|
|
|
|
|
} else { |
78
|
|
|
|
|
|
|
my $parent = $_->parentNode; |
79
|
|
|
|
|
|
|
# add elips |
80
|
|
|
|
|
|
|
unless ($add_elips) { |
81
|
|
|
|
|
|
|
$add_elips++; |
82
|
|
|
|
|
|
|
my $elips_el = XML::LibXML::Element->new('span'); |
83
|
|
|
|
|
|
|
$elips_el->appendTextNode($elips); |
84
|
|
|
|
|
|
|
$parent->addChild($elips_el); |
85
|
|
|
|
|
|
|
}; |
86
|
|
|
|
|
|
|
# skip body |
87
|
|
|
|
|
|
|
if ($parent->isSameNode($body)) { |
88
|
|
|
|
|
|
|
$_->unbindNode(); |
89
|
|
|
|
|
|
|
} else { |
90
|
|
|
|
|
|
|
my @childs = $parent->findnodes($parent->nodePath.'//child::text()'); |
91
|
|
|
|
|
|
|
$#childs > 0 ? $_->unbindNode() : $parent->unbindNode(); |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
}; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
my $ret = $body->html(); |
97
|
|
|
|
|
|
|
$ret =~ s/^( )?|(<\/p>)?<\/body>$//igm; |
98
|
|
|
|
|
|
|
return $ret; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
1; |
102
|
|
|
|
|
|
|
__END__ |