line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
use utf8; |
2
|
1
|
|
|
1
|
|
7
|
use Moo; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
5
|
|
3
|
1
|
|
|
1
|
|
26
|
extends 'NewsExtractor::GenericExtractor'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
4
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
use Importer 'NewsExtractor::TextUtil' => 'normalize_whitespace', 'u'; |
6
|
1
|
|
|
1
|
|
269
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
11
|
|
7
|
|
|
|
|
|
|
before 'content_text', sub { |
8
|
|
|
|
|
|
|
my ($self) = @_; |
9
|
|
|
|
|
|
|
$self->dom->find('figure.fbyt-block')->map('remove'); |
10
|
|
|
|
|
|
|
if (my $el = $self->dom->at('#penci-post-entry-inner > p:last-of-type')) { |
11
|
|
|
|
|
|
|
if ($el->content() =~ /\A看更多<br>/) { |
12
|
|
|
|
|
|
|
$el->remove(); |
13
|
|
|
|
|
|
|
} |
14
|
|
|
|
|
|
|
} |
15
|
|
|
|
|
|
|
}; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my $self = $_[0]; |
18
|
|
|
|
|
|
|
my $ret; |
19
|
0
|
|
|
0
|
0
|
|
if (my $el = $self->dom->at('#penci-post-entry-inner > p:nth-child(1)')) { |
20
|
0
|
|
|
|
|
|
if ($el->content() =~ /文字撰稿:(?<name> \p{Letter}+ )<br>/x) { |
21
|
0
|
0
|
|
|
|
|
($ret) = $+{"name"}; |
22
|
0
|
0
|
|
|
|
|
} |
23
|
1
|
|
|
1
|
|
678
|
|
|
1
|
|
|
|
|
370
|
|
|
1
|
|
|
|
|
66
|
|
|
0
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
} |
25
|
|
|
|
|
|
|
return $ret; |
26
|
|
|
|
|
|
|
} |
27
|
0
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
1; |