| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
use strict; |
|
2
|
6
|
|
|
6
|
|
717448
|
use warnings; |
|
|
6
|
|
|
|
|
41
|
|
|
|
6
|
|
|
|
|
141
|
|
|
3
|
6
|
|
|
6
|
|
24
|
use Encode qw(is_utf8 decode_utf8); |
|
|
6
|
|
|
|
|
11
|
|
|
|
6
|
|
|
|
|
174
|
|
|
4
|
6
|
|
|
6
|
|
455
|
use Mojo::DOM; |
|
|
6
|
|
|
|
|
7568
|
|
|
|
6
|
|
|
|
|
272
|
|
|
5
|
6
|
|
|
6
|
|
2136
|
|
|
|
6
|
|
|
|
|
749961
|
|
|
|
6
|
|
|
|
|
1216
|
|
|
6
|
|
|
|
|
|
|
our @EXPORT = ( |
|
7
|
|
|
|
|
|
|
'u', |
|
8
|
|
|
|
|
|
|
'normalize_whitespace', |
|
9
|
|
|
|
|
|
|
'html2text', |
|
10
|
|
|
|
|
|
|
'is_empty', |
|
11
|
|
|
|
|
|
|
'parse_dateline_ymdhms', |
|
12
|
|
|
|
|
|
|
'reformat_dateline', |
|
13
|
|
|
|
|
|
|
'remove_control_characters', |
|
14
|
|
|
|
|
|
|
); |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
defined($_[0]) or return undef; |
|
17
|
|
|
|
|
|
|
|
|
18
|
43
|
50
|
|
43
|
0
|
16736
|
my $v = "".$_[0]; |
|
19
|
|
|
|
|
|
|
return is_utf8($v) ? $v : decode_utf8($v); |
|
20
|
43
|
|
|
|
|
86
|
} |
|
21
|
43
|
100
|
|
|
|
200
|
|
|
22
|
|
|
|
|
|
|
(! defined($_[0])) || $_[0] eq ''; |
|
23
|
|
|
|
|
|
|
} |
|
24
|
|
|
|
|
|
|
|
|
25
|
0
|
0
|
|
0
|
0
|
0
|
local $_ = $_[0]; |
|
26
|
|
|
|
|
|
|
s/\h+/ /g; |
|
27
|
|
|
|
|
|
|
s/\r\n/\n/g; |
|
28
|
|
|
|
|
|
|
s/\A\s+//; |
|
29
|
2
|
|
|
2
|
0
|
4660
|
s/\s+\z//; |
|
30
|
2
|
|
|
|
|
13
|
return $_; |
|
31
|
2
|
|
|
|
|
4
|
} |
|
32
|
2
|
|
|
|
|
5
|
|
|
33
|
2
|
|
|
|
|
7
|
local $_ = $_[0]; |
|
34
|
2
|
|
|
|
|
6
|
s/\p{PosixCntrl}//g; |
|
35
|
|
|
|
|
|
|
return $_; |
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
|
|
38
|
0
|
|
|
0
|
0
|
0
|
my $html = $_[0]; |
|
39
|
0
|
|
|
|
|
0
|
|
|
40
|
0
|
|
|
|
|
0
|
my $content_dom = Mojo::DOM->new('<body>' . $html . '</body>'); |
|
41
|
|
|
|
|
|
|
$content_dom->find('br')->map(replace => "\n"); |
|
42
|
|
|
|
|
|
|
$content_dom->find('div,p')->map(append => "\n\n"); |
|
43
|
|
|
|
|
|
|
|
|
44
|
0
|
|
|
0
|
0
|
0
|
my @paragraphs = grep { $_ ne '' } map { remove_control_characters($_) } map { normalize_whitespace($_) } split /\n\n+/, $content_dom->all_text; |
|
45
|
|
|
|
|
|
|
|
|
46
|
0
|
|
|
|
|
0
|
return join "\n\n", @paragraphs; |
|
47
|
0
|
|
|
|
|
0
|
} |
|
48
|
0
|
|
|
|
|
0
|
|
|
49
|
|
|
|
|
|
|
my ($text, $offset) = @_; |
|
50
|
0
|
|
|
|
|
0
|
$offset //= ''; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
51
|
|
|
|
|
|
|
|
|
52
|
0
|
|
|
|
|
0
|
my @t = $text =~ m/([0-9]+)/g; |
|
53
|
|
|
|
|
|
|
return undef unless 3 <= @t; |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
my $format_date = '%04d-%02d-%02d'; |
|
56
|
7
|
|
|
7
|
0
|
6722
|
my $format_time = '%02d:%02d:%02d'; |
|
57
|
7
|
|
50
|
|
|
16
|
|
|
58
|
|
|
|
|
|
|
if (@t == 3) { |
|
59
|
7
|
|
|
|
|
47
|
my $format = $format_date . '%s'; |
|
60
|
7
|
50
|
|
|
|
16
|
return u(sprintf($format, $t[0], $t[1], $t[2], $offset)); |
|
61
|
|
|
|
|
|
|
} |
|
62
|
7
|
|
|
|
|
11
|
|
|
63
|
7
|
|
|
|
|
9
|
$t[5] //= 0; |
|
64
|
|
|
|
|
|
|
$t[6] //= 0; |
|
65
|
7
|
100
|
|
|
|
14
|
|
|
66
|
2
|
|
|
|
|
5
|
my $format = $format_date . 'T' . $format_time . '%s'; |
|
67
|
2
|
|
|
|
|
14
|
return u(sprintf($format, $t[0], $t[1], $t[2], $t[3], $t[4], $t[5], $offset)); |
|
68
|
|
|
|
|
|
|
} |
|
69
|
|
|
|
|
|
|
|
|
70
|
5
|
|
50
|
|
|
22
|
my ($text, $offset) = @_; |
|
71
|
5
|
|
50
|
|
|
16
|
|
|
72
|
|
|
|
|
|
|
$offset //= ''; |
|
73
|
5
|
|
|
|
|
10
|
|
|
74
|
5
|
|
|
|
|
40
|
my @t = $text =~ m/([0-9]+)/g; |
|
75
|
|
|
|
|
|
|
$t[3] //= 23; |
|
76
|
|
|
|
|
|
|
$t[4] //= 59; |
|
77
|
|
|
|
|
|
|
$t[5] //= 59; |
|
78
|
4
|
|
|
4
|
0
|
6994
|
|
|
79
|
|
|
|
|
|
|
return u( |
|
80
|
4
|
|
50
|
|
|
14
|
sprintf( |
|
81
|
|
|
|
|
|
|
'%04d-%02d-%02dT%02d:%02d:%02d%s', |
|
82
|
4
|
|
|
|
|
33
|
$t[0], $t[1], $t[2], $t[3], $t[4], $t[5], $offset |
|
83
|
4
|
|
50
|
|
|
11
|
) |
|
84
|
4
|
|
50
|
|
|
11
|
); |
|
85
|
4
|
|
50
|
|
|
21
|
} |
|
86
|
|
|
|
|
|
|
|
|
87
|
4
|
|
|
|
|
61
|
1; |