line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
use strict; |
2
|
6
|
|
|
6
|
|
717448
|
use warnings; |
|
6
|
|
|
|
|
41
|
|
|
6
|
|
|
|
|
141
|
|
3
|
6
|
|
|
6
|
|
24
|
use Encode qw(is_utf8 decode_utf8); |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
174
|
|
4
|
6
|
|
|
6
|
|
455
|
use Mojo::DOM; |
|
6
|
|
|
|
|
7568
|
|
|
6
|
|
|
|
|
272
|
|
5
|
6
|
|
|
6
|
|
2136
|
|
|
6
|
|
|
|
|
749961
|
|
|
6
|
|
|
|
|
1216
|
|
6
|
|
|
|
|
|
|
our @EXPORT = ( |
7
|
|
|
|
|
|
|
'u', |
8
|
|
|
|
|
|
|
'normalize_whitespace', |
9
|
|
|
|
|
|
|
'html2text', |
10
|
|
|
|
|
|
|
'is_empty', |
11
|
|
|
|
|
|
|
'parse_dateline_ymdhms', |
12
|
|
|
|
|
|
|
'reformat_dateline', |
13
|
|
|
|
|
|
|
'remove_control_characters', |
14
|
|
|
|
|
|
|
); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
defined($_[0]) or return undef; |
17
|
|
|
|
|
|
|
|
18
|
43
|
50
|
|
43
|
0
|
16736
|
my $v = "".$_[0]; |
19
|
|
|
|
|
|
|
return is_utf8($v) ? $v : decode_utf8($v); |
20
|
43
|
|
|
|
|
86
|
} |
21
|
43
|
100
|
|
|
|
200
|
|
22
|
|
|
|
|
|
|
(! defined($_[0])) || $_[0] eq ''; |
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
|
25
|
0
|
0
|
|
0
|
0
|
0
|
local $_ = $_[0]; |
26
|
|
|
|
|
|
|
s/\h+/ /g; |
27
|
|
|
|
|
|
|
s/\r\n/\n/g; |
28
|
|
|
|
|
|
|
s/\A\s+//; |
29
|
2
|
|
|
2
|
0
|
4660
|
s/\s+\z//; |
30
|
2
|
|
|
|
|
13
|
return $_; |
31
|
2
|
|
|
|
|
4
|
} |
32
|
2
|
|
|
|
|
5
|
|
33
|
2
|
|
|
|
|
7
|
local $_ = $_[0]; |
34
|
2
|
|
|
|
|
6
|
s/\p{PosixCntrl}//g; |
35
|
|
|
|
|
|
|
return $_; |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
0
|
|
|
0
|
0
|
0
|
my $html = $_[0]; |
39
|
0
|
|
|
|
|
0
|
|
40
|
0
|
|
|
|
|
0
|
my $content_dom = Mojo::DOM->new('<body>' . $html . '</body>'); |
41
|
|
|
|
|
|
|
$content_dom->find('br')->map(replace => "\n"); |
42
|
|
|
|
|
|
|
$content_dom->find('div,p')->map(append => "\n\n"); |
43
|
|
|
|
|
|
|
|
44
|
0
|
|
|
0
|
0
|
0
|
my @paragraphs = grep { $_ ne '' } map { remove_control_characters($_) } map { normalize_whitespace($_) } split /\n\n+/, $content_dom->all_text; |
45
|
|
|
|
|
|
|
|
46
|
0
|
|
|
|
|
0
|
return join "\n\n", @paragraphs; |
47
|
0
|
|
|
|
|
0
|
} |
48
|
0
|
|
|
|
|
0
|
|
49
|
|
|
|
|
|
|
my ($text, $offset) = @_; |
50
|
0
|
|
|
|
|
0
|
$offset //= ''; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
51
|
|
|
|
|
|
|
|
52
|
0
|
|
|
|
|
0
|
my @t = $text =~ m/([0-9]+)/g; |
53
|
|
|
|
|
|
|
return undef unless 3 <= @t; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
my $format_date = '%04d-%02d-%02d'; |
56
|
7
|
|
|
7
|
0
|
6722
|
my $format_time = '%02d:%02d:%02d'; |
57
|
7
|
|
50
|
|
|
16
|
|
58
|
|
|
|
|
|
|
if (@t == 3) { |
59
|
7
|
|
|
|
|
47
|
my $format = $format_date . '%s'; |
60
|
7
|
50
|
|
|
|
16
|
return u(sprintf($format, $t[0], $t[1], $t[2], $offset)); |
61
|
|
|
|
|
|
|
} |
62
|
7
|
|
|
|
|
11
|
|
63
|
7
|
|
|
|
|
9
|
$t[5] //= 0; |
64
|
|
|
|
|
|
|
$t[6] //= 0; |
65
|
7
|
100
|
|
|
|
14
|
|
66
|
2
|
|
|
|
|
5
|
my $format = $format_date . 'T' . $format_time . '%s'; |
67
|
2
|
|
|
|
|
14
|
return u(sprintf($format, $t[0], $t[1], $t[2], $t[3], $t[4], $t[5], $offset)); |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
|
70
|
5
|
|
50
|
|
|
22
|
my ($text, $offset) = @_; |
71
|
5
|
|
50
|
|
|
16
|
|
72
|
|
|
|
|
|
|
$offset //= ''; |
73
|
5
|
|
|
|
|
10
|
|
74
|
5
|
|
|
|
|
40
|
my @t = $text =~ m/([0-9]+)/g; |
75
|
|
|
|
|
|
|
$t[3] //= 23; |
76
|
|
|
|
|
|
|
$t[4] //= 59; |
77
|
|
|
|
|
|
|
$t[5] //= 59; |
78
|
4
|
|
|
4
|
0
|
6994
|
|
79
|
|
|
|
|
|
|
return u( |
80
|
4
|
|
50
|
|
|
14
|
sprintf( |
81
|
|
|
|
|
|
|
'%04d-%02d-%02dT%02d:%02d:%02d%s', |
82
|
4
|
|
|
|
|
33
|
$t[0], $t[1], $t[2], $t[3], $t[4], $t[5], $offset |
83
|
4
|
|
50
|
|
|
11
|
) |
84
|
4
|
|
50
|
|
|
11
|
); |
85
|
4
|
|
50
|
|
|
21
|
} |
86
|
|
|
|
|
|
|
|
87
|
4
|
|
|
|
|
61
|
1; |