line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Sisimai::String; |
2
|
83
|
|
|
83
|
|
56477
|
use feature ':5.10'; |
|
83
|
|
|
|
|
132
|
|
|
83
|
|
|
|
|
6141
|
|
3
|
83
|
|
|
83
|
|
465
|
use strict; |
|
83
|
|
|
|
|
166
|
|
|
83
|
|
|
|
|
1423
|
|
4
|
83
|
|
|
83
|
|
324
|
use warnings; |
|
83
|
|
|
|
|
154
|
|
|
83
|
|
|
|
|
1890
|
|
5
|
83
|
|
|
83
|
|
40236
|
use Encode; |
|
83
|
|
|
|
|
723381
|
|
|
83
|
|
|
|
|
5515
|
|
6
|
83
|
|
|
83
|
|
35753
|
use Digest::SHA; |
|
83
|
|
|
|
|
217998
|
|
|
83
|
|
|
|
|
6214
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
my $EncodingsC = [qw/big5-eten gb2312/]; |
9
|
|
|
|
|
|
|
my $EncodingsE = [qw/iso-8859-1/]; |
10
|
|
|
|
|
|
|
my $EncodingsJ = [qw/7bit-jis iso-2022-jp euc-jp shiftjis/]; |
11
|
83
|
|
|
83
|
|
36177
|
use Encode::Guess; Encode::Guess->add_suspects(@$EncodingsC, @$EncodingsE, @$EncodingsJ); |
|
83
|
|
|
|
|
284631
|
|
|
83
|
|
|
|
|
283
|
|
12
|
17
|
|
|
17
|
0
|
188
|
sub encodenames { return [@$EncodingsC, @$EncodingsE, @$EncodingsJ] }; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub token { |
15
|
|
|
|
|
|
|
# Create the message token from an addresser and a recipient |
16
|
|
|
|
|
|
|
# @param [String] addr1 A sender's email address |
17
|
|
|
|
|
|
|
# @param [String] addr2 A recipient's email address |
18
|
|
|
|
|
|
|
# @param [Integer] epoch Machine time of the email bounce |
19
|
|
|
|
|
|
|
# @return [String] Message token(MD5 hex digest) or empty string |
20
|
|
|
|
|
|
|
# if the any argument is missing |
21
|
|
|
|
|
|
|
# @see http://en.wikipedia.org/wiki/ASCII |
22
|
|
|
|
|
|
|
# @see https://metacpan.org/pod/Digest::MD5 |
23
|
2938
|
|
50
|
2938
|
1
|
66043
|
my $class = shift || return ''; |
24
|
2938
|
|
100
|
|
|
5224
|
my $addr1 = shift || return ''; |
25
|
2937
|
|
100
|
|
|
5363
|
my $addr2 = shift || return ''; |
26
|
2936
|
|
100
|
|
|
5680
|
my $epoch = shift // return ''; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# Format: STX(0x02) Sender-Address RS(0x1e) Recipient-Address ETX(0x03) |
29
|
2935
|
|
|
|
|
38970
|
return Digest::SHA::sha1_hex(sprintf("\x02%s\x1e%s\x1e%d\x03", lc $addr1, lc $addr2, $epoch)); |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
sub is_8bit { |
33
|
|
|
|
|
|
|
# The argument is 8-bit text or not |
34
|
|
|
|
|
|
|
# @param [String] argv1 Any string to be checked |
35
|
|
|
|
|
|
|
# @return [Integer] 0: ASCII Characters only |
36
|
|
|
|
|
|
|
# 1: Including 8-bit character |
37
|
2685
|
|
|
2685
|
1
|
3936
|
my $class = shift; |
38
|
2685
|
|
50
|
|
|
5043
|
my $argv1 = shift // return undef; |
39
|
|
|
|
|
|
|
|
40
|
2685
|
50
|
|
|
|
6858
|
return undef unless ref $argv1 eq 'SCALAR'; |
41
|
2685
|
100
|
|
|
|
9643
|
return 1 unless $$argv1 =~ /\A[\x00-\x7f]+\z/; |
42
|
2645
|
|
|
|
|
7206
|
return 0; |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub sweep { |
46
|
|
|
|
|
|
|
# Clean the string out |
47
|
|
|
|
|
|
|
# @param [String] argv1 String to be cleaned |
48
|
|
|
|
|
|
|
# @return [Scalar] Cleaned out string |
49
|
|
|
|
|
|
|
# @example Clean up text |
50
|
|
|
|
|
|
|
# sweep(' neko ') #=> 'neko' |
51
|
3085
|
|
|
3085
|
1
|
6012
|
my $class = shift; |
52
|
3085
|
|
100
|
|
|
5908
|
my $argv1 = shift // return undef; |
53
|
|
|
|
|
|
|
|
54
|
3084
|
|
|
|
|
4699
|
chomp $argv1; |
55
|
3084
|
|
|
|
|
7188
|
$argv1 =~ y/ //s; |
56
|
3084
|
|
|
|
|
5587
|
$argv1 =~ y/\t//d; |
57
|
3084
|
100
|
|
|
|
9181
|
$argv1 =~ s/\A //g if index($argv1, ' ') == 0; |
58
|
3084
|
100
|
|
|
|
8530
|
$argv1 =~ s/ \z//g if substr($argv1, -1, 1) eq ' '; |
59
|
3084
|
|
|
|
|
6188
|
$argv1 =~ s/ [-]{2,}[^ \t].+\z//; |
60
|
3084
|
|
|
|
|
6946
|
return $argv1; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub to_plain { |
64
|
|
|
|
|
|
|
# Convert given HTML text to plain text |
65
|
|
|
|
|
|
|
# @param [Scalar] argv1 HTML text(reference to string) |
66
|
|
|
|
|
|
|
# @param [Integer] loose Loose check flag |
67
|
|
|
|
|
|
|
# @return [Scalar] Plain text(reference to string) |
68
|
24
|
|
|
24
|
1
|
5493
|
my $class = shift; |
69
|
24
|
|
50
|
|
|
71
|
my $argv1 = shift // return \''; |
70
|
24
|
|
100
|
|
|
95
|
my $loose = shift // 0; |
71
|
24
|
50
|
|
|
|
91
|
return \'' unless ref $argv1 eq 'SCALAR'; |
72
|
|
|
|
|
|
|
|
73
|
24
|
|
|
|
|
43
|
my $plain = $$argv1; |
74
|
24
|
|
|
|
|
60
|
state $match = { |
75
|
|
|
|
|
|
|
'html' => qr|].+?|sim, |
76
|
|
|
|
|
|
|
'body' => qr|.+.*].+|sim, |
77
|
|
|
|
|
|
|
}; |
78
|
|
|
|
|
|
|
|
79
|
24
|
100
|
100
|
|
|
1620
|
if( $loose || $plain =~ $match->{'html'} || $plain =~ $match->{'body'} ) { |
|
|
|
66
|
|
|
|
|
80
|
|
|
|
|
|
|
# ... |
81
|
|
|
|
|
|
|
# 1. Remove ... |
82
|
|
|
|
|
|
|
# 2. Remove |
83
|
|
|
|
|
|
|
# 3. ... to " http://... " |
84
|
|
|
|
|
|
|
# 4. ... to " Value " |
85
|
23
|
|
|
|
|
8336
|
$plain =~ s|.+||gsim; |
86
|
23
|
|
|
|
|
7859
|
$plain =~ s|.+||gsim; |
87
|
23
|
|
|
|
|
5118
|
$plain =~ s|(.*?)| [$2]($1) |gsim; |
88
|
23
|
|
|
|
|
2825
|
$plain =~ s|(.*?)| [$2](mailto:$1) |gsim; |
89
|
|
|
|
|
|
|
|
90
|
23
|
|
|
|
|
9806
|
$plain =~ s/<[^<@>]+?>\s*/ /g; # Delete HTML tags except |
91
|
23
|
|
|
|
|
76
|
$plain =~ s/</
|
92
|
23
|
|
|
|
|
53
|
$plain =~ s/>/>/g; # Convert to right angle brackets |
93
|
23
|
|
|
|
|
57
|
$plain =~ s/&/&/g; # Convert to "&" |
94
|
23
|
|
|
|
|
74
|
$plain =~ s/"/"/g; # Convert to '"' |
95
|
23
|
|
|
|
|
43
|
$plain =~ s/'/'/g; # Convert to "'" |
96
|
23
|
|
|
|
|
154
|
$plain =~ s/ / /g; # Convert to ' ' |
97
|
|
|
|
|
|
|
|
98
|
23
|
50
|
|
|
|
106
|
if( length($$argv1) > length($plain) ) { |
99
|
23
|
|
|
|
|
186
|
$plain =~ y/ //s; |
100
|
23
|
|
|
|
|
46
|
$plain .= "\n" |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
} |
103
|
24
|
|
|
|
|
136
|
return \$plain; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub to_utf8 { |
107
|
|
|
|
|
|
|
# Convert given string to UTF-8 |
108
|
|
|
|
|
|
|
# @param [String] argv1 String to be converted |
109
|
|
|
|
|
|
|
# @param [String] argv2 Encoding name before converting |
110
|
|
|
|
|
|
|
# @return [String] UTF-8 Encoded string |
111
|
60
|
|
|
60
|
1
|
125
|
my $class = shift; |
112
|
60
|
|
50
|
|
|
174
|
my $argv1 = shift || return \''; |
113
|
60
|
|
|
|
|
122
|
my $argv2 = shift; |
114
|
|
|
|
|
|
|
|
115
|
60
|
|
|
|
|
113
|
my $tobeutf8ed = $$argv1; |
116
|
60
|
|
50
|
|
|
183
|
my $encodefrom = lc $argv2 || ''; |
117
|
60
|
|
|
|
|
89
|
my $hasencoded = undef; |
118
|
60
|
|
|
|
|
517
|
my $hasguessed = Encode::Guess->guess($tobeutf8ed); |
119
|
60
|
100
|
|
|
|
53500
|
my $encodingto = ref $hasguessed ? lc($hasguessed->name) : ''; |
120
|
60
|
|
|
|
|
123
|
state $dontencode = qr/\A(?>utf[-]?8|(?:us[-])?ascii)\z/; |
121
|
|
|
|
|
|
|
|
122
|
60
|
50
|
|
|
|
199
|
if( $encodefrom ) { |
123
|
|
|
|
|
|
|
# The 2nd argument is a encoding name of the 1st argument |
124
|
60
|
|
|
|
|
98
|
while(1) { |
125
|
|
|
|
|
|
|
# Encode a given string when the encoding of the string is neigther |
126
|
|
|
|
|
|
|
# utf8 nor ascii. |
127
|
60
|
100
|
|
|
|
440
|
last if $encodefrom =~ $dontencode; |
128
|
59
|
100
|
|
|
|
282
|
last if $encodingto =~ $dontencode; |
129
|
|
|
|
|
|
|
|
130
|
49
|
|
|
|
|
101
|
eval { |
131
|
|
|
|
|
|
|
# Try to convert the string to UTF-8 |
132
|
49
|
|
|
|
|
197
|
Encode::from_to($tobeutf8ed, $encodefrom, 'utf8'); |
133
|
49
|
|
|
|
|
7504
|
$hasencoded = 1; |
134
|
|
|
|
|
|
|
}; |
135
|
49
|
|
|
|
|
105
|
last; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
} |
138
|
60
|
100
|
|
|
|
272
|
return \$tobeutf8ed if $hasencoded; |
139
|
11
|
50
|
|
|
|
30
|
return \$tobeutf8ed unless $encodingto; |
140
|
11
|
50
|
|
|
|
92
|
return \$tobeutf8ed if $encodingto =~ $dontencode; |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# a. The 2nd argument was not given or failed to convert from $encodefrom to UTF-8 |
143
|
|
|
|
|
|
|
# b. Guessed encoding name is available, try to encode using it. |
144
|
|
|
|
|
|
|
# c. Encode a given string when the encoding of the string is neigther utf8 nor ascii. |
145
|
0
|
|
|
|
|
|
eval { Encode::from_to($tobeutf8ed, $encodingto, 'utf8') }; |
|
0
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
|
return \$tobeutf8ed; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
1; |
150
|
|
|
|
|
|
|
__END__ |