| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Sisimai::String; |
|
2
|
99
|
|
|
99
|
|
88259
|
use v5.26; |
|
|
99
|
|
|
|
|
270
|
|
|
3
|
99
|
|
|
99
|
|
529
|
use strict; |
|
|
99
|
|
|
|
|
200
|
|
|
|
99
|
|
|
|
|
1759
|
|
|
4
|
99
|
|
|
99
|
|
320
|
use warnings; |
|
|
99
|
|
|
|
|
155
|
|
|
|
99
|
|
|
|
|
4046
|
|
|
5
|
99
|
|
|
99
|
|
5925
|
use Encode; |
|
|
99
|
|
|
|
|
187894
|
|
|
|
99
|
|
|
|
|
11270
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
my $EncodingsC = [qw/big5-eten gb2312/]; |
|
8
|
|
|
|
|
|
|
my $EncodingsE = [qw/iso-8859-1/]; |
|
9
|
|
|
|
|
|
|
my $EncodingsJ = [qw/7bit-jis iso-2022-jp euc-jp shiftjis/]; |
|
10
|
99
|
|
|
99
|
|
40241
|
use Encode::Guess; Encode::Guess->add_suspects(@$EncodingsC, @$EncodingsE, @$EncodingsJ); |
|
|
99
|
|
|
|
|
338148
|
|
|
|
99
|
|
|
|
|
305
|
|
|
11
|
38
|
|
|
38
|
0
|
333
|
sub encodenames { return [@$EncodingsC, @$EncodingsE, @$EncodingsJ] }; |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
sub is_8bit { |
|
14
|
|
|
|
|
|
|
# The argument is 8-bit text or not |
|
15
|
|
|
|
|
|
|
# @param [String] argv1 Any string to be checked |
|
16
|
|
|
|
|
|
|
# @return [Integer] 0: ASCII Characters only |
|
17
|
|
|
|
|
|
|
# 1: Including 8-bit character |
|
18
|
3298
|
|
|
3298
|
1
|
206410
|
my $class = shift; |
|
19
|
3298
|
50
|
100
|
|
|
6632
|
my $argv1 = shift // return 0; return 0 if ref $argv1 ne 'SCALAR'; |
|
|
3297
|
|
|
|
|
7354
|
|
|
20
|
3297
|
100
|
|
|
|
11924
|
return 1 unless $$argv1 =~ /\A[\x00-\x7f]+\z/; |
|
21
|
3247
|
|
|
|
|
8099
|
return 0; |
|
22
|
|
|
|
|
|
|
} |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub sweep { |
|
25
|
|
|
|
|
|
|
# Clean the string out |
|
26
|
|
|
|
|
|
|
# @param [String] argv1 String to be cleaned |
|
27
|
|
|
|
|
|
|
# @return [Scalar] Cleaned out string |
|
28
|
|
|
|
|
|
|
# @example Clean up text |
|
29
|
|
|
|
|
|
|
# sweep(' neko ') #=> 'neko' |
|
30
|
40907
|
|
|
40907
|
1
|
35999
|
my $class = shift; |
|
31
|
40907
|
|
100
|
|
|
54035
|
my $argv1 = shift // return ""; |
|
32
|
|
|
|
|
|
|
|
|
33
|
40906
|
|
|
|
|
39154
|
chomp $argv1; |
|
34
|
40906
|
|
|
|
|
161388
|
y/ //s, s/\A //g, s/ \z//g, s/ [-]{2,}[^ ].+\z// for $argv1; |
|
35
|
40906
|
|
|
|
|
76057
|
return $argv1; |
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub aligned { |
|
39
|
|
|
|
|
|
|
# Check if each element of the 2nd argument is aligned in the 1st argument or not |
|
40
|
|
|
|
|
|
|
# @param [String] argv1 String to be checked |
|
41
|
|
|
|
|
|
|
# @param [Array] argv2 List including the ordered strings |
|
42
|
|
|
|
|
|
|
# @return [Bool] 0, 1 |
|
43
|
|
|
|
|
|
|
# @since v5.0.0 |
|
44
|
277921
|
|
|
277921
|
0
|
234792
|
my $class = shift; |
|
45
|
277921
|
100
|
100
|
|
|
307239
|
my $argv1 = shift || return 0; return 0 unless length $$argv1; |
|
|
277920
|
|
|
|
|
307904
|
|
|
46
|
265448
|
100
|
100
|
|
|
288389
|
my $argv2 = shift || return 0; return 0 unless scalar @$argv2; |
|
|
265447
|
|
|
|
|
287216
|
|
|
47
|
265446
|
|
|
|
|
197136
|
my $align = -1; |
|
48
|
265446
|
|
|
|
|
186708
|
my $right = 0; |
|
49
|
|
|
|
|
|
|
|
|
50
|
265446
|
|
|
|
|
238178
|
for my $e ( @$argv2 ) { |
|
51
|
|
|
|
|
|
|
# Get the position of each element in the 1st argument using index() |
|
52
|
297982
|
|
|
|
|
317413
|
my $p = index($$argv1, $e, $align + 1); |
|
53
|
|
|
|
|
|
|
|
|
54
|
297982
|
100
|
|
|
|
317386
|
last if $p < 0; # Break this loop when there is no string in the 1st argument |
|
55
|
41243
|
|
|
|
|
38156
|
$align = length($e) + $p - 1; # There is an aligned string in the 1st argument |
|
56
|
41243
|
|
|
|
|
36607
|
$right++; |
|
57
|
|
|
|
|
|
|
} |
|
58
|
265446
|
100
|
|
|
|
289287
|
return 1 if $right == scalar @$argv2; |
|
59
|
256739
|
|
|
|
|
359010
|
return 0; |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub to_plain { |
|
63
|
|
|
|
|
|
|
# Convert given HTML text to plain text |
|
64
|
|
|
|
|
|
|
# @param [Scalar] argv1 HTML text(reference to string) |
|
65
|
|
|
|
|
|
|
# @param [Integer] loose Loose check flag |
|
66
|
|
|
|
|
|
|
# @return [Scalar] Plain text(reference to string) |
|
67
|
17
|
|
|
17
|
1
|
6463
|
my $class = shift; |
|
68
|
17
|
100
|
100
|
|
|
40
|
my $argv1 = shift // return undef; return undef if ref $argv1 ne 'SCALAR'; |
|
|
16
|
|
|
|
|
39
|
|
|
69
|
15
|
|
100
|
|
|
53
|
my $loose = shift // 0; |
|
70
|
15
|
|
|
|
|
22
|
my $plain = $$argv1; |
|
71
|
|
|
|
|
|
|
|
|
72
|
15
|
|
|
|
|
63
|
state $match = { |
|
73
|
|
|
|
|
|
|
'html' => qr|].+?|sim, |
|
74
|
|
|
|
|
|
|
'body' => qr|.+.*].+|sim, |
|
75
|
|
|
|
|
|
|
}; |
|
76
|
|
|
|
|
|
|
|
|
77
|
15
|
100
|
100
|
|
|
173
|
if( $loose || $plain =~ $match->{'html'} || $plain =~ $match->{'body'} ) { |
|
|
|
|
66
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# ... |
|
79
|
|
|
|
|
|
|
# 1. Remove ... |
|
80
|
|
|
|
|
|
|
# 2. Remove |
|
81
|
|
|
|
|
|
|
# 3. ... to " http://... " |
|
82
|
|
|
|
|
|
|
# 4. ... to " Value " |
|
83
|
|
|
|
|
|
|
s|.+||gsim, |
|
84
|
|
|
|
|
|
|
s|.+||gsim, |
|
85
|
|
|
|
|
|
|
s|(.*?)| [$2]($1) |gsim, |
|
86
|
|
|
|
|
|
|
s|(.*?)| [$2](mailto:$1) |gsim, |
|
87
|
|
|
|
|
|
|
s/<[^<@>]+?>\s*/ /g, # Delete HTML tags except |
|
88
|
|
|
|
|
|
|
s/</
|
|
89
|
|
|
|
|
|
|
s/>/>/g, # Convert to right angle brackets |
|
90
|
|
|
|
|
|
|
s/&/&/g, # Convert to "&" |
|
91
|
|
|
|
|
|
|
s/"/"/g, # Convert to '"' |
|
92
|
|
|
|
|
|
|
s/'/'/g, # Convert to "'" |
|
93
|
3
|
|
|
|
|
73
|
s/ / /g for $plain; |
|
94
|
|
|
|
|
|
|
|
|
95
|
3
|
50
|
|
|
|
8
|
if( length($$argv1) > length($plain) ) { |
|
96
|
3
|
|
|
|
|
6
|
$plain =~ y/ //s; |
|
97
|
3
|
|
|
|
|
4
|
$plain .= "\n" |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
} |
|
100
|
15
|
|
|
|
|
42
|
return \$plain; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub to_utf8 { |
|
104
|
|
|
|
|
|
|
# Convert given string to UTF-8 |
|
105
|
|
|
|
|
|
|
# @param [String] argv1 String to be converted |
|
106
|
|
|
|
|
|
|
# @param [String] argv2 Encoding name before converting |
|
107
|
|
|
|
|
|
|
# @return [String] UTF-8 Encoded string |
|
108
|
67
|
|
|
67
|
1
|
103
|
my $class = shift; |
|
109
|
67
|
|
50
|
|
|
169
|
my $argv1 = shift || return ""; |
|
110
|
67
|
|
|
|
|
174
|
my $argv2 = shift; |
|
111
|
|
|
|
|
|
|
|
|
112
|
67
|
|
|
|
|
114
|
state $dontencode = ['utf8', 'utf-8', 'us-ascii', 'ascii']; |
|
113
|
67
|
|
|
|
|
137
|
my $tobeutf8ed = $$argv1; |
|
114
|
67
|
|
50
|
|
|
199
|
my $encodefrom = lc $argv2 || ''; |
|
115
|
67
|
|
|
|
|
85
|
my $hasencoded = 0; |
|
116
|
67
|
|
|
|
|
557
|
my $hasguessed = Encode::Guess->guess($tobeutf8ed); |
|
117
|
67
|
100
|
|
|
|
43127
|
my $encodingto = ref $hasguessed ? lc($hasguessed->name) : ''; |
|
118
|
|
|
|
|
|
|
|
|
119
|
67
|
50
|
|
|
|
206
|
if( $encodefrom ) { |
|
120
|
|
|
|
|
|
|
# The 2nd argument is a encoding name of the 1st argument |
|
121
|
67
|
|
|
|
|
103
|
while(1) { |
|
122
|
|
|
|
|
|
|
# Encode a given string when the encoding of the string is neigther |
|
123
|
|
|
|
|
|
|
# utf8 nor ascii. |
|
124
|
67
|
50
|
|
|
|
148
|
last if grep { $encodefrom eq $_ } @$dontencode; |
|
|
268
|
|
|
|
|
1673
|
|
|
125
|
67
|
100
|
|
|
|
104
|
last if grep { $encodingto eq $_ } @$dontencode; |
|
|
268
|
|
|
|
|
354
|
|
|
126
|
|
|
|
|
|
|
|
|
127
|
51
|
|
|
|
|
97
|
eval { |
|
128
|
|
|
|
|
|
|
# Try to convert the string to UTF-8 |
|
129
|
51
|
|
|
|
|
251
|
Encode::from_to($tobeutf8ed, $encodefrom, 'utf8'); |
|
130
|
51
|
|
|
|
|
7440
|
$hasencoded = 1; |
|
131
|
|
|
|
|
|
|
}; |
|
132
|
51
|
|
|
|
|
78
|
last; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
} |
|
135
|
67
|
100
|
|
|
|
297
|
return \$tobeutf8ed if $hasencoded; |
|
136
|
16
|
50
|
|
|
|
43
|
return \$tobeutf8ed unless $encodingto; |
|
137
|
16
|
50
|
|
|
|
35
|
return \$tobeutf8ed if grep { $encodingto eq $_ } @$dontencode; |
|
|
64
|
|
|
|
|
131
|
|
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# a. The 2nd argument was not given or failed to convert from $encodefrom to UTF-8 |
|
140
|
|
|
|
|
|
|
# b. Guessed encoding name is available, try to encode using it. |
|
141
|
|
|
|
|
|
|
# c. Encode a given string when the encoding of the string is neigther utf8 nor ascii. |
|
142
|
0
|
|
|
|
|
|
eval { Encode::from_to($tobeutf8ed, $encodingto, 'utf8') }; |
|
|
0
|
|
|
|
|
|
|
|
143
|
0
|
|
|
|
|
|
return \$tobeutf8ed; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
1; |
|
147
|
|
|
|
|
|
|
__END__ |