| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Sisimai::String; |
|
2
|
97
|
|
|
97
|
|
87918
|
use v5.26; |
|
|
97
|
|
|
|
|
333
|
|
|
3
|
97
|
|
|
97
|
|
784
|
use strict; |
|
|
97
|
|
|
|
|
232
|
|
|
|
97
|
|
|
|
|
2629
|
|
|
4
|
97
|
|
|
97
|
|
523
|
use warnings; |
|
|
97
|
|
|
|
|
186
|
|
|
|
97
|
|
|
|
|
4956
|
|
|
5
|
97
|
|
|
97
|
|
7390
|
use Encode; |
|
|
97
|
|
|
|
|
252864
|
|
|
|
97
|
|
|
|
|
14628
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
my $EncodingsC = [qw/big5-eten gb2312/]; |
|
8
|
|
|
|
|
|
|
my $EncodingsE = [qw/iso-8859-1/]; |
|
9
|
|
|
|
|
|
|
my $EncodingsJ = [qw/7bit-jis iso-2022-jp euc-jp shiftjis/]; |
|
10
|
97
|
|
|
97
|
|
52285
|
use Encode::Guess; Encode::Guess->add_suspects(@$EncodingsC, @$EncodingsE, @$EncodingsJ); |
|
|
97
|
|
|
|
|
446513
|
|
|
|
97
|
|
|
|
|
391
|
|
|
11
|
37
|
|
|
37
|
0
|
614
|
sub encodenames { return [@$EncodingsC, @$EncodingsE, @$EncodingsJ] }; |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
sub is_8bit { |
|
14
|
|
|
|
|
|
|
# The argument is 8-bit text or not |
|
15
|
|
|
|
|
|
|
# @param [String] argv1 Any string to be checked |
|
16
|
|
|
|
|
|
|
# @return [Integer] 0: ASCII Characters only |
|
17
|
|
|
|
|
|
|
# 1: Including 8-bit character |
|
18
|
3257
|
|
|
3257
|
1
|
338773
|
my $class = shift; |
|
19
|
3257
|
50
|
100
|
|
|
8196
|
my $argv1 = shift // return 0; return 0 if ref $argv1 ne 'SCALAR'; |
|
|
3256
|
|
|
|
|
11211
|
|
|
20
|
3256
|
100
|
|
|
|
19505
|
return 1 unless $$argv1 =~ /\A[\x00-\x7f]+\z/; |
|
21
|
3206
|
|
|
|
|
13721
|
return 0; |
|
22
|
|
|
|
|
|
|
} |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub sweep { |
|
25
|
|
|
|
|
|
|
# Clean the string out |
|
26
|
|
|
|
|
|
|
# @param [String] argv1 String to be cleaned |
|
27
|
|
|
|
|
|
|
# @return [Scalar] Cleaned out string |
|
28
|
|
|
|
|
|
|
# @example Clean up text |
|
29
|
|
|
|
|
|
|
# sweep(' neko ') #=> 'neko' |
|
30
|
44743
|
|
|
44743
|
1
|
67018
|
my $class = shift; |
|
31
|
44743
|
|
100
|
|
|
89872
|
my $argv1 = shift // return ""; |
|
32
|
|
|
|
|
|
|
|
|
33
|
44742
|
|
|
|
|
65689
|
chomp $argv1; |
|
34
|
44742
|
|
|
|
|
261871
|
y/ //s, s/\A //g, s/ \z//g, s/ [-]{2,}[^ ].+\z// for $argv1; |
|
35
|
44742
|
|
|
|
|
133076
|
return $argv1; |
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub aligned { |
|
39
|
|
|
|
|
|
|
# Check if each element of the 2nd argument is aligned in the 1st argument or not |
|
40
|
|
|
|
|
|
|
# @param [String] argv1 String to be checked |
|
41
|
|
|
|
|
|
|
# @param [Array] argv2 List including the ordered strings |
|
42
|
|
|
|
|
|
|
# @return [Bool] 0, 1 |
|
43
|
|
|
|
|
|
|
# @since v5.0.0 |
|
44
|
254927
|
|
|
254927
|
0
|
340842
|
my $class = shift; |
|
45
|
254927
|
100
|
100
|
|
|
420425
|
my $argv1 = shift || return 0; return 0 unless length $$argv1; |
|
|
254926
|
|
|
|
|
436609
|
|
|
46
|
242577
|
100
|
100
|
|
|
398486
|
my $argv2 = shift || return 0; return 0 unless scalar @$argv2; |
|
|
242576
|
|
|
|
|
418993
|
|
|
47
|
242575
|
|
|
|
|
285592
|
my $align = -1; |
|
48
|
242575
|
|
|
|
|
281268
|
my $right = 0; |
|
49
|
|
|
|
|
|
|
|
|
50
|
242575
|
|
|
|
|
336309
|
for my $e ( @$argv2 ) { |
|
51
|
|
|
|
|
|
|
# Get the position of each element in the 1st argument using index() |
|
52
|
270626
|
|
|
|
|
493974
|
my $p = index($$argv1, $e, $align + 1); |
|
53
|
|
|
|
|
|
|
|
|
54
|
270626
|
100
|
|
|
|
478401
|
last if $p < 0; # Break this loop when there is no string in the 1st argument |
|
55
|
36635
|
|
|
|
|
50554
|
$align = length($e) + $p - 1; # There is an aligned string in the 1st argument |
|
56
|
36635
|
|
|
|
|
56920
|
$right++; |
|
57
|
|
|
|
|
|
|
} |
|
58
|
242575
|
100
|
|
|
|
416545
|
return 1 if $right == scalar @$argv2; |
|
59
|
233991
|
|
|
|
|
532325
|
return 0; |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub to_plain { |
|
63
|
|
|
|
|
|
|
# Convert given HTML text to plain text |
|
64
|
|
|
|
|
|
|
# @param [Scalar] argv1 HTML text(reference to string) |
|
65
|
|
|
|
|
|
|
# @param [Integer] loose Loose check flag |
|
66
|
|
|
|
|
|
|
# @return [Scalar] Plain text(reference to string) |
|
67
|
17
|
|
|
17
|
1
|
19024
|
my $class = shift; |
|
68
|
17
|
100
|
100
|
|
|
99
|
my $argv1 = shift // return undef; return undef if ref $argv1 ne 'SCALAR'; |
|
|
16
|
|
|
|
|
56
|
|
|
69
|
15
|
|
100
|
|
|
58
|
my $loose = shift // 0; |
|
70
|
15
|
|
|
|
|
28
|
my $plain = $$argv1; |
|
71
|
|
|
|
|
|
|
|
|
72
|
15
|
|
|
|
|
72
|
state $match = { |
|
73
|
|
|
|
|
|
|
'html' => qr|].+?|sim, |
|
74
|
|
|
|
|
|
|
'body' => qr|.+.*].+|sim, |
|
75
|
|
|
|
|
|
|
}; |
|
76
|
|
|
|
|
|
|
|
|
77
|
15
|
100
|
100
|
|
|
194
|
if( $loose || $plain =~ $match->{'html'} || $plain =~ $match->{'body'} ) { |
|
|
|
|
66
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# ... |
|
79
|
|
|
|
|
|
|
# 1. Remove ... |
|
80
|
|
|
|
|
|
|
# 2. Remove |
|
81
|
|
|
|
|
|
|
# 3. ... to " http://... " |
|
82
|
|
|
|
|
|
|
# 4. ... to " Value " |
|
83
|
|
|
|
|
|
|
s|.+||gsim, |
|
84
|
|
|
|
|
|
|
s|.+||gsim, |
|
85
|
|
|
|
|
|
|
s|(.*?)| [$2]($1) |gsim, |
|
86
|
|
|
|
|
|
|
s|(.*?)| [$2](mailto:$1) |gsim, |
|
87
|
|
|
|
|
|
|
s/<[^<@>]+?>\s*/ /g, # Delete HTML tags except |
|
88
|
|
|
|
|
|
|
s/</
|
|
89
|
|
|
|
|
|
|
s/>/>/g, # Convert to right angle brackets |
|
90
|
|
|
|
|
|
|
s/&/&/g, # Convert to "&" |
|
91
|
|
|
|
|
|
|
s/"/"/g, # Convert to '"' |
|
92
|
|
|
|
|
|
|
s/'/'/g, # Convert to "'" |
|
93
|
3
|
|
|
|
|
140
|
s/ / /g for $plain; |
|
94
|
|
|
|
|
|
|
|
|
95
|
3
|
50
|
|
|
|
14
|
if( length($$argv1) > length($plain) ) { |
|
96
|
3
|
|
|
|
|
8
|
$plain =~ y/ //s; |
|
97
|
3
|
|
|
|
|
6
|
$plain .= "\n" |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
} |
|
100
|
15
|
|
|
|
|
47
|
return \$plain; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub to_utf8 { |
|
104
|
|
|
|
|
|
|
# Convert given string to UTF-8 |
|
105
|
|
|
|
|
|
|
# @param [String] argv1 String to be converted |
|
106
|
|
|
|
|
|
|
# @param [String] argv2 Encoding name before converting |
|
107
|
|
|
|
|
|
|
# @return [String] UTF-8 Encoded string |
|
108
|
67
|
|
|
67
|
1
|
152
|
my $class = shift; |
|
109
|
67
|
|
50
|
|
|
280
|
my $argv1 = shift || return ""; |
|
110
|
67
|
|
|
|
|
222
|
my $argv2 = shift; |
|
111
|
|
|
|
|
|
|
|
|
112
|
67
|
|
|
|
|
150
|
state $dontencode = ['utf8', 'utf-8', 'us-ascii', 'ascii']; |
|
113
|
67
|
|
|
|
|
167
|
my $tobeutf8ed = $$argv1; |
|
114
|
67
|
|
50
|
|
|
248
|
my $encodefrom = lc $argv2 || ''; |
|
115
|
67
|
|
|
|
|
111
|
my $hasencoded = 0; |
|
116
|
67
|
|
|
|
|
736
|
my $hasguessed = Encode::Guess->guess($tobeutf8ed); |
|
117
|
67
|
100
|
|
|
|
61059
|
my $encodingto = ref $hasguessed ? lc($hasguessed->name) : ''; |
|
118
|
|
|
|
|
|
|
|
|
119
|
67
|
50
|
|
|
|
236
|
if( $encodefrom ) { |
|
120
|
|
|
|
|
|
|
# The 2nd argument is a encoding name of the 1st argument |
|
121
|
67
|
|
|
|
|
109
|
while(1) { |
|
122
|
|
|
|
|
|
|
# Encode a given string when the encoding of the string is neigther |
|
123
|
|
|
|
|
|
|
# utf8 nor ascii. |
|
124
|
67
|
50
|
|
|
|
194
|
last if grep { $encodefrom eq $_ } @$dontencode; |
|
|
268
|
|
|
|
|
596
|
|
|
125
|
67
|
100
|
|
|
|
128
|
last if grep { $encodingto eq $_ } @$dontencode; |
|
|
268
|
|
|
|
|
2376
|
|
|
126
|
|
|
|
|
|
|
|
|
127
|
51
|
|
|
|
|
110
|
eval { |
|
128
|
|
|
|
|
|
|
# Try to convert the string to UTF-8 |
|
129
|
51
|
|
|
|
|
948
|
Encode::from_to($tobeutf8ed, $encodefrom, 'utf8'); |
|
130
|
51
|
|
|
|
|
11009
|
$hasencoded = 1; |
|
131
|
|
|
|
|
|
|
}; |
|
132
|
51
|
|
|
|
|
122
|
last; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
} |
|
135
|
67
|
100
|
|
|
|
381
|
return \$tobeutf8ed if $hasencoded; |
|
136
|
16
|
50
|
|
|
|
64
|
return \$tobeutf8ed unless $encodingto; |
|
137
|
16
|
50
|
|
|
|
35
|
return \$tobeutf8ed if grep { $encodingto eq $_ } @$dontencode; |
|
|
64
|
|
|
|
|
170
|
|
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# a. The 2nd argument was not given or failed to convert from $encodefrom to UTF-8 |
|
140
|
|
|
|
|
|
|
# b. Guessed encoding name is available, try to encode using it. |
|
141
|
|
|
|
|
|
|
# c. Encode a given string when the encoding of the string is neigther utf8 nor ascii. |
|
142
|
0
|
|
|
|
|
|
eval { Encode::from_to($tobeutf8ed, $encodingto, 'utf8') }; |
|
|
0
|
|
|
|
|
|
|
|
143
|
0
|
|
|
|
|
|
return \$tobeutf8ed; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
1; |
|
147
|
|
|
|
|
|
|
__END__ |