File Coverage

lib/Sisimai/String.pm
Criterion Covered Total %
statement 74 77 96.1
branch 26 32 81.2
condition 19 22 86.3
subroutine 11 11 100.0
pod 4 6 66.6
total 134 148 90.5


line stmt bran cond sub pod time code
1             package Sisimai::String;
2 99     99   88259 use v5.26;
  99         270  
3 99     99   529 use strict;
  99         200  
  99         1759  
4 99     99   320 use warnings;
  99         155  
  99         4046  
5 99     99   5925 use Encode;
  99         187894  
  99         11270  
6              
7             my $EncodingsC = [qw/big5-eten gb2312/];
8             my $EncodingsE = [qw/iso-8859-1/];
9             my $EncodingsJ = [qw/7bit-jis iso-2022-jp euc-jp shiftjis/];
10 99     99   40241 use Encode::Guess; Encode::Guess->add_suspects(@$EncodingsC, @$EncodingsE, @$EncodingsJ);
  99         338148  
  99         305  
11 38     38 0 333 sub encodenames { return [@$EncodingsC, @$EncodingsE, @$EncodingsJ] };
12              
13             sub is_8bit {
14             # The argument is 8-bit text or not
15             # @param [String] argv1 Any string to be checked
16             # @return [Integer] 0: ASCII Characters only
17             # 1: Including 8-bit character
18 3298     3298 1 206410 my $class = shift;
19 3298 50 100     6632 my $argv1 = shift // return 0; return 0 if ref $argv1 ne 'SCALAR';
  3297         7354  
20 3297 100       11924 return 1 unless $$argv1 =~ /\A[\x00-\x7f]+\z/;
21 3247         8099 return 0;
22             }
23              
24             sub sweep {
25             # Clean the string out
26             # @param [String] argv1 String to be cleaned
27             # @return [Scalar] Cleaned out string
28             # @example Clean up text
29             # sweep(' neko ') #=> 'neko'
30 40907     40907 1 35999 my $class = shift;
31 40907   100     54035 my $argv1 = shift // return "";
32              
33 40906         39154 chomp $argv1;
34 40906         161388 y/ //s, s/\A //g, s/ \z//g, s/ [-]{2,}[^ ].+\z// for $argv1;
35 40906         76057 return $argv1;
36             }
37              
38             sub aligned {
39             # Check if each element of the 2nd argument is aligned in the 1st argument or not
40             # @param [String] argv1 String to be checked
41             # @param [Array] argv2 List including the ordered strings
42             # @return [Bool] 0, 1
43             # @since v5.0.0
44 277921     277921 0 234792 my $class = shift;
45 277921 100 100     307239 my $argv1 = shift || return 0; return 0 unless length $$argv1;
  277920         307904  
46 265448 100 100     288389 my $argv2 = shift || return 0; return 0 unless scalar @$argv2;
  265447         287216  
47 265446         197136 my $align = -1;
48 265446         186708 my $right = 0;
49              
50 265446         238178 for my $e ( @$argv2 ) {
51             # Get the position of each element in the 1st argument using index()
52 297982         317413 my $p = index($$argv1, $e, $align + 1);
53              
54 297982 100       317386 last if $p < 0; # Break this loop when there is no string in the 1st argument
55 41243         38156 $align = length($e) + $p - 1; # There is an aligned string in the 1st argument
56 41243         36607 $right++;
57             }
58 265446 100       289287 return 1 if $right == scalar @$argv2;
59 256739         359010 return 0;
60             }
61              
62             sub to_plain {
63             # Convert given HTML text to plain text
64             # @param [Scalar] argv1 HTML text(reference to string)
65             # @param [Integer] loose Loose check flag
66             # @return [Scalar] Plain text(reference to string)
67 17     17 1 6463 my $class = shift;
68 17 100 100     40 my $argv1 = shift // return undef; return undef if ref $argv1 ne 'SCALAR';
  16         39  
69 15   100     53 my $loose = shift // 0;
70 15         22 my $plain = $$argv1;
71              
72 15         63 state $match = {
73             'html' => qr|].+?|sim,
74             'body' => qr|.+.*].+|sim,
75             };
76              
77 15 100 100     173 if( $loose || $plain =~ $match->{'html'} || $plain =~ $match->{'body'} ) {
      66        
78             # ...
79             # 1. Remove ...
80             # 2. Remove
81             # 3. ... to " http://... "
82             # 4. ... to " Value "
83             s|.+||gsim,
84             s|.+||gsim,
85             s|(.*?)| [$2]($1) |gsim,
86             s|(.*?)| [$2](mailto:$1) |gsim,
87             s/<[^<@>]+?>\s*/ /g, # Delete HTML tags except
88             s/</
89             s/>/>/g, # Convert to right angle brackets
90             s/&/&/g, # Convert to "&"
91             s/"/"/g, # Convert to '"'
92             s/'/'/g, # Convert to "'"
93 3         73 s/ / /g for $plain;
94              
95 3 50       8 if( length($$argv1) > length($plain) ) {
96 3         6 $plain =~ y/ //s;
97 3         4 $plain .= "\n"
98             }
99             }
100 15         42 return \$plain;
101             }
102              
103             sub to_utf8 {
104             # Convert given string to UTF-8
105             # @param [String] argv1 String to be converted
106             # @param [String] argv2 Encoding name before converting
107             # @return [String] UTF-8 Encoded string
108 67     67 1 103 my $class = shift;
109 67   50     169 my $argv1 = shift || return "";
110 67         174 my $argv2 = shift;
111              
112 67         114 state $dontencode = ['utf8', 'utf-8', 'us-ascii', 'ascii'];
113 67         137 my $tobeutf8ed = $$argv1;
114 67   50     199 my $encodefrom = lc $argv2 || '';
115 67         85 my $hasencoded = 0;
116 67         557 my $hasguessed = Encode::Guess->guess($tobeutf8ed);
117 67 100       43127 my $encodingto = ref $hasguessed ? lc($hasguessed->name) : '';
118              
119 67 50       206 if( $encodefrom ) {
120             # The 2nd argument is a encoding name of the 1st argument
121 67         103 while(1) {
122             # Encode a given string when the encoding of the string is neigther
123             # utf8 nor ascii.
124 67 50       148 last if grep { $encodefrom eq $_ } @$dontencode;
  268         1673  
125 67 100       104 last if grep { $encodingto eq $_ } @$dontencode;
  268         354  
126              
127 51         97 eval {
128             # Try to convert the string to UTF-8
129 51         251 Encode::from_to($tobeutf8ed, $encodefrom, 'utf8');
130 51         7440 $hasencoded = 1;
131             };
132 51         78 last;
133             }
134             }
135 67 100       297 return \$tobeutf8ed if $hasencoded;
136 16 50       43 return \$tobeutf8ed unless $encodingto;
137 16 50       35 return \$tobeutf8ed if grep { $encodingto eq $_ } @$dontencode;
  64         131  
138              
139             # a. The 2nd argument was not given or failed to convert from $encodefrom to UTF-8
140             # b. Guessed encoding name is available, try to encode using it.
141             # c. Encode a given string when the encoding of the string is neigther utf8 nor ascii.
142 0           eval { Encode::from_to($tobeutf8ed, $encodingto, 'utf8') };
  0            
143 0           return \$tobeutf8ed;
144             }
145              
146             1;
147             __END__