File Coverage

lib/Sisimai/String.pm
Criterion Covered Total %
statement 74 77 96.1
branch 26 32 81.2
condition 19 22 86.3
subroutine 11 11 100.0
pod 4 6 66.6
total 134 148 90.5


line stmt bran cond sub pod time code
1             package Sisimai::String;
2 97     97   87918 use v5.26;
  97         333  
3 97     97   784 use strict;
  97         232  
  97         2629  
4 97     97   523 use warnings;
  97         186  
  97         4956  
5 97     97   7390 use Encode;
  97         252864  
  97         14628  
6              
7             my $EncodingsC = [qw/big5-eten gb2312/];
8             my $EncodingsE = [qw/iso-8859-1/];
9             my $EncodingsJ = [qw/7bit-jis iso-2022-jp euc-jp shiftjis/];
10 97     97   52285 use Encode::Guess; Encode::Guess->add_suspects(@$EncodingsC, @$EncodingsE, @$EncodingsJ);
  97         446513  
  97         391  
11 37     37 0 614 sub encodenames { return [@$EncodingsC, @$EncodingsE, @$EncodingsJ] };
12              
13             sub is_8bit {
14             # The argument is 8-bit text or not
15             # @param [String] argv1 Any string to be checked
16             # @return [Integer] 0: ASCII Characters only
17             # 1: Including 8-bit character
18 3257     3257 1 338773 my $class = shift;
19 3257 50 100     8196 my $argv1 = shift // return 0; return 0 if ref $argv1 ne 'SCALAR';
  3256         11211  
20 3256 100       19505 return 1 unless $$argv1 =~ /\A[\x00-\x7f]+\z/;
21 3206         13721 return 0;
22             }
23              
24             sub sweep {
25             # Clean the string out
26             # @param [String] argv1 String to be cleaned
27             # @return [Scalar] Cleaned out string
28             # @example Clean up text
29             # sweep(' neko ') #=> 'neko'
30 44743     44743 1 67018 my $class = shift;
31 44743   100     89872 my $argv1 = shift // return "";
32              
33 44742         65689 chomp $argv1;
34 44742         261871 y/ //s, s/\A //g, s/ \z//g, s/ [-]{2,}[^ ].+\z// for $argv1;
35 44742         133076 return $argv1;
36             }
37              
38             sub aligned {
39             # Check if each element of the 2nd argument is aligned in the 1st argument or not
40             # @param [String] argv1 String to be checked
41             # @param [Array] argv2 List including the ordered strings
42             # @return [Bool] 0, 1
43             # @since v5.0.0
44 254927     254927 0 340842 my $class = shift;
45 254927 100 100     420425 my $argv1 = shift || return 0; return 0 unless length $$argv1;
  254926         436609  
46 242577 100 100     398486 my $argv2 = shift || return 0; return 0 unless scalar @$argv2;
  242576         418993  
47 242575         285592 my $align = -1;
48 242575         281268 my $right = 0;
49              
50 242575         336309 for my $e ( @$argv2 ) {
51             # Get the position of each element in the 1st argument using index()
52 270626         493974 my $p = index($$argv1, $e, $align + 1);
53              
54 270626 100       478401 last if $p < 0; # Break this loop when there is no string in the 1st argument
55 36635         50554 $align = length($e) + $p - 1; # There is an aligned string in the 1st argument
56 36635         56920 $right++;
57             }
58 242575 100       416545 return 1 if $right == scalar @$argv2;
59 233991         532325 return 0;
60             }
61              
62             sub to_plain {
63             # Convert given HTML text to plain text
64             # @param [Scalar] argv1 HTML text(reference to string)
65             # @param [Integer] loose Loose check flag
66             # @return [Scalar] Plain text(reference to string)
67 17     17 1 19024 my $class = shift;
68 17 100 100     99 my $argv1 = shift // return undef; return undef if ref $argv1 ne 'SCALAR';
  16         56  
69 15   100     58 my $loose = shift // 0;
70 15         28 my $plain = $$argv1;
71              
72 15         72 state $match = {
73             'html' => qr|].+?|sim,
74             'body' => qr|.+.*].+|sim,
75             };
76              
77 15 100 100     194 if( $loose || $plain =~ $match->{'html'} || $plain =~ $match->{'body'} ) {
      66        
78             # ...
79             # 1. Remove ...
80             # 2. Remove
81             # 3. ... to " http://... "
82             # 4. ... to " Value "
83             s|.+||gsim,
84             s|.+||gsim,
85             s|(.*?)| [$2]($1) |gsim,
86             s|(.*?)| [$2](mailto:$1) |gsim,
87             s/<[^<@>]+?>\s*/ /g, # Delete HTML tags except
88             s/</
89             s/>/>/g, # Convert to right angle brackets
90             s/&/&/g, # Convert to "&"
91             s/"/"/g, # Convert to '"'
92             s/'/'/g, # Convert to "'"
93 3         140 s/ / /g for $plain;
94              
95 3 50       14 if( length($$argv1) > length($plain) ) {
96 3         8 $plain =~ y/ //s;
97 3         6 $plain .= "\n"
98             }
99             }
100 15         47 return \$plain;
101             }
102              
103             sub to_utf8 {
104             # Convert given string to UTF-8
105             # @param [String] argv1 String to be converted
106             # @param [String] argv2 Encoding name before converting
107             # @return [String] UTF-8 Encoded string
108 67     67 1 152 my $class = shift;
109 67   50     280 my $argv1 = shift || return "";
110 67         222 my $argv2 = shift;
111              
112 67         150 state $dontencode = ['utf8', 'utf-8', 'us-ascii', 'ascii'];
113 67         167 my $tobeutf8ed = $$argv1;
114 67   50     248 my $encodefrom = lc $argv2 || '';
115 67         111 my $hasencoded = 0;
116 67         736 my $hasguessed = Encode::Guess->guess($tobeutf8ed);
117 67 100       61059 my $encodingto = ref $hasguessed ? lc($hasguessed->name) : '';
118              
119 67 50       236 if( $encodefrom ) {
120             # The 2nd argument is a encoding name of the 1st argument
121 67         109 while(1) {
122             # Encode a given string when the encoding of the string is neigther
123             # utf8 nor ascii.
124 67 50       194 last if grep { $encodefrom eq $_ } @$dontencode;
  268         596  
125 67 100       128 last if grep { $encodingto eq $_ } @$dontencode;
  268         2376  
126              
127 51         110 eval {
128             # Try to convert the string to UTF-8
129 51         948 Encode::from_to($tobeutf8ed, $encodefrom, 'utf8');
130 51         11009 $hasencoded = 1;
131             };
132 51         122 last;
133             }
134             }
135 67 100       381 return \$tobeutf8ed if $hasencoded;
136 16 50       64 return \$tobeutf8ed unless $encodingto;
137 16 50       35 return \$tobeutf8ed if grep { $encodingto eq $_ } @$dontencode;
  64         170  
138              
139             # a. The 2nd argument was not given or failed to convert from $encodefrom to UTF-8
140             # b. Guessed encoding name is available, try to encode using it.
141             # c. Encode a given string when the encoding of the string is neigther utf8 nor ascii.
142 0           eval { Encode::from_to($tobeutf8ed, $encodingto, 'utf8') };
  0            
143 0           return \$tobeutf8ed;
144             }
145              
146             1;
147             __END__