File Coverage

blib/lib/Encode/Simple.pm
Criterion Covered Total %
statement 155 157 98.7
branch 35 54 64.8
condition 16 43 37.2
subroutine 26 26 100.0
pod 8 8 100.0
total 240 288 83.3


line stmt bran cond sub pod time code
1             package Encode::Simple;
2              
3 3     3   16446 use strict;
  3         7  
  3         155  
4 3     3   22 use warnings;
  3         12  
  3         167  
5 3     3   16 use Carp ();
  3         6  
  3         46  
6 3     3   409 use Encode ();
  3         60802  
  3         137  
7 3     3   22 use Exporter 'import';
  3         20  
  3         560  
8              
9             our $VERSION = '1.003';
10              
11             our @EXPORT = qw(encode encode_utf8 decode decode_utf8);
12             our @EXPORT_OK = qw(encode_lax encode_utf8_lax decode_lax decode_utf8_lax);
13             our %EXPORT_TAGS = (
14             all => [@EXPORT, @EXPORT_OK],
15             strict => [qw(encode encode_utf8 decode decode_utf8)],
16             lax => [qw(encode_lax encode_utf8_lax decode_lax decode_utf8_lax)],
17             utf8 => [qw(encode_utf8 encode_utf8_lax decode_utf8 decode_utf8_lax)],
18             );
19              
20 3     3   63 use constant HAS_UNICODE_UTF8 => do { local $@; !!eval { require Unicode::UTF8; 1 } };
  3         7  
  3         6  
  3         6  
  3         5  
  3         25  
  1         59  
21 3     3   936 use constant MASK_STRICT => Encode::FB_CROAK | Encode::LEAVE_SRC;
  3         7  
  3         182  
22 3     3   18 use constant MASK_LAX => Encode::FB_DEFAULT | Encode::LEAVE_SRC;
  3         4  
  3         388  
23              
24             my %ENCODINGS;
25              
26             sub encode {
27 8     8 1 370962 my ($encoding, $input) = @_;
28 8 50       31 return undef unless defined $input;
29 8   66     39 my $obj = $ENCODINGS{$encoding} || _find_encoding($encoding);
30 8         19 my ($output, $error);
31 3     3   2132 { local $@; use warnings FATAL => 'utf8'; # Encode::Unicode throws warnings in this category
  3         7  
  3         714  
  8         12  
  8         14  
32 8 100 50     18 unless (eval { $output = $obj->encode("$input", MASK_STRICT); 1 }) { $error = $@ || 'Error' }
  8         124  
  4         15  
  4         18  
33             }
34 8 100       32 _rethrow($error) if defined $error;
35 4         18 return $output;
36             }
37              
38             sub encode_lax {
39 4     4 1 1898 my ($encoding, $input) = @_;
40 4 50       16 return undef unless defined $input;
41 4   33     15 my $obj = $ENCODINGS{$encoding} || _find_encoding($encoding);
42 4         11 my ($output, $error);
43 3     3   21 { local $@; no warnings 'utf8'; # Encode::Unicode throws warnings in this category
  3         6  
  3         516  
  4         7  
  4         7  
44 4 50 0     10 unless (eval { $output = $obj->encode("$input", MASK_LAX); 1 }) { $error = $@ || 'Error' }
  4         41  
  4         17  
  0         0  
45             }
46 4 50       15 _rethrow($error) if defined $error;
47 4         22 return $output;
48             }
49              
50             sub encode_utf8 {
51 10     10 1 513277 my ($input) = @_;
52 10 50       37 return undef unless defined $input;
53 10         21 my ($output, $error);
54 10         18 if (HAS_UNICODE_UTF8) {
55 3     3   20 local $@; use warnings FATAL => 'utf8'; # Unicode::UTF8 throws warnings in this category
  3         15  
  3         731  
56             unless (eval { $output = Unicode::UTF8::encode_utf8($input); 1 }) { $error = $@ || 'Error' }
57             } else {
58 10   66     32 my $obj = $ENCODINGS{'UTF-8'} || _find_encoding('UTF-8');
59 10         59 local $@;
60 10 100 50     39 unless (eval { $output = $obj->encode("$input", MASK_STRICT); 1 }) { $error = $@ || 'Error' }
  9         56  
  8         31  
  3         16  
61             }
62 9 100       32 _rethrow($error) if defined $error;
63 7         76 return $output;
64             }
65              
66             sub encode_utf8_lax {
67 4     4 1 12 my ($input) = @_;
68 4 50       16 return undef unless defined $input;
69 4         10 my ($output, $error);
70 4         17 if (HAS_UNICODE_UTF8) {
71 3     3   22 local $@; no warnings 'utf8'; # Unicode::UTF8 throws warnings in this category
  3         5  
  3         684  
72             unless (eval { $output = Unicode::UTF8::encode_utf8($input); 1 }) { $error = $@ || 'Error' }
73             } else {
74 4   33     12 my $obj = $ENCODINGS{'UTF-8'} || _find_encoding('UTF-8');
75 4         8 local $@;
76 4 50 0     15 unless (eval { $output = $obj->encode("$input", MASK_LAX); 1 }) { $error = $@ || 'Error' }
  4         25  
  2         9  
  2         8  
77             }
78 4 50       15 _rethrow($error) if defined $error;
79 2         11 return $output;
80             }
81              
82             sub decode {
83 6     6 1 33 my ($encoding, $input) = @_;
84 6 50       23 return undef unless defined $input;
85 6   33     20 my $obj = $ENCODINGS{$encoding} || _find_encoding($encoding);
86 6         11 my ($output, $error);
87 3     3   53 { local $@; use warnings FATAL => 'utf8'; # Encode::Unicode throws warnings in this category
  3         6  
  3         662  
  6         10  
  6         12  
88 6 100 50     11 unless (eval { $output = $obj->decode("$input", MASK_STRICT); 1 }) { $error = $@ || 'Error' }
  6         51  
  4         20  
  2         24  
89             }
90 6 100       24 _rethrow($error) if defined $error;
91 4         15 return $output;
92             }
93              
94             sub decode_lax {
95 5     5 1 15 my ($encoding, $input) = @_;
96 5 50       17 return undef unless defined $input;
97 5   66     23 my $obj = $ENCODINGS{$encoding} || _find_encoding($encoding);
98 5         13 my ($output, $error);
99 3     3   29 { local $@; no warnings 'utf8'; # Encode::Unicode throws warnings in this category
  3         5  
  3         561  
  5         8  
  5         10  
100 5 50 0     12 unless (eval { $output = $obj->decode("$input", MASK_LAX); 1 }) { $error = $@ || 'Error' }
  5         35  
  5         48  
  0         0  
101             }
102 5 50       15 _rethrow($error) if defined $error;
103 5         22 return $output;
104             }
105              
106             sub decode_utf8 {
107 8     8 1 22 my ($input) = @_;
108 8 50       28 return undef unless defined $input;
109 8         18 my ($output, $error);
110 8         15 if (HAS_UNICODE_UTF8) {
111 3     3   22 local $@; use warnings FATAL => 'utf8'; # Unicode::UTF8 throws warnings in this category
  3         5  
  3         962  
112             unless (eval { $output = Unicode::UTF8::decode_utf8($input); 1 }) { $error = $@ || 'Error' }
113             } else {
114 8   33     25 my $obj = $ENCODINGS{'UTF-8'} || _find_encoding('UTF-8');
115 8         16 local $@;
116 8 100 50     27 unless (eval { $output = $obj->decode("$input", MASK_STRICT); 1 }) { $error = $@ || 'Error' }
  7         50  
  5         37  
  4         52  
117             }
118 7 100       29 _rethrow($error) if defined $error;
119 4         17 return $output;
120             }
121              
122             sub decode_utf8_lax {
123 8     8 1 22 my ($input) = @_;
124 8 50       26 return undef unless defined $input;
125 8         17 my ($output, $error);
126 8         15 if (HAS_UNICODE_UTF8) {
127 3     3   23 local $@; no warnings 'utf8'; # Unicode::UTF8 throws warnings in this category
  3         5  
  3         1411  
128             unless (eval { $output = Unicode::UTF8::decode_utf8($input); 1 }) { $error = $@ || 'Error' }
129             } else {
130 8   33     26 my $obj = $ENCODINGS{'UTF-8'} || _find_encoding('UTF-8');
131 8         16 local $@;
132 8 50 0     22 unless (eval { $output = $obj->decode("$input", MASK_LAX); 1 }) { $error = $@ || 'Error' }
  8         46  
  6         50  
  2         8  
133             }
134 8 50       31 _rethrow($error) if defined $error;
135 6         24 return $output;
136             }
137              
138             sub _find_encoding {
139 6     6   17 my ($encoding) = @_;
140 6 50       17 Carp::croak('Encoding name should not be undef') unless defined $encoding;
141 6         27 my $obj = Encode::find_encoding($encoding);
142 6 50       8993 Carp::croak("Unknown encoding '$encoding'") unless defined $obj;
143 6         51 return $ENCODINGS{$encoding} = $obj;
144             }
145              
146             sub _rethrow {
147 11     11   39 my ($error) = @_;
148 11 50 33     111 die $error if ref $error or $error =~ m/\n(?!\z)/;
149 11         95 $error =~ s/ at .+? line [0-9]+\.\n\z//;
150 11         1999 Carp::croak($error);
151             }
152              
153             1;
154              
155             =head1 NAME
156              
157             Encode::Simple - Encode and decode text, simply
158              
159             =head1 SYNOPSIS
160              
161             use Encode::Simple qw(encode encode_lax encode_utf8 decode decode_lax decode_utf8);
162             my $bytes = encode 'Shift_JIS', $characters;
163             my $bytes = encode_lax 'ASCII', $characters;
164             my $bytes = encode_utf8 $characters;
165             my $characters = decode 'cp1252', $bytes;
166             my $characters = decode_lax 'UTF-8', $bytes;
167             my $characters = decode_utf8 $bytes;
168              
169             =head1 DESCRIPTION
170              
171             This module is a simple wrapper around L that presents L and
172             L functions with straightforward behavior and error handling. See
173             L for a list of supported encodings.
174              
175             =head1 FUNCTIONS
176              
177             All functions are exported by name, as well as via the tags C<:all>,
178             C<:strict>, C<:lax>, and C<:utf8>. By default, L, L,
179             L, and L are exported as in L.
180              
181             =head2 encode
182              
183             my $bytes = encode $encoding, $characters;
184              
185             Encodes the input string of characters into a byte string using C<$encoding>.
186             Throws an exception if the input string contains characters that are not valid
187             or possible to represent in C<$encoding>.
188              
189             =head2 encode_lax
190              
191             my $bytes = encode_lax $encoding, $characters;
192              
193             Encodes the input string of characters into a byte string using C<$encoding>,
194             encoding any invalid characters as a substitution character (the substitution
195             character used depends on the encoding). Note that some encoders do not respect
196             this option and may throw an exception anyway, this notably includes
197             L (but not UTF-8).
198              
199             =head2 encode_utf8
200              
201             my $bytes = encode_utf8 $characters;
202              
203             I
204              
205             Encodes the input string of characters into a UTF-8 byte string. Throws an
206             exception if the input string contains characters that are not valid or
207             possible to represent in UTF-8.
208              
209             This function will use the more consistent and efficient
210             L if installed, and is otherwise equivalent to
211             L with an encoding of C. It is B equivalent to
212             L, which should be avoided.
213              
214             =head2 encode_utf8_lax
215              
216             my $bytes = encode_utf8_lax $characters;
217              
218             I
219              
220             Encodes the input string of characters into a UTF-8 byte string, encoding any
221             invalid characters as the Unicode replacement character C, represented
222             in UTF-8 as the three bytes C<0xEFBFBD>.
223              
224             This function will use the more consistent and efficient
225             L if installed, and is otherwise equivalent to
226             L with an encoding of C. It is B equivalent to
227             L, which should be avoided.
228              
229             =head2 decode
230              
231             my $characters = decode $encoding, $bytes;
232              
233             Decodes the input byte string into a string of characters using C<$encoding>.
234             Throws an exception if the input bytes are not valid for C<$encoding>.
235              
236             =head2 decode_lax
237              
238             my $characters = decode_lax $encoding, $bytes;
239              
240             Decodes the input byte string into a string of characters using C<$encoding>,
241             decoding any malformed bytes to the Unicode replacement character (U+FFFD).
242             Note that some encoders do not respect this option and may throw an exception
243             anyway, this notably includes L (but not UTF-8).
244              
245             =head2 decode_utf8
246              
247             my $characters = decode_utf8 $bytes;
248              
249             I
250              
251             Decodes the input UTF-8 byte string into a string of characters. Throws an
252             exception if the input bytes are not valid for UTF-8.
253              
254             This function will use the more consistent and efficient
255             L if installed, and is otherwise equivalent to
256             L with an encoding of C. It is B equivalent to
257             L, which should be avoided.
258              
259             =head2 decode_utf8_lax
260              
261             my $characters = decode_utf8_lax $bytes;
262              
263             I
264              
265             Decodes the input UTF-8 byte string into a string of characters, decoding any
266             malformed bytes to the Unicode replacement character C.
267              
268             This function will use the more consistent and efficient
269             L if installed, and is otherwise equivalent to
270             L with an encoding of C. It is B equivalent to
271             L, which should be avoided.
272              
273             =head1 BUGS
274              
275             Report any issues on the public bugtracker.
276              
277             =head1 AUTHOR
278              
279             Dan Book
280              
281             =head1 COPYRIGHT AND LICENSE
282              
283             This software is Copyright (c) 2018 by Dan Book.
284              
285             This is free software, licensed under:
286              
287             The Artistic License 2.0 (GPL Compatible)
288              
289             =head1 SEE ALSO
290              
291             L