File Coverage

blib/lib/Locale/Recode.pm
Criterion Covered Total %
statement 34 60 56.6
branch 9 20 45.0
condition 2 6 33.3
subroutine 4 7 57.1
pod 4 6 66.6
total 53 99 53.5


line stmt bran cond sub pod time code
1             #! /bin/false
2              
3             # vim: set autoindent shiftwidth=4 tabstop=4:
4              
5             # Portable character conversion for Perl.
6             # Copyright (C) 2002-2026 Guido Flohr <guido.flohr@cantanea.com>,
7             # all rights reserved.
8              
9             # This program is free software: you can redistribute it and/or modify
10             # it under the terms of the GNU General Public License as published by
11             # the Free Software Foundation; either version 3 of the License, or
12             # (at your option) any later version.
13              
14             # This program is distributed in the hope that it will be useful,
15             # but WITHOUT ANY WARRANTY; without even the implied warranty of
16             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17             # GNU General Public License for more details.
18              
19             # You should have received a copy of the GNU General Public License
20             # along with this program. If not, see <http://www.gnu.org/licenses/>.
21              
22             package Locale::Recode;
23              
24 161     161   2003689 use strict;
  161         426  
  161         151574  
25              
26             require Locale::Recode::_Conversions;
27              
28             my $loaded = {};
29             my $has_encode;
30              
31             sub new
32             {
33 549   33 549 0 24557559 my $class = ref($_[0]) || $_[0];
34 549         1376 shift;
35 549         2559 my %args = @_;
36              
37 549         1704 my $self = bless {}, $class;
38              
39 549         1922 my ($from_codeset, $to_codeset) = @args{qw (from to)};
40            
41 549 50 33     2927 unless ($from_codeset && $to_codeset) {
42 0         0 require Carp;
43 0         0 Carp::croak (<<EOF);
44             Usage: $class->new (from => FROM_CODESET, to => TO_CODESET);
45             EOF
46             }
47              
48             # Find a conversion path.
49 549         3400 my $path = Locale::Recode::_Conversions->findPath ($from_codeset,
50             $to_codeset);
51 549 50       1392 unless ($path) {
52 0         0 $self->{__error} = 'EINVAL';
53 0         0 return $self;
54             }
55              
56 549         1105 my @conversions = ();
57 549         1393 foreach (@$path) {
58 549         1712 my ($module, $from, $to) = @$_;
59            
60 549 100       1595 unless ($loaded->{$module}) {
61 139         9280 eval "require Locale::RecodeData::$module";
62 139 50       2043 if ($@) {
63 0         0 $self->{__error} = $@;
64 0         0 return $self;
65             }
66            
67 139         646 $loaded->{$module} = 1;
68             }
69            
70 549         1255 my $module_name = "Locale::RecodeData::$module";
71 549         985 my $method = 'new';
72 549         4498 my $object = $module_name->$method (from => $from,
73             to => $to);
74            
75 549         2085 push @conversions, $object;
76             }
77              
78 549         4725 $self->{__conversions} = \@conversions;
79            
80 549         2573 return $self;
81             }
82              
83             sub resolveAlias
84             {
85 0     0 0 0 my ($class, $alias) = @_;
86              
87 0         0 return Locale::Recode::_Conversions->resolveAlias ($alias);
88             }
89              
90             sub getSupported
91             {
92 0     0 1 0 return [ Locale::Recode::_Conversions->listSupported ];
93             }
94              
95             sub getCharsets
96             {
97 0     0 1 0 my $self = shift;
98 0         0 my %all = map { $_ => 1 } @{&getSupported};
  0         0  
  0         0  
99              
100 0         0 require Locale::Recode::_Aliases;
101              
102 0         0 my $conversions = Locale::Recode::_Conversions->listSupported;
103 0         0 foreach my $charset (keys %{Locale::Recode::_Aliases::ALIASES()}) {
  0         0  
104 0         0 my $mime_name = $self->resolveAlias ($charset);
105 0 0       0 next unless exists $all{$mime_name};
106 0         0 $all{$charset} = 1;
107             }
108            
109 0         0 return [ keys %all ];
110             }
111              
112             sub recode
113             {
114 199610     199610 1 1538091 my $self = $_[0];
115              
116 199610 50       366467 return if $self->{__error};
117              
118 199610 50       352571 return 1 unless defined $_[1];
119              
120 199610         271952 my $chain = $self->{__conversions};
121            
122 199610         297398 foreach my $module (@$chain) {
123 199610         394983 my $success = $module->_recode ($_[1]);
124            
125 199610 50       409810 unless ($success) {
126 0         0 $self->{__error} = $module->_getError;
127 0         0 return;
128             }
129             }
130              
131 199610         341299 return 1;
132             }
133              
134             sub getError
135             {
136 412     412 1 1971 my $self = shift;
137 412 50       3015 my $error = $self->{__error} or return;
138              
139 0 0         if ('EINVAL' eq $error) {
140 0           return 'Invalid conversion';
141             } else {
142 0           return $error;
143             }
144             }
145              
146             1;
147              
148             __END__
149              
150             =head1 NAME
151              
152             Locale::Recode - Object-Oriented Portable Charset Conversion
153              
154             =head1 SYNOPSIS
155              
156             use Locale::Recode;
157              
158             $cd = Locale::Recode->new (from => 'UTF-8',
159             to => 'ISO-8859-1');
160              
161             die $cd->getError if $cd->getError;
162              
163             $cd->recode ($text) or die $cd->getError;
164              
165             $mime_name = Locale::Recode->resolveAlias ('latin-1');
166              
167             $supported = Locale::Recode->getSupported;
168              
169             $complete = Locale::Recode->getCharsets;
170              
171             =head1 DESCRIPTION
172              
173             This module provides routines that convert textual data from one
174             codeset to another in a portable way. The module has been started
175             before Encode(3) was written. It's main purpose today is to provide
176             charset conversion even when Encode(3) is not available on the system.
177             It should also work for older Perl versions without Unicode support.
178              
179             Internally Locale::Recode(3) will use Encode(3) whenever possible,
180             to allow for a faster conversion and for a wider range of supported
181             charsets, and will only fall back to the Perl implementation when
182             Encode(3) is not available or does not support a particular charset
183             that Locale::Recode(3) does.
184              
185             Locale::Recode(3) is part of libintl-perl, and it's main purpose is
186             actually to implement a portable charset conversion framework for
187             the message translation facilities described in Locale::TextDomain(3).
188              
189             =head1 CONSTRUCTOR
190              
191             The constructor C<new()> requires two named arguments:
192              
193             =over 4
194              
195             =item B<from>
196              
197             The encoding of the original data. Case doesn't matter, aliases
198             are resolved.
199              
200             =item B<to>
201              
202             The target encoding. Again, case doesn't matter, and aliases
203             are resolved.
204              
205             =back
206              
207             The constructor will never fail. In case of an error, the object's
208             internal state is set to bad and it will refuse to do any conversions.
209             You can inquire the reason for the failure with the method
210             getError().
211              
212             =head1 OBJECT METHODS
213              
214             The following object methods are available.
215              
216             =over 4
217              
218             =item B<recode (STRING)>
219              
220             Converts B<STRING> from the source encoding into the destination
221             encoding. In case of success, a truth value is returned, false
222             otherwise. You can inquire the reason for the failure with the
223             method getError().
224              
225             =item B<getError>
226              
227             Returns either false if the object is not in an error state or
228             an error message.
229              
230             =back
231              
232             =head1 CLASS METHODS
233              
234             The object provides some additional class methods:
235              
236             =over 4
237              
238             =item B<getSupported>
239              
240             Returns a reference to a list of all supported charsets. This
241             may implicitely load additional Encode(3) conversions like
242             Encode::HanExtra(3) which may produce considerable load on your
243             system.
244              
245             The method is therefore not intended for regular use but rather
246             for getting resp. displaying I<once> a list of available encodings.
247              
248             The members of the list are all converted to uppercase!
249              
250             =item B<getCharsets>
251              
252             Like getSupported() but also returns all available aliases.
253              
254             =back
255              
256             =head1 SUPPORTED CHARSETS
257              
258             The range of supported charsets is system-dependent. The following
259             somewhat special charsets are always available:
260              
261             =over 4
262              
263             =item B<UTF-8>
264              
265             UTF-8 is available independently of your Perl version. For Perl 5.6
266             or better or in the presence of Encode(3), conversions are not done
267             in Perl but with the interfaces provided by these facilities which
268             are written in C, hence much faster.
269              
270             Encoding data I<into> UTF-8 is fast, even if it is done in Perl.
271             Decoding it in Perl may become quite slow. If you frequently have
272             to decode UTF-8 with B<Locale::Recode> you will probably want to
273             make sure that you do that with Perl 5.6 or beter, or install Encode(3) to
274             speed up things.
275              
276             =item B<INTERNAL>
277              
278             UTF-8 is fast to write but hard to read for applications. It is
279             therefore not the worst for internal string representation but not
280             far from that. Locale::Recode(3) stores strings internally as a
281             reference to an array of integer values like most programming languages
282             (Perl is an exception) do, trading memory for performance.
283              
284             The integer values are the UCS-4 codes of the characters in host
285             byte order.
286              
287             The encoding B<INTERNAL> is directly availabe via Locale::Recode(3)
288             but of course you should not really use it for data exchange, unless
289             you know what you are doing.
290              
291             =back
292              
293             Locale::Recode(3) has native support for a plethora of other encodings,
294             most of them 8 bit encodings that are fast to decode, including most
295             encodings used on popular micros like the ISO-8859-* series of encodings,
296             most Windows-* encodings (also known as CP*), Macintosh, Atari, etc.
297              
298             =head1 NAMES AND ALIASES
299              
300             Each charset resp. encoding is available internally under a unique
301             name. Whenever the information was available, the preferred MIME name
302             (see L<http://www.iana.org/assignments/character-sets/>) was chosen as
303             the internal name.
304              
305             Alias handling is quite strict. The module does not make wild guesses
306             at what you mean ("What's the meaning of the acronym JIS" is a valid
307             alias for "7bit-jis" in Encode(3) ....) but aims at providing common
308             aliases only. The same applies to so-called aliases that are really
309             mistakes, like "utf8" for UTF-8.
310              
311             The module knows all aliases that are listed with the IANA character
312             set registry (L<http://www.iana.org/assignments/character-sets/>), plus
313             those known to libiconv version 1.8, and a bunch of additional ones.
314              
315             =head1 CONVERSION TABLES
316              
317             The conversion tables have either been taken from official sources
318             like the IANA or the Unicode Consortium, from Bruno Haible's libiconv,
319             or from the sources of the GNU libc and the regression tests for
320             libintl-perl will check for conformance here. For some encodings this data
321             differs from Encode(3)'s data which would cause these tests to fail.
322             In these cases, the module will not invoke the Encode(3) methods, but
323             will fall back to the internal implementation for the sake of
324             consistency.
325              
326             The few encodings that are affected are so simple that you will not
327             experience any real performance penalty unless you convert large chunks
328             of data. But the package is not really intended for such use anyway, and
329             since Encode(3) is relatively new, I rather think that the differences
330             are bugs in Encode which will be fixed soon.
331              
332             =head1 BUGS
333              
334             The module should provide fall back conversions for other Unicode
335             encoding schemes like UCS-2, UCS-4 (big- and little-endian).
336              
337             The pure Perl UTF-8 decoder will not always handle corrupt UTF-8
338             correctly, especially at the end and at the beginning of the string.
339             This is not likely to be fixed, since the module's intention is not
340             to be a consistency checker for UTF-8 data.
341              
342             =head1 AUTHOR
343              
344             Copyright (C) 2002-2026 L<Guido Flohr|http://www.guido-flohr.net/>
345             (L<mailto:guido.flohr@cantanea.com>), all rights reserved. See the source
346             code for details!code for details!
347              
348             =head1 SEE ALSO
349              
350             Encode(3), iconv(3), iconv(1), recode(1), perl(1)
351              
352             =cut
353             Local Variables:
354             mode: perl
355             perl-indent-level: 4
356             perl-continued-statement-offset: 4
357             perl-continued-brace-offset: 0
358             perl-brace-offset: -4
359             perl-brace-imaginary-offset: 0
360             perl-label-offset: -4
361             cperl-indent-level: 4
362             cperl-continued-statement-offset: 2
363             tab-width: 4
364             End: