File Coverage

blib/lib/MIME/Charset.pm

Criterion	Covered	Total	%
statement	213	290	73.4
branch	90	166	54.2
condition	43	99	43.4
subroutine	29	35	82.8
pod	20	20	100.0
total	395	610	64.7

line	stmt	bran	cond	sub	pod	time	code
1							#-- perl --
2
3							package MIME::Charset;
4	9			9		69055	use 5.005;
	9					26
5
6							=head1 NAME
7
8							MIME::Charset - Charset Information for MIME
9
10							=head1 SYNOPSIS
11
12							use MIME::Charset:
13
14							$charset = MIME::Charset->new("euc-jp");
15
16							Getting charset information:
17
18							$benc = $charset->body_encoding; # e.g. "Q"
19							$cset = $charset->as_string; # e.g. "US-ASCII"
20							$henc = $charset->header_encoding; # e.g. "S"
21							$cset = $charset->output_charset; # e.g. "ISO-2022-JP"
22
23							Translating text data:
24
25							($text, $charset, $encoding) =
26							$charset->header_encode(
27							"\xc9\xc2\xc5\xaa\xc0\xde\xc3\xef\xc5\xaa".
28							"\xc7\xd1\xca\xaa\xbd\xd0\xce\xcf\xb4\xef",
29							Charset => 'euc-jp');
30							# ...returns e.g. (, "ISO-2022-JP", "B").
31
32							($text, $charset, $encoding) =
33							$charset->body_encode(
34							"Collectioneur path\xe9tiquement ".
35							"\xe9clectique de d\xe9chets",
36							Charset => 'latin1');
37							# ...returns e.g. (, "ISO-8859-1", "QUOTED-PRINTABLE").
38
39							$len = $charset->encoded_header_len(
40							"Perl\xe8\xa8\x80\xe8\xaa\x9e",
41							Charset => 'utf-8',
42							Encoding => "b");
43							# ...returns e.g. 28.
44
45							Manipulating module defaults:
46
47							MIME::Charset::alias("csEUCKR", "euc-kr");
48							MIME::Charset::default("iso-8859-1");
49							MIME::Charset::fallback("us-ascii");
50
51							Non-OO functions (may be deprecated in near future):
52
53							use MIME::Charset qw(:info);
54
55							$benc = body_encoding("iso-8859-2"); # "Q"
56							$cset = canonical_charset("ANSI X3.4-1968"); # "US-ASCII"
57							$henc = header_encoding("utf-8"); # "S"
58							$cset = output_charset("shift_jis"); # "ISO-2022-JP"
59
60							use MIME::Charset qw(:trans);
61
62							($text, $charset, $encoding) =
63							header_encode(
64							"\xc9\xc2\xc5\xaa\xc0\xde\xc3\xef\xc5\xaa".
65							"\xc7\xd1\xca\xaa\xbd\xd0\xce\xcf\xb4\xef",
66							"euc-jp");
67							# ...returns (, "ISO-2022-JP", "B");
68
69							($text, $charset, $encoding) =
70							body_encode(
71							"Collectioneur path\xe9tiquement ".
72							"\xe9clectique de d\xe9chets",
73							"latin1");
74							# ...returns (, "ISO-8859-1", "QUOTED-PRINTABLE");
75
76							$len = encoded_header_len(
77							"Perl\xe8\xa8\x80\xe8\xaa\x9e", "b", "utf-8"); # 28
78
79							=head1 DESCRIPTION
80
81							MIME::Charset provides information about character sets used for
82							MIME messages on Internet.
83
84							=head2 Definitions
85
86							The B is ``character set'' used in MIME to refer to a
87							method of converting a sequence of octets into a sequence of characters.
88							It includes both concepts of ``coded character set'' (CCS) and
89							``character encoding scheme'' (CES) of ISO/IEC.
90
91							The B is that used in MIME to refer to a method of representing
92							a body part or a header body as sequence(s) of printable US-ASCII
93							characters.
94
95							=cut
96
97	9			9		36	use strict;
	9					10
	9					255
98	9			9		29	use vars qw(@ISA $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS $Config);
	9					15
	9					597
99	9			9		30	use Exporter;
	9					12
	9					735
100							@ISA = qw(Exporter);
101							@EXPORT = qw(body_encoding canonical_charset header_encoding output_charset
102							body_encode encoded_header_len header_encode);
103							@EXPORT_OK = qw(alias default fallback recommended);
104							%EXPORT_TAGS = (
105							"info" => [qw(body_encoding header_encoding
106							canonical_charset output_charset)],
107							"trans" =>[ qw(body_encode encoded_header_len
108							header_encode)],
109							);
110	9			9		34	use Carp qw(croak);
	9					9
	9					489
111
112	9	50		9		34	use constant USE_ENCODE => ($] >= 5.007003)? 'Encode': '';
	9					9
	9					1081
113
114							my @ENCODE_SUBS = qw(FB_CROAK FB_PERLQQ FB_HTMLCREF FB_XMLCREF
115							is_utf8 resolve_alias);
116							if (USE_ENCODE) {
117	9			9		4908	eval "use ".USE_ENCODE." \@ENCODE_SUBS;";
	9					75112
	9					664
118							if ($@) { # Perl 5.7.3 + Encode 0.40
119							eval "use ".USE_ENCODE." qw(is_utf8);";
120							require MIME::Charset::_Compat;
121							for my $sub (@ENCODE_SUBS) {
122	9			9		34	no strict "refs";
	9					13
	9					591
123							*{$sub} = \&{"MIME::Charset::_Compat::$sub"}
124							unless $sub eq 'is_utf8';
125							}
126							}
127							} else {
128							require MIME::Charset::_Compat;
129							for my $sub (@ENCODE_SUBS) {
130	9			9		28	no strict "refs";
	9					10
	9					27624
131							*{$sub} = \&{"MIME::Charset::_Compat::$sub"};
132							}
133							}
134
135							$VERSION = '1.012.2';
136
137							######## Private Attributes ########
138
139							my $DEFAULT_CHARSET = 'US-ASCII';
140							my $FALLBACK_CHARSET = 'UTF-8';
141
142							# This table was initially borrowed from Python email package.
143
144							my %CHARSETS = (# input header enc body enc output conv
145							'ISO-8859-1' => ['Q', 'Q', undef],
146							'ISO-8859-2' => ['Q', 'Q', undef],
147							'ISO-8859-3' => ['Q', 'Q', undef],
148							'ISO-8859-4' => ['Q', 'Q', undef],
149							# ISO-8859-5 is Cyrillic, and not especially used
150							# ISO-8859-6 is Arabic, also not particularly used
151							# ISO-8859-7 is Greek, 'Q' will not make it readable
152							# ISO-8859-8 is Hebrew, 'Q' will not make it readable
153							'ISO-8859-9' => ['Q', 'Q', undef],
154							'ISO-8859-10' => ['Q', 'Q', undef],
155							# ISO-8859-11 is Thai, 'Q' will not make it readable
156							'ISO-8859-13' => ['Q', 'Q', undef],
157							'ISO-8859-14' => ['Q', 'Q', undef],
158							'ISO-8859-15' => ['Q', 'Q', undef],
159							'ISO-8859-16' => ['Q', 'Q', undef],
160							'WINDOWS-1252' => ['Q', 'Q', undef],
161							'VISCII' => ['Q', 'Q', undef],
162							'US-ASCII' => [undef, undef, undef],
163							'BIG5' => ['B', 'B', undef],
164							'GB2312' => ['B', 'B', undef],
165							'HZ-GB-2312' => ['B', undef, undef],
166							'EUC-JP' => ['B', undef, 'ISO-2022-JP'],
167							'SHIFT_JIS' => ['B', undef, 'ISO-2022-JP'],
168							'ISO-2022-JP' => ['B', undef, undef],
169							'ISO-2022-JP-1' => ['B', undef, undef],
170							'ISO-2022-JP-2' => ['B', undef, undef],
171							'EUC-JISX0213' => ['B', undef, 'ISO-2022-JP-3'],
172							'SHIFT_JISX0213' => ['B', undef, 'ISO-2022-JP-3'],
173							'ISO-2022-JP-3' => ['B', undef, undef],
174							'EUC-JIS-2004' => ['B', undef, 'ISO-2022-JP-2004'],
175							'SHIFT_JIS-2004' => ['B', undef, 'ISO-2022-JP-2004'],
176							'ISO-2022-JP-2004' => ['B', undef, undef],
177							'KOI8-R' => ['B', 'B', undef],
178							'TIS-620' => ['B', 'B', undef], # cf. Mew
179							'UTF-16' => ['B', 'B', undef],
180							'UTF-16BE' => ['B', 'B', undef],
181							'UTF-16LE' => ['B', 'B', undef],
182							'UTF-32' => ['B', 'B', undef],
183							'UTF-32BE' => ['B', 'B', undef],
184							'UTF-32LE' => ['B', 'B', undef],
185							'UTF-7' => ['Q', undef, undef],
186							'UTF-8' => ['S', 'S', undef],
187							'GSM03.38' => [undef, undef, undef], # not for MIME
188							# We're making this one up to represent raw unencoded 8bit
189							'8BIT' => [undef, 'B', 'ISO-8859-1'],
190							);
191
192							# Fix some unexpected or unpreferred names returned by
193							# Encode::resolve_alias() or used by somebodies else.
194							my %CHARSET_ALIASES = (# unpreferred preferred
195							"ASCII" => "US-ASCII",
196							"BIG5-ETEN" => "BIG5",
197							"CP1250" => "WINDOWS-1250",
198							"CP1251" => "WINDOWS-1251",
199							"CP1252" => "WINDOWS-1252",
200							"CP1253" => "WINDOWS-1253",
201							"CP1254" => "WINDOWS-1254",
202							"CP1255" => "WINDOWS-1255",
203							"CP1256" => "WINDOWS-1256",
204							"CP1257" => "WINDOWS-1257",
205							"CP1258" => "WINDOWS-1258",
206							"CP874" => "WINDOWS-874",
207							"CP936" => "GBK",
208							"CP949" => "KS_C_5601-1987",
209							"EUC-CN" => "GB2312",
210							"HZ" => "HZ-GB-2312", # RFC 1842
211							"KS_C_5601" => "KS_C_5601-1987",
212							"SHIFTJIS" => "SHIFT_JIS",
213							"SHIFTJISX0213" => "SHIFT_JISX0213",
214							"TIS620" => "TIS-620", # IANA MIBenum 2259
215							"UNICODE-1-1-UTF-7" => "UTF-7", # RFC 1642 (obs.)
216							"UTF8" => "UTF-8",
217							"UTF-8-STRICT" => "UTF-8", # Perl internal use
218							"GSM0338" => "GSM03.38", # not for MIME
219							);
220
221							# Some vendors encode characters beyond standardized mappings using extended
222							# encoders. Some other standard encoders need additional encode modules.
223							my %ENCODERS = (
224							'EXTENDED' => {
225							'ISO-8859-1' => [['cp1252'], ], # Encode::Byte
226							'ISO-8859-2' => [['cp1250'], ], # Encode::Byte
227							'ISO-8859-5' => [['cp1251'], ], # Encode::Byte
228							'ISO-8859-6' => [
229							['cp1256'], # Encode::Byte
230							# ['cp1006'], # ditto, for Farsi
231							],
232							'ISO-8859-6-I'=>[['cp1256'], ], # ditto
233							'ISO-8859-7' => [['cp1253'], ], # Encode::Byte
234							'ISO-8859-8' => [['cp1255'], ], # Encode::Byte
235							'ISO-8859-8-I'=>[['cp1255'], ], # ditto
236							'ISO-8859-9' => [['cp1254'], ], # Encode::Byte
237							'ISO-8859-13'=> [['cp1257'], ], # Encode::Byte
238							'GB2312' => [
239							['gb18030', 'Encode::HanExtra'],
240							['cp936'], # Encode::CN
241							],
242							'EUC-JP' => [
243							['eucJP-ascii', 'Encode::EUCJPASCII'],
244							# ['cp51932', 'Encode::EUCJPMS'],
245							],
246							'ISO-2022-JP'=> [
247							['x-iso2022jp-ascii',
248							'Encode::EUCJPASCII'],
249							# ['iso-2022-jp-ms','Encode::ISO2022JPMS'],
250							# ['cp50220', 'Encode::EUCJPMS'],
251							# ['cp50221', 'Encode::EUCJPMS'],
252							['iso-2022-jp-1'], # Encode::JP (note*)
253							],
254							'SHIFT_JIS' => [
255							['cp932'], # Encode::JP
256							],
257							'EUC-JISX0213' => [['euc-jis-2004', 'Encode::JISX0213'], ],
258							'ISO-2022-JP-3' => [['iso-2022-jp-2004', 'Encode::JISX0213'], ],
259							'SHIFT_JISX0213'=> [['shift_jis-2004', 'Encode::ShiftJIS2004'], ],
260							'EUC-KR' => [['cp949'], ], # Encode::KR
261							'BIG5' => [
262							# ['big5plus', 'Encode::HanExtra'],
263							# ['big5-2003', 'Encode::HanExtra'],
264							['cp950'], # Encode::TW
265							# ['big5-1984', 'Encode::HanExtra'],
266							],
267							'TIS-620' => [['cp874'], ], # Encode::Byte
268							'UTF-8' => [['utf8'], ], # Special name on Perl
269							},
270							'STANDARD' => {
271							'ISO-8859-6-E' => [['iso-8859-6'],],# Encode::Byte
272							'ISO-8859-6-I' => [['iso-8859-6'],],# ditto
273							'ISO-8859-8-E' => [['iso-8859-8'],],# Encode::Byte
274							'ISO-8859-8-I' => [['iso-8859-8'],],# ditto
275							'GB18030' => [['gb18030', 'Encode::HanExtra'], ],
276							'ISO-2022-JP-2' => [['iso-2022-jp-2','Encode::ISO2022JP2'], ],
277							'EUC-JISX0213' => [['euc-jisx0213', 'Encode::JISX0213'], ],
278							'ISO-2022-JP-3' => [['iso-2022-jp-3', 'Encode::JISX0213'], ],
279							'EUC-JIS-2004' => [['euc-jis-2004', 'Encode::JISX0213'], ],
280							'ISO-2022-JP-2004' => [['iso-2022-jp-2004', 'Encode::JISX0213'], ],
281							'SHIFT_JIS-2004'=> [['shift_jis-2004', 'Encode::ShiftJIS2004'], ],
282							'EUC-TW' => [['euc-tw', 'Encode::HanExtra'], ],
283							'HZ-GB-2312' => [['hz'], ], # Encode::CN
284							'TIS-620' => [['tis620'], ], # (note*)
285							'UTF-16' => [['x-utf16auto', 'MIME::Charset::UTF'],],
286							'UTF-32' => [['x-utf32auto', 'MIME::Charset::UTF'],],
287							'GSM03.38' => [['gsm0338'], ], # Encode::GSM0338
288
289							# (note*) ISO-8859-11 was not registered by IANA.
290							# L treats it as canonical name of ``tis-?620''.
291							},
292							);
293
294							# ISO-2022-* escape sequences etc. to detect charset from unencoded data.
295							my @ESCAPE_SEQS = (
296							# ISO-2022-* sequences
297							# escape seq, possible charset
298							# Following sequences are commonly used.
299							["\033\$\@", "ISO-2022-JP"], # RFC 1468
300							["\033\$B", "ISO-2022-JP"], # ditto
301							["\033(J", "ISO-2022-JP"], # ditto
302							["\033(I", "ISO-2022-JP"], # ditto (nonstandard)
303							["\033\$(D", "ISO-2022-JP"], # RFC 2237 (note*)
304							# Following sequences are less commonly used.
305							["\033.A", "ISO-2022-JP-2"], # RFC 1554
306							["\033.F", "ISO-2022-JP-2"], # ditto
307							["\033\$(C", "ISO-2022-JP-2"], # ditto
308							["\033\$(O", "ISO-2022-JP-3"], # JIS X 0213:2000
309							["\033\$(P", "ISO-2022-JP-2004"], # JIS X 0213:2000/2004
310							["\033\$(Q", "ISO-2022-JP-2004"], # JIS X 0213:2004
311							["\033\$)C", "ISO-2022-KR"], # RFC 1557
312							["\033\$)A", "ISO-2022-CN"], # RFC 1922
313							["\033\$A", "ISO-2022-CN"], # ditto (nonstandard)
314							["\033\$)G", "ISO-2022-CN"], # ditto
315							["\033\$*H", "ISO-2022-CN"], # ditto
316							# Other sequences will be used with appropriate charset
317							# parameters, or hardly used.
318
319							# note*: This RFC defines ISO-2022-JP-1, superset of
320							# ISO-2022-JP. But that charset name is rarely used.
321							# OTOH many of encoders for ISO-2022-JP recognize this
322							# sequence so that comatibility with EUC-JP will be
323							# guaranteed.
324
325							# Singlebyte 7-bit sequences
326							# escape seq, possible charset
327							["\033e", "GSM03.38"], # ESTI GSM 03.38 (note*)
328							["\033\012", "GSM03.38"], # ditto
329							["\033<", "GSM03.38"], # ditto
330							["\033/", "GSM03.38"], # ditto
331							["\033>", "GSM03.38"], # ditto
332							["\033\024", "GSM03.38"], # ditto
333							["\033(", "GSM03.38"], # ditto
334							["\033\@", "GSM03.38"], # ditto
335							["\033)", "GSM03.38"], # ditto
336							["\033=", "GSM03.38"], # ditto
337
338							# note*: This is not used for MIME message.
339							);
340
341							######## Public Configuration Attributes ########
342
343							$Config = {
344							Detect7bit => 'YES',
345							Mapping => 'EXTENDED',
346							Replacement => 'DEFAULT',
347							};
348							local @INC = @INC;
349							pop @INC if $INC[-1] eq '.';
350							eval { require MIME::Charset::Defaults; };
351
352							######## Private Constants ########
353
354							my $NON7BITRE = qr{
355							[^\x01-\x7e]
356							}x;
357
358							my $NONASCIIRE = qr{
359							[^\x09\x0a\x0d\x20\x21-\x7e]
360							}x;
361
362							my $ISO2022RE = qr{
363							ISO-2022-.+
364							}ix;
365
366							my $ASCIITRANSRE = qr{
367							HZ-GB-2312 \| UTF-7
368							}ix;
369
370
371							######## Public Functions ########
372
373							=head2 Constructor
374
375							=over
376
377							=item $charset = MIME::Charset->new([CHARSET [, OPTS]])
378
379							Create charset object.
380
381							OPTS may accept following key-value pair.
382							B:
383							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
384							conversion will not be performed. So this option do not have any effects.
385
386							=over 4
387
388							=item Mapping => MAPTYPE
389
390							Whether to extend mappings actually used for charset names or not.
391							C<"EXTENDED"> uses extended mappings.
392							C<"STANDARD"> uses standardized strict mappings.
393							Default is C<"EXTENDED">.
394
395							=back
396
397							=cut
398
399							sub new {
400	75			75	1	700	my $class = shift;
401	75					91	my $charset = shift;
402	75	100				150	return bless {}, $class unless $charset;
403	70	50				131	return bless {}, $class if 75 < length $charset; # w/a for CPAN RT #65796.
404	70					97	my %params = @_;
405	70		33			348	my $mapping = uc($params{'Mapping'} \|\| $Config->{Mapping});
406
407	70	100				216	if ($charset =~ /\bhz.?gb.?2312$/i) {
		100
408							# workaround: "HZ-GB-2312" mistakenly treated as "EUC-CN" by Encode
409							# (2.12).
410	4					6	$charset = "HZ-GB-2312";
411							} elsif ($charset =~ /\btis-?620$/i) {
412							# workaround: "TIS620" treated as ISO-8859-11 by Encode.
413							# And "TIS-620" not known by some versions of Encode (cf.
414							# CPAN RT #20781).
415	1					2	$charset = "TIS-620";
416							} else {
417	65		66			142	$charset = resolve_alias($charset) \|\| $charset
418							}
419	70		66			66531	$charset = $CHARSET_ALIASES{uc($charset)} \|\| uc($charset);
420	70					68	my ($henc, $benc, $outcset);
421	70					106	my $spec = $CHARSETS{$charset};
422	70	100				98	if ($spec) {
423	60					123	($henc, $benc, $outcset) =
424							($$spec[0], $$spec[1], USE_ENCODE? $$spec[2]: undef);
425							} else {
426	10					14	($henc, $benc, $outcset) = ('S', 'B', undef);
427							}
428	70					61	my ($decoder, $encoder);
429	70					54	if (USE_ENCODE) {
430	70					106	$decoder = _find_encoder($charset, $mapping);
431	70					93	$encoder = _find_encoder($outcset, $mapping);
432							} else {
433							$decoder = $encoder = undef;
434							}
435
436	70		66			605	bless {
			100
437							InputCharset => $charset,
438							Decoder => $decoder,
439							HeaderEncoding => $henc,
440							BodyEncoding => $benc,
441							OutputCharset => ($outcset \|\| $charset),
442							Encoder => ($encoder \|\| $decoder),
443							}, $class;
444							}
445
446							my %encoder_cache = ();
447
448							sub _find_encoder($$) {
449	140		100	140		312	my $charset = uc(shift \|\| "");
450	140	100				214	return undef unless $charset;
451	78					77	my $mapping = uc(shift);
452	78					64	my ($spec, $name, $module, $encoder);
453
454	78					61	local($@);
455	78					143	$encoder = $encoder_cache{$charset, $mapping};
456	78	100				133	return $encoder if ref $encoder;
457
458	55					77	foreach my $m (('EXTENDED', 'STANDARD')) {
459	82	50	66			273	next if $m eq 'EXTENDED' and $mapping ne 'EXTENDED';
460	82					119	$spec = $ENCODERS{$m}->{$charset};
461	82	100				125	next unless $spec;
462	41					33	foreach my $s (@{$spec}) {
	41					70
463	47					33	($name, $module) = @{$s};
	47					74
464	47	100				105	if ($module) {
465	14	100				770	next unless eval "require $module;";
466							}
467	37					121	$encoder = Encode::find_encoding($name);
468	37	50				8902	last if ref $encoder;
469							}
470	41	100				73	last if ref $encoder;
471							}
472	55		100			167	$encoder \|\|= Encode::find_encoding($charset);
473	55	100				1060	$encoder_cache{$charset, $mapping} = $encoder if $encoder;
474	55					88	return $encoder;
475							}
476
477							=back
478
479							=head2 Getting Information of Charsets
480
481							=over
482
483							=item $charset->body_encoding
484
485							=item body_encoding CHARSET
486
487							Get recommended transfer-encoding of CHARSET for message body.
488
489							Returned value will be one of C<"B"> (BASE64), C<"Q"> (QUOTED-PRINTABLE),
490							C<"S"> (shorter one of either) or
491							C (might not be transfer-encoded; either 7BIT or 8BIT). This may
492							not be same as encoding for message header.
493
494							=cut
495
496							sub body_encoding($) {
497	4			4	1	13	my $self = shift;
498	4	50				13	return undef unless $self;
499	4	100				15	$self = __PACKAGE__->new($self) unless ref $self;
500	4					25	$self->{BodyEncoding};
501							}
502
503							=item $charset->as_string
504
505							=item canonical_charset CHARSET
506
507							Get canonical name for charset.
508
509							=cut
510
511							sub canonical_charset($) {
512	4			4	1	11	my $self = shift;
513	4	50				9	return undef unless $self;
514	4	100				9	$self = __PACKAGE__->new($self) unless ref $self;
515	4					16	$self->{InputCharset};
516							}
517
518							sub as_string($) {
519	29			29	1	82	my $self = shift;
520	29					77	$self->{InputCharset};
521							}
522
523							=item $charset->decoder
524
525							Get L<"Encode::Encoding"> object to decode strings to Unicode by charset.
526							If charset is not specified or not known by this module,
527							undef will be returned.
528
529							=cut
530
531							sub decoder($) {
532	12			12	1	26	my $self = shift;
533	12					29	$self->{Decoder};
534							}
535
536							=item $charset->dup
537
538							Get a copy of charset object.
539
540							=cut
541
542							sub dup($) {
543	2			2	1	3	my $self = shift;
544	2					7	my $obj = __PACKAGE__->new(undef);
545	2					3	%{$obj} = %{$self};
	2					8
	2					7
546	2					6	$obj;
547							}
548
549							=item $charset->encoder([CHARSET])
550
551							Get L<"Encode::Encoding"> object to encode Unicode string using compatible
552							charset recommended to be used for messages on Internet.
553
554							If optional CHARSET is specified, replace encoder (and output charset
555							name) of $charset object with those of CHARSET, therefore,
556							$charset object will be a converter between original charset and
557							new CHARSET.
558
559							=cut
560
561							sub encoder($$;) {
562	6			6	1	7	my $self = shift;
563	6					7	my $charset = shift;
564	6	100				15	if ($charset) {
565	2	50				7	$charset = __PACKAGE__->new($charset) unless ref $charset;
566	2					4	$self->{OutputCharset} = $charset->{InputCharset};
567	2					3	$self->{Encoder} = $charset->{Decoder};
568	2					4	$self->{BodyEncoding} = $charset->{BodyEncoding};
569	2					3	$self->{HeaderEncoding} = $charset->{HeaderEncoding};
570							}
571	6					17	$self->{Encoder};
572							}
573
574							=item $charset->header_encoding
575
576							=item header_encoding CHARSET
577
578							Get recommended encoding scheme of CHARSET for message header.
579
580							Returned value will be one of C<"B">, C<"Q">, C<"S"> (shorter one of either)
581							or C (might not be encoded). This may not be same as encoding
582							for message body.
583
584							=cut
585
586							sub header_encoding($) {
587	4			4	1	9	my $self = shift;
588	4	50				9	return undef unless $self;
589	4	100				9	$self = __PACKAGE__->new($self) unless ref $self;
590	4					16	$self->{HeaderEncoding};
591							}
592
593							=item $charset->output_charset
594
595							=item output_charset CHARSET
596
597							Get a charset which is compatible with given CHARSET and is recommended
598							to be used for MIME messages on Internet (if it is known by this module).
599
600							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
601							this function will simply
602							return the result of L<"canonical_charset">.
603
604							=cut
605
606							sub output_charset($) {
607	4			4	1	15	my $self = shift;
608	4	50				9	return undef unless $self;
609	4	100				12	$self = __PACKAGE__->new($self) unless ref $self;
610	4					21	$self->{OutputCharset};
611							}
612
613							=back
614
615							=head2 Translating Text Data
616
617							=over
618
619							=item $charset->body_encode(STRING [, OPTS])
620
621							=item body_encode STRING, CHARSET [, OPTS]
622
623							Get converted (if needed) data of STRING and recommended transfer-encoding
624							of that data for message body. CHARSET is the charset by which STRING
625							is encoded.
626
627							OPTS may accept following key-value pairs.
628							B:
629							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
630							conversion will not be performed. So these options do not have any effects.
631
632							=over 4
633
634							=item Detect7bit => YESNO
635
636							Try auto-detecting 7-bit charset when CHARSET is not given.
637							Default is C<"YES">.
638
639							=item Replacement => REPLACEMENT
640
641							Specifies error handling scheme. See L<"Error Handling">.
642
643							=back
644
645							3-item list of (I, I,
646							I) will be returned.
647							I will be either C<"BASE64">, C<"QUOTED-PRINTABLE">,
648							C<"7BIT"> or C<"8BIT">. If I could not be determined
649							and I contains non-ASCII byte(s), I will
650							be C and I will be C<"BASE64">.
651							I will be C<"US-ASCII"> if and only if string does not
652							contain any non-ASCII bytes.
653
654							=cut
655
656							sub body_encode {
657	4			4	1	263	my $self = shift;
658	4					4	my $text;
659	4	100				10	if (ref $self) {
660	2					3	$text = shift;
661							} else {
662	2					1	$text = $self;
663	2					10	$self = __PACKAGE__->new(shift);
664							}
665	4					17	my ($encoded, $charset) = $self->_text_encode($text, @_);
666							return ($encoded, undef, 'BASE64')
667	4	50	33			18	unless $charset and $charset->{InputCharset};
668	4					5	my $cset = $charset->{OutputCharset};
669
670							# Determine transfer-encoding.
671	4					4	my $enc = $charset->{BodyEncoding};
672
673	4	50	33			20	if (!$enc and $encoded !~ /\x00/) { # Eliminate hostile NUL character.
		0
		0
		0
674	4	50				93	if ($encoded =~ $NON7BITRE) { # String contains 8bit char(s).
		50
675	0					0	$enc = '8BIT';
676							} elsif ($cset =~ /^($ISO2022RE\|$ASCIITRANSRE)$/) { # 7BIT.
677	4					6	$enc = '7BIT';
678							} else { # Pure ASCII.
679	0					0	$enc = '7BIT';
680	0					0	$cset = 'US-ASCII';
681							}
682							} elsif ($enc eq 'S') {
683	0					0	$enc = _resolve_S($encoded, 1);
684							} elsif ($enc eq 'B') {
685	0					0	$enc = 'BASE64';
686							} elsif ($enc eq 'Q') {
687	0					0	$enc = 'QUOTED-PRINTABLE';
688							} else {
689	0					0	$enc = 'BASE64';
690							}
691	4					24	return ($encoded, $cset, $enc);
692							}
693
694							=item $charset->decode(STRING [,CHECK])
695
696							Decode STRING to Unicode.
697
698							B:
699							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
700							this function will die.
701
702							=cut
703
704							sub decode($$$;) {
705	14			14	1	831	my $self = shift;
706	14					16	my $s = shift;
707	14		100			46	my $check = shift \|\| 0;
708	14					53	$self->{Decoder}->decode($s, $check);
709							}
710
711							=item detect_7bit_charset STRING
712
713							Guess 7-bit charset that may encode a string STRING.
714							If STRING contains any 8-bit bytes, C will be returned.
715							Otherwise, Default Charset will be returned for unknown charset.
716
717							=cut
718
719							sub detect_7bit_charset($) {
720	4	50		4	1	11	return $DEFAULT_CHARSET unless &USE_ENCODE;
721	4					5	my $s = shift;
722	4	50				8	return $DEFAULT_CHARSET unless $s;
723
724							# Non-7bit string
725	4	50				11	return undef if $s =~ $NON7BITRE;
726
727							# Try to detect 7-bit escape sequences.
728	4					9	foreach (@ESCAPE_SEQS) {
729	8					12	my ($seq, $cset) = @$_;
730	8	100				17	if (index($s, $seq) >= 0) {
731	4					9	my $decoder = __PACKAGE__->new($cset);
732	4	50				9	next unless $decoder->{Decoder};
733	4					6	eval {
734	4					3	my $dummy = $s;
735	4					14	$decoder->decode($dummy, FB_CROAK());
736							};
737	4	50				205	if ($@) {
738	0					0	next;
739							}
740	4					23	return $decoder->{InputCharset};
741							}
742							}
743
744							# How about HZ, VIQR, UTF-7, ...?
745
746	0					0	return $DEFAULT_CHARSET;
747							}
748
749							sub _detect_7bit_charset {
750	0			0		0	detect_7bit_charset(@_);
751							}
752
753							=item $charset->encode(STRING [, CHECK])
754
755							Encode STRING (Unicode or non-Unicode) using compatible charset recommended
756							to be used for messages on Internet (if this module knows it).
757							Note that string will be decoded to Unicode then encoded even if compatible charset
758							was equal to original charset.
759
760							B:
761							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
762							this function will die.
763
764							=cut
765
766							sub encode($$$;) {
767	10			10	1	313	my $self = shift;
768	10					19	my $s = shift;
769	10		50			30	my $check = shift \|\| 0;
770
771	10	100	66			44	unless (is_utf8($s) or $s =~ /[^\x00-\xFF]/) {
772	4	50				42	$s = $self->{Decoder}->decode($s, ($check & 0x1)? FB_CROAK(): 0);
773							}
774	10					34	my $enc = $self->{Encoder}->encode($s, $check);
775	10	50				3399	Encode::_utf8_off($enc) if is_utf8($enc); # workaround for RT #35120
776	10					28	$enc;
777							}
778
779							=item $charset->encoded_header_len(STRING [, ENCODING])
780
781							=item encoded_header_len STRING, ENCODING, CHARSET
782
783							Get length of encoded STRING for message header
784							(without folding).
785
786							ENCODING may be one of C<"B">, C<"Q"> or C<"S"> (shorter
787							one of either C<"B"> or C<"Q">).
788
789							=cut
790
791							sub encoded_header_len($$$;) {
792	14			14	1	25	my $self = shift;
793	14					13	my ($encoding, $s);
794	14	100				18	if (ref $self) {
795	8					6	$s = shift;
796	8		66			19	$encoding = uc(shift \|\| $self->{HeaderEncoding});
797							} else {
798	6					6	$s = $self;
799	6					8	$encoding = uc(shift);
800	6					24	$self = shift;
801	6	50				24	$self = __PACKAGE__->new($self) unless ref $self;
802							}
803
804							#FIXME:$encoding === undef
805
806	14					12	my $enclen;
807	14	100	66			46	if ($encoding eq 'Q') {
		50
808	4					8	$enclen = _enclen_Q($s);
809							} elsif ($encoding eq 'S' and _resolve_S($s) eq 'Q') {
810	0					0	$enclen = _enclen_Q($s);
811							} else { # "B"
812	10					14	$enclen = _enclen_B($s);
813							}
814
815	14					59	length($self->{OutputCharset})+$enclen+7;
816							}
817
818							sub _enclen_B($) {
819	10			10		23	int((length(shift) + 2) / 3) * 4;
820							}
821
822							sub _enclen_Q($;$) {
823	4			4		8	my $s = shift;
824	4					4	my $in_body = shift;
825	4					5	my @o;
826	4	50				6	if ($in_body) {
827	0					0	@o = ($s =~ m{([^-\t\r\n !*+/0-9A-Za-z])}go);
828							} else {
829	4					26	@o = ($s =~ m{([^- !*+/0-9A-Za-z])}gos);
830							}
831	4					10	length($s) + scalar(@o) * 2;
832							}
833
834							sub _resolve_S($;$) {
835	6			6		7	my $s = shift;
836	6					5	my $in_body = shift;
837	6					6	my $e;
838	6	50				11	if ($in_body) {
839	0					0	$e = scalar(() = $s =~ m{[^-\t\r\n !*+/0-9A-Za-z]}g);
840	0	0				0	return (length($s) + 8 < $e * 6) ? 'BASE64' : 'QUOTED-PRINTABLE';
841							} else {
842	6					27	$e = scalar(() = $s =~ m{[^- !*+/0-9A-Za-z]}g);
843	6	50				27	return (length($s) + 8 < $e * 6) ? 'B' : 'Q';
844							}
845							}
846
847							=item $charset->header_encode(STRING [, OPTS])
848
849							=item header_encode STRING, CHARSET [, OPTS]
850
851							Get converted (if needed) data of STRING and recommended encoding scheme of
852							that data for message headers. CHARSET is the charset by which STRING
853							is encoded.
854
855							OPTS may accept following key-value pairs.
856							B:
857							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
858							conversion will not be performed. So these options do not have any effects.
859
860							=over 4
861
862							=item Detect7bit => YESNO
863
864							Try auto-detecting 7-bit charset when CHARSET is not given.
865							Default is C<"YES">.
866
867							=item Replacement => REPLACEMENT
868
869							Specifies error handling scheme. See L<"Error Handling">.
870
871							=back
872
873							3-item list of (I, I,
874							I) will be returned. I will be
875							either C<"B">, C<"Q"> or C (might not be encoded).
876							If I could not be determined and I
877							contains non-ASCII byte(s), I will be C<"8BIT">
878							(this is I charset name but a special value to represent unencodable
879							data) and I will be C (should not be encoded).
880							I will be C<"US-ASCII"> if and only if string does not
881							contain any non-ASCII bytes.
882
883							=cut
884
885							sub header_encode {
886	8			8	1	573	my $self = shift;
887	8					6	my $text;
888	8	100				17	if (ref $self) {
889	4					5	$text = shift;
890							} else {
891	4					2	$text = $self;
892	4					7	$self = __PACKAGE__->new(shift);
893							}
894	8					38	my ($encoded, $charset) = $self->_text_encode($text, @_);
895							return ($encoded, '8BIT', undef)
896	8	50	33			27	unless $charset and $charset->{InputCharset};
897	8					10	my $cset = $charset->{OutputCharset};
898
899							# Determine encoding scheme.
900	8					9	my $enc = $charset->{HeaderEncoding};
901
902	8	100	66			54	if (!$enc and $encoded !~ $NON7BITRE) {
		50
		50
903	2	50				89	unless ($cset =~ /^($ISO2022RE\|$ASCIITRANSRE)$/) { # 7BIT.
904	2					4	$cset = 'US-ASCII';
905							}
906							} elsif ($enc eq 'S') {
907	0					0	$enc = _resolve_S($encoded);
908							} elsif ($enc !~ /^[BQ]$/) {
909	0					0	$enc = 'B';
910							}
911	8					30	return ($encoded, $cset, $enc);
912							}
913
914							sub _text_encode {
915	12			12		12	my $charset = shift;
916	12					9	my $s = shift;
917	12					15	my %params = @_;
918	12		33			40	my $replacement = uc($params{'Replacement'} \|\| $Config->{Replacement});
919	12		33			29	my $detect7bit = uc($params{'Detect7bit'} \|\| $Config->{Detect7bit});
920							my $encoding = $params{'Encoding'} \|\|
921	12		33			33	(exists $params{'Encoding'}? undef: 'A'); # undocumented
922
923	12	50	33			46	if (!$encoding or $encoding ne 'A') { # no 7-bit auto-detection
924	0					0	$detect7bit = 'NO';
925							}
926	12	100				28	unless ($charset->{InputCharset}) {
927	4	50				25	if ($s =~ $NON7BITRE) {
		50
928	0					0	return ($s, undef);
929							} elsif ($detect7bit ne "NO") {
930	4					10	$charset = __PACKAGE__->new(&detect_7bit_charset($s));
931							} else {
932	0					0	$charset = __PACKAGE__->new($DEFAULT_CHARSET,
933							Mapping => 'STANDARD');
934							}
935							}
936	12	50	33			41	if (!$encoding or $encoding ne 'A') { # no conversion
937	0					0	$charset = $charset->dup;
938	0					0	$charset->encoder($charset);
939	0					0	$charset->{HeaderEncoding} = $encoding;
940	0					0	$charset->{BodyEncoding} = $encoding;
941							}
942							my $check = ($replacement and $replacement =~ /^\d+$/)?
943							$replacement:
944							{
945							'CROAK' => FB_CROAK(),
946							'STRICT' => FB_CROAK(),
947							'FALLBACK' => FB_CROAK(), # special
948							'PERLQQ' => FB_PERLQQ(),
949							'HTMLCREF' => FB_HTMLCREF(),
950							'XMLCREF' => FB_XMLCREF(),
951	12	50	33			153	}->{$replacement \|\| ""} \|\| 0;
			50
952
953							# Encode data by output charset if required. If failed, fallback to
954							# fallback charset.
955	12					22	my $encoded;
956	12	100	33			98	if (is_utf8($s) or $s =~ /[^\x00-\xFF]/ or
			50
			50
			66
957							($charset->{InputCharset} \|\| "") ne ($charset->{OutputCharset} \|\| "")) {
958	4	50				7	if ($check & 0x1) { # CROAK or FALLBACK
959	0					0	eval {
960	0					0	$encoded = $s;
961	0					0	$encoded = $charset->encode($encoded, FB_CROAK());
962							};
963	0	0				0	if ($@) {
964	0	0	0			0	if ($replacement eq "FALLBACK" and $FALLBACK_CHARSET) {
965	0					0	my $cset = __PACKAGE__->new($FALLBACK_CHARSET,
966							Mapping => 'STANDARD');
967							# croak unknown charset
968							croak "unknown charset ``$FALLBACK_CHARSET''"
969	0	0				0	unless $cset->{Decoder};
970							# charset translation
971	0					0	$charset = $charset->dup;
972	0					0	$charset->encoder($cset);
973	0					0	$encoded = $s;
974	0					0	$encoded = $charset->encode($encoded, 0);
975							# replace input & output charsets with fallback charset
976	0					0	$cset->encoder($cset);
977	0					0	$charset = $cset;
978							} else {
979	0					0	$@ =~ s/ at .+$//;
980	0					0	croak $@;
981							}
982							}
983							} else {
984	4					4	$encoded = $s;
985	4					10	$encoded = $charset->encode($encoded, $check);
986							}
987							} else {
988	8					10	$encoded = $s;
989							}
990
991	12	100				62	if ($encoded !~ /$NONASCIIRE/) { # maybe ASCII
992							# check ``ASCII transformation'' charsets
993	4	50				81	if ($charset->{OutputCharset} =~ /^($ASCIITRANSRE)$/) {
		0
994	4					8	my $u = $encoded;
995	4					4	if (USE_ENCODE) {
996	4					11	$u = $charset->encoder->decode($encoded); # dec. by output
997							} elsif ($encoded =~ /[+~]/) { # workaround for pre-Encode env.
998							$u = "x$u";
999							}
1000	4	100				143	if ($u eq $encoded) {
1001	2					6	$charset = $charset->dup;
1002	2					4	$charset->encoder($DEFAULT_CHARSET);
1003							}
1004							} elsif ($charset->{OutputCharset} ne "US-ASCII") {
1005	0					0	$charset = $charset->dup;
1006	0					0	$charset->encoder($DEFAULT_CHARSET);
1007							}
1008							}
1009
1010	12					28	return ($encoded, $charset);
1011							}
1012
1013							=item $charset->undecode(STRING [,CHECK])
1014
1015							Encode Unicode string STRING to byte string by input charset of $charset.
1016							This is equivalent to C<$charset-Edecoder-Eencode()>.
1017
1018							B:
1019							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
1020							this function will die.
1021
1022							=cut
1023
1024							sub undecode($$$;) {
1025	0			0	1		my $self = shift;
1026	0						my $s = shift;
1027	0		0				my $check = shift \|\| 0;
1028	0						my $enc = $self->{Decoder}->encode($s, $check);
1029	0						Encode::_utf8_off($enc); # workaround for RT #35120
1030	0						$enc;
1031							}
1032
1033							=back
1034
1035							=head2 Manipulating Module Defaults
1036
1037							=over
1038
1039							=item alias ALIAS [, CHARSET]
1040
1041							Get/set charset alias for canonical names determined by
1042							L<"canonical_charset">.
1043
1044							If CHARSET is given and isn't false, ALIAS will be assigned as an alias of
1045							CHARSET. Otherwise, alias won't be changed. In both cases,
1046							current charset name that ALIAS is assigned will be returned.
1047
1048							=cut
1049
1050							sub alias ($;$) {
1051	0			0	1		my $alias = uc(shift);
1052	0						my $charset = uc(shift);
1053
1054	0	0					return $CHARSET_ALIASES{$alias} unless $charset;
1055
1056	0						$CHARSET_ALIASES{$alias} = $charset;
1057	0						return $charset;
1058							}
1059
1060							=item default [CHARSET]
1061
1062							Get/set default charset.
1063
1064							B is used by this module when charset context is
1065							unknown. Modules using this module are recommended to use this
1066							charset when charset context is unknown or implicit default is
1067							expected. By default, it is C<"US-ASCII">.
1068
1069							If CHARSET is given and isn't false, it will be set to default charset.
1070							Otherwise, default charset won't be changed. In both cases,
1071							current default charset will be returned.
1072
1073							B: Default charset I be changed.
1074
1075							=cut
1076
1077							sub default(;$) {
1078	0			0	1		my $charset = &canonical_charset(shift);
1079
1080	0	0					if ($charset) {
1081	0	0					croak "Unknown charset '$charset'"
1082							unless resolve_alias($charset);
1083	0						$DEFAULT_CHARSET = $charset;
1084							}
1085	0						return $DEFAULT_CHARSET;
1086							}
1087
1088							=item fallback [CHARSET]
1089
1090							Get/set fallback charset.
1091
1092							B is used by this module when conversion by given
1093							charset is failed and C<"FALLBACK"> error handling scheme is specified.
1094							Modules using this module may use this charset as last resort of charset
1095							for conversion. By default, it is C<"UTF-8">.
1096
1097							If CHARSET is given and isn't false, it will be set to fallback charset.
1098							If CHARSET is C<"NONE">, fallback charset will be undefined.
1099							Otherwise, fallback charset won't be changed. In any cases,
1100							current fallback charset will be returned.
1101
1102							B: It I useful that C<"US-ASCII"> is specified as fallback charset,
1103							since result of conversion will be readable without charset information.
1104
1105							=cut
1106
1107							sub fallback(;$) {
1108	0			0	1		my $charset = &canonical_charset(shift);
1109
1110	0	0					if ($charset eq "NONE") {
		0
1111	0						$FALLBACK_CHARSET = undef;
1112							} elsif ($charset) {
1113	0	0					croak "Unknown charset '$charset'"
1114							unless resolve_alias($charset);
1115	0						$FALLBACK_CHARSET = $charset;
1116							}
1117	0						return $FALLBACK_CHARSET;
1118							}
1119
1120							=item recommended CHARSET [, HEADERENC, BODYENC [, ENCCHARSET]]
1121
1122							Get/set charset profiles.
1123
1124							If optional arguments are given and any of them are not false, profiles
1125							for CHARSET will be set by those arguments. Otherwise, profiles
1126							won't be changed. In both cases, current profiles for CHARSET will be
1127							returned as 3-item list of (HEADERENC, BODYENC, ENCCHARSET).
1128
1129							HEADERENC is recommended encoding scheme for message header.
1130							It may be one of C<"B">, C<"Q">, C<"S"> (shorter one of either) or
1131							C (might not be encoded).
1132
1133							BODYENC is recommended transfer-encoding for message body. It may be
1134							one of C<"B">, C<"Q">, C<"S"> (shorter one of either) or
1135							C (might not be transfer-encoded).
1136
1137							ENCCHARSET is a charset which is compatible with given CHARSET and
1138							is recommended to be used for MIME messages on Internet.
1139							If conversion is not needed (or this module doesn't know appropriate
1140							charset), ENCCHARSET is C.
1141
1142							B: This function in the future releases can accept more optional
1143							arguments (for example, properties to handle character widths, line folding
1144							behavior, ...). So format of returned value may probably be changed.
1145							Use L<"header_encoding">, L<"body_encoding"> or L<"output_charset"> to get
1146							particular profile.
1147
1148							=cut
1149
1150							sub recommended ($;$;$;$) {
1151	0			0	1		my $charset = &canonical_charset(shift);
1152	0		0				my $henc = uc(shift) \|\| undef;
1153	0		0				my $benc = uc(shift) \|\| undef;
1154	0						my $cset = &canonical_charset(shift);
1155
1156	0	0					croak "CHARSET is not specified" unless $charset;
1157	0	0	0				croak "Unknown header encoding" unless !$henc or $henc =~ /^[BQS]$/;
1158	0	0	0				croak "Unknown body encoding" unless !$benc or $benc =~ /^[BQ]$/;
1159
1160	0	0	0				if ($henc or $benc or $cset) {
			0
1161	0	0					$cset = undef if $charset eq $cset;
1162	0						my @spec = ($henc, $benc, USE_ENCODE? $cset: undef);
1163	0						$CHARSETS{$charset} = \@spec;
1164	0						return @spec;
1165							} else {
1166	0	0					$charset = __PACKAGE__->new($charset) unless ref $charset;
1167	0						return map { $charset->{$_} } qw(HeaderEncoding BodyEncoding
	0
1168							OutputCharset);
1169							}
1170							}
1171
1172							=back
1173
1174							=head2 Constants
1175
1176							=over
1177
1178							=item USE_ENCODE
1179
1180							Unicode/multibyte support flag.
1181							Non-empty string will be set when Unicode and multibyte support is enabled.
1182							Currently, this flag will be non-empty on Perl 5.7.3 or later and
1183							empty string on earlier versions of Perl.
1184
1185							=back
1186
1187							=head2 Error Handling
1188
1189							L<"body_encode"> and L<"header_encode"> accept following C
1190							options:
1191
1192							=over
1193
1194							=item C<"DEFAULT">
1195
1196							Put a substitution character in place of a malformed character.
1197							For UCM-based encodings, will be used.
1198
1199							=item C<"FALLBACK">
1200
1201							Try C<"DEFAULT"> scheme using I (see L<"fallback">).
1202							When fallback charset is undefined and conversion causes error,
1203							code will die on error with an error message.
1204
1205							=item C<"CROAK">
1206
1207							Code will die on error immediately with an error message.
1208							Therefore, you should trap the fatal error with eval{} unless you
1209							really want to let it die on error.
1210							Synonym is C<"STRICT">.
1211
1212							=item C<"PERLQQ">
1213
1214							=item C<"HTMLCREF">
1215
1216							=item C<"XMLCREF">
1217
1218							Use C, C or C
1219							scheme defined by L module.
1220
1221							=item numeric values
1222
1223							Numeric values are also allowed.
1224							For more details see L.
1225
1226							=back
1227
1228							If error handling scheme is not specified or unknown scheme is specified,
1229							C<"DEFAULT"> will be assumed.
1230
1231							=head2 Configuration File
1232
1233							Built-in defaults for option parameters can be overridden by configuration
1234							file: F.
1235							For more details read F.
1236
1237							=head1 VERSION
1238
1239							Consult $VERSION variable.
1240
1241							Development versions of this module may be found at
1242							L.
1243
1244							=head2 Incompatible Changes
1245
1246							=over 4
1247
1248							=item Release 1.001
1249
1250							=over 4
1251
1252							=item *
1253
1254							new() method returns an object when CHARSET argument is not specified.
1255
1256							=back
1257
1258							=item Release 1.005
1259
1260							=over 4
1261
1262							=item *
1263
1264							Restrict characters in encoded-word according to RFC 2047 section 5 (3).
1265							This also affects return value of encoded_header_len() method.
1266
1267							=back
1268
1269							=item Release 1.008.2
1270
1271							=over 4
1272
1273							=item *
1274
1275							body_encoding() method may also returns C<"S">.
1276
1277							=item *
1278
1279							Return value of body_encode() method for UTF-8 may include
1280							C<"QUOTED-PRINTABLE"> encoding item that in earlier versions was fixed to
1281							C<"BASE64">.
1282
1283							=back
1284
1285							=back
1286
1287							=head1 SEE ALSO
1288
1289							Multipurpose Internet Mail Extensions (MIME).
1290
1291							=head1 AUTHOR
1292
1293							Hatuka*nezumi - IKEDA Soji
1294
1295							=head1 COPYRIGHT
1296
1297							Copyright (C) 2006-2017 Hatuka*nezumi - IKEDA Soji.
1298							This program is free software; you can redistribute it and/or modify it
1299							under the same terms as Perl itself.
1300
1301							=cut
1302
1303							1;