File Coverage

blib/lib/MIME/Charset.pm

Criterion	Covered	Total	%
statement	214	291	73.5
branch	92	168	54.7
condition	43	99	43.4
subroutine	29	35	82.8
pod	20	20	100.0
total	398	613	64.9

line	stmt	bran	cond	sub	pod	time	code
1							#-- perl --
2
3							package MIME::Charset;
4	9			9		200987	use 5.005;
	9					81
5
6							=head1 NAME
7
8							MIME::Charset - Charset Information for MIME
9
10							=head1 SYNOPSIS
11
12							use MIME::Charset:
13
14							$charset = MIME::Charset->new("euc-jp");
15
16							Getting charset information:
17
18							$benc = $charset->body_encoding; # e.g. "Q"
19							$cset = $charset->as_string; # e.g. "US-ASCII"
20							$henc = $charset->header_encoding; # e.g. "S"
21							$cset = $charset->output_charset; # e.g. "ISO-2022-JP"
22
23							Translating text data:
24
25							($text, $charset, $encoding) =
26							$charset->header_encode(
27							"\xc9\xc2\xc5\xaa\xc0\xde\xc3\xef\xc5\xaa".
28							"\xc7\xd1\xca\xaa\xbd\xd0\xce\xcf\xb4\xef",
29							Charset => 'euc-jp');
30							# ...returns e.g. (, "ISO-2022-JP", "B").
31
32							($text, $charset, $encoding) =
33							$charset->body_encode(
34							"Collectioneur path\xe9tiquement ".
35							"\xe9clectique de d\xe9chets",
36							Charset => 'latin1');
37							# ...returns e.g. (, "ISO-8859-1", "QUOTED-PRINTABLE").
38
39							$len = $charset->encoded_header_len(
40							"Perl\xe8\xa8\x80\xe8\xaa\x9e",
41							Charset => 'utf-8',
42							Encoding => "b");
43							# ...returns e.g. 28.
44
45							Manipulating module defaults:
46
47							MIME::Charset::alias("csEUCKR", "euc-kr");
48							MIME::Charset::default("iso-8859-1");
49							MIME::Charset::fallback("us-ascii");
50
51							Non-OO functions (may be deprecated in near future):
52
53							use MIME::Charset qw(:info);
54
55							$benc = body_encoding("iso-8859-2"); # "Q"
56							$cset = canonical_charset("ANSI X3.4-1968"); # "US-ASCII"
57							$henc = header_encoding("utf-8"); # "S"
58							$cset = output_charset("shift_jis"); # "ISO-2022-JP"
59
60							use MIME::Charset qw(:trans);
61
62							($text, $charset, $encoding) =
63							header_encode(
64							"\xc9\xc2\xc5\xaa\xc0\xde\xc3\xef\xc5\xaa".
65							"\xc7\xd1\xca\xaa\xbd\xd0\xce\xcf\xb4\xef",
66							"euc-jp");
67							# ...returns (, "ISO-2022-JP", "B");
68
69							($text, $charset, $encoding) =
70							body_encode(
71							"Collectioneur path\xe9tiquement ".
72							"\xe9clectique de d\xe9chets",
73							"latin1");
74							# ...returns (, "ISO-8859-1", "QUOTED-PRINTABLE");
75
76							$len = encoded_header_len(
77							"Perl\xe8\xa8\x80\xe8\xaa\x9e", "b", "utf-8"); # 28
78
79							=head1 DESCRIPTION
80
81							MIME::Charset provides information about character sets used for
82							MIME messages on Internet.
83
84							=head2 Definitions
85
86							The B is ``character set'' used in MIME to refer to a
87							method of converting a sequence of octets into a sequence of characters.
88							It includes both concepts of ``coded character set'' (CCS) and
89							``character encoding scheme'' (CES) of ISO/IEC.
90
91							The B is that used in MIME to refer to a method of representing
92							a body part or a header body as sequence(s) of printable US-ASCII
93							characters.
94
95							=cut
96
97	9			9		42	use strict;
	9					12
	9					243
98	9			9		33	use vars qw(@ISA $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS $Config);
	9					13
	9					656
99	9			9		53	use Exporter;
	9					17
	9					819
100							@ISA = qw(Exporter);
101							@EXPORT = qw(body_encoding canonical_charset header_encoding output_charset
102							body_encode encoded_header_len header_encode);
103							@EXPORT_OK = qw(alias default fallback recommended);
104							%EXPORT_TAGS = (
105							"info" => [qw(body_encoding header_encoding
106							canonical_charset output_charset)],
107							"trans" =>[ qw(body_encode encoded_header_len
108							header_encode)],
109							);
110	9			9		55	use Carp qw(croak);
	9					21
	9					667
111
112	9	50		9		52	use constant USE_ENCODE => ($] >= 5.007003)? 'Encode': '';
	9					22
	9					1375
113
114							my @ENCODE_SUBS = qw(FB_CROAK FB_PERLQQ FB_HTMLCREF FB_XMLCREF
115							is_utf8 resolve_alias);
116							if (USE_ENCODE) {
117	9			9		4662	eval "use ".USE_ENCODE." \@ENCODE_SUBS;";
	9					80726
	9					3718
118							if ($@) { # Perl 5.7.3 + Encode 0.40
119							eval "use ".USE_ENCODE." qw(is_utf8);";
120							require MIME::Charset::_Compat;
121							for my $sub (@ENCODE_SUBS) {
122	9			9		53	no strict "refs";
	9					13
	9					742
123							*{$sub} = \&{"MIME::Charset::_Compat::$sub"}
124							unless $sub eq 'is_utf8';
125							}
126							}
127							} else {
128							require MIME::Charset::_Compat;
129							for my $sub (@ENCODE_SUBS) {
130	9			9		61	no strict "refs";
	9					14
	9					33756
131							*{$sub} = \&{"MIME::Charset::_Compat::$sub"};
132							}
133							}
134
135							$VERSION = '1.013.1';
136
137							######## Private Attributes ########
138
139							my $DEFAULT_CHARSET = 'US-ASCII';
140							my $FALLBACK_CHARSET = 'UTF-8';
141
142							# This table was initially borrowed from Python email package.
143
144							my %CHARSETS = (# input header enc body enc output conv
145							'DIN_66003' => ['Q', undef, undef],
146							'ISO-8859-1' => ['Q', 'Q', undef],
147							'ISO-8859-2' => ['Q', 'Q', undef],
148							'ISO-8859-3' => ['Q', 'Q', undef],
149							'ISO-8859-4' => ['Q', 'Q', undef],
150							# ISO-8859-5 is Cyrillic, and not especially used
151							# ISO-8859-6 is Arabic, also not particularly used
152							# ISO-8859-7 is Greek, 'Q' will not make it readable
153							# ISO-8859-8 is Hebrew, 'Q' will not make it readable
154							'ISO-8859-9' => ['Q', 'Q', undef],
155							'ISO-8859-10' => ['Q', 'Q', undef],
156							# ISO-8859-11 is Thai, 'Q' will not make it readable
157							'ISO-8859-13' => ['Q', 'Q', undef],
158							'ISO-8859-14' => ['Q', 'Q', undef],
159							'ISO-8859-15' => ['Q', 'Q', undef],
160							'ISO-8859-16' => ['Q', 'Q', undef],
161							'WINDOWS-1252' => ['Q', 'Q', undef],
162							'VISCII' => ['Q', 'Q', undef],
163							'US-ASCII' => [undef, undef, undef],
164							'BIG5' => ['B', 'B', undef],
165							'GB2312' => ['B', 'B', undef],
166							'HZ-GB-2312' => ['B', undef, undef],
167							'EUC-JP' => ['B', undef, 'ISO-2022-JP'],
168							'SHIFT_JIS' => ['B', undef, 'ISO-2022-JP'],
169							'ISO-2022-JP' => ['B', undef, undef],
170							'ISO-2022-JP-1' => ['B', undef, undef],
171							'ISO-2022-JP-2' => ['B', undef, undef],
172							'EUC-JISX0213' => ['B', undef, 'ISO-2022-JP-3'],
173							'SHIFT_JISX0213' => ['B', undef, 'ISO-2022-JP-3'],
174							'ISO-2022-JP-3' => ['B', undef, undef],
175							'EUC-JIS-2004' => ['B', undef, 'ISO-2022-JP-2004'],
176							'SHIFT_JIS-2004' => ['B', undef, 'ISO-2022-JP-2004'],
177							'ISO-2022-JP-2004' => ['B', undef, undef],
178							'KOI8-R' => ['B', 'B', undef],
179							'TIS-620' => ['B', 'B', undef], # cf. Mew
180							'UTF-16' => ['B', 'B', undef],
181							'UTF-16BE' => ['B', 'B', undef],
182							'UTF-16LE' => ['B', 'B', undef],
183							'UTF-32' => ['B', 'B', undef],
184							'UTF-32BE' => ['B', 'B', undef],
185							'UTF-32LE' => ['B', 'B', undef],
186							'UTF-7' => ['Q', undef, undef],
187							'UTF-8' => ['S', 'S', undef],
188							'GSM03.38' => [undef, undef, undef], # not for MIME
189							# We're making this one up to represent raw unencoded 8bit
190							'8BIT' => [undef, 'B', 'ISO-8859-1'],
191							);
192
193							# Fix some unexpected or unpreferred names returned by
194							# Encode::resolve_alias() or used by somebodies else.
195							my %CHARSET_ALIASES = (# unpreferred preferred
196							"ASCII" => "US-ASCII",
197							"BIG5-ETEN" => "BIG5",
198							"CP1250" => "WINDOWS-1250",
199							"CP1251" => "WINDOWS-1251",
200							"CP1252" => "WINDOWS-1252",
201							"CP1253" => "WINDOWS-1253",
202							"CP1254" => "WINDOWS-1254",
203							"CP1255" => "WINDOWS-1255",
204							"CP1256" => "WINDOWS-1256",
205							"CP1257" => "WINDOWS-1257",
206							"CP1258" => "WINDOWS-1258",
207							"CP874" => "WINDOWS-874",
208							"CP936" => "GBK",
209							"CP949" => "KS_C_5601-1987",
210							"DIN66003" => "DIN_66003",
211							"EUC-CN" => "GB2312",
212							"HZ" => "HZ-GB-2312", # RFC 1842
213							"KS_C_5601" => "KS_C_5601-1987",
214							"SHIFTJIS" => "SHIFT_JIS",
215							"SHIFTJISX0213" => "SHIFT_JISX0213",
216							"TIS620" => "TIS-620", # IANA MIBenum 2259
217							"UNICODE-1-1-UTF-7" => "UTF-7", # RFC 1642 (obs.)
218							"UTF8" => "UTF-8",
219							"UTF-8-STRICT" => "UTF-8", # Perl internal use
220							"GSM0338" => "GSM03.38", # not for MIME
221							);
222
223							# Some vendors encode characters beyond standardized mappings using extended
224							# encoders. Some other standard encoders need additional encode modules.
225							my %ENCODERS = (
226							'EXTENDED' => {
227							'ISO-8859-1' => [['cp1252'], ], # Encode::Byte
228							'ISO-8859-2' => [['cp1250'], ], # Encode::Byte
229							'ISO-8859-5' => [['cp1251'], ], # Encode::Byte
230							'ISO-8859-6' => [
231							['cp1256'], # Encode::Byte
232							# ['cp1006'], # ditto, for Farsi
233							],
234							'ISO-8859-6-I'=>[['cp1256'], ], # ditto
235							'ISO-8859-7' => [['cp1253'], ], # Encode::Byte
236							'ISO-8859-8' => [['cp1255'], ], # Encode::Byte
237							'ISO-8859-8-I'=>[['cp1255'], ], # ditto
238							'ISO-8859-9' => [['cp1254'], ], # Encode::Byte
239							'ISO-8859-13'=> [['cp1257'], ], # Encode::Byte
240							'GB2312' => [
241							['gb18030', 'Encode::HanExtra'],
242							['cp936'], # Encode::CN
243							],
244							'EUC-JP' => [
245							['eucJP-ascii', 'Encode::EUCJPASCII'],
246							# ['cp51932', 'Encode::EUCJPMS'],
247							],
248							'ISO-2022-JP'=> [
249							['x-iso2022jp-ascii',
250							'Encode::EUCJPASCII'],
251							# ['iso-2022-jp-ms','Encode::ISO2022JPMS'],
252							# ['cp50220', 'Encode::EUCJPMS'],
253							# ['cp50221', 'Encode::EUCJPMS'],
254							['iso-2022-jp-1'], # Encode::JP (note*)
255							],
256							'SHIFT_JIS' => [
257							['cp932'], # Encode::JP
258							],
259							'EUC-JISX0213' => [['euc-jis-2004', 'Encode::JISX0213'], ],
260							'ISO-2022-JP-3' => [['iso-2022-jp-2004', 'Encode::JISX0213'], ],
261							'SHIFT_JISX0213'=> [['shift_jis-2004', 'Encode::ShiftJIS2004'], ],
262							'EUC-KR' => [['cp949'], ], # Encode::KR
263							'BIG5' => [
264							# ['big5plus', 'Encode::HanExtra'],
265							# ['big5-2003', 'Encode::HanExtra'],
266							['cp950'], # Encode::TW
267							# ['big5-1984', 'Encode::HanExtra'],
268							],
269							'TIS-620' => [['cp874'], ], # Encode::Byte
270							'UTF-8' => [['utf8'], ], # Special name on Perl
271							},
272							'STANDARD' => {
273							'DIN_66003' => [['din66003', 'Endode::DIN66003'], ],
274							'ISO-8859-6-E' => [['iso-8859-6'],],# Encode::Byte
275							'ISO-8859-6-I' => [['iso-8859-6'],],# ditto
276							'ISO-8859-8-E' => [['iso-8859-8'],],# Encode::Byte
277							'ISO-8859-8-I' => [['iso-8859-8'],],# ditto
278							'GB18030' => [['gb18030', 'Encode::HanExtra'], ],
279							'ISO-2022-JP-2' => [['iso-2022-jp-2','Encode::ISO2022JP2'], ],
280							'EUC-JISX0213' => [['euc-jisx0213', 'Encode::JISX0213'], ],
281							'ISO-2022-JP-3' => [['iso-2022-jp-3', 'Encode::JISX0213'], ],
282							'EUC-JIS-2004' => [['euc-jis-2004', 'Encode::JISX0213'], ],
283							'ISO-2022-JP-2004' => [['iso-2022-jp-2004', 'Encode::JISX0213'], ],
284							'SHIFT_JIS-2004'=> [['shift_jis-2004', 'Encode::ShiftJIS2004'], ],
285							'EUC-TW' => [['euc-tw', 'Encode::HanExtra'], ],
286							'HZ-GB-2312' => [['hz'], ], # Encode::CN
287							'TIS-620' => [['tis620'], ], # (note*)
288							'UTF-16' => [['x-utf16auto', 'MIME::Charset::UTF'],],
289							'UTF-32' => [['x-utf32auto', 'MIME::Charset::UTF'],],
290							'GSM03.38' => [['gsm0338'], ], # Encode::GSM0338
291
292							# (note*) ISO-8859-11 was not registered by IANA.
293							# L treats it as canonical name of ``tis-?620''.
294							},
295							);
296
297							# ISO-2022-* escape sequences etc. to detect charset from unencoded data.
298							my @ESCAPE_SEQS = (
299							# ISO-2022-* sequences
300							# escape seq, possible charset
301							# Following sequences are commonly used.
302							["\033\$\@", "ISO-2022-JP"], # RFC 1468
303							["\033\$B", "ISO-2022-JP"], # ditto
304							["\033(J", "ISO-2022-JP"], # ditto
305							["\033(I", "ISO-2022-JP"], # ditto (nonstandard)
306							["\033\$(D", "ISO-2022-JP"], # RFC 2237 (note*)
307							# Following sequences are less commonly used.
308							["\033.A", "ISO-2022-JP-2"], # RFC 1554
309							["\033.F", "ISO-2022-JP-2"], # ditto
310							["\033\$(C", "ISO-2022-JP-2"], # ditto
311							["\033\$(O", "ISO-2022-JP-3"], # JIS X 0213:2000
312							["\033\$(P", "ISO-2022-JP-2004"], # JIS X 0213:2000/2004
313							["\033\$(Q", "ISO-2022-JP-2004"], # JIS X 0213:2004
314							["\033\$)C", "ISO-2022-KR"], # RFC 1557
315							["\033\$)A", "ISO-2022-CN"], # RFC 1922
316							["\033\$A", "ISO-2022-CN"], # ditto (nonstandard)
317							["\033\$)G", "ISO-2022-CN"], # ditto
318							["\033\$*H", "ISO-2022-CN"], # ditto
319							# Other sequences will be used with appropriate charset
320							# parameters, or hardly used.
321
322							# note*: This RFC defines ISO-2022-JP-1, superset of
323							# ISO-2022-JP. But that charset name is rarely used.
324							# OTOH many of encoders for ISO-2022-JP recognize this
325							# sequence so that comatibility with EUC-JP will be
326							# guaranteed.
327
328							# Singlebyte 7-bit sequences
329							# escape seq, possible charset
330							["\033e", "GSM03.38"], # ESTI GSM 03.38 (note*)
331							["\033\012", "GSM03.38"], # ditto
332							["\033<", "GSM03.38"], # ditto
333							["\033/", "GSM03.38"], # ditto
334							["\033>", "GSM03.38"], # ditto
335							["\033\024", "GSM03.38"], # ditto
336							["\033(", "GSM03.38"], # ditto
337							["\033\@", "GSM03.38"], # ditto
338							["\033)", "GSM03.38"], # ditto
339							["\033=", "GSM03.38"], # ditto
340
341							# note*: This is not used for MIME message.
342							);
343
344							######## Public Configuration Attributes ########
345
346							$Config = {
347							Detect7bit => 'YES',
348							Mapping => 'EXTENDED',
349							Replacement => 'DEFAULT',
350							};
351							local @INC = @INC;
352							pop @INC if $INC[-1] eq '.';
353							eval { require MIME::Charset::Defaults; };
354
355							######## Private Constants ########
356
357							my $NON7BITRE = qr{
358							[^\x01-\x7e]
359							}x;
360
361							my $NONASCIIRE = qr{
362							[^\x09\x0a\x0d\x20\x21-\x7e]
363							}x;
364
365							my $ISO2022RE = qr{
366							ISO-2022-.+
367							}ix;
368
369							my $ASCIITRANSRE = qr{
370							HZ-GB-2312 \| UTF-7
371							}ix;
372
373
374							######## Public Functions ########
375
376							=head2 Constructor
377
378							=over
379
380							=item $charset = MIME::Charset->new([CHARSET [, OPTS]])
381
382							Create charset object.
383
384							OPTS may accept following key-value pair.
385							B:
386							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
387							conversion will not be performed. So this option do not have any effects.
388
389							=over 4
390
391							=item Mapping => MAPTYPE
392
393							Whether to extend mappings actually used for charset names or not.
394							C<"EXTENDED"> uses extended mappings.
395							C<"STANDARD"> uses standardized strict mappings.
396							Default is C<"EXTENDED">.
397
398							=back
399
400							=cut
401
402							sub new {
403	75			75	1	1531	my $class = shift;
404	75					99	my $charset = shift;
405	75	100				141	return bless {}, $class unless $charset;
406	70	50				136	return bless {}, $class if 75 < length $charset; # w/a for CPAN RT #65796.
407	70					110	my %params = @_;
408	70		33			258	my $mapping = uc($params{'Mapping'} \|\| $Config->{Mapping});
409
410	70	100				299	if ($charset =~ /\bhz.?gb.?2312$/i) {
		100
		100
411							# workaround: "HZ-GB-2312" mistakenly treated as "EUC-CN" by Encode
412							# (2.12).
413	4					7	$charset = "HZ-GB-2312";
414							} elsif ($charset =~ /\btis-?620$/i) {
415							# workaround: "TIS620" treated as ISO-8859-11 by Encode.
416							# And "TIS-620" not known by some versions of Encode (cf.
417							# CPAN RT #20781).
418	1					2	$charset = "TIS-620";
419							} elsif ($charset =~ /\biso[-_]8859[-_]8[-_]i$/i) {
420							# workaround: "ISO-8859-8-I" is treated as an alias of "ISO-8859-8"
421							# by Encode (3.19): See the note in
422							# https://encoding.spec.whatwg.org/#legacy-single-byte-encodings
423							# However we'll treat these as separate names for compatibility.
424	1					2	$charset = "ISO-8859-8-I";
425							} else {
426	64		66			159	$charset = resolve_alias($charset) \|\| $charset
427							}
428	70		66			114022	$charset = $CHARSET_ALIASES{uc($charset)} \|\| uc($charset);
429	70					115	my ($henc, $benc, $outcset);
430	70					145	my $spec = $CHARSETS{$charset};
431	70	100				118	if ($spec) {
432	60					149	($henc, $benc, $outcset) =
433							($$spec[0], $$spec[1], USE_ENCODE? $$spec[2]: undef);
434							} else {
435	10					19	($henc, $benc, $outcset) = ('S', 'B', undef);
436							}
437	70					93	my ($decoder, $encoder);
438	70					79	if (USE_ENCODE) {
439	70					128	$decoder = _find_encoder($charset, $mapping);
440	70					118	$encoder = _find_encoder($outcset, $mapping);
441							} else {
442							$decoder = $encoder = undef;
443							}
444
445	70		66			604	bless {
			100
446							InputCharset => $charset,
447							Decoder => $decoder,
448							HeaderEncoding => $henc,
449							BodyEncoding => $benc,
450							OutputCharset => ($outcset \|\| $charset),
451							Encoder => ($encoder \|\| $decoder),
452							}, $class;
453							}
454
455							my %encoder_cache = ();
456
457							sub _find_encoder($$) {
458	140		100	140		345	my $charset = uc(shift \|\| "");
459	140	100				243	return undef unless $charset;
460	78					116	my $mapping = uc(shift);
461	78					93	my ($spec, $name, $module, $encoder);
462
463	78					97	local($@);
464	78					180	$encoder = $encoder_cache{$charset, $mapping};
465	78	100				198	return $encoder if ref $encoder;
466
467	55					88	foreach my $m (('EXTENDED', 'STANDARD')) {
468	82	50	66			233	next if $m eq 'EXTENDED' and $mapping ne 'EXTENDED';
469	82					155	$spec = $ENCODERS{$m}->{$charset};
470	82	100				146	next unless $spec;
471	41					53	foreach my $s (@{$spec}) {
	41					68
472	47					67	($name, $module) = @{$s};
	47					96
473	47	100				89	if ($module) {
474	14	100				792	next unless eval "require $module;";
475							}
476	37					136	$encoder = Encode::find_encoding($name);
477	37	50				21314	last if ref $encoder;
478							}
479	41	100				90	last if ref $encoder;
480							}
481	55		100			186	$encoder \|\|= Encode::find_encoding($charset);
482	55	100				1405	$encoder_cache{$charset, $mapping} = $encoder if $encoder;
483	55					123	return $encoder;
484							}
485
486							=back
487
488							=head2 Getting Information of Charsets
489
490							=over
491
492							=item $charset->body_encoding
493
494							=item body_encoding CHARSET
495
496							Get recommended transfer-encoding of CHARSET for message body.
497
498							Returned value will be one of C<"B"> (BASE64), C<"Q"> (QUOTED-PRINTABLE),
499							C<"S"> (shorter one of either) or
500							C (might not be transfer-encoded; either 7BIT or 8BIT). This may
501							not be same as encoding for message header.
502
503							=cut
504
505							sub body_encoding($) {
506	4			4	1	15	my $self = shift;
507	4	50				22	return undef unless $self;
508	4	100				16	$self = __PACKAGE__->new($self) unless ref $self;
509	4					31	$self->{BodyEncoding};
510							}
511
512							=item $charset->as_string
513
514							=item canonical_charset CHARSET
515
516							Get canonical name for charset.
517
518							=cut
519
520							sub canonical_charset($) {
521	4			4	1	12	my $self = shift;
522	4	50				14	return undef unless $self;
523	4	100				11	$self = __PACKAGE__->new($self) unless ref $self;
524	4					25	$self->{InputCharset};
525							}
526
527							sub as_string($) {
528	29			29	1	91	my $self = shift;
529	29					78	$self->{InputCharset};
530							}
531
532							=item $charset->decoder
533
534							Get L<"Encode::Encoding"> object to decode strings to Unicode by charset.
535							If charset is not specified or not known by this module,
536							undef will be returned.
537
538							=cut
539
540							sub decoder($) {
541	15			15	1	34	my $self = shift;
542	15					46	$self->{Decoder};
543							}
544
545							=item $charset->dup
546
547							Get a copy of charset object.
548
549							=cut
550
551							sub dup($) {
552	2			2	1	4	my $self = shift;
553	2					6	my $obj = __PACKAGE__->new(undef);
554	2					4	%{$obj} = %{$self};
	2					7
	2					8
555	2					6	$obj;
556							}
557
558							=item $charset->encoder([CHARSET])
559
560							Get L<"Encode::Encoding"> object to encode Unicode string using compatible
561							charset recommended to be used for messages on Internet.
562
563							If optional CHARSET is specified, replace encoder (and output charset
564							name) of $charset object with those of CHARSET, therefore,
565							$charset object will be a converter between original charset and
566							new CHARSET.
567
568							=cut
569
570							sub encoder($$;) {
571	6			6	1	8	my $self = shift;
572	6					9	my $charset = shift;
573	6	100				13	if ($charset) {
574	2	50				10	$charset = __PACKAGE__->new($charset) unless ref $charset;
575	2					6	$self->{OutputCharset} = $charset->{InputCharset};
576	2					6	$self->{Encoder} = $charset->{Decoder};
577	2					3	$self->{BodyEncoding} = $charset->{BodyEncoding};
578	2					4	$self->{HeaderEncoding} = $charset->{HeaderEncoding};
579							}
580	6					45	$self->{Encoder};
581							}
582
583							=item $charset->header_encoding
584
585							=item header_encoding CHARSET
586
587							Get recommended encoding scheme of CHARSET for message header.
588
589							Returned value will be one of C<"B">, C<"Q">, C<"S"> (shorter one of either)
590							or C (might not be encoded). This may not be same as encoding
591							for message body.
592
593							=cut
594
595							sub header_encoding($) {
596	4			4	1	12	my $self = shift;
597	4	50				10	return undef unless $self;
598	4	100				12	$self = __PACKAGE__->new($self) unless ref $self;
599	4					27	$self->{HeaderEncoding};
600							}
601
602							=item $charset->output_charset
603
604							=item output_charset CHARSET
605
606							Get a charset which is compatible with given CHARSET and is recommended
607							to be used for MIME messages on Internet (if it is known by this module).
608
609							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
610							this function will simply
611							return the result of L<"canonical_charset">.
612
613							=cut
614
615							sub output_charset($) {
616	4			4	1	13	my $self = shift;
617	4	50				9	return undef unless $self;
618	4	100				13	$self = __PACKAGE__->new($self) unless ref $self;
619	4					17	$self->{OutputCharset};
620							}
621
622							=back
623
624							=head2 Translating Text Data
625
626							=over
627
628							=item $charset->body_encode(STRING [, OPTS])
629
630							=item body_encode STRING, CHARSET [, OPTS]
631
632							Get converted (if needed) data of STRING and recommended transfer-encoding
633							of that data for message body. CHARSET is the charset by which STRING
634							is encoded.
635
636							OPTS may accept following key-value pairs.
637							B:
638							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
639							conversion will not be performed. So these options do not have any effects.
640
641							=over 4
642
643							=item Detect7bit => YESNO
644
645							Try auto-detecting 7-bit charset when CHARSET is not given.
646							Default is C<"YES">.
647
648							=item Replacement => REPLACEMENT
649
650							Specifies error handling scheme. See L<"Error Handling">.
651
652							=back
653
654							3-item list of (I, I,
655							I) will be returned.
656							I will be either C<"BASE64">, C<"QUOTED-PRINTABLE">,
657							C<"7BIT"> or C<"8BIT">. If I could not be determined
658							and I contains non-ASCII byte(s), I will
659							be C and I will be C<"BASE64">.
660							I will be C<"US-ASCII"> if and only if string does not
661							contain any non-ASCII bytes.
662
663							=cut
664
665							sub body_encode {
666	4			4	1	394	my $self = shift;
667	4					7	my $text;
668	4	100				10	if (ref $self) {
669	2					3	$text = shift;
670							} else {
671	2					3	$text = $self;
672	2					15	$self = __PACKAGE__->new(shift);
673							}
674	4					16	my ($encoded, $charset) = $self->_text_encode($text, @_);
675							return ($encoded, undef, 'BASE64')
676	4	50	33			24	unless $charset and $charset->{InputCharset};
677	4					8	my $cset = $charset->{OutputCharset};
678
679							# Determine transfer-encoding.
680	4					14	my $enc = $charset->{BodyEncoding};
681
682	4	50	33			20	if (!$enc and $encoded !~ /\x00/) { # Eliminate hostile NUL character.
		0
		0
		0
683	4	50				111	if ($encoded =~ $NON7BITRE) { # String contains 8bit char(s).
		50
684	0					0	$enc = '8BIT';
685							} elsif ($cset =~ /^($ISO2022RE\|$ASCIITRANSRE)$/) { # 7BIT.
686	4					14	$enc = '7BIT';
687							} else { # Pure ASCII.
688	0					0	$enc = '7BIT';
689	0					0	$cset = 'US-ASCII';
690							}
691							} elsif ($enc eq 'S') {
692	0					0	$enc = _resolve_S($encoded, 1);
693							} elsif ($enc eq 'B') {
694	0					0	$enc = 'BASE64';
695							} elsif ($enc eq 'Q') {
696	0					0	$enc = 'QUOTED-PRINTABLE';
697							} else {
698	0					0	$enc = 'BASE64';
699							}
700	4					38	return ($encoded, $cset, $enc);
701							}
702
703							=item $charset->decode(STRING [,CHECK])
704
705							Decode STRING to Unicode.
706
707							B:
708							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
709							this function will die.
710
711							=cut
712
713							sub decode($$$;) {
714	14			14	1	1658	my $self = shift;
715	14					19	my $s = shift;
716	14		100			38	my $check = shift \|\| 0;
717	14					59	$self->{Decoder}->decode($s, $check);
718							}
719
720							=item detect_7bit_charset STRING
721
722							Guess 7-bit charset that may encode a string STRING.
723							If STRING contains any 8-bit bytes, C will be returned.
724							Otherwise, Default Charset will be returned for unknown charset.
725
726							=cut
727
728							sub detect_7bit_charset($) {
729	4	50		4	1	12	return $DEFAULT_CHARSET unless &USE_ENCODE;
730	4					8	my $s = shift;
731	4	50				8	return $DEFAULT_CHARSET unless $s;
732
733							# Non-7bit string
734	4	50				14	return undef if $s =~ $NON7BITRE;
735
736							# Try to detect 7-bit escape sequences.
737	4					10	foreach (@ESCAPE_SEQS) {
738	8					18	my ($seq, $cset) = @$_;
739	8	100				20	if (index($s, $seq) >= 0) {
740	4					12	my $decoder = __PACKAGE__->new($cset);
741	4	50				11	next unless $decoder->{Decoder};
742	4					7	eval {
743	4					13	my $dummy = $s;
744	4					25	$decoder->decode($dummy, FB_CROAK());
745							};
746	4	50				299	if ($@) {
747	0					0	next;
748							}
749	4					24	return $decoder->{InputCharset};
750							}
751							}
752
753							# How about HZ, VIQR, UTF-7, ...?
754
755	0					0	return $DEFAULT_CHARSET;
756							}
757
758							sub _detect_7bit_charset {
759	0			0		0	detect_7bit_charset(@_);
760							}
761
762							=item $charset->encode(STRING [, CHECK])
763
764							Encode STRING (Unicode or non-Unicode) using compatible charset recommended
765							to be used for messages on Internet (if this module knows it).
766							Note that string will be decoded to Unicode then encoded even if compatible charset
767							was equal to original charset.
768
769							B:
770							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
771							this function will die.
772
773							=cut
774
775							sub encode($$$;) {
776	10			10	1	666	my $self = shift;
777	10					13	my $s = shift;
778	10		50			32	my $check = shift \|\| 0;
779
780	10	100	66			50	unless (is_utf8($s) or $s =~ /[^\x00-\xFF]/) {
781	4	50				34	$s = $self->{Decoder}->decode($s, ($check & 0x1)? FB_CROAK(): 0);
782							}
783	10					37	my $enc = $self->{Encoder}->encode($s, $check);
784	10	50				4464	Encode::_utf8_off($enc) if is_utf8($enc); # workaround for RT #35120
785	10					34	$enc;
786							}
787
788							=item $charset->encoded_header_len(STRING [, ENCODING])
789
790							=item encoded_header_len STRING, ENCODING, CHARSET
791
792							Get length of encoded STRING for message header
793							(without folding).
794
795							ENCODING may be one of C<"B">, C<"Q"> or C<"S"> (shorter
796							one of either C<"B"> or C<"Q">).
797
798							=cut
799
800							sub encoded_header_len($$$;) {
801	14			14	1	33	my $self = shift;
802	14					16	my ($encoding, $s);
803	14	100				27	if (ref $self) {
804	8					11	$s = shift;
805	8		66			22	$encoding = uc(shift \|\| $self->{HeaderEncoding});
806							} else {
807	6					8	$s = $self;
808	6					9	$encoding = uc(shift);
809	6					7	$self = shift;
810	6	50				22	$self = __PACKAGE__->new($self) unless ref $self;
811							}
812
813							#FIXME:$encoding === undef
814
815	14					17	my $enclen;
816	14	100	66			39	if ($encoding eq 'Q') {
		50
817	4					10	$enclen = _enclen_Q($s);
818							} elsif ($encoding eq 'S' and _resolve_S($s) eq 'Q') {
819	0					0	$enclen = _enclen_Q($s);
820							} else { # "B"
821	10					19	$enclen = _enclen_B($s);
822							}
823
824	14					75	length($self->{OutputCharset})+$enclen+7;
825							}
826
827							sub _enclen_B($) {
828	10			10		27	int((length(shift) + 2) / 3) * 4;
829							}
830
831							sub _enclen_Q($;$) {
832	4			4		7	my $s = shift;
833	4					6	my $in_body = shift;
834	4					5	my @o;
835	4	50				7	if ($in_body) {
836	0					0	@o = ($s =~ m{([^-\t\r\n !*+/0-9A-Za-z])}go);
837							} else {
838	4					45	@o = ($s =~ m{([^- !*+/0-9A-Za-z])}gos);
839							}
840	4					14	length($s) + scalar(@o) * 2;
841							}
842
843							sub _resolve_S($;$) {
844	6			6		9	my $s = shift;
845	6					8	my $in_body = shift;
846	6					7	my $e;
847	6	50				10	if ($in_body) {
848	0					0	$e = scalar(() = $s =~ m{[^-\t\r\n !*+/0-9A-Za-z]}g);
849	0	0				0	return (length($s) + 8 < $e * 6) ? 'BASE64' : 'QUOTED-PRINTABLE';
850							} else {
851	6					48	$e = scalar(() = $s =~ m{[^- !*+/0-9A-Za-z]}g);
852	6	50				31	return (length($s) + 8 < $e * 6) ? 'B' : 'Q';
853							}
854							}
855
856							=item $charset->header_encode(STRING [, OPTS])
857
858							=item header_encode STRING, CHARSET [, OPTS]
859
860							Get converted (if needed) data of STRING and recommended encoding scheme of
861							that data for message headers. CHARSET is the charset by which STRING
862							is encoded.
863
864							OPTS may accept following key-value pairs.
865							B:
866							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
867							conversion will not be performed. So these options do not have any effects.
868
869							=over 4
870
871							=item Detect7bit => YESNO
872
873							Try auto-detecting 7-bit charset when CHARSET is not given.
874							Default is C<"YES">.
875
876							=item Replacement => REPLACEMENT
877
878							Specifies error handling scheme. See L<"Error Handling">.
879
880							=back
881
882							3-item list of (I, I,
883							I) will be returned. I will be
884							either C<"B">, C<"Q"> or C (might not be encoded).
885							If I could not be determined and I
886							contains non-ASCII byte(s), I will be C<"8BIT">
887							(this is I charset name but a special value to represent unencodable
888							data) and I will be C (should not be encoded).
889							I will be C<"US-ASCII"> if and only if string does not
890							contain any non-ASCII bytes.
891
892							=cut
893
894							sub header_encode {
895	8			8	1	1103	my $self = shift;
896	8					9	my $text;
897	8	100				20	if (ref $self) {
898	4					5	$text = shift;
899							} else {
900	4					4	$text = $self;
901	4					12	$self = __PACKAGE__->new(shift);
902							}
903	8					27	my ($encoded, $charset) = $self->_text_encode($text, @_);
904							return ($encoded, '8BIT', undef)
905	8	50	33			41	unless $charset and $charset->{InputCharset};
906	8					14	my $cset = $charset->{OutputCharset};
907
908							# Determine encoding scheme.
909	8					12	my $enc = $charset->{HeaderEncoding};
910
911	8	100	66			55	if (!$enc and $encoded !~ $NON7BITRE) {
		50
		50
912	2	50				70	unless ($cset =~ /^($ISO2022RE\|$ASCIITRANSRE)$/) { # 7BIT.
913	2					5	$cset = 'US-ASCII';
914							}
915							} elsif ($enc eq 'S') {
916	0					0	$enc = _resolve_S($encoded);
917							} elsif ($enc !~ /^[BQ]$/) {
918	0					0	$enc = 'B';
919							}
920	8					40	return ($encoded, $cset, $enc);
921							}
922
923							sub _text_encode {
924	12			12		18	my $charset = shift;
925	12					14	my $s = shift;
926	12					19	my %params = @_;
927	12		33			53	my $replacement = uc($params{'Replacement'} \|\| $Config->{Replacement});
928	12		33			42	my $detect7bit = uc($params{'Detect7bit'} \|\| $Config->{Detect7bit});
929							my $encoding = $params{'Encoding'} \|\|
930	12		33			43	(exists $params{'Encoding'}? undef: 'A'); # undocumented
931
932	12	50	33			47	if (!$encoding or $encoding ne 'A') { # no 7-bit auto-detection
933	0					0	$detect7bit = 'NO';
934							}
935	12	100				30	unless ($charset->{InputCharset}) {
936	4	50				37	if ($s =~ $NON7BITRE) {
		50
937	0					0	return ($s, undef);
938							} elsif ($detect7bit ne "NO") {
939	4					11	$charset = __PACKAGE__->new(&detect_7bit_charset($s));
940							} else {
941	0					0	$charset = __PACKAGE__->new($DEFAULT_CHARSET,
942							Mapping => 'STANDARD');
943							}
944							}
945	12	50	33			40	if (!$encoding or $encoding ne 'A') { # no conversion
946	0					0	$charset = $charset->dup;
947	0					0	$charset->encoder($charset);
948	0					0	$charset->{HeaderEncoding} = $encoding;
949	0					0	$charset->{BodyEncoding} = $encoding;
950							}
951							my $check = ($replacement and $replacement =~ /^\d+$/)?
952							$replacement:
953							{
954							'CROAK' => FB_CROAK(),
955							'STRICT' => FB_CROAK(),
956							'FALLBACK' => FB_CROAK(), # special
957							'PERLQQ' => FB_PERLQQ(),
958							'HTMLCREF' => FB_HTMLCREF(),
959							'XMLCREF' => FB_XMLCREF(),
960	12	50	33			155	}->{$replacement \|\| ""} \|\| 0;
			50
961
962							# Encode data by output charset if required. If failed, fallback to
963							# fallback charset.
964	12					29	my $encoded;
965	12	100	33			103	if (is_utf8($s) or $s =~ /[^\x00-\xFF]/ or
			50
			50
			66
966							($charset->{InputCharset} \|\| "") ne ($charset->{OutputCharset} \|\| "")) {
967	4	50				12	if ($check & 0x1) { # CROAK or FALLBACK
968	0					0	eval {
969	0					0	$encoded = $s;
970	0					0	$encoded = $charset->encode($encoded, FB_CROAK());
971							};
972	0	0				0	if ($@) {
973	0	0	0			0	if ($replacement eq "FALLBACK" and $FALLBACK_CHARSET) {
974	0					0	my $cset = __PACKAGE__->new($FALLBACK_CHARSET,
975							Mapping => 'STANDARD');
976							# croak unknown charset
977							croak "unknown charset ``$FALLBACK_CHARSET''"
978	0	0				0	unless $cset->{Decoder};
979							# charset translation
980	0					0	$charset = $charset->dup;
981	0					0	$charset->encoder($cset);
982	0					0	$encoded = $s;
983	0					0	$encoded = $charset->encode($encoded, 0);
984							# replace input & output charsets with fallback charset
985	0					0	$cset->encoder($cset);
986	0					0	$charset = $cset;
987							} else {
988	0					0	$@ =~ s/ at .+$//;
989	0					0	croak $@;
990							}
991							}
992							} else {
993	4					7	$encoded = $s;
994	4					11	$encoded = $charset->encode($encoded, $check);
995							}
996							} else {
997	8					13	$encoded = $s;
998							}
999
1000	12	100				72	if ($encoded !~ /$NONASCIIRE/) { # maybe ASCII
1001							# check ``ASCII transformation'' charsets
1002	4	50				124	if ($charset->{OutputCharset} =~ /^($ASCIITRANSRE)$/) {
		0
1003	4					10	my $u = $encoded;
1004	4					6	if (USE_ENCODE) {
1005	4					13	$u = $charset->encoder->decode($encoded); # dec. by output
1006							} elsif ($encoded =~ /[+~]/) { # workaround for pre-Encode env.
1007							$u = "x$u";
1008							}
1009	4	100				209	if ($u eq $encoded) {
1010	2					10	$charset = $charset->dup;
1011	2					5	$charset->encoder($DEFAULT_CHARSET);
1012							}
1013							} elsif ($charset->{OutputCharset} ne "US-ASCII") {
1014	0					0	$charset = $charset->dup;
1015	0					0	$charset->encoder($DEFAULT_CHARSET);
1016							}
1017							}
1018
1019	12					42	return ($encoded, $charset);
1020							}
1021
1022							=item $charset->undecode(STRING [,CHECK])
1023
1024							Encode Unicode string STRING to byte string by input charset of $charset.
1025							This is equivalent to C<$charset-Edecoder-Eencode()>.
1026
1027							B:
1028							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
1029							this function will die.
1030
1031							=cut
1032
1033							sub undecode($$$;) {
1034	0			0	1		my $self = shift;
1035	0						my $s = shift;
1036	0		0				my $check = shift \|\| 0;
1037	0						my $enc = $self->{Decoder}->encode($s, $check);
1038	0						Encode::_utf8_off($enc); # workaround for RT #35120
1039	0						$enc;
1040							}
1041
1042							=back
1043
1044							=head2 Manipulating Module Defaults
1045
1046							=over
1047
1048							=item alias ALIAS [, CHARSET]
1049
1050							Get/set charset alias for canonical names determined by
1051							L<"canonical_charset">.
1052
1053							If CHARSET is given and isn't false, ALIAS will be assigned as an alias of
1054							CHARSET. Otherwise, alias won't be changed. In both cases,
1055							current charset name that ALIAS is assigned will be returned.
1056
1057							=cut
1058
1059							sub alias ($;$) {
1060	0			0	1		my $alias = uc(shift);
1061	0						my $charset = uc(shift);
1062
1063	0	0					return $CHARSET_ALIASES{$alias} unless $charset;
1064
1065	0						$CHARSET_ALIASES{$alias} = $charset;
1066	0						return $charset;
1067							}
1068
1069							=item default [CHARSET]
1070
1071							Get/set default charset.
1072
1073							B is used by this module when charset context is
1074							unknown. Modules using this module are recommended to use this
1075							charset when charset context is unknown or implicit default is
1076							expected. By default, it is C<"US-ASCII">.
1077
1078							If CHARSET is given and isn't false, it will be set to default charset.
1079							Otherwise, default charset won't be changed. In both cases,
1080							current default charset will be returned.
1081
1082							B: Default charset I be changed.
1083
1084							=cut
1085
1086							sub default(;$) {
1087	0			0	1		my $charset = &canonical_charset(shift);
1088
1089	0	0					if ($charset) {
1090	0	0					croak "Unknown charset '$charset'"
1091							unless resolve_alias($charset);
1092	0						$DEFAULT_CHARSET = $charset;
1093							}
1094	0						return $DEFAULT_CHARSET;
1095							}
1096
1097							=item fallback [CHARSET]
1098
1099							Get/set fallback charset.
1100
1101							B is used by this module when conversion by given
1102							charset is failed and C<"FALLBACK"> error handling scheme is specified.
1103							Modules using this module may use this charset as last resort of charset
1104							for conversion. By default, it is C<"UTF-8">.
1105
1106							If CHARSET is given and isn't false, it will be set to fallback charset.
1107							If CHARSET is C<"NONE">, fallback charset will be undefined.
1108							Otherwise, fallback charset won't be changed. In any cases,
1109							current fallback charset will be returned.
1110
1111							B: It I useful that C<"US-ASCII"> is specified as fallback charset,
1112							since result of conversion will be readable without charset information.
1113
1114							=cut
1115
1116							sub fallback(;$) {
1117	0			0	1		my $charset = &canonical_charset(shift);
1118
1119	0	0					if ($charset eq "NONE") {
		0
1120	0						$FALLBACK_CHARSET = undef;
1121							} elsif ($charset) {
1122	0	0					croak "Unknown charset '$charset'"
1123							unless resolve_alias($charset);
1124	0						$FALLBACK_CHARSET = $charset;
1125							}
1126	0						return $FALLBACK_CHARSET;
1127							}
1128
1129							=item recommended CHARSET [, HEADERENC, BODYENC [, ENCCHARSET]]
1130
1131							Get/set charset profiles.
1132
1133							If optional arguments are given and any of them are not false, profiles
1134							for CHARSET will be set by those arguments. Otherwise, profiles
1135							won't be changed. In both cases, current profiles for CHARSET will be
1136							returned as 3-item list of (HEADERENC, BODYENC, ENCCHARSET).
1137
1138							HEADERENC is recommended encoding scheme for message header.
1139							It may be one of C<"B">, C<"Q">, C<"S"> (shorter one of either) or
1140							C (might not be encoded).
1141
1142							BODYENC is recommended transfer-encoding for message body. It may be
1143							one of C<"B">, C<"Q">, C<"S"> (shorter one of either) or
1144							C (might not be transfer-encoded).
1145
1146							ENCCHARSET is a charset which is compatible with given CHARSET and
1147							is recommended to be used for MIME messages on Internet.
1148							If conversion is not needed (or this module doesn't know appropriate
1149							charset), ENCCHARSET is C.
1150
1151							B: This function in the future releases can accept more optional
1152							arguments (for example, properties to handle character widths, line folding
1153							behavior, ...). So format of returned value may probably be changed.
1154							Use L<"header_encoding">, L<"body_encoding"> or L<"output_charset"> to get
1155							particular profile.
1156
1157							=cut
1158
1159							sub recommended ($;$;$;$) {
1160	0			0	1		my $charset = &canonical_charset(shift);
1161	0		0				my $henc = uc(shift) \|\| undef;
1162	0		0				my $benc = uc(shift) \|\| undef;
1163	0						my $cset = &canonical_charset(shift);
1164
1165	0	0					croak "CHARSET is not specified" unless $charset;
1166	0	0	0				croak "Unknown header encoding" unless !$henc or $henc =~ /^[BQS]$/;
1167	0	0	0				croak "Unknown body encoding" unless !$benc or $benc =~ /^[BQ]$/;
1168
1169	0	0	0				if ($henc or $benc or $cset) {
			0
1170	0	0					$cset = undef if $charset eq $cset;
1171	0						my @spec = ($henc, $benc, USE_ENCODE? $cset: undef);
1172	0						$CHARSETS{$charset} = \@spec;
1173	0						return @spec;
1174							} else {
1175	0	0					$charset = __PACKAGE__->new($charset) unless ref $charset;
1176	0						return map { $charset->{$_} } qw(HeaderEncoding BodyEncoding
	0
1177							OutputCharset);
1178							}
1179							}
1180
1181							=back
1182
1183							=head2 Constants
1184
1185							=over
1186
1187							=item USE_ENCODE
1188
1189							Unicode/multibyte support flag.
1190							Non-empty string will be set when Unicode and multibyte support is enabled.
1191							Currently, this flag will be non-empty on Perl 5.7.3 or later and
1192							empty string on earlier versions of Perl.
1193
1194							=back
1195
1196							=head2 Error Handling
1197
1198							L<"body_encode"> and L<"header_encode"> accept following C
1199							options:
1200
1201							=over
1202
1203							=item C<"DEFAULT">
1204
1205							Put a substitution character in place of a malformed character.
1206							For UCM-based encodings, will be used.
1207
1208							=item C<"FALLBACK">
1209
1210							Try C<"DEFAULT"> scheme using I (see L<"fallback">).
1211							When fallback charset is undefined and conversion causes error,
1212							code will die on error with an error message.
1213
1214							=item C<"CROAK">
1215
1216							Code will die on error immediately with an error message.
1217							Therefore, you should trap the fatal error with eval{} unless you
1218							really want to let it die on error.
1219							Synonym is C<"STRICT">.
1220
1221							=item C<"PERLQQ">
1222
1223							=item C<"HTMLCREF">
1224
1225							=item C<"XMLCREF">
1226
1227							Use C, C or C
1228							scheme defined by L module.
1229
1230							=item numeric values
1231
1232							Numeric values are also allowed.
1233							For more details see L.
1234
1235							=back
1236
1237							If error handling scheme is not specified or unknown scheme is specified,
1238							C<"DEFAULT"> will be assumed.
1239
1240							=head2 Configuration File
1241
1242							Built-in defaults for option parameters can be overridden by configuration
1243							file: F.
1244							For more details read F.
1245
1246							=head1 VERSION
1247
1248							Consult $VERSION variable.
1249
1250							Development versions of this module may be found at
1251							L.
1252
1253							=head2 Incompatible Changes
1254
1255							=over 4
1256
1257							=item Release 1.001
1258
1259							=over 4
1260
1261							=item *
1262
1263							new() method returns an object when CHARSET argument is not specified.
1264
1265							=back
1266
1267							=item Release 1.005
1268
1269							=over 4
1270
1271							=item *
1272
1273							Restrict characters in encoded-word according to RFC 2047 section 5 (3).
1274							This also affects return value of encoded_header_len() method.
1275
1276							=back
1277
1278							=item Release 1.008.2
1279
1280							=over 4
1281
1282							=item *
1283
1284							body_encoding() method may also returns C<"S">.
1285
1286							=item *
1287
1288							Return value of body_encode() method for UTF-8 may include
1289							C<"QUOTED-PRINTABLE"> encoding item that in earlier versions was fixed to
1290							C<"BASE64">.
1291
1292							=back
1293
1294							=back
1295
1296							=head1 SEE ALSO
1297
1298							Multipurpose Internet Mail Extensions (MIME).
1299
1300							=head1 AUTHOR
1301
1302							Hatuka*nezumi - IKEDA Soji
1303
1304							=head1 COPYRIGHT
1305
1306							Copyright (C) 2006-2017 Hatuka*nezumi - IKEDA Soji.
1307							This program is free software; you can redistribute it and/or modify it
1308							under the same terms as Perl itself.
1309
1310							=cut
1311
1312							1;