File Coverage

blib/lib/Convert/YText.pm

Criterion	Covered	Total	%
statement	73	73	100.0
branch	12	14	85.7
condition			n/a
subroutine	14	14	100.0
pod	4	9	44.4
total	103	110	93.6

line	stmt	bran	sub	pod	time	code
1						package Convert::YText;
2
3	2		2		4120	use strict;
	2				6
	2				88
4	2		2		14	use warnings;
	2				4
	2				96
5	2		2		25	use Carp;
	2				4
	2				213
6
7	2		2		14	use vars qw/$VERSION @ISA @EXPORT_OK/;
	2				4
	2				259
8						@ISA = 'Exporter';
9						@EXPORT_OK = qw( encode_ytext decode_ytext validate_ytext);
10
11	2		2		4900	use encoding "utf-8";
	2				33819
	2				13
12
13
14						$VERSION="0.1.1_1";
15
16						=head1 NAME
17
18						Convert::YText - Quotes strings suitably for rfc2822 local part
19
20						=head1 VERSION
21
22						Version 0.1
23
24						=head1 SYNOPSIS
25
26						use Convert::YText qw(encode_ytext decode_ytext);
27
28						$encoded=encode_ytext($string);
29						$decoded=decode_ytext($encoded);
30
31						($decoded eq $string) \|\| die "this should never happen!";
32
33						=head1 DESCRIPTION
34
35						Convert::YText converts strings to and from "YText", a format inspired
36						by xtext defined in RFC1894, the MIME base64 and quoted-printable
37						types (RFC 1394). The main goal is encode a UTF8 string into something safe
38						for use as the local part in an internet email address (RFC2822).
39
40						By default spaces are replaced with "+", "/" with "~", the characters
41						"A-Za-z0-9_.-" encode as themselves, and everything else is written
42						"=USTR=" where USTR is the base64 (using "A-Za-z0-9_." as digits)
43						encoding of the unicode character code. The encoding is configurable
44						(see below).
45
46						=head1 PROCEDURAL INTERFACE
47
48						The module can can export C which converts arbitrary
49						unicode string into a "safe" form, and C which recovers
50						the original text. C is a heuristic which returns 0
51						for bad input.
52
53
54						=cut
55
56
57						sub encode_ytext{
58	148		148	0	38003	my $str=shift;
59	148				321	my $object = Convert::YText->new();
60	148				267	return $object->encode($str);
61						}
62
63						sub decode_ytext{
64	92		92	0	95	my $str=shift;
65	92				160	my $object = Convert::YText->new();
66	92				131	return $object->decode($str);
67						}
68
69						sub validate_ytext{
70	56		56	0	29202	my $str=shift;
71	56				140	my $object = Convert::YText->new();
72	56				119	return $object->valid($str);
73						}
74
75						=head1 OBJECT ORIENTED INTERFACE.
76
77						For more control, you will need to use the OO interface.
78
79						=head2 new
80
81						Create a new encoding object.
82
83						=head3 Arguments
84
85						Arguments are by name (i.e. a hash).
86
87						=over
88
89						=item DIGIT_STRING ("A-Za-z0-9_.") Must be 64 characters long
90
91						=item ESCAPE_CHAR ('=') Must not be in digit string.
92
93						=item SPACE_CHAR ('+') Non digit to replace space. Can be the empty string.
94
95						=item SLASH_CHAR ( '~') Non digit to replace slash. Can be the empty string.
96
97						=item EXTRA_CHARS ('._\-') Other characters to leave unencoded.
98
99						=back
100
101						=cut
102
103						sub new {
104	300		300	1	2303	my $class = shift;
105
106	300				480	my %params=@_;
107
108	300				991	my $self = { ESCAPE_CHAR=>'=',
109						SPACE_CHAR=>'+',
110						SLASH_CHAR=>'~',
111						EXTRA_CHARS=>'-',
112						DIGIT_STRING=>
113						"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_."
114						};
115
116	300				905	while (my ($key,$val) = each %params){
117	11				156	$self->{$key} = $val;
118						};
119
120	300	50			799	croak("DIGIT_STRING must have 64 characters got: ".$self->{DIGIT_STRING}) if (length($self->{DIGIT_STRING})!=64);
121
122						# computed values. Setting directly is probably a bad idea.
123
124	300				5787	$self->{DIGITS}=[split "",$self->{DIGIT_STRING}];
125						$self->{NO_ESCAPE}= $self->{DIGIT_STRING}.$self->{EXTRA_CHARS}.( length($self->{SPACE_CHAR}) ? ' ' : '' )
126	300	100			2184	. (length($self->{SLASH_CHAR}) ? '/' : '');
		100
127
128	300				2187	$self->{ESCRX}=qr{\Q$self->{ESCAPE_CHAR}\E([\Q$self->{DIGIT_STRING}\E]+)\Q$self->{ESCAPE_CHAR}\E};
129
130	300				1238	$self->{MUST64}=qr{[^\Q$self->{NO_ESCAPE}\E]};
131
132	300				1718	$self->{VALIDRX}=qr{[\Q$self->{ESCAPE_CHAR}$self->{NO_ESCAPE}\E]+};
133
134	300				454	bless ($self, $class);
135	300				659	return $self;
136						}
137
138
139						sub encode_num{
140	3621		3621	0	15559	my $self=shift;
141	3621				2791	my $num=shift;
142	3621				3030	my $str="";
143
144	3621				5735	while ($num>0){
145	4659				6774	my $remainder=$num % 64;
146	4659				3214	$num=$num >> 6;
147	4659				16681	$str = $self->{DIGITS}->[$remainder].$str;
148						}
149	3621				26289	return $str;
150						}
151
152						sub decode_str{
153	3603		3603	0	3118	my $self=shift;
154	3603				4438	my $str=shift;
155	3603				6389	my @chars=split "",$str;
156	3603				2761	my $num=0;
157
158	3603				5929	while (scalar(@chars)>0){
159	4641				18377	my $remainder=index $self->{DIGIT_STRING},$chars[0];
160
161	4641	50			22339	croak("not a digit: ".$chars[0]. " in \"$str\"") if ($remainder <0);
162
163	4641				3777	$num=$num << 6;
164	4641				3154	$num+=$remainder;
165	4641				8809	shift @chars;
166						}
167
168	3603				19163	return chr($num);
169						}
170
171						=head2 encode
172
173						=head3 Arguments
174
175						a string to encode.
176
177						=head3 Returns
178
179						encoded string
180
181						=cut
182
183						sub encode{
184	516		516	1	174239	my $self=shift;
185	516				597	my $str=shift;
186
187	516				3909	$str=~ s/($self->{MUST64})/"$self->{ESCAPE_CHAR}".encode_num($self,ord($1))."$self->{ESCAPE_CHAR}"/ge;
	3621				53903
188	516	100			5030	$str=~ s\|/\|$self->{SLASH_CHAR}\|g if (length($self->{SLASH_CHAR}));
189	516				2051	$str=~ s/ /$self->{SPACE_CHAR}/g;
190
191	516				3898	return $str;
192						};
193
194						=head2 decode
195
196						=head3 Arguments
197
198						a string to decode.
199
200						=head3 Returns
201
202						encoded string
203
204						=cut
205
206						sub decode{
207	460		460	1	712	my $self=shift;
208	460				475	my $str = shift;
209
210	460	100			1946	$str=~ s/\Q$self->{SPACE_CHAR}\E/ /g if (length($self->{SPACE_CHAR}));
211	460	100			1056	$str=~ s\|\Q$self->{SLASH_CHAR}\E\|/\|g if (length($self->{SLASH_CHAR}));
212	460				3368	$str=~ s/$self->{ESCRX}/ decode_str($self,$1)/eg;
	3603				28116
213	460				4853	return $str;
214						}
215
216						=head2 valid
217
218						Simple necessary but not sufficient test for validity.
219
220						=cut
221
222						sub valid{
223	56		56	1	68	my $self=shift;
224	56				53	my $str = shift;
225
226	56				819	return $str =~ m/$self->{VALIDRX}/;
227						}
228
229						=head1 DISCUSSION
230
231						According to RFC 2822, the following non-alphanumerics are OK for the
232						local part of an address: "!#$%&'*+-/=?^_`{\|}~". On the other hand, it
233						seems common in practice to block addresses having "%!/\|`#&?" in the
234						local part. The idea is to restrict ourselves to basic ASCII
235						alphanumerics, plus a small set of printable ASCII, namely "=_+-~.".
236
237
238						The characters '+' and '-' are pretty widely used to attach suffixes
239						(although usually only one works on a given mail host). It seems ok to
240						use '+-', since the first marks the beginning of a suffix, and then is
241						a regular character. The character '.' also seems mostly permissable.
242
243
244						=head1 AUTHOR
245
246						David Bremner, Eddb@cpan.org
247
248						=head1 COPYRIGHT
249
250						Copyright (C) 2011 David Bremner. All Rights Reserved.
251
252						This module is free software; you can redistribute it and/or modify it
253						under the same terms as Perl itself.
254
255						=head1 SEE ALSO
256
257						L, L, L.
258
259						=head1 ACKNOWLEDGEMENTS
260
261						Thanks to jawnsy for encouraging me to OO-ize the module.
262
263						=cut
264
265						1;