File Coverage

blib/lib/Convert/YText.pm

Criterion	Covered	Total	%
statement	70	70	100.0
branch	12	14	85.7
condition			n/a
subroutine	13	13	100.0
pod	4	9	44.4
total	99	106	93.4

line	stmt	bran	sub	pod	time	code
1						package Convert::YText;
2
3	2		2		3694	use strict;
	2				2
	2				57
4	2		2		9	use warnings;
	2				2
	2				55
5	2		2		7	use Carp;
	2				2
	2				141
6
7	2		2		8	use vars qw/$VERSION @ISA @EXPORT_OK/;
	2				2
	2				1454
8						@ISA = 'Exporter';
9						@EXPORT_OK = qw( encode_ytext decode_ytext validate_ytext);
10
11						$VERSION="0.2";
12
13						=head1 NAME
14
15						Convert::YText - Quotes strings suitably for rfc2822 local part
16
17						=head1 VERSION
18
19						Version 0.2
20
21						=head1 SYNOPSIS
22
23						use Convert::YText qw(encode_ytext decode_ytext);
24
25						$encoded=encode_ytext($string);
26						$decoded=decode_ytext($encoded);
27
28						($decoded eq $string) \|\| die "this should never happen!";
29
30						=head1 DESCRIPTION
31
32						Convert::YText converts strings to and from "YText", a format inspired
33						by xtext defined in RFC1894, the MIME base64 and quoted-printable
34						types (RFC 1394). The main goal is encode a UTF8 string into something safe
35						for use as the local part in an internet email address (RFC2822).
36
37						By default spaces are replaced with "+", "/" with "~", the characters
38						"A-Za-z0-9_.-" encode as themselves, and everything else is written
39						"=USTR=" where USTR is the base64 (using "A-Za-z0-9_." as digits)
40						encoding of the unicode character code. The encoding is configurable
41						(see below).
42
43						=head1 PROCEDURAL INTERFACE
44
45						The module can can export C which converts arbitrary
46						unicode string into a "safe" form, and C which recovers
47						the original text. C is a heuristic which returns 0
48						for bad input.
49
50
51						=cut
52
53
54						sub encode_ytext{
55	155		155	0	37461	my $str=shift;
56	155				311	my $object = Convert::YText->new();
57	155				344	return $object->encode($str);
58						}
59
60						sub decode_ytext{
61	98		98	0	96	my $str=shift;
62	98				151	my $object = Convert::YText->new();
63	98				149	return $object->decode($str);
64						}
65
66						sub validate_ytext{
67	57		57	0	20509	my $str=shift;
68	57				110	my $object = Convert::YText->new();
69	57				96	return $object->valid($str);
70						}
71
72						=head1 OBJECT ORIENTED INTERFACE.
73
74						For more control, you will need to use the OO interface.
75
76						=head2 new
77
78						Create a new encoding object.
79
80						=head3 Arguments
81
82						Arguments are by name (i.e. a hash).
83
84						=over
85
86						=item DIGIT_STRING ("A-Za-z0-9_.") Must be 64 characters long
87
88						=item ESCAPE_CHAR ('=') Must not be in digit string.
89
90						=item SPACE_CHAR ('+') Non digit to replace space. Can be the empty string.
91
92						=item SLASH_CHAR ( '~') Non digit to replace slash. Can be the empty string.
93
94						=item EXTRA_CHARS ('._\-') Other characters to leave unencoded.
95
96						=back
97
98						=cut
99
100						sub new {
101	314		314	1	2107	my $class = shift;
102
103	314				459	my %params=@_;
104
105	314				1025	my $self = { ESCAPE_CHAR=>'=',
106						SPACE_CHAR=>'+',
107						SLASH_CHAR=>'~',
108						EXTRA_CHARS=>'-',
109						DIGIT_STRING=>
110						"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_."
111						};
112
113	314				825	while (my ($key,$val) = each %params){
114	11				39	$self->{$key} = $val;
115						};
116
117	314	50			908	croak("DIGIT_STRING must have 64 characters got: ".$self->{DIGIT_STRING}) if (length($self->{DIGIT_STRING})!=64);
118
119						# computed values. Setting directly is probably a bad idea.
120
121	314				6128	$self->{DIGITS}=[split "",$self->{DIGIT_STRING}];
122						$self->{NO_ESCAPE}= $self->{DIGIT_STRING}.$self->{EXTRA_CHARS}.( length($self->{SPACE_CHAR}) ? ' ' : '' )
123	314	100			1963	. (length($self->{SLASH_CHAR}) ? '/' : '');
		100
124
125	314				3002	$self->{ESCRX}=qr{\Q$self->{ESCAPE_CHAR}\E([\Q$self->{DIGIT_STRING}\E]+)\Q$self->{ESCAPE_CHAR}\E};
126
127	314				1209	$self->{MUST64}=qr{[^\Q$self->{NO_ESCAPE}\E]};
128
129	314				1090	$self->{VALIDRX}=qr{[\Q$self->{ESCAPE_CHAR}$self->{NO_ESCAPE}\E]+};
130
131	314				373	bless ($self, $class);
132	314				615	return $self;
133						}
134
135
136						sub encode_num{
137	3723		3723	0	3657	my $self=shift;
138	3723				3140	my $num=shift;
139	3723				3310	my $str="";
140
141	3723				6250	while ($num>0){
142	4737				4682	my $remainder=$num % 64;
143	4737				3899	$num=$num >> 6;
144	4737				12591	$str = $self->{DIGITS}->[$remainder].$str;
145						}
146	3723				13392	return $str;
147						}
148
149						sub decode_str{
150	3705		3705	0	3901	my $self=shift;
151	3705				5440	my $str=shift;
152	3705				6619	my @chars=split "",$str;
153	3705				3824	my $num=0;
154
155	3705				6457	while (scalar(@chars)>0){
156	4719				7375	my $remainder=index $self->{DIGIT_STRING},$chars[0];
157
158	4719	50			7700	croak("not a digit: ".$chars[0]. " in \"$str\"") if ($remainder <0);
159
160	4719				4098	$num=$num << 6;
161	4719				4610	$num+=$remainder;
162	4719				9575	shift @chars;
163						}
164
165	3705				16423	return chr($num);
166						}
167
168						=head2 encode
169
170						=head3 Arguments
171
172						a string to encode.
173
174						=head3 Returns
175
176						encoded string
177
178						=cut
179
180						sub encode{
181	547		547	1	169081	my $self=shift;
182	547				799	my $str=shift;
183
184	547				4123	$str=~ s/($self->{MUST64})/"$self->{ESCAPE_CHAR}".encode_num($self,ord($1))."$self->{ESCAPE_CHAR}"/ge;
	3723				8800
185	547	100			1732	$str=~ s\|/\|$self->{SLASH_CHAR}\|g if (length($self->{SLASH_CHAR}));
186	547				1763	$str=~ s/ /$self->{SPACE_CHAR}/g;
187
188	547				2650	return $str;
189						};
190
191						=head2 decode
192
193						=head3 Arguments
194
195						a string to decode.
196
197						=head3 Returns
198
199						encoded string
200
201						=cut
202
203						sub decode{
204	490		490	1	531	my $self=shift;
205	490				490	my $str = shift;
206
207	490	100			2009	$str=~ s/\Q$self->{SPACE_CHAR}\E/ /g if (length($self->{SPACE_CHAR}));
208	490	100			1089	$str=~ s\|\Q$self->{SLASH_CHAR}\E\|/\|g if (length($self->{SLASH_CHAR}));
209	490				3478	$str=~ s/$self->{ESCRX}/ decode_str($self,$1)/eg;
	3705				5122
210	490				3107	return $str;
211						}
212
213						=head2 valid
214
215						Simple necessary but not sufficient test for validity.
216
217						=cut
218
219						sub valid{
220	57		57	1	49	my $self=shift;
221	57				37	my $str = shift;
222
223	57				586	return $str =~ m/$self->{VALIDRX}/;
224						}
225
226						=head1 DISCUSSION
227
228						According to RFC 2822, the following non-alphanumerics are OK for the
229						local part of an address: "!#$%&'*+-/=?^_`{\|}~". On the other hand, it
230						seems common in practice to block addresses having "%!/\|`#&?" in the
231						local part. The idea is to restrict ourselves to basic ASCII
232						alphanumerics, plus a small set of printable ASCII, namely "=_+-~.".
233
234
235						The characters '+' and '-' are pretty widely used to attach suffixes
236						(although usually only one works on a given mail host). It seems ok to
237						use '+-', since the first marks the beginning of a suffix, and then is
238						a regular character. The character '.' also seems mostly permissable.
239
240
241						=head1 AUTHOR
242
243						David Bremner, Eddb@cpan.org
244
245						=head1 COPYRIGHT
246
247						Copyright (C) 2011 David Bremner. All Rights Reserved.
248
249						This module is free software; you can redistribute it and/or modify it
250						under the same terms as Perl itself.
251
252						=head1 SEE ALSO
253
254						L, L, L.
255
256						=cut
257
258						1;