File Coverage

blib/lib/Number/Nary.pm

Criterion	Covered	Total	%
statement	97	98	98.9
branch	31	34	91.1
condition	3	3	100.0
subroutine	20	20	100.0
pod	3	3	100.0
total	154	158	97.4

line	stmt	bran	cond	sub	pod	time	code
1							package Number::Nary 1.100313;
2							# ABSTRACT: encode and decode numbers as n-ary strings
3
4	5			5		291376	use strict;
	5					90
	5					132
5	5			5		21	use warnings;
	5					8
	5					153
6
7	5			5		33	use Carp qw(croak);
	5					8
	5					265
8	5			5		28	use Scalar::Util 0.90 qw(reftype);
	5					150
	5					247
9	5			5		2443	use List::MoreUtils 0.09 qw(uniq);
	5					59172
	5					34
10	5			5		6393	use UDCode ();
	5					2312
	5					209
11
12	5					60	use Sub::Exporter -setup => {
13							exports => [ qw(n_codec n_encode n_decode) ],
14							groups => {
15							default => [ qw(n_codec) ],
16							codec_pair => \&_generate_codec_pair,
17							}
18	5			5		2741	};
	5					57008
19
20							sub _generate_codec_pair {
21	2			2		295	my (undef, undef, $arg, undef) = @_;
22
23	2					6	my $local_arg = {%$arg};
24	2					4	my $digits = delete $local_arg->{digits};
25
26	2					5	my %pair;
27	2					4	@pair{qw(encode decode)} = n_codec($digits, $local_arg);
28	2					5	return \%pair;
29							}
30
31							#pod =head1 SYNOPSIS
32							#pod
33							#pod This module lets you convert numbers into strings that encode the number using
34							#pod the digit set of your choice. For example, you could get routines to convert
35							#pod to and from hex like so:
36							#pod
37							#pod my ($enc_hex, $dec_hex) = n_codec('0123456789ABCDEF');
38							#pod
39							#pod my $hex = $enc_hex->(255); # sets $hex to FF
40							#pod my $num = $dec_hex->('A0'); # sets $num to 160
41							#pod
42							#pod This would be slow and stupid, since Perl already provides the means to easily
43							#pod and quickly convert between decimal and hex representations of numbers.
44							#pod Number::Nary's utility comes from the fact that it can encode into bases
45							#pod composed of arbitrary digit sets.
46							#pod
47							#pod my ($enc, $dec) = n_codec('0123'); # base 4 (for working with nybbles?)
48							#pod
49							#pod # base64
50							#pod my ($enc, $dec) = n_codec(
51							#pod join('', 'A' .. 'Z', 'a' .. 'z', 0 .. 9, '+', '/', '=')
52							#pod );
53							#pod
54							#pod =func n_codec
55							#pod
56							#pod my ($encode_sub, $decode_sub) = n_codec($digit_string, \%arg);
57							#pod
58							#pod This routine returns a reference to a subroutine which will encode numbers into
59							#pod the given set of digits and a reference which will do the reverse operation.
60							#pod
61							#pod The digits may be given as a string or an arrayref. This routine will croak if
62							#pod the set of digits contains repeated digits, or if there could be ambiguity
63							#pod in decoding a string of the given digits. (Number::Nary is overly aggressive
64							#pod about weeding out possibly ambiguous digit sets, for the sake of the author's
65							#pod sanity.)
66							#pod
67							#pod The encode sub will croak if it is given input other than a non-negative
68							#pod integer.
69							#pod
70							#pod The decode sub will croak if given a string that contains characters not in the
71							#pod digit string, or, for fixed-string digit sets, if the lenth of the string to
72							#pod decode is not a multiple of the length of the component digits.
73							#pod
74							#pod Valid arguments to be passed in the second parameter are:
75							#pod
76							#pod predecode - if given, this coderef will be used to preprocess strings
77							#pod passed to the decoder
78							#pod
79							#pod postencode - if given, this coderef will be used to postprocess strings
80							#pod produced by the encoder
81							#pod
82							#pod =cut
83
84							sub _split_len_iterator {
85	10			10		20	my ($length) = @_;
86
87							return sub {
88	20			20		34	my ($string, $callback) = @_;
89
90	20					56	my $places = length($string) / $length;
91
92	20	100				130	croak "string length is not a multiple of digit length"
93							unless $places == int $places;
94
95	19					41	for my $position (1 .. $places) {
96	71					115	my $digit = substr $string, (-$length * $position), $length;
97	71					97	$callback->($digit, $position);
98							}
99							}
100	10					57	}
101
102							sub _split_digit_iterator {
103	2			2		5	my ($digits) = @_;
104
105							sub {
106	1			1		3	my ($string, $callback) = @_;
107	1					2	my @digits;
108	1					3	ITER: while (length $string) {
109	4					6	for (@$digits) {
110	14	100				28	if (index($string, $_) == 0) {
111	4					10	push @digits, substr($string, 0, length $_, '');
112	4					10	next ITER;
113							}
114							}
115	0					0	croak "could not decompose string '$string'";
116							}
117
118	1					3	for (1 .. @digits) {
119	4					8	$callback->($digits[-$_], $_);
120							}
121							}
122	2					22	}
123
124							sub _set_iterator {
125	14			14		26	my ($digits, $length_ref) = @_;
126
127	14	50				33	croak "digit set is empty" unless @$digits;
128							croak "digit set contains zero-length digit"
129	5	50		5		3800	if do { no warnings 'uninitialized'; grep { ! length $_ } @$digits };
	5					18
	5					3373
	14					20
	14					25
	160					244
130	14	100				228	croak "digit set contains repeated digits" if @$digits != uniq @$digits;
131
132	13					48	my @lengths = uniq map { length } @$digits;
	154					226
133
134	13	100				58	return _split_len_iterator($lengths[0]) if @lengths == 1;
135
136	3	100				12	croak "digit set may be ambiguous" if ! UDCode::is_udcode(@$digits);
137
138	2					265	return _split_digit_iterator($digits);
139							}
140
141							sub n_codec {
142	14			14	1	1744	my ($digit_set, $arg) = @_;
143
144	14					45	my @digits;
145
146	14	100				42	if (ref $digit_set) {
147	6	50				27	croak "digit set must be a string or arrayref"
148							unless reftype $digit_set eq 'ARRAY';
149	6					21	@digits = @$digit_set;
150							} else {
151	8					34	@digits = split //, $digit_set;
152							}
153
154	14					38	my $iterator = _set_iterator(\@digits);
155
156							my $encode_sub = sub {
157	22			22		3639	my ($value) = @_;
158
159	22	100	100			1040	croak "value isn't an non-negative integer"
160							if not defined $value
161							or $value !~ /\A\d+\z/;
162
163	14					27	my $string = '';
164
165	14	100				30	if (@digits == 1) {
166	2					5	$string = $digits[0] x $value;
167							} else {
168	12					18	while (1) {
169	32					46	my $digit = $value % @digits;
170	32					56	$value = int($value / @digits);
171	32					56	$string = "$digits[$digit]$string";
172	32	100				69	last unless $value;
173							}
174							}
175
176	14	100				38	$string = $arg->{postencode}->($string) if $arg->{postencode};
177	14					61	return $string;
178	12					46	};
179
180	12					21	my %digit_value = do { my $i = 0; map { $_ => $i++ } @digits; };
	12					15
	12					22
	151					257
181
182							my $decode_sub = sub {
183	22			22		4884	my ($string) = @_;
184	22	100				53	return unless defined $string;
185
186	21	100				53	$string = $arg->{predecode}->($string) if $arg->{predecode};
187
188	21					46	my $value = 0;
189
190							$iterator->($string, sub {
191	75			26		103	my ($digit, $position) = @_;
192							croak "string to decode contains invalid digits"
193	75	100				270	unless exists $digit_value{$digit};
194
195							# Stupid hack, but I'm just cramming unary support in here at the moment.
196							# It can be polished up later, if needed. -- rjbs, 2009-11-22
197							$value += @digits == 1
198							? 1
199	73	100				179	: ($digit_value{$digit} * @digits ** ($position++ - 1));
200	21					106	});
201
202	18					122	return $value;
203	12					46	};
204
205	12					48	return ($encode_sub, $decode_sub);
206							}
207
208							#pod =func n_encode
209							#pod
210							#pod my $string = n_encode($value, $digit_string);
211							#pod
212							#pod This encodes the given value into a string using the given digit string. It is
213							#pod written in terms of C, above, so it's not efficient at all for
214							#pod multiple uses in one process.
215							#pod
216							#pod =func n_decode
217							#pod
218							#pod my $number = n_decode($string, $digit_string);
219							#pod
220							#pod This is the decoding equivalent to C, above.
221							#pod
222							#pod =cut
223
224							# If you really can't stand using n_codec, you could memoize these.
225	1			1	1	277	sub n_encode { (n_codec($_[1]))[0]->($_[0]) }
226	3			3	1	747	sub n_decode { (n_codec($_[1]))[1]->($_[0]) }
227
228							#pod =head1 EXPORTS
229							#pod
230							#pod C is exported by default. C and C are exported.
231							#pod
232							#pod Pairs of routines to encode and decode may be imported by using the
233							#pod C group as follows:
234							#pod
235							#pod use Number::Nary -codec_pair => { digits => '01234567', -suffix => '8' };
236							#pod
237							#pod my $encoded = encode8($number);
238							#pod my $decoded = decode8($encoded);
239							#pod
240							#pod For more information on this kind of exporting, see L.
241							#pod
242							#pod =head1 SECRET ORIGINS
243							#pod
244							#pod I originally used this system to produce unique worksheet names in Excel. I
245							#pod had a large report generating system that used Win32::OLE, and to keep track of
246							#pod what was where I'd Storable-digest the options used to produce each worksheet
247							#pod and then n-ary encode them into the set of characters that were valid in
248							#pod worksheet names. Working out that set of characters was by far the hardest
249							#pod part.
250							#pod
251							#pod =head1 ACKNOWLEDGEMENTS
252							#pod
253							#pod Thanks, Jesse Vincent. When I remarked, on IRC, that this would be trivial to
254							#pod do, he said, "Great. Would you mind doing it?" (Well, more or less.) It was
255							#pod a fun little distraction.
256							#pod
257							#pod Mark Jason Dominus and Michael Peters offered some useful advice on how to weed
258							#pod out ambiguous digit sets, enabling me to allow digit sets made up of
259							#pod varying-length digits.
260							#pod
261							#pod =head1 SEE ALSO
262							#pod
263							#pod L is in the same problem space wth Number::Nary. It provides
264							#pod only an OO interface and does not reliably handle multicharacter digits or
265							#pod recognize ambiguous digit sets.
266							#pod
267							#pod =cut
268
269							1; # my ($encode_sub, $decode_sub) = n_codec('8675309'); # jennynary
270
271							__END__