File Coverage

blib/lib/Unicode/Digits.pm

Criterion	Covered	Total	%
statement	37	37	100.0
branch	16	20	80.0
condition	14	15	93.3
subroutine	7	7	100.0
pod	1	1	100.0
total	75	80	93.7

line	stmt	bran	cond	sub	pod	time	code
1							package Unicode::Digits;
2
3	2			2		42584	use warnings;
	2					4
	2					57
4	2			2		10	use strict;
	2					4
	2					76
5
6	2			2		12	use Carp;
	2					7
	2					177
7	2			2		2871	use Unicode::UCD qw/charinfo/;
	2					440268
	2					225
8	2			2		24	use Exporter qw/import/;
	2					6
	2					1303
9
10							our @EXPORT_OK = qw(digits_to_int);
11
12							=head1 NAME
13
14							Unicode::Digits - Convert UNICODE digits to integers you can do math with
15
16							=head1 VERSION
17
18							Version 20090607
19
20							=cut
21
22							our $VERSION = '20090607';
23
24							=head1 SYNOPSIS
25
26							So, you have matched a string with C<\d> and now want to do some math.
27							What is that you say? The number your captured plus 5 is 5? Oh, that
28							is right \d now matches UNICODE digits not [0-9]. What to do? Well,
29							You can just call C and all of your troubles* are over!
30
31							use Unicode::Digits qw/digits_to_int/;
32
33							my $string = "forty-two in Mongolian is \x{1814}\x{1812}";
34							my $num = digits_to_int $string =~ /(\d+)/;
35							print $num + 5, "\n";
36
37							=head1 FUNCTIONS
38
39							=head2 digits_to_int(STRING)
40
41							The digits_to_int function transliterates a string of UNICODE digit
42							characters to a number you can do math with, non-digit characters are
43							passed through, so C<"42 is \x{1814}\x{1812}"> becomes C<"42 is 42">.
44
45							=head2 digits_to_int(STRING, ERRORHANDLING)
46
47							You can optionally pass an argument that controls what happens when
48							the source string contains non-digit characters or characters from
49							different sets of digits. ERRORHANDLING can be one of C<"strict">,
50							C<"loose">, C<"looser">, or C<"loosest">. Their behaviours are as
51							follows:
52
53							=over
54
55							=item strict
56
57							All of the characters must be digit characters and they must all come
58							from the same range (so no mixing Monglian digits with Arabic-Indic
59							digits) or the function will die.
60
61							=item loose
62
63							All of the characters must be digit characters or it will die.
64							If there are characters from different ranges you will get a warning.
65
66							=item looser
67
68							If there are any non digit characters, or the characters are from
69							different ranges, you will get a warning.
70
71							=item loosest
72
73							This is the default mode, all non-digit characters are passed through
74							witout warning, and the digits do not have to come from the same range.
75
76							=back
77
78							=cut
79
80							sub _find_zero($) {
81	38			38		60	my $ord = ord shift;
82	38					92	return $ord - charinfo($ord)->{digit};
83							}
84
85							sub digits_to_int {
86	20	50	66	20	1	3029	croak "wrong number of arguments" unless @_ == 1 or @_ == 2;
87	20					40	my ($string, $mode) = @_;
88	20	100				50	$mode = "loosest" unless defined $mode;
89
90	20	50				113	croak "ERRORHANDLING must be strict, loose, looser, or loosest not '$mode'"
91							unless $mode =~ /^(?:strict\|loose(?:r\|st)?)$/;
92
93	20	100	100			380	croak "string '$string' contains non-digit characters"
94							if $mode =~ '^(?:strict\|loose)$' and $string =~ /\D/;
95
96	18	100	100			183	carp "string '$string' contains non-digit characters"
97							if $mode eq "looser" and $string =~ /\D/;
98
99	18					26	my $num;
100	18					67	my ($first_num) = $string =~ /(\d)/;
101	18	50				42	return $string unless defined $first_num;
102
103	18					34	my $zero = _find_zero $first_num;
104
105	18					272281	for my $d (split //, $string) {
106	51	100				151	if ($d =~ /\D/) {
107	15					19	$num .= $d;
108	15					17	next;
109							}
110
111	36					95	my $info = charinfo ord $d;
112
113	36	100	100			11616	croak "string '$string' contains digits from different ranges"
114							if $mode eq 'strict' and $zero != _find_zero $d;
115
116	35	100	100			1632	carp "string '$string' contains digits from different ranges"
117							if $mode =~ /^looser?$/ and $zero != _find_zero $d;
118
119	35	50				4702	die sprintf "U+%x claims to be a digit, but doesn't have a digit number", ord $d
120							unless $info->{digit} =~ /[0-9]/;
121
122	35					163	$num .= $info->{digit};
123							}
124	17					91	return $num;
125							}
126
127							=head1 AUTHOR
128
129							Chas. J. Owens IV, C<< >>
130
131							=head1 DIAGNOSTICS
132
133							=over
134
135							=item "wrong number of arguments"
136
137							C takes one or two arguments,
138							if you have more than two or no arguments you will recieve this error.
139
140							=item "ERRORHANDLING must be strict, loose, looser, or loosest not '%s'"
141
142							If you pass a second argument that is not strict, loose, looser,
143							or loosest to C, you will
144							recieve this error.
145
146							=item "string '%s' contains non-digit characters"
147
148							You will recieve this message as a warning or error (depending on what
149							mode you chose), if the string has characters that do not have the
150							UNICODE digit property.
151
152							=item "string '$s' contains digits from different ranges"
153
154							You will recieve this message as a warning or error (depending on what
155							mode you chose), if the string has characters that are not part of the
156							same range of digit characters.
157
158							=item "U+%x claims to be a digit, but doesn't have a digit number"
159
160							This error is unlikely to occur, if it does then the bug is either with
161							my code (the likely scenario) or C (not very likely).
162
163							=back
164
165							=head1 BUGS
166
167							My understanding of UNICODE is flawed, therefore, I have undoubtly done
168							something wrong. For instance, what should be done with "5\x{0308}"?
169							Also, there is a bunch of stuff relating to surrogates I don't understand.
170
171							=head1 SUPPORT
172
173							You can find documentation for this module with the perldoc command.
174
175							perldoc Unicode::Digits
176
177							=head1 COPYRIGHT & LICENSE
178
179							Copyright 2009 Chas. J. Owens IV, all rights reserved.
180
181							This program is free software; you can redistribute it and/or modify it
182							under the same terms as Perl itself.
183
184							=cut
185
186							"this is not an interesting return value";