line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Unicode::Digits; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
42584
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
57
|
|
4
|
2
|
|
|
2
|
|
10
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
76
|
|
5
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
12
|
use Carp; |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
177
|
|
7
|
2
|
|
|
2
|
|
2871
|
use Unicode::UCD qw/charinfo/; |
|
2
|
|
|
|
|
440268
|
|
|
2
|
|
|
|
|
225
|
|
8
|
2
|
|
|
2
|
|
24
|
use Exporter qw/import/; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
1303
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @EXPORT_OK = qw(digits_to_int); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Unicode::Digits - Convert UNICODE digits to integers you can do math with |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 VERSION |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Version 20090607 |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=cut |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
our $VERSION = '20090607'; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 SYNOPSIS |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
So, you have matched a string with C<\d> and now want to do some math. |
27
|
|
|
|
|
|
|
What is that you say? The number your captured plus 5 is 5? Oh, that |
28
|
|
|
|
|
|
|
is right \d now matches UNICODE digits not [0-9]. What to do? Well, |
29
|
|
|
|
|
|
|
You can just call C and all of your troubles* are over! |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
use Unicode::Digits qw/digits_to_int/; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
my $string = "forty-two in Mongolian is \x{1814}\x{1812}"; |
34
|
|
|
|
|
|
|
my $num = digits_to_int $string =~ /(\d+)/; |
35
|
|
|
|
|
|
|
print $num + 5, "\n"; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 FUNCTIONS |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head2 digits_to_int(STRING) |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
The digits_to_int function transliterates a string of UNICODE digit |
42
|
|
|
|
|
|
|
characters to a number you can do math with, non-digit characters are |
43
|
|
|
|
|
|
|
passed through, so C<"42 is \x{1814}\x{1812}"> becomes C<"42 is 42">. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head2 digits_to_int(STRING, ERRORHANDLING) |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
You can optionally pass an argument that controls what happens when |
48
|
|
|
|
|
|
|
the source string contains non-digit characters or characters from |
49
|
|
|
|
|
|
|
different sets of digits. ERRORHANDLING can be one of C<"strict">, |
50
|
|
|
|
|
|
|
C<"loose">, C<"looser">, or C<"loosest">. Their behaviours are as |
51
|
|
|
|
|
|
|
follows: |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=over |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=item strict |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
All of the characters must be digit characters and they must all come |
58
|
|
|
|
|
|
|
from the same range (so no mixing Monglian digits with Arabic-Indic |
59
|
|
|
|
|
|
|
digits) or the function will die. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=item loose |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
All of the characters must be digit characters or it will die. |
64
|
|
|
|
|
|
|
If there are characters from different ranges you will get a warning. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=item looser |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
If there are any non digit characters, or the characters are from |
69
|
|
|
|
|
|
|
different ranges, you will get a warning. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=item loosest |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
This is the default mode, all non-digit characters are passed through |
74
|
|
|
|
|
|
|
witout warning, and the digits do not have to come from the same range. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=back |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=cut |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
sub _find_zero($) { |
81
|
38
|
|
|
38
|
|
60
|
my $ord = ord shift; |
82
|
38
|
|
|
|
|
92
|
return $ord - charinfo($ord)->{digit}; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub digits_to_int { |
86
|
20
|
50
|
66
|
20
|
1
|
3029
|
croak "wrong number of arguments" unless @_ == 1 or @_ == 2; |
87
|
20
|
|
|
|
|
40
|
my ($string, $mode) = @_; |
88
|
20
|
100
|
|
|
|
50
|
$mode = "loosest" unless defined $mode; |
89
|
|
|
|
|
|
|
|
90
|
20
|
50
|
|
|
|
113
|
croak "ERRORHANDLING must be strict, loose, looser, or loosest not '$mode'" |
91
|
|
|
|
|
|
|
unless $mode =~ /^(?:strict|loose(?:r|st)?)$/; |
92
|
|
|
|
|
|
|
|
93
|
20
|
100
|
100
|
|
|
380
|
croak "string '$string' contains non-digit characters" |
94
|
|
|
|
|
|
|
if $mode =~ '^(?:strict|loose)$' and $string =~ /\D/; |
95
|
|
|
|
|
|
|
|
96
|
18
|
100
|
100
|
|
|
183
|
carp "string '$string' contains non-digit characters" |
97
|
|
|
|
|
|
|
if $mode eq "looser" and $string =~ /\D/; |
98
|
|
|
|
|
|
|
|
99
|
18
|
|
|
|
|
26
|
my $num; |
100
|
18
|
|
|
|
|
67
|
my ($first_num) = $string =~ /(\d)/; |
101
|
18
|
50
|
|
|
|
42
|
return $string unless defined $first_num; |
102
|
|
|
|
|
|
|
|
103
|
18
|
|
|
|
|
34
|
my $zero = _find_zero $first_num; |
104
|
|
|
|
|
|
|
|
105
|
18
|
|
|
|
|
272281
|
for my $d (split //, $string) { |
106
|
51
|
100
|
|
|
|
151
|
if ($d =~ /\D/) { |
107
|
15
|
|
|
|
|
19
|
$num .= $d; |
108
|
15
|
|
|
|
|
17
|
next; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
36
|
|
|
|
|
95
|
my $info = charinfo ord $d; |
112
|
|
|
|
|
|
|
|
113
|
36
|
100
|
100
|
|
|
11616
|
croak "string '$string' contains digits from different ranges" |
114
|
|
|
|
|
|
|
if $mode eq 'strict' and $zero != _find_zero $d; |
115
|
|
|
|
|
|
|
|
116
|
35
|
100
|
100
|
|
|
1632
|
carp "string '$string' contains digits from different ranges" |
117
|
|
|
|
|
|
|
if $mode =~ /^looser?$/ and $zero != _find_zero $d; |
118
|
|
|
|
|
|
|
|
119
|
35
|
50
|
|
|
|
4702
|
die sprintf "U+%x claims to be a digit, but doesn't have a digit number", ord $d |
120
|
|
|
|
|
|
|
unless $info->{digit} =~ /[0-9]/; |
121
|
|
|
|
|
|
|
|
122
|
35
|
|
|
|
|
163
|
$num .= $info->{digit}; |
123
|
|
|
|
|
|
|
} |
124
|
17
|
|
|
|
|
91
|
return $num; |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=head1 AUTHOR |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
Chas. J. Owens IV, C<< >> |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=head1 DIAGNOSTICS |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=over |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=item "wrong number of arguments" |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
C takes one or two arguments, |
138
|
|
|
|
|
|
|
if you have more than two or no arguments you will recieve this error. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=item "ERRORHANDLING must be strict, loose, looser, or loosest not '%s'" |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
If you pass a second argument that is not strict, loose, looser, |
143
|
|
|
|
|
|
|
or loosest to C, you will |
144
|
|
|
|
|
|
|
recieve this error. |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=item "string '%s' contains non-digit characters" |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
You will recieve this message as a warning or error (depending on what |
149
|
|
|
|
|
|
|
mode you chose), if the string has characters that do not have the |
150
|
|
|
|
|
|
|
UNICODE digit property. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=item "string '$s' contains digits from different ranges" |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
You will recieve this message as a warning or error (depending on what |
155
|
|
|
|
|
|
|
mode you chose), if the string has characters that are not part of the |
156
|
|
|
|
|
|
|
same range of digit characters. |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=item "U+%x claims to be a digit, but doesn't have a digit number" |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
This error is unlikely to occur, if it does then the bug is either with |
161
|
|
|
|
|
|
|
my code (the likely scenario) or C (not very likely). |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=back |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=head1 BUGS |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
My understanding of UNICODE is flawed, therefore, I have undoubtly done |
168
|
|
|
|
|
|
|
something wrong. For instance, what should be done with "5\x{0308}"? |
169
|
|
|
|
|
|
|
Also, there is a bunch of stuff relating to surrogates I don't understand. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=head1 SUPPORT |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
perldoc Unicode::Digits |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
Copyright 2009 Chas. J. Owens IV, all rights reserved. |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
182
|
|
|
|
|
|
|
under the same terms as Perl itself. |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=cut |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
"this is not an interesting return value"; |