File Coverage

blib/lib/Lingua/RO/Numbers.pm

Criterion	Covered	Total	%
statement	141	153	92.1
branch	93	112	83.0
condition	22	30	73.3
subroutine	12	12	100.0
pod	3	3	100.0
total	271	310	87.4

line	stmt	bran	cond	sub	pod	time	code
1							package Lingua::RO::Numbers;
2
3							#
4							## See: http://ro.wikipedia.org/wiki/Sistem_zecimal#Denumiri_ale_numerelor
5							#
6
7	4			4		622117	use utf8;
	4					614
	4					23
8	4			4		170	use strict;
	4					7
	4					81
9	4			4		17	use warnings;
	4					7
	4					11402
10
11							require Exporter;
12							our @ISA = qw(Exporter);
13							our @EXPORT_OK = qw(number_to_ro ro_to_number);
14
15							=encoding utf8
16
17							=head1 NAME
18
19							Lingua::RO::Numbers - Convert numeric values into their Romanian string equivalents and viceversa
20
21							=head1 VERSION
22
23							Version 0.23
24
25							=cut
26
27							our $VERSION = '0.23';
28
29							#===========================================================================
30							# Data tables
31							#===========================================================================
32
33							# Numbers => text
34							our %DIGITS;
35							@DIGITS{'0' .. '19'} = qw(
36							zero unu doi trei patru cinci șase șapte opt nouă zece
37							unsprezece
38							doisprezece
39							treisprezece
40							paisprezece
41							cincisprezece
42							șaisprezece
43							șaptesprezece
44							optsprezece
45							nouăsprezece
46							);
47
48							# Text => numbers
49							our %WORDS;
50							@WORDS{ map { _remove_diacritics($_) } values %DIGITS } = keys %DIGITS;
51							@WORDS{qw(o un doua cin sai ob)} = (1, 1, 2, 5, 6, 8);
52
53							# Colocvial
54							@WORDS{qw(unspe doispe treispe paispe cinspe cinsprezece saispe saptespe saptuspe optspe optuspe nouaspe)} =
55							(11, 12, 13, 14, 15, 15, 16, 17, 17, 18, 18, 19);
56
57							# This array contains numbers greater than 99 and it's used to convert numbers into text
58							our @BIGNUMS = (
59							{ num => 10**2, sg => 'suta', pl => 'sute', fem => 1 },
60							{ num => 10**3, sg => 'mie', pl => 'mii', fem => 1 },
61							{ num => 10**6, sg => 'milion', pl => 'milioane' },
62							{ num => 10**9, sg => 'miliard', pl => 'miliarde' },
63							{ num => 10**12, sg => 'bilion', pl => 'bilioane' },
64							{ num => 10**15, sg => 'biliard', pl => 'biliarde' },
65							{ num => 10**18, sg => 'trilion', pl => 'trilioane' },
66							{ num => 10**21, sg => 'triliard', pl => 'triliarde' },
67							{ num => 10**24, sg => 'cvadrilion', pl => 'cvadrilioane' },
68							{ num => 10**27, sg => 'cvadriliard', pl => 'cvadriliarde' },
69							{ num => 'inf', sg => 'infinit', pl => 'infinit' },
70							);
71
72							# This hash is a reversed version of the above array and it's used to convert text into numbers
73							our %BIGWORDS = (map { $_->{sg} => $_->{num}, $_->{pl} => $_->{num} } @BIGNUMS);
74
75							# Change 'suta' to 'sută'
76							$BIGNUMS[0]{sg} = 'sută';
77
78							#===========================================================================
79							# POD
80							#===========================================================================
81
82							=head1 SYNOPSIS
83
84							use Lingua::RO::Numbers qw(number_to_ro ro_to_number);
85							print number_to_ro(315);
86							# prints: 'trei sute cincisprezece'
87
88							print ro_to_number('trei sute douazeci si cinci virgula doi');
89							# prints: 325.2
90
91							=head1 DESCRIPTION
92
93							Lingua::RO::Numbers converts arbitrary numbers into human-readable
94							Romanian text and viceversa, converting arbitrary Romanian text
95							into its corresponding numerical value.
96
97							=head2 EXPORT
98
99							Nothing is exported by default.
100							Only the functions B and B are exportable.
101
102							=over
103
104							=item B
105
106							Initialize an object.
107
108							my $obj = Lingua::RO::Numbers->new();
109
110							is equivalent with:
111
112							my $obj = Lingua::RO::Numbers->new(
113							diacritics => 1,
114							invalid_number => undef,
115							negative_sign => 'minus',
116							decimal_point => 'virgulă',
117							thousands_separator => '',
118							infinity => 'infinit',
119							not_a_number => 'NaN',
120							);
121
122							=item B
123
124							Converts a number to its Romanian string representation.
125
126							# Functional oriented usage
127							$string = number_to_ro($number);
128							$string = number_to_ro($number, %opts);
129
130							# Object oriented usage
131							my $obj = Lingua::RO::Numbers->new(%opts);
132							$string = $obj->number_to_ro($number);
133
134							# Example:
135							print number_to_ro(98_765, thousands_separator => q{,});
136							#=> 'nouăzeci și opt de mii, șapte sute șaizeci și cinci'
137
138							=item B
139
140							Converts a Romanian text into its numeric value.
141
142							# Functional oriented usage
143							$number = ro_to_number($text);
144							$number = ro_to_number($text, %opts);
145
146							# Object oriented usage
147							my $obj = Lingua::RO::Numbers->new(%opts);
148							$number = $obj->ro_to_number($text);
149
150							# Example:
151							print ro_to_number('patruzeci si doi'); #=> 42
152
153							=back
154
155							=cut
156
157							#===========================================================================
158							# Public methods
159							#===========================================================================
160
161							sub new {
162	4			4	1	1642	my ($class, %opts) = @_;
163
164	4					23	my $self = bless {
165							diacritics => 1,
166							invalid_number => undef,
167							negative_sign => 'minus',
168							decimal_point => 'virgulă',
169							thousands_separator => '',
170							infinity => 'infinit',
171							not_a_number => 'NaN',
172							}, $class;
173
174	4					6	foreach my $key (keys %{$self}) {
	4					19
175	28	100				55	if (exists $opts{$key}) {
176	4					12	$self->{$key} = delete $opts{$key};
177							}
178							}
179
180	4					10	foreach my $invalid_key (keys %opts) {
181	0					0	warn "Invalid option: <$invalid_key>";
182							}
183
184	4					13	return $self;
185							}
186
187							# This function is an interface to a private function
188							# which converts a mathematical number into its Romanian equivalent text.
189							sub number_to_ro {
190	34			34	1	9403	my ($self, $number) = _get_self_and_arg(@_);
191
192	34					92	my $word_number = $self->_number_to_ro($number + 0);
193
194	34	50				75	if (not $self->{diacritics}) {
195	0					0	$word_number = _remove_diacritics($word_number);
196							}
197
198							# Return the text-number
199	34					113	return $word_number;
200							}
201
202							# This function is an interface to a private function
203							# which converts a Romanian text-number into its mathematical value.
204							sub ro_to_number {
205	33			33	1	1106	my ($self, $text) = _get_self_and_arg(@_);
206
207							# Decode the text unless it is already UTF-8
208	33	100				132	utf8::is_utf8($text) \|\| do {
209	3					775	require Encode;
210	3					25254	$text = Encode::decode_utf8($text);
211							};
212
213							# Return the number
214	33					121	return $self->_ro_to_number($text);
215							}
216
217							#===========================================================================
218							# Private helpers
219							#===========================================================================
220
221							# Helper to support both object-oriented and functional calling styles.
222							# Returns ($self, $arg) in both cases.
223							sub _get_self_and_arg {
224	67	100		67		222	if (ref $_[0] eq __PACKAGE__) {
225	65					187	return ($_[0], $_[1]);
226							}
227	2					6	my ($arg, %opts) = @_;
228	2					8	return (__PACKAGE__->new(%opts), $arg);
229							}
230
231							# This function removes the Romanian diacritics from a given text.
232							sub _remove_diacritics {
233	181			181		347	my ($text) = @_;
234	181					413	$text =~ tr{ăâșțî}{aasti};
235	181					534	return $text;
236							}
237
238							# This function removes irrelevant characters from a text.
239							sub _normalize_text {
240
241							# Lowercase and remove the diacritics
242	101			101		368	my $text = _remove_diacritics(lc(shift));
243
244							# Replace irrelevant characters with a space
245	101					356	$text =~ tr/a-z / /c;
246
247	101					455	return $text;
248							}
249
250							# This function adds together a list of numbers.
251							sub _add_numbers {
252	38			38		102	my (@nums) = @_;
253
254	38					59	my $num = 0;
255	38					108	while (defined(my $i = shift @nums)) {
256
257							# When the current number is lower than the next number
258	83	100	100			271	if (@nums and $i < $nums[0]) {
259	21					38	my $n = shift @nums;
260
261							# This is a special case, where: int(log(1000)/log(10)) == 2
262	21					111	my $l = log($n) / log(10);
263	21	100				141	if (length($l) == length(int($l))) {
264	1					6	$l = sprintf('%.0f', $l);
265							}
266
267							# Factor (e.g.: 400 -> 4)
268	21					64	my $f = int($i / (10**int(log($i) / log(10))));
269
270							# When the next number is not really next to the current number
271							# e.g.: $i == 400 and $n == 5000 # should produce 405_000 not 45_000
272	21	50				63	if ((my $mod = length($n) % 3) != 0) {
273	21					44	$f = 10*(3 - $mod);
274							}
275
276							# Join the numbers and continue
277	21					43	$num += 10*int($l) $f + $n;
278	21					72	next;
279							}
280
281	62					174	$num += $i;
282							}
283
284	38					98	return $num;
285							}
286
287							# This function converts a Romanian text-number into a mathematical number.
288							sub _ro_to_number {
289	33			33		71	my ($self, $text) = @_;
290
291							# When no text has been provided
292	33	50				105	return if not defined $text;
293
294							# If a thousand separator is defined, remove it from text
295	33	50	33			185	if (defined($self->{thousands_separator}) and length($self->{thousands_separator})) {
296	0					0	$text =~ s/\Q$self->{thousands_separator}\E/ /g;
297							}
298
299							# Split the text into words
300	33					72	my @words = split(' ', _normalize_text($text));
301
302	33					86	my $dec_point = _normalize_text($self->{decimal_point});
303	33					79	my $neg_sign = _normalize_text($self->{negative_sign});
304
305	33					61	my @nums; # numbers
306							my @decs; # decimal numbers
307
308	33					51	my $neg = 0; # bool -- true when the number is negative
309	33					55	my $adec = 0; # bool -- true after the decimal point
310	33					55	my $amount = 0; # int -- current number
311	33					77	my $factor = 1; # int -- multiplication factor
312
313	33	50				79	if (@words) {
314
315							# Check for negative numbers
316	33	100				101	if ($words[0] eq $neg_sign) {
317	2					5	$neg = 1;
318	2					5	shift @words;
319							}
320
321							# Check for infinity and NaN
322	33	100				86	if (@words == 1) {
323
324							# Infinity
325	1					4	my $inf = _normalize_text($self->{infinity});
326	1	50				5	if ($words[0] eq $inf) {
327	0	0				0	return $neg ? -999 : 999;
328							}
329
330							# Not a number
331	1					3	my $nan = _normalize_text($self->{not_a_number});
332	1	50				4	if ($words[0] eq $nan) {
333	0					0	return -sin(999);
334							}
335							}
336							}
337
338							# Iterate over the words
339	33					83	WORD: while (@words) {
340
341							# It's a small number (lower than 100)
342	106	100				619	if (exists $WORDS{$words[0]}) {
		50
		0
343	72					134	$amount = shift @words;
344	72					126	$factor = 1;
345							}
346							elsif ($words[0] =~ s/zeci\z//) {
347	34					77	$amount = shift @words;
348	34					53	$factor = 10;
349							}
350							# It's a big number (e.g.: milion)
351							elsif (exists $BIGWORDS{$words[0]}) {
352	0					0	$factor = $BIGWORDS{shift @words};
353							}
354							# Ignore invalid words
355							else {
356	0					0	shift @words;
357	0					0	next WORD;
358							}
359
360							# Take and multiply the current number
361	106	50				377	my $num = exists($WORDS{$amount}) ? $WORDS{$amount} * $factor : next WORD;
362
363							# Check for some word-joining tokens
364	106	100				327	if (@words) {
365	98	100				226	if ($words[0] eq 'si') { # e.g.: patruzeci si doi
366	27					42	shift @words;
367	27					63	$num += $WORDS{shift @words};
368							}
369
370	98	100				205	if (@words) {
371							{
372	85	100				155	if ($words[0] eq 'de') { # e.g.: o suta de mii
	87					188
373	21					33	shift @words;
374							}
375
376	87	100				229	if (exists $BIGWORDS{$words[0]}) {
377	80					154	$num *= $BIGWORDS{shift @words};
378							}
379
380	87	100	100			426	if (@words && $words[0] eq 'de') {
381	2					6	redo;
382							}
383							}
384							}
385							}
386
387							# If we are after the decimal point, store the
388							# numbers in @decs, otherwise store them in @nums.
389	106	100				297	$adec ? push(@decs, $num) : push(@nums, $num);
390
391							# Check for the decimal point
392	106	100	100			409	if (@words and $words[0] eq $dec_point) {
393	5					11	$adec = 1;
394	5					14	shift @words;
395							}
396							}
397
398							# Return undef when no number has been converted
399	33	50				77	return if not @nums;
400
401							# Add all the numbers together (if any)
402	33					109	my $num = _add_numbers(@nums);
403
404							# If the number contains decimals,
405							# add them at the end of the number
406	33	100				71	if (@decs) {
407
408							# Special case -- check for leading zeros
409	5					11	my $zeros = '';
410	5		66			49	while (@decs and $decs[0] == 0) {
411	2					10	$zeros .= shift(@decs);
412							}
413
414	5					14	$num .= '.' . $zeros . _add_numbers(@decs);
415							}
416
417							# Return the number
418	33	100				369	return $neg ? -$num : $num + 0;
419							}
420
421							# This function converts numbers into their Romanian equivalent text.
422							sub _number_to_ro {
423	149			149		258	my ($self, $number) = @_;
424
425	149					200	my @words;
426
427	149	100	33			715	if (exists $DIGITS{$number}) { # example: 8
		50
		50
		100
		100
		100
		50
428	16					30	push @words, $DIGITS{$number};
429							}
430							elsif (lc($number) eq 'nan') { # not a number (NaN)
431	0					0	return $self->{not_a_number};
432							}
433							elsif ($number == 999) { # number is infinite
434	0					0	return $self->{infinity};
435							}
436							elsif ($number < 0) { # example: -43
437	2					4	push @words, $self->{negative_sign};
438	2					13	push @words, $self->_number_to_ro(abs($number));
439							}
440							elsif ($number != int($number)) { # example: 0.123 or 12.43
441	5					15	my $l = length($number) - 2;
442
443	5	50				17	if ((length($number) - length(int $number) - 1) < 1) { # special case
444	0					0	push @words, $self->_number_to_ro(sprintf('%.0f', $number));
445							}
446							else {
447	5					13	push @words, $self->_number_to_ro(int $number);
448	5					9	push @words, $self->{decimal_point};
449
450	5					9	$number -= int $number;
451
452	5					11	until ($number == int($number)) {
453	16					37	$number *= 10;
454	16					46	$number = sprintf('%.*f', --$l, $number); # because of imprecise multiplication
455	16	100				44	push @words, $DIGITS{0} if $number < 1;
456							}
457
458	5					10	push @words, $self->_number_to_ro(int $number);
459							}
460							}
461							elsif ($number >= $BIGNUMS[0]{num}) { # i.e.: >= 100
462	87					205	foreach my $j (reverse 1 .. $#BIGNUMS) {
463							next unless $number >= $BIGNUMS[$j - 1]{num}
464	797	100	66			1650	&& $number <= $BIGNUMS[$j]{num};
465
466	87					164	my $cat = int $number / $BIGNUMS[$j - 1]{num};
467	87					168	$number -= $BIGNUMS[$j - 1]{num} * int($number / $BIGNUMS[$j - 1]{num});
468
469	87	100				220	my @of = $cat <= 2 ? () : do {
470							my @w = exists $DIGITS{$cat}
471	59	100				183	? $DIGITS{$cat}
472							: ($self->_number_to_ro($cat), 'de');
473	59	100				133	if (@w > 2) {
474	30	100				62	$w[-2] = 'două' if $w[-2] eq $DIGITS{2};
475							}
476	59					137	@w;
477							};
478
479	87	100	66			195	if ($cat >= 100 && $cat < 1_000) {
480	26					48	my $rest = $cat - 100 * int($cat / 100);
481	26	100	66			117	if (@of and $rest != 0 and exists $DIGITS{$rest}) {
			100
482	5					9	splice @of, -1; # remove 'de'
483							}
484							}
485
486							push @words,
487							$cat == 1 ? ($BIGNUMS[$j - 1]{fem} ? 'o' : 'un', $BIGNUMS[$j - 1]{sg})
488							: $cat == 2 ? ('două', $BIGNUMS[$j - 1]{pl})
489	87	100				262	: (@of, $BIGNUMS[$j - 1]{pl});
		100
		100
490
491	87	100				176	if ($number > 0) {
492	73	100				176	$words[-1] .= $self->{thousands_separator} if $BIGNUMS[$j]{num} > 1_000;
493	73					170	push @words, $self->_number_to_ro($number);
494							}
495
496	87					202	last;
497							}
498							}
499							elsif ($number > 19 && $number < 100) { # example: 42
500	39					71	my $cat = int($number / 10);
501	39	100				106	my $tens = ($cat == 2 ? 'două' : $cat == 6 ? 'șai' : $DIGITS{$cat}) . 'zeci';
		100
502	39					73	push @words, $tens;
503	39	100				180	push @words, 'și', $DIGITS{$number % 10} if $number % 10 != 0;
504							}
505							else { # doesn't look like a number
506	0					0	return $self->{invalid_number};
507							}
508
509	149	50				582	return wantarray ? @words : @words ? join(' ', @words) : ();
		100
510							}
511
512							=head1 AUTHOR
513
514							Daniel Șuteu, C<< >>
515
516							=head1 SUPPORT
517
518							You can find documentation for this module with the perldoc command.
519
520							perldoc Lingua::RO::Numbers
521
522							=head1 REPOSITORY
523
524							L
525
526							=head1 REFERENCES
527
528							L
529
530							=head1 LICENSE AND COPYRIGHT
531
532							Copyright 2013-2018 Daniel Șuteu.
533
534							This program is free software; you can redistribute it and/or modify it
535							under the terms of the the Artistic License (2.0). You may obtain a
536							copy of the full license at:
537
538							L
539
540							=cut
541
542							1; # End of Lingua::RO::Numbers
543
544							__END__