File Coverage

blib/lib/Lingua/NOR/Word2Num.pm

Criterion	Covered	Total	%
statement	28	53	52.8
branch	1	18	5.5
condition	4	6	66.6
subroutine	9	10	90.0
pod	3	3	100.0
total	45	90	50.0

line	stmt	bran	cond	sub	pod	time	code
1							# For Emacs: -- mode:cperl; eval: (folding-mode 1); coding:utf-8; --
2
3							package Lingua::NOR::Word2Num;
4							# ABSTRACT: Word to number conversion in Norwegian
5
6	1			1		125898	use 5.16.0;
	1					4
7	1			1		4	use utf8;
	1					1
	1					12
8	1			1		23	use warnings;
	1					1
	1					54
9
10							# {{{ use block
11
12	1			1		518	use Export::Attrs;
	1					13639
	1					8
13	1			1		1503	use Parse::RecDescent;
	1					59983
	1					10
14
15							# }}}
16							# {{{ variable declarations
17							our $VERSION = '0.2603300';
18							my $parser = no_numerals();
19
20							# }}}
21
22							# {{{ w2n convert text to number
23
24							sub w2n :Export {
25	4		100	4	1	311336	my $input = shift // return;
26
27	3					56	$input =~ s/ og / /g; # Spoke only relevant keywords
28	3					13	$input =~ s/ million / millioner /g; # equal
29
30	3					11	$input =~ s/,//g;
31	3					23	$input =~ s/ //g;
32
33	3	50				12	return 0 if $input eq 'null';
34
35	3		100			44	return $parser->numeral($input) \|\| undef;
36	1			1		275	}
	1					2
	1					9
37
38							# }}}
39							# {{{ no_numerals create parser for numerals
40
41							sub no_numerals {
42	1			1	1	7	return Parse::RecDescent->new (q{
43							numeral:
44							numeral: million { return $item[1]; } # root parse. go from maximum to minimum value
45							\| millenium { return $item[1]; }
46							\| century { return $item[1]; }
47							\| decade { return $item[1]; }
48							\| { return undef; }
49
50							number: 'null' { $return = 0; } # try to find a word from 0 to 19
51							\| 'nitten' { $return = 19; }
52							\| 'atten' { $return = 18; }
53							\| 'sytten' { $return = 17; }
54							\| 'seksten' { $return = 16; }
55							\| 'femten' { $return = 15; }
56							\| 'fjorten' { $return = 14; }
57							\| 'tretten' { $return = 13; }
58							\| 'tolv' { $return = 12; }
59							\| 'ellve' { $return = 11; }
60							\| 'ti' { $return = 10; }
61							\| 'ni' { $return = 9; }
62							\| 'åtte' { $return = 8; }
63							\| 'sju' { $return = 7; }
64							\| 'seks' { $return = 6; }
65							\| 'fem' { $return = 5; }
66							\| 'fire' { $return = 4; }
67							\| 'tre' { $return = 3; }
68							\| 'to' { $return = 2; }
69							\| 'en' { $return = 1; }
70							\| 'ett' { $return = 1; }
71
72							tens: 'tjue' { $return = 20; } # try to find a word that represents
73							\| 'tretti' { $return = 30; } # values 20,30,..,90
74							\| 'førti' { $return = 40; }
75							\| 'femti' { $return = 50; }
76							\| 'seksti' { $return = 60; }
77							\| 'sytti' { $return = 70; }
78							\| 'åtti' { $return = 80; }
79							\| 'nitti' { $return = 90; }
80
81							decade: tens(?) number(?) # try to find words that represents values
82							{ $return = 0; # from 0 to 99
83							for (@item) {
84							$return += $$_[0] if (ref $_ && defined $$_[0]);
85							}
86							}
87
88							century: number(?) 'hundre' decade(?) # try to find words that represents values
89							{ $return = 0; # from 100 to 999
90							for (@item) {
91							if (ref $_ && defined $$_[0]) {
92							$return += $$_[0];
93							} elsif ($_ eq "hundre") {
94							$return = ($return>0) ? $return * 100 : 100;
95							}
96							}
97							}
98
99							millenium: century(?) decade(?) 'tusen' century(?) decade(?) # try to find words that represents values
100							{ $return = 0; # from 1.000 to 999.999
101							for (@item) {
102							if (ref $_ && defined $$_[0]) {
103							$return += $$_[0];
104							} elsif ($_ eq "tusen") {
105							$return = ($return>0) ? $return * 1000 : 1000;
106							}
107							}
108							}
109
110							million: millenium(?) century(?) decade(?) # try to find words that represents values
111							'millioner' # from 1.000.000 to 999.999.999
112							millenium(?) century(?) decade(?)
113							{ $return = 0;
114							for (@item) {
115							if (ref $_ && defined $$_[0]) {
116							$return += $$_[0];
117							} elsif ($_ eq "millioner") {
118							$return = ($return>0) ? $return * 1000000 : 1000000;
119							}
120							}
121							}
122							});
123							}
124
125							# }}}
126							# {{{ ordinal2cardinal convert ordinal text to cardinal text
127
128							sub ordinal2cardinal :Export {
129	0		0	0	1		my $input = shift // return;
130
131							# Norwegian ordinal→cardinal: reverse lookup for irregular forms,
132							# suffix stripping for regular/compound forms.
133
134							# Fully irregular 1-12
135	0						my %irregular = (
136							'første' => 'en',
137							'andre' => 'to',
138							'tredje' => 'tre',
139							'fjerde' => 'fire',
140							'femte' => 'fem',
141							'sjette' => 'seks',
142							'sjuende' => 'sju',
143							'åttende' => 'åtte',
144							'niende' => 'ni',
145							'tiende' => 'ti',
146							'ellevte' => 'ellve',
147							'tolvte' => 'tolv',
148							);
149
150							# Teens 13-19
151	0						my %teens = (
152							'trettende' => 'tretten',
153							'fjortende' => 'fjorten',
154							'femtende' => 'femten',
155							'sekstende' => 'seksten',
156							'syttende' => 'sytten',
157							'attende' => 'atten',
158							'nittende' => 'nitten',
159							);
160
161							# Tens ordinals
162	0						my %tens = (
163							'tjuende' => 'tjue',
164							'trettiende' => 'tretti',
165							'førtiende' => 'førti',
166							'femtiende' => 'femti',
167							'sekstiende' => 'seksti',
168							'syttiende' => 'sytti',
169							'åttiende' => 'åtti',
170							'nittiende' => 'nitti',
171							);
172
173							# Exact match: standalone ordinals
174	0	0					return $irregular{$input} if exists $irregular{$input};
175	0	0					return $teens{$input} if exists $teens{$input};
176	0	0					return $tens{$input} if exists $tens{$input};
177
178							# Round hundreds: "hundrede" → "hundre"
179	0	0					$input =~ s{hundrede\z}{hundre}xms and return $input;
180
181							# Thousands ordinal: "tusende" → "tusen" (e.g. "ett tusende" → "ett tusen")
182	0	0					$input =~ s{tusende\z}{tusen}xms and return $input;
183
184							# Compounds 21-99: tens cardinal prefix + ordinal unit tail
185							# e.g. "tjueførste" → "tjue" + "første" → "tjue" + "en" = "tjueen"
186	0						for my $ord (sort { length $b <=> length $a } keys %irregular) {
	0
187	0	0					if ($input =~ m{\A(.+)\Q$ord\E\z}xms) {
188	0						my $prefix = $1;
189	0						return $prefix . $irregular{$ord};
190							}
191							}
192
193							# Compound with teen tail (for hundreds + teen ordinal)
194	0						for my $ord (sort { length $b <=> length $a } keys %teens) {
	0
195	0	0					if ($input =~ m{\A(.+)\Q$ord\E\z}xms) {
196	0						my $prefix = $1;
197	0						return $prefix . $teens{$ord};
198							}
199							}
200
201							# Compound with tens tail (for hundreds + tens ordinal)
202	0						for my $ord (sort { length $b <=> length $a } keys %tens) {
	0
203	0	0					if ($input =~ m{\A(.+)\Q$ord\E\z}xms) {
204	0						my $prefix = $1;
205	0						return $prefix . $tens{$ord};
206							}
207							}
208
209	0						return; # not an ordinal
210	1			1		1047	}
	1					3
	1					7
211
212							# }}}
213
214							1;
215
216							__END__