| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Lingua::ID::Words2Nums; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 30646 | use 5.010; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 40 |  | 
| 4 | 1 |  |  | 1 |  | 6 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 37 |  | 
| 5 | 1 |  |  | 1 |  | 6 | use warnings; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 203 |  | 
| 6 |  |  |  |  |  |  | #use Log::Any qw($log); | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | our $VERSION = '0.15'; # VERSION | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | our %SPEC; | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | require Exporter; | 
| 13 |  |  |  |  |  |  | our @ISA       = qw(Exporter); | 
| 14 |  |  |  |  |  |  | our @EXPORT_OK = qw(words2nums words2nums_simple $Pat); | 
| 15 |  |  |  |  |  |  |  | 
| 16 | 1 |  |  | 1 |  | 888 | use Parse::Number::ID qw(parse_number_id); | 
|  | 1 |  |  |  |  | 983 |  | 
|  | 1 |  |  |  |  | 92 |  | 
| 17 | 1 |  |  | 1 |  | 7 | use Scalar::Util qw(looks_like_number); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 2338 |  | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | my %Digits = ( | 
| 20 |  |  |  |  |  |  | nol => 0, kosong => 0, ksg => 0, ksng => 0, | 
| 21 |  |  |  |  |  |  | se => 1, satu => 1, st => 1, | 
| 22 |  |  |  |  |  |  | dua => 2, | 
| 23 |  |  |  |  |  |  | tiga => 3, tg => 3, | 
| 24 |  |  |  |  |  |  | empat => 4, pat => 4, ampat => 4, mpat => 4, | 
| 25 |  |  |  |  |  |  | lima => 5, lm => 5, | 
| 26 |  |  |  |  |  |  | enam => 6, nam => 6, | 
| 27 |  |  |  |  |  |  | tujuh => 7, tjh => 7, | 
| 28 |  |  |  |  |  |  | delapan => 8, dlpn => 8, lapan => 8, | 
| 29 |  |  |  |  |  |  | sembilan => 9, smbln => 9, | 
| 30 |  |  |  |  |  |  | ); | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | my %Mults = ( | 
| 33 |  |  |  |  |  |  | puluh => 1e1, plh => 1e1, | 
| 34 |  |  |  |  |  |  | lusin => 12, | 
| 35 |  |  |  |  |  |  | kodi => 20, | 
| 36 |  |  |  |  |  |  | ratus => 1e2, rts => 1e2, | 
| 37 |  |  |  |  |  |  | gros => 144, gross => 144, | 
| 38 |  |  |  |  |  |  | ribu => 1e3, rb => 1e3, | 
| 39 |  |  |  |  |  |  | juta => 1e6, jt => 1e6, | 
| 40 |  |  |  |  |  |  | milyar => 1e9, milyard => 1e9, miliar => 1e9, miliard => 1e9, | 
| 41 |  |  |  |  |  |  | triliun => 1e12, trilyun => 1e12, | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | # -yun / kw- / etc variants? | 
| 44 |  |  |  |  |  |  | kuadriliun => 1e15, | 
| 45 |  |  |  |  |  |  | kuintiliun => 18, | 
| 46 |  |  |  |  |  |  | sekstiliun => 21, | 
| 47 |  |  |  |  |  |  | septiliun => 24, | 
| 48 |  |  |  |  |  |  | oktiliun => 27, | 
| 49 |  |  |  |  |  |  | noniliun => 30, | 
| 50 |  |  |  |  |  |  | desiliun => 33, | 
| 51 |  |  |  |  |  |  | undesiliun => 36, | 
| 52 |  |  |  |  |  |  | duodesiliun => 39, | 
| 53 |  |  |  |  |  |  | tredesiliun => 42, | 
| 54 |  |  |  |  |  |  | kuatuordesiliun => 45, | 
| 55 |  |  |  |  |  |  | kuindesiliun => 48, | 
| 56 |  |  |  |  |  |  | seksdesiliun => 51, | 
| 57 |  |  |  |  |  |  | septendesiliun => 54, | 
| 58 |  |  |  |  |  |  | oktodesiliun => 57, | 
| 59 |  |  |  |  |  |  | novemdesiliun => 60, | 
| 60 |  |  |  |  |  |  | vigintiliun => 63, | 
| 61 |  |  |  |  |  |  | googol => 100, gugol => 100, | 
| 62 |  |  |  |  |  |  | sentiliun => 303, | 
| 63 |  |  |  |  |  |  | ); | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | my %Teen_Words = ( | 
| 66 |  |  |  |  |  |  | belas => 0, | 
| 67 |  |  |  |  |  |  | bls => 0, | 
| 68 |  |  |  |  |  |  | ); | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | my %Words = ( | 
| 71 |  |  |  |  |  |  | %Digits, %Mults, %Teen_Words, | 
| 72 |  |  |  |  |  |  | ); | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | my %Se = ("se" => 0, "s" => 0); | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | # words that can be used with se- (or single digit), e.g. sebelas, tiga belas, | 
| 77 |  |  |  |  |  |  | # sepuluh, seratus, dua ratus, ... | 
| 78 |  |  |  |  |  |  | my %Se_Words = ( | 
| 79 |  |  |  |  |  |  | %Mults, %Teen_Words, | 
| 80 |  |  |  |  |  |  | ); | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | my $Pos_pat  = qr/(?:positif|plus|pos)/; | 
| 83 |  |  |  |  |  |  | my $Neg_pat  = qr/(?:negatif|ngtf|min|minus|mns)/; | 
| 84 |  |  |  |  |  |  | my $Exp_pat  = qr/(?:(?:di)?\s*(?:kali|kl)(?:kan)?\s+(?:sepuluh|splh) | 
| 85 |  |  |  |  |  |  | \s+(?:pangkat|pkt|pngkt))/x; | 
| 86 |  |  |  |  |  |  | my $Dec_pat  = qr/(?:koma|km|titik|ttk)/; | 
| 87 |  |  |  |  |  |  | my $Teen_pat = "(?:".join("|", sort keys %Teen_Words).")"; | 
| 88 |  |  |  |  |  |  | my $Mult_pat = "(?:" . join("|", sort keys %Se_Words).")"; | 
| 89 |  |  |  |  |  |  | my $Se_pat   = "(?:" . join("|", sort keys %Se).")"; | 
| 90 |  |  |  |  |  |  | my $Se_Mult_pat = "(?:(?:" . join("|", sort keys %Se).")". | 
| 91 |  |  |  |  |  |  | "(?:" . join("|", sort keys %Se_Words) . "))"; | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | # quick pattern for extracting words ; | 
| 94 |  |  |  |  |  |  | my $_w = "(?:" . join("|", $Se_Mult_pat, | 
| 95 |  |  |  |  |  |  | (grep {$_ ne 'se'} sort keys(%Words)), | 
| 96 |  |  |  |  |  |  | $Parse::Number::ID::Pat, | 
| 97 |  |  |  |  |  |  | ) . ")"; | 
| 98 |  |  |  |  |  |  | our $Pat = qr/(?:$_w(?:,?\s*$_w)*)/; | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | $SPEC{words2nums} = { | 
| 101 |  |  |  |  |  |  | v => 1.1, | 
| 102 |  |  |  |  |  |  | summary => 'Convert Indonesian verbage to number', | 
| 103 |  |  |  |  |  |  | description => <<'_', | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | Parse Indonesian verbage and return number, or undef if failed (unknown verbage | 
| 106 |  |  |  |  |  |  | or 'syntax error'). In English, this is equivalent to converting "one hundred | 
| 107 |  |  |  |  |  |  | and twenty three" to 123. Currently can handle real numbers in normal and | 
| 108 |  |  |  |  |  |  | scientific form in the order of hundreds of trillions. | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | Will produce unexpected result if you feed it stupid verbage. | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | _ | 
| 113 |  |  |  |  |  |  | args => { | 
| 114 |  |  |  |  |  |  | str => { | 
| 115 |  |  |  |  |  |  | schema => 'str*', | 
| 116 |  |  |  |  |  |  | summary => 'The verbage to convert', | 
| 117 |  |  |  |  |  |  | req => 1, | 
| 118 |  |  |  |  |  |  | pos => 0, | 
| 119 |  |  |  |  |  |  | }, | 
| 120 |  |  |  |  |  |  | }, | 
| 121 |  |  |  |  |  |  | args_as => 'array', | 
| 122 |  |  |  |  |  |  | result_naked => 1, | 
| 123 |  |  |  |  |  |  | }; | 
| 124 | 45 |  |  | 45 | 0 | 316 | sub words2nums($) { _handle_exp(@_) } | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | $SPEC{words2nums_simple} = { | 
| 127 |  |  |  |  |  |  | v => 1.1, | 
| 128 |  |  |  |  |  |  | summary => 'Like words2nums, but can only parse sequence of digits', | 
| 129 |  |  |  |  |  |  | description => <<'_', | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | In English, this is equivalent to converting "one two three" to 123. | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | Will produce unexpected result if you feed it stupid verbage. | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | _ | 
| 136 |  |  |  |  |  |  | args => { | 
| 137 |  |  |  |  |  |  | str => { | 
| 138 |  |  |  |  |  |  | schema => 'str*', | 
| 139 |  |  |  |  |  |  | summary => 'The verbage to convert', | 
| 140 |  |  |  |  |  |  | req => 1, | 
| 141 |  |  |  |  |  |  | pos => 0, | 
| 142 |  |  |  |  |  |  | }, | 
| 143 |  |  |  |  |  |  | }, | 
| 144 |  |  |  |  |  |  | args_as => 'array', | 
| 145 |  |  |  |  |  |  | result_naked => 1, | 
| 146 |  |  |  |  |  |  | }; | 
| 147 | 7 |  |  | 7 | 0 | 22 | sub words2nums_simple($) { _handle_simple(@_) } | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | sub _handle_exp($) { | 
| 150 | 45 |  |  | 45 |  | 97 | my $words = lc shift; | 
| 151 | 45 |  |  |  |  | 58 | my ($num1, $num2); | 
| 152 |  |  |  |  |  |  |  | 
| 153 | 45 | 100 |  |  |  | 552 | if( $words =~ /(.+)\s+$Exp_pat\s+(.+)/ ) { | 
| 154 |  |  |  |  |  |  | #$log->trace("it's an exponent"); | 
| 155 | 4 |  |  |  |  | 9 | $num1 = _handle_neg($1); | 
| 156 | 4 |  |  |  |  | 10 | $num2 = _handle_neg($2); | 
| 157 |  |  |  |  |  |  | #$log->trace("num1 is $num1, num2 is $num2"); | 
| 158 | 4 | 50 | 33 |  |  | 36 | !defined($num1) || !defined($num2) and return undef; | 
| 159 | 4 |  |  |  |  | 52 | return $num1 * 10 ** $num2; | 
| 160 |  |  |  |  |  |  | } else { | 
| 161 |  |  |  |  |  |  | #$log->trace("not an exponent"); | 
| 162 | 41 |  |  |  |  | 79 | $num1 = _handle_neg($words); | 
| 163 | 41 | 50 |  |  |  | 96 | not defined $num1 and return undef; | 
| 164 |  |  |  |  |  |  | #$log->trace("num1 = $num1"); | 
| 165 | 41 |  |  |  |  | 279 | return $num1; | 
| 166 |  |  |  |  |  |  | } | 
| 167 |  |  |  |  |  |  | } | 
| 168 |  |  |  |  |  |  |  | 
| 169 |  |  |  |  |  |  | sub _handle_neg($) { | 
| 170 | 49 |  |  | 49 |  | 100 | my $words = lc shift; | 
| 171 | 49 |  |  |  |  | 55 | my $num1; | 
| 172 |  |  |  |  |  |  |  | 
| 173 | 49 | 100 |  |  |  | 488 | if( $words =~ /^\s*$Neg_pat\s+(.+)/ ) { | 
|  |  | 100 |  |  |  |  |  | 
| 174 |  |  |  |  |  |  | #$log->trace("it's negative"); | 
| 175 | 8 |  |  |  |  | 48 | $num1 = -_handle_dec($1); | 
| 176 | 8 | 50 |  |  |  | 19 | not defined $num1 and return undef; | 
| 177 |  |  |  |  |  |  | #$log->trace("num1 = $num1"); | 
| 178 | 8 |  |  |  |  | 15 | return $num1; | 
| 179 |  |  |  |  |  |  | } elsif( $words =~ /^\s*$Pos_pat\s+(.+)/ ) { | 
| 180 |  |  |  |  |  |  | #$log->trace("it's positif"); | 
| 181 | 1 |  |  |  |  | 7 | $num1 = _handle_dec($1); | 
| 182 | 1 | 50 |  |  |  | 5 | not defined $num1 and return undef; | 
| 183 |  |  |  |  |  |  | #$log->trace("num1 = $num1"); | 
| 184 | 1 |  |  |  |  | 3 | return $num1; | 
| 185 |  |  |  |  |  |  | } else { | 
| 186 |  |  |  |  |  |  | #$log->trace("it's not negative"); | 
| 187 | 40 |  |  |  |  | 78 | $num1 = _handle_dec($words); | 
| 188 | 40 | 50 |  |  |  | 85 | not defined $num1 and return undef; | 
| 189 |  |  |  |  |  |  | #$log->trace("num1 = $num1"); | 
| 190 | 40 |  |  |  |  | 85 | return $num1; | 
| 191 |  |  |  |  |  |  | } | 
| 192 |  |  |  |  |  |  | } | 
| 193 |  |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | sub _handle_dec($) { | 
| 195 | 49 |  |  | 49 |  | 83 | my $words = lc shift; | 
| 196 | 49 |  |  |  |  | 52 | my ($num1, $num2); | 
| 197 |  |  |  |  |  |  |  | 
| 198 | 49 | 100 |  |  |  | 1228 | if( $words =~ /(.+)\s+$Dec_pat\s+(.+)/ ) { | 
| 199 |  |  |  |  |  |  | #$log->trace("it has decimals (\$1=$1, \$2=$2)"); | 
| 200 | 8 |  |  |  |  | 17 | $num1 = _handle_int($1); | 
| 201 | 8 |  |  |  |  | 18 | $num2 = _handle_simple($2); | 
| 202 |  |  |  |  |  |  | #$log->trace("num1 is $num1, num2 is $num2"); | 
| 203 | 8 | 50 | 33 |  |  | 33 | !defined($num1) || !defined($num2) and return undef; | 
| 204 | 8 |  |  |  |  | 36 | return $num1 + ("0.".$num2); | 
| 205 |  |  |  |  |  |  | } else { | 
| 206 |  |  |  |  |  |  | #$log->trace("it's an integer"); | 
| 207 | 41 |  |  |  |  | 83 | $num1 = _handle_int($words); | 
| 208 | 41 | 50 |  |  |  | 90 | not defined $num1 and return undef; | 
| 209 |  |  |  |  |  |  | #$log->trace("num1 is $num1"); | 
| 210 | 41 |  |  |  |  | 98 | return $num1; | 
| 211 |  |  |  |  |  |  | } | 
| 212 |  |  |  |  |  |  | } | 
| 213 |  |  |  |  |  |  |  | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | # handle words before decimal (e.g, 'seratus dua puluh tiga', ...) | 
| 216 |  |  |  |  |  |  | sub _handle_int($) { | 
| 217 | 49 |  |  | 49 |  | 139 | my @words = &_split_it( lc shift ); | 
| 218 | 49 |  |  |  |  | 75 | my ($num, $mult); | 
| 219 | 49 |  |  |  |  | 59 | my $seen_digits = 0; | 
| 220 | 49 |  |  |  |  | 54 | my ($w, $a, $subtot, $tot); | 
| 221 | 49 |  |  |  |  | 64 | my @nums = (); | 
| 222 |  |  |  |  |  |  |  | 
| 223 | 49 | 50 |  |  |  | 169 | $words[0] eq 'ERR' and return undef; | 
| 224 |  |  |  |  |  |  | #$log->trace("the words are @words"); | 
| 225 |  |  |  |  |  |  |  | 
| 226 | 49 |  |  |  |  | 87 | for $w (@words) { | 
| 227 | 135 | 100 |  |  |  | 649 | if( defined $Digits{$w} ) { # digits (satuan) | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 228 |  |  |  |  |  |  | #$log->trace("saw a digit: $w"); | 
| 229 | 0 |  |  |  |  | 0 | $seen_digits and do { push @nums, ((10 * (pop @nums)) + $Digits{$w}) } | 
| 230 | 65 | 50 | 33 |  |  | 164 | or do { push @nums, $Digits{$w}; $seen_digits = 1 } | 
|  | 65 |  |  |  |  | 121 |  | 
|  | 65 |  |  |  |  | 132 |  | 
| 231 |  |  |  |  |  |  | } | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  | elsif ( looks_like_number $w ) { # digits (satuan) as number | 
| 234 |  |  |  |  |  |  | #$log->trace("saw a number: $w"); | 
| 235 | 13 | 50 |  |  |  | 30 | return undef if $seen_digits; # 1  2 is considered an error | 
| 236 | 13 |  |  |  |  | 29 | push @nums, $w; | 
| 237 | 13 |  |  |  |  | 31 | $seen_digits = 1; | 
| 238 |  |  |  |  |  |  | } | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | elsif( $w =~ /^$Teen_pat$/ ) { # special case, teens (belasan) | 
| 241 |  |  |  |  |  |  | #$log->trace("saw a teen: $w"); | 
| 242 | 6 | 50 |  |  |  | 13 | return undef unless $seen_digits; # mistake in writing teens | 
| 243 | 6 |  |  |  |  | 10 | push @nums, 10 + pop @nums; | 
| 244 | 6 |  |  |  |  | 17 | $seen_digits = 0; | 
| 245 |  |  |  |  |  |  | } | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | else{ # must be a multiplier, or unknown | 
| 248 |  |  |  |  |  |  | #$log->trace( "saw a multiplier: $w"); | 
| 249 | 51 | 50 |  |  |  | 145 | return undef unless defined $Mults{$w}; # unknown word | 
| 250 | 51 | 50 |  |  |  | 116 | return undef unless @nums; # mistake in writing tens/multiplier | 
| 251 |  |  |  |  |  |  |  | 
| 252 | 51 |  |  |  |  | 83 | $a = 0; $subtot = 0; | 
|  | 51 |  |  |  |  | 62 |  | 
| 253 | 51 |  | 100 |  |  | 62 | do { $a = pop @nums; $subtot += $a } | 
|  | 77 |  |  |  |  | 158 |  | 
|  | 77 |  |  |  |  | 417 |  | 
| 254 |  |  |  |  |  |  | until ( $a > $Mults{$w} || !@nums ); | 
| 255 |  |  |  |  |  |  |  | 
| 256 | 51 | 100 |  |  |  | 124 | if( $a > $Mults{$w} ) { push @nums, $a; $subtot -= $a } | 
|  | 19 |  |  |  |  | 31 |  | 
|  | 19 |  |  |  |  | 28 |  | 
| 257 | 51 |  |  |  |  | 420 | push @nums, $Mults{$w}*$subtot; | 
| 258 | 51 |  |  |  |  | 122 | $seen_digits = 0; | 
| 259 |  |  |  |  |  |  | } | 
| 260 |  |  |  |  |  |  | } | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | # calculate total | 
| 263 | 49 |  |  |  |  | 75 | $tot = 0; | 
| 264 | 49 |  |  |  |  | 114 | while( @nums ){ $tot += shift @nums } | 
|  | 71 |  |  |  |  | 184 |  | 
| 265 | 49 |  |  |  |  | 143 | $tot; | 
| 266 |  |  |  |  |  |  | } | 
| 267 |  |  |  |  |  |  |  | 
| 268 |  |  |  |  |  |  |  | 
| 269 |  |  |  |  |  |  | # handle words after decimal (simple with no 'belas', 'puluh', 'ratus', ...) | 
| 270 |  |  |  |  |  |  | sub _handle_simple($) { | 
| 271 |  |  |  |  |  |  | #$log->tracef("-> _handle_simple(%s)", \@_); | 
| 272 | 15 |  |  | 15 |  | 57 | my @words = &_split_it( lc shift ); | 
| 273 |  |  |  |  |  |  | #$log->tracef("words = %s", \@words); | 
| 274 | 15 |  |  |  |  | 23 | my ($num, $w); | 
| 275 |  |  |  |  |  |  |  | 
| 276 | 15 | 50 |  |  |  | 43 | $words[0] eq 'ERR' and return undef; | 
| 277 |  |  |  |  |  |  |  | 
| 278 | 15 |  |  |  |  | 21 | $num = ""; | 
| 279 | 15 |  |  |  |  | 30 | for $w (@words) { | 
| 280 | 38 | 100 |  |  |  | 115 | if (looks_like_number $w) { | 
| 281 | 4 |  |  |  |  | 11 | $num .= $w; | 
| 282 |  |  |  |  |  |  | } else { | 
| 283 | 34 | 50 |  |  |  | 86 | not defined $Digits{$w} and return undef; | 
| 284 | 34 |  |  |  |  | 94 | $num .= $Digits{$w}; | 
| 285 |  |  |  |  |  |  | } | 
| 286 |  |  |  |  |  |  | } | 
| 287 |  |  |  |  |  |  |  | 
| 288 | 15 |  |  |  |  | 86 | $num; | 
| 289 |  |  |  |  |  |  | } | 
| 290 |  |  |  |  |  |  |  | 
| 291 |  |  |  |  |  |  |  | 
| 292 |  |  |  |  |  |  | # split string into array of words. also splits 'sepuluh' -> (se, puluh), | 
| 293 |  |  |  |  |  |  | # 'tigabelas' -> (tiga, belas), etc. | 
| 294 |  |  |  |  |  |  | sub _split_it($) { | 
| 295 | 64 |  |  | 64 |  | 117 | my $words = lc shift; | 
| 296 | 64 |  |  |  |  | 137 | my @words = (); | 
| 297 | 64 |  |  |  |  | 74 | my $w; | 
| 298 |  |  |  |  |  |  |  | 
| 299 | 64 |  |  |  |  | 236 | for $w (split /\s+/, $words) { | 
| 300 |  |  |  |  |  |  | ##$log->trace("saw $w"); | 
| 301 | 153 | 100 | 66 |  |  | 2361 | if ($w =~ /^([-+]?[0-9.,]+(?:[Ee][+-]?\d+)?)(\D?.*)$/) { | 
|  |  | 100 | 33 |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 302 | 17 |  |  |  |  | 61 | my ($n0, $w2) = ($1, $2); | 
| 303 |  |  |  |  |  |  | #print "n0=$n0, w2=$w2\n"; | 
| 304 | 17 |  |  |  |  | 68 | my $n = parse_number_id(text => $n0); | 
| 305 | 17 | 50 |  |  |  | 536 | unless (defined $n) { | 
| 306 | 0 |  |  |  |  | 0 | unshift @words, 'ERR'; | 
| 307 | 0 |  |  |  |  | 0 | last; | 
| 308 |  |  |  |  |  |  | } | 
| 309 | 17 |  |  |  |  | 32 | push @words, $n; | 
| 310 | 17 | 100 |  |  |  | 151 | push @words, $w2 if length($w2); | 
| 311 |  |  |  |  |  |  | } | 
| 312 |  |  |  |  |  |  | elsif( $w =~ /^($Se_pat)($Mult_pat)$/ and defined $Words{$1} ) { | 
| 313 |  |  |  |  |  |  | #$log->trace("i should split $w"); | 
| 314 | 18 |  |  |  |  | 126 | push @words, $1, $2 } | 
| 315 |  |  |  |  |  |  | elsif( $w =~ /^(.+)\s+($Mult_pat)$/ and defined $Words{$1} ) { | 
| 316 |  |  |  |  |  |  | #$log->trace("i should split $w"); | 
| 317 | 0 |  |  |  |  | 0 | push @words, $1, $2 } | 
| 318 |  |  |  |  |  |  | elsif( defined $Words{$w} ) { | 
| 319 | 118 |  |  |  |  | 1597 | push @words, $w } | 
| 320 |  |  |  |  |  |  | else { | 
| 321 |  |  |  |  |  |  | #$log->trace("i don't recognize $w"); | 
| 322 | 0 |  |  |  |  | 0 | unshift @words, 'ERR'; | 
| 323 | 0 |  |  |  |  | 0 | last; | 
| 324 |  |  |  |  |  |  | } | 
| 325 |  |  |  |  |  |  | } | 
| 326 |  |  |  |  |  |  |  | 
| 327 |  |  |  |  |  |  | #use Data::Dump; dd \@words; | 
| 328 | 64 |  |  |  |  | 293 | @words; | 
| 329 |  |  |  |  |  |  | } | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | 1; | 
| 332 |  |  |  |  |  |  | # ABSTRACT: Convert Indonesian verbage to number | 
| 333 |  |  |  |  |  |  |  | 
| 334 |  |  |  |  |  |  |  | 
| 335 |  |  |  |  |  |  | =pod | 
| 336 |  |  |  |  |  |  |  | 
| 337 |  |  |  |  |  |  | =head1 NAME | 
| 338 |  |  |  |  |  |  |  | 
| 339 |  |  |  |  |  |  | Lingua::ID::Words2Nums - Convert Indonesian verbage to number | 
| 340 |  |  |  |  |  |  |  | 
| 341 |  |  |  |  |  |  | =head1 VERSION | 
| 342 |  |  |  |  |  |  |  | 
| 343 |  |  |  |  |  |  | version 0.15 | 
| 344 |  |  |  |  |  |  |  | 
| 345 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 346 |  |  |  |  |  |  |  | 
| 347 |  |  |  |  |  |  | use Lingua::ID::Words2Nums qw(words2nums words2nums_simple); | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | print words2nums("seratus tiga koma dua");  # 103.2 | 
| 350 |  |  |  |  |  |  | print words2nums("minus 3 juta 100 ribu");  # 3100000 | 
| 351 |  |  |  |  |  |  | print words2nums("1,605 jt");               # 1605000 (abbreviations accepted) | 
| 352 |  |  |  |  |  |  | print words2nums("-1.3e4");                 # 13000 | 
| 353 |  |  |  |  |  |  |  | 
| 354 |  |  |  |  |  |  | print words2nums_simple("satu dua tiga");   # 123 | 
| 355 |  |  |  |  |  |  |  | 
| 356 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 357 |  |  |  |  |  |  |  | 
| 358 |  |  |  |  |  |  | This module provides two functions, B and B. They | 
| 359 |  |  |  |  |  |  | are the counterpart of L's B and | 
| 360 |  |  |  |  |  |  | B. | 
| 361 |  |  |  |  |  |  |  | 
| 362 |  |  |  |  |  |  | =head1 VARIABLES | 
| 363 |  |  |  |  |  |  |  | 
| 364 |  |  |  |  |  |  | None are exported by default, but they are exportable. | 
| 365 |  |  |  |  |  |  |  | 
| 366 |  |  |  |  |  |  | =head2 $Pat (REGEX) | 
| 367 |  |  |  |  |  |  |  | 
| 368 |  |  |  |  |  |  | A regex for quickly matching/extracting verbage from text; it looks for a string | 
| 369 |  |  |  |  |  |  | of words. It's not perfect (the extracted verbage might not be valid, e.g. "ribu | 
| 370 |  |  |  |  |  |  | tiga"), but it's simple and fast. | 
| 371 |  |  |  |  |  |  |  | 
| 372 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 373 |  |  |  |  |  |  |  | 
| 374 |  |  |  |  |  |  | L | 
| 375 |  |  |  |  |  |  |  | 
| 376 |  |  |  |  |  |  | L is used to parse numbers in the verbage. | 
| 377 |  |  |  |  |  |  |  | 
| 378 |  |  |  |  |  |  | =head1 AUTHOR | 
| 379 |  |  |  |  |  |  |  | 
| 380 |  |  |  |  |  |  | Steven Haryanto | 
| 381 |  |  |  |  |  |  |  | 
| 382 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 383 |  |  |  |  |  |  |  | 
| 384 |  |  |  |  |  |  | This software is copyright (c) 2012 by Steven Haryanto. | 
| 385 |  |  |  |  |  |  |  | 
| 386 |  |  |  |  |  |  | This is free software; you can redistribute it and/or modify it under | 
| 387 |  |  |  |  |  |  | the same terms as the Perl 5 programming language system itself. | 
| 388 |  |  |  |  |  |  |  | 
| 389 |  |  |  |  |  |  | =cut | 
| 390 |  |  |  |  |  |  |  | 
| 391 |  |  |  |  |  |  |  | 
| 392 |  |  |  |  |  |  | __END__ |