| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  |  | 
| 2 |  |  |  |  |  |  | package Lingua::EN::Numbers; | 
| 3 |  |  |  |  |  |  |  | 
| 4 |  |  |  |  |  |  | require Exporter; | 
| 5 |  |  |  |  |  |  | @ISA = qw(Exporter); | 
| 6 |  |  |  |  |  |  |  | 
| 7 | 4 |  |  | 4 |  | 34274 | use 5.006; | 
|  | 4 |  |  |  |  | 14 |  | 
|  | 4 |  |  |  |  | 157 |  | 
| 8 | 4 |  |  | 4 |  | 22 | use strict; | 
|  | 4 |  |  |  |  | 41 |  | 
|  | 4 |  |  |  |  | 155 |  | 
| 9 | 4 |  |  | 4 |  | 19 | use warnings; | 
|  | 4 |  |  |  |  | 10 |  | 
|  | 4 |  |  |  |  | 288 |  | 
| 10 | 4 | 50 |  | 4 |  | 164 | BEGIN { *DEBUG = sub () {0} unless defined &DEBUG } # setup a DEBUG constant | 
| 11 | 4 |  |  |  |  | 6828 | use vars qw( | 
| 12 |  |  |  |  |  |  | @EXPORT @EXPORT_OK $VERSION | 
| 13 |  |  |  |  |  |  | %D %Card2ord %Mult | 
| 14 | 4 |  |  | 4 |  | 26 | ); | 
|  | 4 |  |  |  |  | 6 |  | 
| 15 |  |  |  |  |  |  | $VERSION = '2.01'; | 
| 16 |  |  |  |  |  |  | @EXPORT    = (); | 
| 17 |  |  |  |  |  |  | @EXPORT_OK = qw( num2en num2en_ordinal ); | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | @D{0 .. 20, 30,40,50,60,70,80,90} = qw| | 
| 20 |  |  |  |  |  |  | zero | 
| 21 |  |  |  |  |  |  | one two three four five six seven eight nine ten | 
| 22 |  |  |  |  |  |  | eleven twelve thirteen fourteen fifteen | 
| 23 |  |  |  |  |  |  | sixteen seventeen eighteen nineteen | 
| 24 |  |  |  |  |  |  | twenty thirty forty fifty sixty seventy eighty ninety | 
| 25 |  |  |  |  |  |  | |; | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | @Card2ord{  qw| one   two    three five  eight  nine  twelve  |} | 
| 29 |  |  |  |  |  |  | =          qw| first second third fifth eighth ninth twelfth |; | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | { | 
| 33 |  |  |  |  |  |  | my $c = 0; | 
| 34 |  |  |  |  |  |  | for ( '', qw< | 
| 35 |  |  |  |  |  |  | thousand million billion trillion quadrillion quintillion sextillion | 
| 36 |  |  |  |  |  |  | septillion octillion nonillion | 
| 37 |  |  |  |  |  |  | > ) { | 
| 38 |  |  |  |  |  |  | $Mult{$c} = $_; | 
| 39 |  |  |  |  |  |  | $c++; | 
| 40 |  |  |  |  |  |  | } | 
| 41 |  |  |  |  |  |  | } | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | #========================================================================== | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | sub num2en_ordinal { | 
| 46 |  |  |  |  |  |  | #  Cardinals are [one two three...] | 
| 47 |  |  |  |  |  |  | #  Ordinals  are [first second third...] | 
| 48 |  |  |  |  |  |  |  | 
| 49 | 4 | 50 | 33 | 4 | 0 | 28 | return undef unless defined $_[0] and length $_[0]; | 
| 50 | 4 |  |  |  |  | 7 | my($x) = $_[0]; | 
| 51 |  |  |  |  |  |  |  | 
| 52 | 4 |  |  |  |  | 7 | $x = num2en($x); | 
| 53 | 4 | 50 |  |  |  | 10 | return $x unless $x; | 
| 54 | 4 | 50 |  |  |  | 26 | $x =~ s/(\w+)$//s   or return $x . "th"; | 
| 55 | 4 |  |  |  |  | 9 | my $last = $1; | 
| 56 |  |  |  |  |  |  |  | 
| 57 | 4 |  | 33 |  |  | 26 | $last = | 
| 58 |  |  |  |  |  |  | $Card2ord{$last} || ( $last =~ s/y$/ieth/ && $last ) || ( $last !~ /th$/ ? $last . "th" : $last ); | 
| 59 |  |  |  |  |  |  |  | 
| 60 | 4 |  |  |  |  | 16 | return "$x$last"; | 
| 61 |  |  |  |  |  |  | } | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | #========================================================================== | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | sub num2en { | 
| 66 | 80 |  |  | 80 | 0 | 1517 | my $x = $_[0]; | 
| 67 | 80 | 100 | 100 |  |  | 401 | return undef unless defined $x and length $x; | 
| 68 |  |  |  |  |  |  |  | 
| 69 | 78 | 50 |  |  |  | 160 | return 'not-a-number'      if $x eq 'NaN'; | 
| 70 | 78 | 50 |  |  |  | 164 | return 'positive infinity' if $x =~ m/^\+inf(?:inity)?$/si; | 
| 71 | 78 | 50 |  |  |  | 135 | return 'negative infinity' if $x =~ m/^\-inf(?:inity)?$/si; | 
| 72 | 78 | 50 |  |  |  | 150 | return          'infinity' if $x =~  m/^inf(?:inity)?$/si; | 
| 73 |  |  |  |  |  |  |  | 
| 74 | 78 | 100 |  |  |  | 311 | return $D{$x} if exists $D{$x};  # the most common cases | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | # Make sure it's not in scientific notation: | 
| 77 | 48 | 100 |  |  |  | 53 | {  my $e = _e2en($x);  return $e if defined $e; } | 
|  | 48 |  |  |  |  | 87 |  | 
|  | 48 |  |  |  |  | 149 |  | 
| 78 |  |  |  |  |  |  |  | 
| 79 | 45 |  |  |  |  | 43 | my $orig = $x; | 
| 80 |  |  |  |  |  |  |  | 
| 81 | 45 |  |  |  |  | 74 | $x =~ s/,//g; # nix any commas | 
| 82 |  |  |  |  |  |  |  | 
| 83 | 45 |  |  |  |  | 38 | my $sign; | 
| 84 | 45 | 100 |  |  |  | 142 | $sign = $1 if $x =~ s/^([-+])//s; | 
| 85 |  |  |  |  |  |  |  | 
| 86 | 45 |  |  |  |  | 46 | my($int, $fract); | 
| 87 | 45 | 100 |  |  |  | 191 | if(    $x =~ m<^\d+$>          ) { $int = $x } | 
|  | 31 | 100 |  |  |  | 40 |  | 
|  |  | 100 |  |  |  |  |  | 
| 88 | 7 |  |  |  |  | 17 | elsif( $x =~ m<^(\d+)\.(\d+)$> ) { $int = $1; $fract = $2 } | 
|  | 7 |  |  |  |  | 11 |  | 
| 89 | 3 |  |  |  |  | 7 | elsif( $x =~ m<^\.(\d+)$>      ) { $fract = $1 } | 
| 90 |  |  |  |  |  |  | else { | 
| 91 | 4 |  |  |  |  | 2 | DEBUG and print "Not a number: \"orig\"\n"; | 
| 92 | 4 |  |  |  |  | 23 | return undef; | 
| 93 |  |  |  |  |  |  | } | 
| 94 |  |  |  |  |  |  |  | 
| 95 | 41 |  |  |  |  | 35 | DEBUG and printf " Working on Sign[%s]  Int2en[%s]  Fract[%s]  < \"%s\"\n", | 
| 96 |  |  |  |  |  |  | map defined($_) ? $_ : "nil", $sign, $int, $fract, $orig; | 
| 97 |  |  |  |  |  |  |  | 
| 98 | 41 |  | 66 |  |  | 78 | return join ' ', grep defined($_) && length($_), | 
| 99 |  |  |  |  |  |  | _sign2en($sign), | 
| 100 |  |  |  |  |  |  | _int2en($int), | 
| 101 |  |  |  |  |  |  | _fract2en($fract), | 
| 102 |  |  |  |  |  |  | ; | 
| 103 |  |  |  |  |  |  | } | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | sub _sign2en { | 
| 108 | 41 | 100 | 66 | 41 |  | 159 | return undef unless defined $_[0] and length $_[0]; | 
| 109 | 11 | 100 |  |  |  | 35 | return 'negative' if $_[0] eq '-'; | 
| 110 | 4 | 50 |  |  |  | 16 | return 'positive' if $_[0] eq '+'; | 
| 111 | 0 |  |  |  |  | 0 | return "WHAT_IS_$_[0]"; | 
| 112 |  |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | sub _fract2en {    # "1234" => "point one two three four" | 
| 115 | 41 | 100 | 66 | 41 |  | 451 | return undef unless defined $_[0] and length $_[0]; | 
| 116 | 10 |  |  |  |  | 12 | my $x = $_[0]; | 
| 117 | 10 |  |  |  |  | 188 | return join ' ', 'point', map $D{$_}, split '', $x; | 
| 118 |  |  |  |  |  |  | } | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | 
| 121 |  |  |  |  |  |  | # The real work: | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | sub _int2en { | 
| 124 | 70 | 50 | 66 | 70 |  | 514 | return undef unless defined $_[0] and length $_[0] | 
|  |  |  | 66 |  |  |  |  | 
| 125 |  |  |  |  |  |  | and $_[0] =~ m/^\d+$/s; | 
| 126 |  |  |  |  |  |  |  | 
| 127 | 67 |  |  |  |  | 82 | my($x) = $_[0]; | 
| 128 |  |  |  |  |  |  |  | 
| 129 | 67 | 100 |  |  |  | 190 | return $D{$x} if defined $D{$x};  # most common/irreg cases | 
| 130 |  |  |  |  |  |  |  | 
| 131 | 41 | 100 |  |  |  | 168 | if( $x =~ m/^(.)(.)$/ ) { | 
|  |  | 100 |  |  |  |  |  | 
| 132 | 21 |  |  |  |  | 147 | return  $D{$1 . '0'} . '-' . $D{$2}; | 
| 133 |  |  |  |  |  |  | # like    forty        -     two | 
| 134 |  |  |  |  |  |  | # note that neither bit can be zero at this point | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | } elsif( $x =~ m/^(.)(..)$/ ) { | 
| 137 | 11 |  |  |  |  | 42 | my($h, $rest) = ("$D{$1} hundred", $2); | 
| 138 | 11 | 100 |  |  |  | 29 | return $h if $rest eq '00'; | 
| 139 | 10 |  |  |  |  | 44 | return "$h and " . _int2en(0 + $rest); | 
| 140 |  |  |  |  |  |  | } else { | 
| 141 | 9 |  |  |  |  | 19 | return _bigint2en($x); | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  | } | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | sub _bigint2en { | 
| 148 | 9 | 50 | 33 | 9 |  | 88 | return undef unless defined $_[0] and length $_[0] | 
|  |  |  | 33 |  |  |  |  | 
| 149 |  |  |  |  |  |  | and $_[0] =~ m/^\d+$/s; | 
| 150 |  |  |  |  |  |  |  | 
| 151 | 9 |  |  |  |  | 13 | my($x) = $_[0]; | 
| 152 |  |  |  |  |  |  |  | 
| 153 | 9 |  |  |  |  | 434 | my @chunks;  # each:  [ string, exponent ] | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | { | 
| 156 | 9 |  |  |  |  | 50 | my $groupnum = 0; | 
|  | 9 |  |  |  |  | 11 |  | 
| 157 | 9 |  |  |  |  | 7 | my $num; | 
| 158 | 9 |  |  |  |  | 43 | while( $x =~ s<(\d{1,3})$><>s ) { # pull at most three digits from the end | 
| 159 | 23 |  |  |  |  | 45 | $num = $1 + 0; | 
| 160 | 23 | 100 |  |  |  | 53 | unshift @chunks, [ $num, $groupnum ] if $num; | 
| 161 | 23 |  |  |  |  | 59 | ++$groupnum; | 
| 162 |  |  |  |  |  |  | } | 
| 163 | 9 | 50 |  |  |  | 22 | return $D{'0'} unless @chunks;  # rare but possible | 
| 164 |  |  |  |  |  |  | } | 
| 165 |  |  |  |  |  |  |  | 
| 166 | 9 |  |  |  |  | 9 | my $and; | 
| 167 |  |  |  |  |  |  |  | 
| 168 | 9 | 100 | 100 |  |  | 47 | $and = 'and' if  $chunks[-1][1] == 0  and  $chunks[-1][0] < 100; | 
| 169 |  |  |  |  |  |  | # The special 'and' that shows up in like "one thousand and eight" | 
| 170 |  |  |  |  |  |  | # and "two billion and fifteen", but not "one thousand [*and] five hundred" | 
| 171 |  |  |  |  |  |  | # or "one million, [*and] nine" | 
| 172 |  |  |  |  |  |  |  | 
| 173 | 9 |  |  |  |  | 22 | _chunks2en( \@chunks ); | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | # bugfix: neilb: deals with case where we have at least millions, thousands, | 
| 176 |  |  |  |  |  |  | # but 00N for the last chunk. | 
| 177 |  |  |  |  |  |  | # $chunks[-2] .= " and" if $and and @chunks > 1; | 
| 178 | 9 | 100 | 66 |  |  | 35 | if ($and and @chunks > 1) { | 
| 179 | 6 |  |  |  |  | 15 | $chunks[-2] .= " and $chunks[-1]"; | 
| 180 | 6 |  |  |  |  | 7 | pop(@chunks); | 
| 181 |  |  |  |  |  |  | } | 
| 182 |  |  |  |  |  |  |  | 
| 183 | 9 | 100 | 100 |  |  | 32 | return "$chunks[0] $chunks[1]" if @chunks == 2 and !$and; | 
| 184 |  |  |  |  |  |  | # Avoid having a comma if just two units | 
| 185 | 8 |  |  |  |  | 30 | return join ", ", @chunks; | 
| 186 |  |  |  |  |  |  | } | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  |  | 
| 189 |  |  |  |  |  |  | sub _chunks2en { | 
| 190 | 9 |  |  | 9 |  | 13 | my $chunks = $_[0]; | 
| 191 | 9 | 50 |  |  |  | 21 | return unless @$chunks; | 
| 192 | 9 |  |  |  |  | 8 | my @out; | 
| 193 | 9 |  |  |  |  | 14 | foreach my $c (@$chunks) { | 
| 194 | 19 | 50 |  |  |  | 51 | push @out,   $c = _groupify( _int2en( $c->[0] ),  $c->[1] )  if $c->[0]; | 
| 195 |  |  |  |  |  |  | } | 
| 196 | 9 |  |  |  |  | 20 | @$chunks = @out; | 
| 197 | 9 |  |  |  |  | 12 | return; | 
| 198 |  |  |  |  |  |  | } | 
| 199 |  |  |  |  |  |  |  | 
| 200 |  |  |  |  |  |  | sub _groupify { | 
| 201 |  |  |  |  |  |  | # turn ("seventeen", 3) => "seventeen billion" | 
| 202 | 19 |  |  | 19 |  | 20 | my($basic, $multnum) = @_; | 
| 203 | 19 | 100 |  |  |  | 44 | return  $basic unless $multnum;  # the first group is unitless | 
| 204 | 12 |  |  |  |  | 13 | DEBUG > 2 and print "  Groupifying $basic x $multnum mults\n"; | 
| 205 | 12 | 50 |  |  |  | 65 | return "$basic $Mult{$multnum}"  if  $Mult{$multnum}; | 
| 206 |  |  |  |  |  |  | # Otherwise it must be huuuuuge, so fake it with scientific notation | 
| 207 | 0 |  |  |  |  | 0 | return "$basic " . "times ten to the " . num2en_ordinal($multnum * 3); | 
| 208 |  |  |  |  |  |  | } | 
| 209 |  |  |  |  |  |  |  | 
| 210 |  |  |  |  |  |  | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | 
| 211 |  |  |  |  |  |  | # | 
| 212 |  |  |  |  |  |  | # Because I can never remember this: | 
| 213 |  |  |  |  |  |  | # | 
| 214 |  |  |  |  |  |  | #  3.1E8 | 
| 215 |  |  |  |  |  |  | #  ^^^   is called the "mantissa" | 
| 216 |  |  |  |  |  |  | #      ^ is called the "exponent" | 
| 217 |  |  |  |  |  |  | #         (the implicit "10" is the "base" a/k/a "radix") | 
| 218 |  |  |  |  |  |  |  | 
| 219 |  |  |  |  |  |  | sub _e2en { | 
| 220 | 48 |  |  | 48 |  | 58 | my $x = $_[0]; | 
| 221 |  |  |  |  |  |  |  | 
| 222 | 48 |  |  |  |  | 70 | my($m, $e); | 
| 223 | 48 | 100 |  |  |  | 216 | if( $x =~ | 
| 224 |  |  |  |  |  |  | m< | 
| 225 |  |  |  |  |  |  | ^( | 
| 226 |  |  |  |  |  |  | [-+]?  # leading sign | 
| 227 |  |  |  |  |  |  | (?: | 
| 228 |  |  |  |  |  |  | [\d,]+  |  [\d,]*\.\d+  # number | 
| 229 |  |  |  |  |  |  | ) | 
| 230 |  |  |  |  |  |  | ) | 
| 231 |  |  |  |  |  |  | [eE] | 
| 232 |  |  |  |  |  |  | (-?\d+)   # mantissa, has to be an integer | 
| 233 |  |  |  |  |  |  | $ | 
| 234 |  |  |  |  |  |  | >x | 
| 235 |  |  |  |  |  |  | ) { | 
| 236 | 3 |  |  |  |  | 9 | ($m, $e) = ($1, $2); | 
| 237 | 3 |  |  |  |  | 4 | DEBUG and print "  Scientific notation: [$x] => $m E $e\n"; | 
| 238 | 3 |  |  |  |  | 6 | $e += 0; | 
| 239 | 3 |  |  |  |  | 9 | return num2en($m) . ' times ten to the ' . num2en_ordinal($e); | 
| 240 |  |  |  |  |  |  | } else { | 
| 241 | 45 |  |  |  |  | 32 | DEBUG and print "  Okay, $x isn't in exponential notation\n"; | 
| 242 | 45 |  |  |  |  | 109 | return undef; | 
| 243 |  |  |  |  |  |  | } | 
| 244 |  |  |  |  |  |  | } | 
| 245 |  |  |  |  |  |  |  | 
| 246 |  |  |  |  |  |  | #========================================================================== | 
| 247 |  |  |  |  |  |  | 1; | 
| 248 |  |  |  |  |  |  |  | 
| 249 |  |  |  |  |  |  | __END__ |