| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | ############################################################################# | 
| 2 |  |  |  |  |  |  | # Math/String/Charset.pm -- package which defines a charset for Math/String | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  | # Copyright (C) 1999-2008 by Tels. All rights reserved. | 
| 5 |  |  |  |  |  |  | ############################################################################# | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | package Math::String::Charset; | 
| 8 | 6 |  |  | 6 |  | 16913 | use base Exporter; | 
|  | 6 |  |  |  |  | 8 |  | 
|  | 6 |  |  |  |  | 656 |  | 
| 9 |  |  |  |  |  |  | @EXPORT_OK = qw/analyze/; | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | BEGIN | 
| 12 |  |  |  |  |  |  | { | 
| 13 | 6 |  |  | 6 |  | 127 | *analyze = \&study; | 
| 14 |  |  |  |  |  |  | } | 
| 15 |  |  |  |  |  |  |  | 
| 16 | 6 |  |  | 6 |  | 21 | use vars qw($VERSION); | 
|  | 6 |  |  |  |  | 12 |  | 
|  | 6 |  |  |  |  | 283 |  | 
| 17 |  |  |  |  |  |  | $VERSION = '1.29';	# Current version of this package | 
| 18 |  |  |  |  |  |  | require  5.008003;	# requires this Perl version or later | 
| 19 |  |  |  |  |  |  |  | 
| 20 | 6 |  |  | 6 |  | 19 | use strict; | 
|  | 6 |  |  |  |  | 11 |  | 
|  | 6 |  |  |  |  | 110 |  | 
| 21 | 6 |  |  | 6 |  | 4181 | use Math::BigInt; | 
|  | 6 |  |  |  |  | 79470 |  | 
|  | 6 |  |  |  |  | 29 |  | 
| 22 |  |  |  |  |  |  |  | 
| 23 | 6 |  |  | 6 |  | 72476 | use vars qw/$die_on_error $CALC/; | 
|  | 6 |  |  |  |  | 13 |  | 
|  | 6 |  |  |  |  | 323 |  | 
| 24 |  |  |  |  |  |  | $die_on_error = 1;		# set to 0 to not die | 
| 25 |  |  |  |  |  |  |  | 
| 26 | 6 |  |  | 6 |  | 1643 | use Math::String::Charset::Nested; | 
|  | 6 |  |  |  |  | 9 |  | 
|  | 6 |  |  |  |  | 368 |  | 
| 27 | 6 |  |  | 6 |  | 1677 | use Math::String::Charset::Grouped; | 
|  | 6 |  |  |  |  | 11 |  | 
|  | 6 |  |  |  |  | 488 |  | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | # following hash values are used: | 
| 30 |  |  |  |  |  |  | # _clen  : length of one character (all chars must have same len unless sep) | 
| 31 |  |  |  |  |  |  | # _start : contains array of all valid start characters | 
| 32 |  |  |  |  |  |  | # _ones  : list of one-character strings (cross of _end and _start) | 
| 33 |  |  |  |  |  |  | # _end   : contains hash (for easier lookup) of all valid end characters | 
| 34 |  |  |  |  |  |  | # _order : = 1 (1 = simple, 2 = nested) | 
| 35 |  |  |  |  |  |  | # _type  : = 0 (0 = simple, 1 = grouped, 2 = wordlist) | 
| 36 |  |  |  |  |  |  | # _error : error message or "" | 
| 37 |  |  |  |  |  |  | # _count : array of count of different strings with length x | 
| 38 |  |  |  |  |  |  | # _sum   : array of starting number for strings with length x | 
| 39 |  |  |  |  |  |  | #          _sum[x] = _sum[x-1]+_count[x-1] | 
| 40 |  |  |  |  |  |  | # _cnt   : number of elements in _count and _sum (as well as in _scnt & _ssum) | 
| 41 |  |  |  |  |  |  | # _cnum  : number of characters in _ones as BigInt (for speed) | 
| 42 |  |  |  |  |  |  | # _minlen: minimum string length (anything shorter is invalid), default -inf | 
| 43 |  |  |  |  |  |  | # _maxlen: maximum string length (anything longer is invalid), default undef | 
| 44 |  |  |  |  |  |  | # _scale : optional output/input scale | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | # simple ones: | 
| 47 |  |  |  |  |  |  | # _sep  : separator string (undef for none) | 
| 48 |  |  |  |  |  |  | # _map  : mapping character to number | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | # See the other Charset package files for the keys the higher-order charsets use. | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | my $ONE = Math::BigInt->bone(); | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | BEGIN | 
| 55 |  |  |  |  |  |  | { | 
| 56 |  |  |  |  |  |  | # this will fail if Math::BigInt is loaded with a different lib afterwards! | 
| 57 | 6 |  | 50 | 6 |  | 1518 | $CALC = Math::BigInt->config()->{lib} || 'Math::BigInt::Calc'; | 
| 58 |  |  |  |  |  |  | } | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | ############################################################################# | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | sub new | 
| 63 |  |  |  |  |  |  | { | 
| 64 | 250 |  |  | 250 | 1 | 3621 | my $class = shift; | 
| 65 | 250 |  | 50 |  |  | 854 | $class = ref($class) || $class || __PACKAGE__; | 
| 66 |  |  |  |  |  |  |  | 
| 67 | 250 |  |  |  |  | 293 | my $self = bless {}, $class; | 
| 68 |  |  |  |  |  |  |  | 
| 69 | 250 |  |  |  |  | 201 | my $value; | 
| 70 | 250 | 50 |  |  |  | 385 | if (!ref($_[0])) | 
| 71 |  |  |  |  |  |  | { | 
| 72 | 0 |  |  |  |  | 0 | $value = [ @_ ]; | 
| 73 |  |  |  |  |  |  | } | 
| 74 |  |  |  |  |  |  | else | 
| 75 |  |  |  |  |  |  | { | 
| 76 | 250 |  |  |  |  | 275 | $value = shift; | 
| 77 |  |  |  |  |  |  | } | 
| 78 | 250 | 100 |  |  |  | 845 | if (ref($value) !~ /^(ARRAY|HASH)$/) | 
| 79 |  |  |  |  |  |  | { | 
| 80 |  |  |  |  |  |  | # got an object, so make copy | 
| 81 | 2 |  |  |  |  | 20 | foreach my $k (keys %$value) | 
| 82 |  |  |  |  |  |  | { | 
| 83 | 30 | 100 |  |  |  | 44 | if (ref($value->{$k}) eq 'ARRAY') | 
|  |  | 100 |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | { | 
| 85 | 8 |  |  |  |  | 7 | $self->{$k} = [ @{$value->{$k}} ]; | 
|  | 8 |  |  |  |  | 22 |  | 
| 86 |  |  |  |  |  |  | } | 
| 87 |  |  |  |  |  |  | elsif (ref($value->{$k}) eq 'HASH') | 
| 88 |  |  |  |  |  |  | { | 
| 89 | 4 |  |  |  |  | 3 | foreach my $j (keys %{$value->{k}}) | 
|  | 4 |  |  |  |  | 9 |  | 
| 90 |  |  |  |  |  |  | { | 
| 91 | 0 |  |  |  |  | 0 | $self->{$k}->{$j} = $value->{$k}->{$j}; | 
| 92 |  |  |  |  |  |  | } | 
| 93 |  |  |  |  |  |  | } | 
| 94 |  |  |  |  |  |  | else | 
| 95 |  |  |  |  |  |  | { | 
| 96 | 18 |  |  |  |  | 21 | $self->{$k} = $value->{$k}; | 
| 97 |  |  |  |  |  |  | } | 
| 98 |  |  |  |  |  |  | } | 
| 99 | 2 |  |  |  |  | 5 | return $self; | 
| 100 |  |  |  |  |  |  | } | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | # convert ARRAY ref into HASH ref in the same go | 
| 103 | 248 |  |  |  |  | 412 | $value = $self->_check_params($value); | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | #  print "new $class type $self->{_type} order $self->{_order} $self->{_error}\n"; | 
| 106 |  |  |  |  |  |  |  | 
| 107 | 248 | 100 |  |  |  | 459 | if ($self->{_error} eq '') | 
| 108 |  |  |  |  |  |  | { | 
| 109 |  |  |  |  |  |  | # now route request for initialization to subclasses if we are in baseclass | 
| 110 | 229 | 100 |  |  |  | 330 | if ($class eq 'Math::String::Charset') | 
| 111 |  |  |  |  |  |  | { | 
| 112 |  |  |  |  |  |  | return Math::String::Charset::Grouped->new($value) | 
| 113 | 212 | 100 |  |  |  | 335 | if ($self->{_type} == 1); | 
| 114 | 204 | 50 | 33 |  |  | 347 | if (($self->{_type} == 2) && ($self->{_order} == 1)) | 
| 115 |  |  |  |  |  |  | { | 
| 116 | 0 |  |  |  |  | 0 | require Math::String::Charset::Wordlist; | 
| 117 | 0 |  |  |  |  | 0 | return Math::String::Charset::Wordlist->new($value); | 
| 118 |  |  |  |  |  |  | } | 
| 119 |  |  |  |  |  |  | return Math::String::Charset::Nested->new($value) | 
| 120 | 204 | 100 |  |  |  | 398 | if ($self->{_order} == 2); | 
| 121 |  |  |  |  |  |  | } | 
| 122 | 213 |  |  |  |  | 347 | $self->_strict_check($value); | 
| 123 | 213 |  |  |  |  | 369 | $self->_initialize($value); | 
| 124 |  |  |  |  |  |  | } | 
| 125 | 232 | 50 | 66 |  |  | 819 | die ($self->{_error}) if $die_on_error && $self->{_error} ne ''; | 
| 126 | 232 |  |  |  |  | 603 | $self; | 
| 127 |  |  |  |  |  |  | } | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | ############################################################################# | 
| 130 |  |  |  |  |  |  | # private, initialize self | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | sub _strict_check | 
| 133 |  |  |  |  |  |  | { | 
| 134 |  |  |  |  |  |  | # a per class check, to be overwritten by subclasses | 
| 135 | 196 |  |  | 196 |  | 166 | my $self = shift; | 
| 136 | 196 |  |  |  |  | 123 | my $value = shift; | 
| 137 |  |  |  |  |  |  |  | 
| 138 | 196 |  |  |  |  | 193 | my $class = ref($self); | 
| 139 |  |  |  |  |  |  | return $self->{_error} = "Wrong type '$self->{_type}' for $class" | 
| 140 | 196 | 50 |  |  |  | 288 | if $self->{_type} != 0; | 
| 141 |  |  |  |  |  |  | return $self->{_error} = "Wrong order'$self->{_order}' for $class" | 
| 142 | 196 | 50 |  |  |  | 262 | if $self->{_order} != 1; | 
| 143 | 196 |  |  |  |  | 443 | foreach my $key (keys %$value) | 
| 144 |  |  |  |  |  |  | { | 
| 145 | 218 | 50 |  |  |  | 720 | return $self->{_error} = "Illegal parameter '$key' for $class" | 
| 146 |  |  |  |  |  |  | if $key !~ /^(start|minlen|maxlen|sep|scale)$/; | 
| 147 |  |  |  |  |  |  | } | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | sub _check_params | 
| 151 |  |  |  |  |  |  | { | 
| 152 |  |  |  |  |  |  | # check params | 
| 153 | 248 |  |  | 248 |  | 207 | my $self = shift; | 
| 154 | 248 |  |  |  |  | 182 | my $value = shift; | 
| 155 |  |  |  |  |  |  |  | 
| 156 | 248 |  |  |  |  | 401 | $self->{_error} = ""; 			# no error | 
| 157 | 248 |  |  |  |  | 309 | $self->{_count} = [ ]; | 
| 158 |  |  |  |  |  |  |  | 
| 159 |  |  |  |  |  |  | # convert array ref to hash | 
| 160 | 248 | 100 |  |  |  | 597 | $value = { start => $value } if (ref($value) eq 'ARRAY'); | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  | # from 1st take clen | 
| 163 | 248 |  |  |  |  | 581 | $self->{_clen} = $value->{charlen}; | 
| 164 | 248 |  |  |  |  | 261 | $self->{_sep} = $value->{sep}; | 
| 165 |  |  |  |  |  |  |  | 
| 166 |  |  |  |  |  |  | return $self->{_error} = "Can not have both 'sep' and 'charlen' in new()" | 
| 167 | 248 | 50 | 66 |  |  | 381 | if ((exists $value->{charlen}) && (exists $value->{sep})); | 
| 168 |  |  |  |  |  |  |  | 
| 169 | 247 |  |  |  |  | 276 | $self->{_order} = $value->{order}; | 
| 170 | 247 |  |  |  |  | 237 | $self->{_type} = $value->{type}; | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  | $self->{_scale} = Math::BigInt->new($value->{scale}) | 
| 173 | 247 | 100 |  |  |  | 346 | if exists $value->{scale}; | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | return $self->{_error} = "Can not have both 'bi' and 'sets' in new()" | 
| 176 | 247 | 100 | 66 |  |  | 499 | if ((exists $value->{sets}) && (exists $value->{bi})); | 
| 177 |  |  |  |  |  |  |  | 
| 178 | 246 | 100 |  |  |  | 393 | if (!defined $self->{_type}) | 
| 179 |  |  |  |  |  |  | { | 
| 180 | 231 |  |  |  |  | 218 | $self->{_type} = 0; | 
| 181 | 231 | 100 |  |  |  | 322 | $self->{_type} = 1 if exists $value->{sets}; | 
| 182 |  |  |  |  |  |  | } | 
| 183 |  |  |  |  |  |  |  | 
| 184 | 246 | 100 |  |  |  | 381 | if (!defined $self->{_order}) | 
| 185 |  |  |  |  |  |  | { | 
| 186 | 241 |  |  |  |  | 190 | $self->{_order} = 1; | 
| 187 | 241 | 100 |  |  |  | 360 | $self->{_order} = 2 if exists $value->{bi}; | 
| 188 |  |  |  |  |  |  | } | 
| 189 |  |  |  |  |  |  |  | 
| 190 |  |  |  |  |  |  | return $self->{_error} = "Illegal type '$self->{_type}' used with 'bi'" | 
| 191 | 246 | 100 | 100 |  |  | 443 | if ((exists $value->{bi}) && ($self->{_type} != 0)); | 
| 192 |  |  |  |  |  |  |  | 
| 193 |  |  |  |  |  |  | return $self->{_error} = "Illegal type '$self->{_type}' used with 'sets'" | 
| 194 | 245 | 100 | 100 |  |  | 481 | if ((exists $value->{sets}) && ($self->{_type} == 0)); | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  | return $self->{_error} = "Illegal type '$self->{_type}'" | 
| 197 | 242 | 100 | 100 |  |  | 784 | if (($self->{_type} < 0) || ($self->{_type} > 2)); | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | return $self->{_error} = | 
| 200 |  |  |  |  |  |  | "Illegal combination of type '$self->{_type}' and order '$self->{_order}'" | 
| 201 | 236 | 100 | 100 |  |  | 478 | if (($self->{_type} == 1) && ($self->{_order} != 1)); | 
| 202 |  |  |  |  |  |  |  | 
| 203 | 235 | 100 |  |  |  | 355 | if ($self->{_order} == 1) | 
| 204 |  |  |  |  |  |  | { | 
| 205 |  |  |  |  |  |  | return $self->{_error} = | 
| 206 |  |  |  |  |  |  | "Illegal combination of order '$self->{_order}' and 'end'" | 
| 207 | 215 | 100 |  |  |  | 273 | if defined $value->{end}; | 
| 208 |  |  |  |  |  |  |  | 
| 209 |  |  |  |  |  |  | return $self->{_error} = | 
| 210 |  |  |  |  |  |  | "Illegal combination of order '$self->{_order}' and 'bi'" | 
| 211 | 214 | 50 |  |  |  | 320 | if defined $value->{bi}; | 
| 212 |  |  |  |  |  |  | } | 
| 213 |  |  |  |  |  |  |  | 
| 214 |  |  |  |  |  |  | return $self->{_error} = "Illegal order '$self->{_order}'" | 
| 215 | 234 | 100 | 66 |  |  | 677 | if (($self->{_order} < 1) || ($self->{_order} > 2)); | 
| 216 |  |  |  |  |  |  |  | 
| 217 | 231 |  |  |  |  | 224 | $self->{_sep} = $value->{sep};			# sep char or undef | 
| 218 |  |  |  |  |  |  | return $self->{_error} = "Field 'sep' must not be empty" | 
| 219 | 231 | 100 | 100 |  |  | 449 | if (defined $self->{_sep} && $self->{_sep} eq ''); | 
| 220 |  |  |  |  |  |  |  | 
| 221 | 230 |  |  |  |  | 258 | $self->{_minlen} = $value->{minlen}; | 
| 222 | 230 |  |  |  |  | 406 | $self->{_maxlen} = $value->{maxlen}; | 
| 223 | 230 | 100 |  |  |  | 726 | $self->{_minlen} = Math::BigInt->binf('-') if !defined $self->{_minlen}; | 
| 224 | 230 | 100 |  |  |  | 5317 | $self->{_maxlen} = Math::BigInt->binf() if !defined $self->{_maxlen}; | 
| 225 |  |  |  |  |  |  | return $self->{_error} = 'Maxlen is smaller than minlen!' | 
| 226 | 230 | 100 |  |  |  | 3521 | if ($self->{_minlen} > $self->{_maxlen}); | 
| 227 |  |  |  |  |  |  |  | 
| 228 | 229 |  |  |  |  | 4031 | $value; | 
| 229 |  |  |  |  |  |  | } | 
| 230 |  |  |  |  |  |  |  | 
| 231 |  |  |  |  |  |  | sub _initialize | 
| 232 |  |  |  |  |  |  | { | 
| 233 |  |  |  |  |  |  | # init only for simple charsets, the rest is done in subclass | 
| 234 | 196 |  |  | 196 |  | 148 | my $self = shift; | 
| 235 | 196 |  |  |  |  | 130 | my $value = shift; | 
| 236 |  |  |  |  |  |  |  | 
| 237 | 196 |  |  |  |  | 246 | $self->{_start} = [ ]; | 
| 238 | 196 | 50 |  |  |  | 295 | $self->{_start} = [ @{$value->{start}} ] if defined $value->{start}; | 
|  | 196 |  |  |  |  | 712 |  | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | $self->{_clen} = CORE::length($self->{_start}->[0]) | 
| 241 | 196 | 100 |  |  |  | 412 | if !defined $self->{_sep}; | 
| 242 |  |  |  |  |  |  |  | 
| 243 | 196 |  |  |  |  | 213 | $self->{_ones} = $self->{_start}; | 
| 244 |  |  |  |  |  |  |  | 
| 245 |  |  |  |  |  |  | # XXX TODO: remove | 
| 246 |  |  |  |  |  |  | #  foreach (@{$self->{_start}}) { $self->{_end}->{$_} = 1; } | 
| 247 |  |  |  |  |  |  |  | 
| 248 |  |  |  |  |  |  | # some more tests for validity | 
| 249 | 196 | 100 |  |  |  | 314 | if (!defined $self->{_sep}) | 
| 250 |  |  |  |  |  |  | { | 
| 251 | 179 |  |  |  |  | 130 | foreach (@{$self->{_start}}) | 
|  | 179 |  |  |  |  | 279 |  | 
| 252 |  |  |  |  |  |  | { | 
| 253 |  |  |  |  |  |  | $self->{_error} = "Illegal char '$_', length not $self->{_clen}" | 
| 254 | 2794 | 100 |  |  |  | 3271 | if CORE::length($_) != $self->{_clen}; | 
| 255 |  |  |  |  |  |  | } | 
| 256 |  |  |  |  |  |  | } | 
| 257 |  |  |  |  |  |  | # initialize array of counts for len of 0..1 | 
| 258 | 196 |  |  |  |  | 194 | $self->{_cnt} = 1;				# cached amount of class-sizes | 
| 259 | 196 |  |  |  |  | 228 | $self->{_count}->[0] = 1;			# '' is one string | 
| 260 | 196 |  |  |  |  | 163 | $self->{_count}->[1] = Math::BigInt->new (scalar @{$self->{_ones}});	# 1 | 
|  | 196 |  |  |  |  | 473 |  | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | # init _sum array | 
| 263 | 196 |  |  |  |  | 4851 | $self->{_sum}->[0] = 0; | 
| 264 | 196 |  |  |  |  | 180 | $self->{_sum}->[1] = 1; | 
| 265 | 196 |  |  |  |  | 362 | $self->{_sum}->[2] = $self->{_count}->[1] + 1; | 
| 266 |  |  |  |  |  |  |  | 
| 267 |  |  |  |  |  |  | # from _ones, make mapping name => number | 
| 268 | 196 |  |  |  |  | 18258 | my $i = 1; | 
| 269 | 196 |  |  |  |  | 172 | foreach (@{$self->{_ones}}) | 
|  | 196 |  |  |  |  | 298 |  | 
| 270 |  |  |  |  |  |  | { | 
| 271 | 2858 |  |  |  |  | 3057 | $self->{_map}->{$_} = $i++; | 
| 272 |  |  |  |  |  |  | } | 
| 273 | 196 |  |  |  |  | 170 | $self->{_cnum} = Math::BigInt->new( scalar @{$self->{_ones}} ); | 
|  | 196 |  |  |  |  | 446 |  | 
| 274 |  |  |  |  |  |  |  | 
| 275 |  |  |  |  |  |  | # _end contains entries for all valid end characters, and since these are the | 
| 276 |  |  |  |  |  |  | # same than in _map, we can reuse _map to save memory and construction time | 
| 277 |  |  |  |  |  |  |  | 
| 278 | 196 |  |  |  |  | 4294 | $self->{_end} = $self->{_map}; | 
| 279 |  |  |  |  |  |  |  | 
| 280 |  |  |  |  |  |  | return $self->{_error} = "Empty charset!" | 
| 281 | 196 | 50 | 33 |  |  | 387 | if ($self->{_cnum}->is_zero() && $self->{_minlen} > 0); | 
| 282 |  |  |  |  |  |  |  | 
| 283 | 196 |  |  |  |  | 1881 | $self; | 
| 284 |  |  |  |  |  |  | } | 
| 285 |  |  |  |  |  |  |  | 
| 286 |  |  |  |  |  |  | sub scale | 
| 287 |  |  |  |  |  |  | { | 
| 288 | 5 |  |  | 5 | 1 | 64 | my $self = shift; | 
| 289 |  |  |  |  |  |  |  | 
| 290 | 5 | 100 |  |  |  | 23 | $self->{_scale} = Math::BigInt->new($_[0]) if @_ > 0; | 
| 291 | 5 |  |  |  |  | 61 | $self->{_scale}; | 
| 292 |  |  |  |  |  |  | } | 
| 293 |  |  |  |  |  |  |  | 
| 294 |  |  |  |  |  |  | sub zero | 
| 295 |  |  |  |  |  |  | { | 
| 296 |  |  |  |  |  |  | # return the string representing zero. If no minlen is defined, this is | 
| 297 |  |  |  |  |  |  | # simple '', otherwise the first string of the first class after minlen which | 
| 298 |  |  |  |  |  |  | # is not empty | 
| 299 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 300 |  |  |  |  |  |  |  | 
| 301 | 0 | 0 |  |  |  | 0 | return $self->{_zero} if defined $self->{_zero};	# already known? | 
| 302 |  |  |  |  |  |  |  | 
| 303 | 0 | 0 |  |  |  | 0 | return '' if $self->{_minlen} > 0; | 
| 304 | 0 |  |  |  |  | 0 | my $i = $self->{_minlen}; | 
| 305 | 0 |  |  |  |  | 0 | while ($self->class($i) == 0) { $i++; } | 
|  | 0 |  |  |  |  | 0 |  | 
| 306 | 0 |  |  |  |  | 0 | $self->{_minlen} = $i;				# adjust minlen | 
| 307 | 0 |  |  |  |  | 0 | $self->{_zero} = $self->first($i); | 
| 308 | 0 |  |  |  |  | 0 | $self->{_zero}; | 
| 309 |  |  |  |  |  |  | } | 
| 310 |  |  |  |  |  |  |  | 
| 311 |  |  |  |  |  |  | sub one | 
| 312 |  |  |  |  |  |  | { | 
| 313 |  |  |  |  |  |  | # return the string representing one. If no minlen is defined, this is | 
| 314 |  |  |  |  |  |  | # simple the first string with length(1), otherwise the first string of the | 
| 315 |  |  |  |  |  |  | # first class after minlen which is not empty | 
| 316 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 317 |  |  |  |  |  |  |  | 
| 318 | 0 | 0 |  |  |  | 0 | return '' if $self->{_minlen} > 0; | 
| 319 | 0 |  |  |  |  | 0 | my $i = $self->{_minlen}; | 
| 320 | 0 |  |  |  |  | 0 | while ($self->class($i) == 0) { $i++; } | 
|  | 0 |  |  |  |  | 0 |  | 
| 321 | 0 |  |  |  |  | 0 | $self->{_minlen} = $i;				# adjust minlen | 
| 322 | 0 |  |  |  |  | 0 | $self->first($i)->next(); | 
| 323 |  |  |  |  |  |  | } | 
| 324 |  |  |  |  |  |  |  | 
| 325 |  |  |  |  |  |  | sub copy | 
| 326 |  |  |  |  |  |  | { | 
| 327 |  |  |  |  |  |  | # for speed reasons, do not make a copy of a charset, but share it instead | 
| 328 | 3 |  |  | 3 | 1 | 4 | my ($c,$x); | 
| 329 | 3 | 50 |  |  |  | 12 | if (@_ > 1) | 
| 330 |  |  |  |  |  |  | { | 
| 331 |  |  |  |  |  |  | # if two arguments, the first one is the class to "swallow" subclasses | 
| 332 | 0 |  |  |  |  | 0 | ($c,$x) = @_; | 
| 333 |  |  |  |  |  |  | } | 
| 334 |  |  |  |  |  |  | else | 
| 335 |  |  |  |  |  |  | { | 
| 336 | 3 |  |  |  |  | 5 | $x = shift; | 
| 337 | 3 |  |  |  |  | 6 | $c = ref($x); | 
| 338 |  |  |  |  |  |  | } | 
| 339 | 3 | 50 |  |  |  | 8 | return unless ref($x); # only for objects | 
| 340 |  |  |  |  |  |  |  | 
| 341 | 3 |  |  |  |  | 6 | my $self = {}; bless $self,$c; | 
|  | 3 |  |  |  |  | 7 |  | 
| 342 | 3 |  |  |  |  | 21 | foreach my $k (keys %$x) | 
| 343 |  |  |  |  |  |  | { | 
| 344 | 53 | 50 |  |  |  | 370 | if (ref($x->{$k}) eq 'SCALAR') | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 345 |  |  |  |  |  |  | { | 
| 346 | 0 |  |  |  |  | 0 | $self->{$k} = \${$x->{$k}}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 347 |  |  |  |  |  |  | } | 
| 348 |  |  |  |  |  |  | elsif (ref($x->{$k}) eq 'ARRAY') | 
| 349 |  |  |  |  |  |  | { | 
| 350 | 13 |  |  |  |  | 6 | $self->{$k} = [ @{$x->{$k}} ]; | 
|  | 13 |  |  |  |  | 38 |  | 
| 351 |  |  |  |  |  |  | } | 
| 352 |  |  |  |  |  |  | elsif (ref($x->{$k}) eq 'HASH') | 
| 353 |  |  |  |  |  |  | { | 
| 354 |  |  |  |  |  |  | # only one level deep! | 
| 355 | 8 |  |  |  |  | 9 | foreach my $h (keys %{$x->{$k}}) | 
|  | 8 |  |  |  |  | 22 |  | 
| 356 |  |  |  |  |  |  | { | 
| 357 | 78 |  |  |  |  | 77 | $self->{$k}->{$h} = $x->{$k}->{$h}; | 
| 358 |  |  |  |  |  |  | } | 
| 359 |  |  |  |  |  |  | } | 
| 360 |  |  |  |  |  |  | elsif (ref($x->{$k})) | 
| 361 |  |  |  |  |  |  | { | 
| 362 | 14 |  |  |  |  | 15 | my $c = ref($x->{$k}); | 
| 363 | 14 |  |  |  |  | 35 | $self->{$k} = $c->new($x->{$k});  # no copy() due to deep rec | 
| 364 |  |  |  |  |  |  | } | 
| 365 |  |  |  |  |  |  | else | 
| 366 |  |  |  |  |  |  | { | 
| 367 |  |  |  |  |  |  | # simple scalar w/o reference | 
| 368 | 18 |  |  |  |  | 20 | $self->{$k} = $x->{$k}; | 
| 369 |  |  |  |  |  |  | } | 
| 370 |  |  |  |  |  |  | } | 
| 371 | 3 |  |  |  |  | 9 | $self; | 
| 372 |  |  |  |  |  |  | } | 
| 373 |  |  |  |  |  |  |  | 
| 374 |  |  |  |  |  |  | sub count | 
| 375 |  |  |  |  |  |  | { | 
| 376 |  |  |  |  |  |  | # Return count of all possible strings described by in charset as positive | 
| 377 |  |  |  |  |  |  | # bigint. Returns 'inf' if no maxlen is defined, because there should be no | 
| 378 |  |  |  |  |  |  | # upper bound on how many strings are possible. | 
| 379 |  |  |  |  |  |  | # if maxlen is defined, forces a calculation of all possible class() values | 
| 380 |  |  |  |  |  |  | # and may therefore be slow on the first call, also caches possible lot's of | 
| 381 |  |  |  |  |  |  | # values. | 
| 382 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 383 | 0 |  |  |  |  | 0 | my $count = Math::BigInt->bzero(); | 
| 384 |  |  |  |  |  |  |  | 
| 385 | 0 | 0 |  |  |  | 0 | return $count->binf() if $self->{_maxlen}->is_inf(); | 
| 386 |  |  |  |  |  |  |  | 
| 387 | 0 |  |  |  |  | 0 | for (my $i = 0; $i < $self->{_maxlen}; $i++) | 
| 388 |  |  |  |  |  |  | { | 
| 389 | 0 |  |  |  |  | 0 | $count += $self->class($i); | 
| 390 |  |  |  |  |  |  | } | 
| 391 | 0 |  |  |  |  | 0 | $count; | 
| 392 |  |  |  |  |  |  | } | 
| 393 |  |  |  |  |  |  |  | 
| 394 |  |  |  |  |  |  | sub dump | 
| 395 |  |  |  |  |  |  | { | 
| 396 | 3 |  |  | 3 | 0 | 4 | my $self = shift; | 
| 397 | 3 |  | 50 |  |  | 5 | my $indend = shift || ''; | 
| 398 |  |  |  |  |  |  |  | 
| 399 | 3 |  |  |  |  | 4 | my $txt = "type SIMPLE:\n"; | 
| 400 | 3 |  |  |  |  | 4 | $txt .= $indend . "start: " . join(' ',@{$self->{_start}}) . "\n"; | 
|  | 3 |  |  |  |  | 8 |  | 
| 401 | 3 |  |  |  |  | 3 | my $e = $self->{_end}; | 
| 402 | 3 |  |  |  |  | 10 | $txt .= $indend . "end  : " . join(' ', sort { $e->{$a} <=> $e->{$b} } keys %$e) . "\n"; | 
|  | 129 |  |  |  |  | 86 |  | 
| 403 | 3 |  |  |  |  | 5 | $txt .= $indend . "ones : " . join(' ',@{$self->{_ones}}) . "\n"; | 
|  | 3 |  |  |  |  | 7 |  | 
| 404 | 3 |  |  |  |  | 9 | $txt; | 
| 405 |  |  |  |  |  |  | } | 
| 406 |  |  |  |  |  |  |  | 
| 407 |  |  |  |  |  |  | sub error | 
| 408 |  |  |  |  |  |  | { | 
| 409 | 48 |  |  | 48 | 1 | 1650 | my $self = shift; | 
| 410 |  |  |  |  |  |  |  | 
| 411 | 48 |  |  |  |  | 164 | $self->{_error}; | 
| 412 |  |  |  |  |  |  | } | 
| 413 |  |  |  |  |  |  |  | 
| 414 |  |  |  |  |  |  | sub order | 
| 415 |  |  |  |  |  |  | { | 
| 416 |  |  |  |  |  |  | # return charset's order/class | 
| 417 | 3 |  |  | 3 | 1 | 6 | my $self = shift; | 
| 418 | 3 |  |  |  |  | 13 | $self->{_order}; | 
| 419 |  |  |  |  |  |  | } | 
| 420 |  |  |  |  |  |  |  | 
| 421 |  |  |  |  |  |  | sub type | 
| 422 |  |  |  |  |  |  | { | 
| 423 |  |  |  |  |  |  | # return charset's type | 
| 424 | 2 |  |  | 2 | 1 | 8 | my $self = shift; | 
| 425 | 2 |  |  |  |  | 8 | $self->{_type}; | 
| 426 |  |  |  |  |  |  | } | 
| 427 |  |  |  |  |  |  |  | 
| 428 |  |  |  |  |  |  | sub charlen | 
| 429 |  |  |  |  |  |  | { | 
| 430 |  |  |  |  |  |  | # return charset's length of one character | 
| 431 | 44 |  |  | 44 | 1 | 36 | my $self = shift; | 
| 432 | 44 |  |  |  |  | 89 | $self->{_clen}; | 
| 433 |  |  |  |  |  |  | } | 
| 434 |  |  |  |  |  |  |  | 
| 435 |  |  |  |  |  |  | sub length | 
| 436 |  |  |  |  |  |  | { | 
| 437 |  |  |  |  |  |  | # return number of characters in charset | 
| 438 | 177 |  |  | 177 | 1 | 322 | my $self = shift; | 
| 439 |  |  |  |  |  |  |  | 
| 440 | 177 |  |  |  |  | 121 | scalar @{$self->{_ones}}; | 
|  | 177 |  |  |  |  | 418 |  | 
| 441 |  |  |  |  |  |  | } | 
| 442 |  |  |  |  |  |  |  | 
| 443 |  |  |  |  |  |  | sub _calc | 
| 444 |  |  |  |  |  |  | { | 
| 445 |  |  |  |  |  |  | # given count of len 1..x, calculate count for y (y > x) and all between | 
| 446 |  |  |  |  |  |  | # x and y | 
| 447 | 56 |  |  | 56 |  | 52 | my $self = shift; | 
| 448 | 56 | 50 | 50 |  |  | 99 | my $max = shift || 1; $max = 1 if $max < 1; | 
|  | 56 |  |  |  |  | 88 |  | 
| 449 | 56 | 50 |  |  |  | 88 | return if $max <= $self->{_cnt}; | 
| 450 |  |  |  |  |  |  |  | 
| 451 | 56 |  |  |  |  | 46 | my $i = $self->{_cnt}; 		# last defined element | 
| 452 | 56 |  |  |  |  | 59 | my $last = $self->{_count}->[$i]; | 
| 453 | 56 |  |  |  |  | 57 | my $size = Math::BigInt->new ( scalar @{$self->{_ones}} ); | 
|  | 56 |  |  |  |  | 128 |  | 
| 454 | 56 |  |  |  |  | 1232 | while ($i <= $max) | 
| 455 |  |  |  |  |  |  | { | 
| 456 | 113 |  |  |  |  | 216 | $last = $last * $size; | 
| 457 | 113 |  |  |  |  | 4929 | $self->{_count}->[$i+1] = $last; | 
| 458 | 113 |  |  |  |  | 203 | $self->{_sum}->[$i+1] = $self->{_sum}->[$i] + $self->{_count}->[$i]; | 
| 459 | 113 |  |  |  |  | 6823 | $i++; | 
| 460 |  |  |  |  |  |  | } | 
| 461 | 56 |  |  |  |  | 125 | $self->{_cnt} = $i-1;		# store new cache size | 
| 462 |  |  |  |  |  |  | } | 
| 463 |  |  |  |  |  |  |  | 
| 464 |  |  |  |  |  |  | sub class | 
| 465 |  |  |  |  |  |  | { | 
| 466 |  |  |  |  |  |  | # return number of all combinations with a certain length | 
| 467 | 38 |  |  | 38 | 1 | 392 | my $self = shift; | 
| 468 | 38 | 50 |  |  |  | 40 | my $len = shift; $len = 0 if !defined $len; | 
|  | 38 |  |  |  |  | 85 |  | 
| 469 | 38 |  |  |  |  | 72 | $len = abs(int($len)); | 
| 470 |  |  |  |  |  |  |  | 
| 471 | 38 | 50 | 33 |  |  | 124 | return 0 if $len < $self->{_minlen} || $len > $self->{_maxlen}; | 
| 472 |  |  |  |  |  |  |  | 
| 473 |  |  |  |  |  |  | # print "$len $self->{_minlen}\n"; | 
| 474 | 38 | 50 |  |  |  | 3960 | $len -= $self->{_minlen} if $self->{_minlen} > 0;	# correct | 
| 475 |  |  |  |  |  |  | # not known yet, so calculate and cache | 
| 476 | 38 | 100 |  |  |  | 3903 | $self->_calc($len) if $self->{_cnt} < $len; | 
| 477 | 38 |  |  |  |  | 142 | $self->{_count}->[$len]; | 
| 478 |  |  |  |  |  |  | } | 
| 479 |  |  |  |  |  |  |  | 
| 480 |  |  |  |  |  |  | sub lowest | 
| 481 |  |  |  |  |  |  | { | 
| 482 |  |  |  |  |  |  | # return number of first string with $length characters | 
| 483 |  |  |  |  |  |  | # equivalent to $charset->first($length)->num2str(); | 
| 484 | 4 |  |  | 4 | 1 | 6 | my $self = shift; | 
| 485 | 4 |  | 50 |  |  | 10 | my $len = abs(int(shift || 1)); | 
| 486 |  |  |  |  |  |  |  | 
| 487 |  |  |  |  |  |  | # not known yet, so calculate and cache | 
| 488 | 4 | 50 |  |  |  | 8 | $self->_calc($len) if $self->{_cnt} < $len; | 
| 489 | 4 |  |  |  |  | 13 | $self->{_sum}->[$len]; | 
| 490 |  |  |  |  |  |  | } | 
| 491 |  |  |  |  |  |  |  | 
| 492 |  |  |  |  |  |  | sub highest | 
| 493 |  |  |  |  |  |  | { | 
| 494 |  |  |  |  |  |  | # return number of first string with $length characters | 
| 495 |  |  |  |  |  |  | # equivalent to $charset->first($length)->num2str(); | 
| 496 | 4 |  |  | 4 | 1 | 826 | my $self = shift; | 
| 497 | 4 |  | 50 |  |  | 9 | my $len = abs(int(shift || 1)); | 
| 498 |  |  |  |  |  |  |  | 
| 499 | 4 |  |  |  |  | 3 | $len++; | 
| 500 |  |  |  |  |  |  | # not known yet, so calculate and cache | 
| 501 | 4 | 100 |  |  |  | 9 | $self->_calc($len) if $self->{_cnt} < $len; | 
| 502 | 4 |  |  |  |  | 11 | $self->{_sum}->[$len]-1; | 
| 503 |  |  |  |  |  |  | } | 
| 504 |  |  |  |  |  |  |  | 
| 505 |  |  |  |  |  |  | sub norm | 
| 506 |  |  |  |  |  |  | { | 
| 507 |  |  |  |  |  |  | # normalize a string by removing separator char at front/end | 
| 508 | 117 |  |  | 117 | 1 | 255 | my $self = shift; | 
| 509 | 117 |  |  |  |  | 81 | my $str = shift; | 
| 510 |  |  |  |  |  |  |  | 
| 511 | 117 | 100 |  |  |  | 369 | return $str if !defined $self->{_sep}; | 
| 512 |  |  |  |  |  |  |  | 
| 513 | 11 |  |  |  |  | 63 | $str =~ s/$self->{_sep}\z//;		# remove at end | 
| 514 | 11 |  |  |  |  | 36 | $str =~ s/^$self->{_sep}//;		# remove at front | 
| 515 | 11 |  |  |  |  | 30 | $str; | 
| 516 |  |  |  |  |  |  | } | 
| 517 |  |  |  |  |  |  |  | 
| 518 |  |  |  |  |  |  | sub is_valid | 
| 519 |  |  |  |  |  |  | { | 
| 520 |  |  |  |  |  |  | # check wether a string conforms to the given charset set | 
| 521 | 138 |  |  | 138 | 1 | 110 | my $self = shift; | 
| 522 | 138 |  |  |  |  | 112 | my $str = shift; | 
| 523 |  |  |  |  |  |  |  | 
| 524 |  |  |  |  |  |  | # print "$str\n"; | 
| 525 | 138 | 100 |  |  |  | 207 | return 0 if !defined $str; | 
| 526 | 137 | 100 |  |  |  | 209 | if ($str eq '') | 
| 527 |  |  |  |  |  |  | { | 
| 528 | 12 | 100 |  |  |  | 35 | return $self->{_minlen} <= 0 ? 1 : 0; | 
| 529 |  |  |  |  |  |  | } | 
| 530 |  |  |  |  |  |  |  | 
| 531 | 125 |  |  |  |  | 214 | my $int = Math::BigInt->bzero(); | 
| 532 | 125 |  |  |  |  | 1397 | my @chars; | 
| 533 | 125 | 100 |  |  |  | 204 | if (defined $self->{_sep}) | 
| 534 |  |  |  |  |  |  | { | 
| 535 | 8 |  |  |  |  | 41 | @chars = split /$self->{_sep}/,$str; | 
| 536 | 8 | 100 |  |  |  | 18 | shift @chars if $chars[0] eq ''; | 
| 537 | 8 | 50 |  |  |  | 18 | pop @chars if $chars[-1] eq $self->{_sep}; | 
| 538 |  |  |  |  |  |  | } | 
| 539 |  |  |  |  |  |  | else | 
| 540 |  |  |  |  |  |  | { | 
| 541 | 117 |  |  |  |  | 87 | my $i = 0; my $len = CORE::length($str); my $clen = $self->{_clen}; | 
|  | 117 |  |  |  |  | 96 |  | 
|  | 117 |  |  |  |  | 104 |  | 
| 542 | 117 |  |  |  |  | 178 | while ($i < $len) | 
| 543 |  |  |  |  |  |  | { | 
| 544 | 235 |  |  |  |  | 274 | push @chars, substr($str,$i,$clen); $i += $clen; | 
|  | 235 |  |  |  |  | 338 |  | 
| 545 |  |  |  |  |  |  | } | 
| 546 |  |  |  |  |  |  | } | 
| 547 |  |  |  |  |  |  | # length okay? | 
| 548 | 125 | 100 | 100 |  |  | 286 | return 0 if scalar @chars < $self->{_minlen} || scalar @chars > $self->{_maxlen}; | 
| 549 |  |  |  |  |  |  |  | 
| 550 |  |  |  |  |  |  | # valid start char? | 
| 551 | 123 |  |  |  |  | 8841 | my $map = $self->{_map}; | 
| 552 |  |  |  |  |  |  | # XXX TODO: remove | 
| 553 |  |  |  |  |  |  | # return 0 unless exists $map->{$chars[0]}; | 
| 554 | 123 |  |  |  |  | 169 | foreach (@chars) | 
| 555 |  |  |  |  |  |  | { | 
| 556 | 237 | 100 |  |  |  | 459 | return 0 unless exists $map->{$_}; | 
| 557 |  |  |  |  |  |  | } | 
| 558 | 112 |  |  |  |  | 299 | 1; | 
| 559 |  |  |  |  |  |  | } | 
| 560 |  |  |  |  |  |  |  | 
| 561 |  |  |  |  |  |  | sub minlen | 
| 562 |  |  |  |  |  |  | { | 
| 563 | 5 |  |  | 5 | 1 | 9 | my $self = shift; | 
| 564 |  |  |  |  |  |  |  | 
| 565 | 5 |  |  |  |  | 12 | $self->{_minlen}; | 
| 566 |  |  |  |  |  |  | } | 
| 567 |  |  |  |  |  |  |  | 
| 568 |  |  |  |  |  |  | sub maxlen | 
| 569 |  |  |  |  |  |  | { | 
| 570 | 2 |  |  | 2 | 1 | 2 | my $self = shift; | 
| 571 |  |  |  |  |  |  |  | 
| 572 | 2 |  |  |  |  | 7 | $self->{_maxlen}; | 
| 573 |  |  |  |  |  |  | } | 
| 574 |  |  |  |  |  |  |  | 
| 575 |  |  |  |  |  |  | sub start | 
| 576 |  |  |  |  |  |  | { | 
| 577 |  |  |  |  |  |  | # this returns all the starting characters in a list, or in case of a simple | 
| 578 |  |  |  |  |  |  | # charset, simple the charset | 
| 579 |  |  |  |  |  |  | # in scalar context, returns length of starting set, for simple charsets this | 
| 580 |  |  |  |  |  |  | # equals the length | 
| 581 | 11 |  |  | 11 | 1 | 20 | my $self = shift; | 
| 582 |  |  |  |  |  |  |  | 
| 583 | 11 | 50 |  |  |  | 24 | wantarray ? @{$self->{_start}} : scalar @{$self->{_start}}; | 
|  | 11 |  |  |  |  | 87 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 584 |  |  |  |  |  |  | } | 
| 585 |  |  |  |  |  |  |  | 
| 586 |  |  |  |  |  |  | sub end | 
| 587 |  |  |  |  |  |  | { | 
| 588 |  |  |  |  |  |  | # this returns all the end characters in a list, or in case of a simple | 
| 589 |  |  |  |  |  |  | # charset, simple the charset | 
| 590 |  |  |  |  |  |  | # in scalar context, returns length of end set, for simple charsets this | 
| 591 |  |  |  |  |  |  | # equals the length | 
| 592 | 3 |  |  | 3 | 1 | 4 | my $self = shift; | 
| 593 |  |  |  |  |  |  |  | 
| 594 | 3 | 50 |  |  |  | 9 | wantarray ? sort keys %{$self->{_end}} : scalar keys %{$self->{_end}}; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 3 |  |  |  |  | 12 |  | 
| 595 |  |  |  |  |  |  | } | 
| 596 |  |  |  |  |  |  |  | 
| 597 |  |  |  |  |  |  | sub ones | 
| 598 |  |  |  |  |  |  | { | 
| 599 |  |  |  |  |  |  | # this returns all the one-char strings (in scalar context the count of them) | 
| 600 | 2 |  |  | 2 | 1 | 4 | my $self = shift; | 
| 601 |  |  |  |  |  |  |  | 
| 602 | 2 | 50 |  |  |  | 5 | wantarray ? @{$self->{_ones}} : scalar @{$self->{_ones}}; | 
|  | 2 |  |  |  |  | 14 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 603 |  |  |  |  |  |  | } | 
| 604 |  |  |  |  |  |  |  | 
| 605 |  |  |  |  |  |  | sub num2str | 
| 606 |  |  |  |  |  |  | { | 
| 607 |  |  |  |  |  |  | # convert Math::BigInt/Math::String to string | 
| 608 |  |  |  |  |  |  | # in list context return string and stringlen | 
| 609 | 106 |  |  | 106 | 0 | 427 | my ($self,$x) = @_; | 
| 610 |  |  |  |  |  |  |  | 
| 611 | 106 | 100 |  |  |  | 200 | $x = Math::BigInt->new($x) unless ref $x; | 
| 612 |  |  |  |  |  |  |  | 
| 613 | 106 | 50 |  |  |  | 696 | return undef if $x->{sign} !~ /^[+-]$/; | 
| 614 |  |  |  |  |  |  |  | 
| 615 | 106 |  |  |  |  | 99 | my $j = $self->{_cnum};			# nr of chars | 
| 616 |  |  |  |  |  |  |  | 
| 617 | 106 | 100 |  |  |  | 228 | if ($self->{_minlen} <= $ONE) | 
| 618 |  |  |  |  |  |  | { | 
| 619 | 105 | 100 |  |  |  | 1923 | if ($x->is_zero()) | 
| 620 |  |  |  |  |  |  | { | 
| 621 | 7 | 50 |  |  |  | 84 | return wantarray ? ('',0) : ''; | 
| 622 |  |  |  |  |  |  | } | 
| 623 |  |  |  |  |  |  |  | 
| 624 |  |  |  |  |  |  | # single character? | 
| 625 | 98 | 100 | 66 |  |  | 912 | if ($x <= $j && $self->{_minlen} <= $ONE) | 
| 626 |  |  |  |  |  |  | { | 
| 627 | 39 |  |  |  |  | 1288 | my $c = $self->{_ones}->[$x->numify() - 1]; | 
| 628 | 39 | 50 |  |  |  | 722 | return wantarray ? ($c,1) : $c; 		# string len == 1 | 
| 629 |  |  |  |  |  |  | } | 
| 630 |  |  |  |  |  |  | } | 
| 631 |  |  |  |  |  |  |  | 
| 632 | 60 |  |  |  |  | 1246 | my $digits = $self->chars($x); my $d = $digits; | 
|  | 60 |  |  |  |  | 48 |  | 
| 633 |  |  |  |  |  |  |  | 
| 634 |  |  |  |  |  |  | # now treat the string as it were a zero-padded string of length $digits | 
| 635 |  |  |  |  |  |  |  | 
| 636 |  |  |  |  |  |  | # length is not right (too short or too long) | 
| 637 | 60 | 100 | 66 |  |  | 115 | if ($digits < $self->{_minlen} || $digits > $self->{_maxlen}) | 
| 638 |  |  |  |  |  |  | { | 
| 639 | 1 | 50 |  |  |  | 4 | return wantarray ? (undef,0) : undef; | 
| 640 |  |  |  |  |  |  | } | 
| 641 |  |  |  |  |  |  |  | 
| 642 | 59 |  |  |  |  | 4364 | my $es="";                    		# result | 
| 643 |  |  |  |  |  |  | # copy input, make positive number, correct to $digits and cater for 0 | 
| 644 | 59 |  |  |  |  | 105 | my $y = Math::BigInt->new($x); $y->babs(); | 
|  | 59 |  |  |  |  | 1175 |  | 
| 645 |  |  |  |  |  |  | #print "fac $j y: $y new: "; | 
| 646 | 59 |  |  |  |  | 348 | $y -= $self->{_sum}->[$digits]; | 
| 647 |  |  |  |  |  |  |  | 
| 648 |  |  |  |  |  |  | #print "y: $y\n"; | 
| 649 | 59 | 100 |  |  |  | 3456 | my $mod = 0; my $s = $self->{_sep}; $s = '' if !defined $s; | 
|  | 59 |  |  |  |  | 66 |  | 
|  | 59 |  |  |  |  | 125 |  | 
| 650 | 59 |  |  |  |  | 94 | while (!$y->is_zero()) | 
| 651 |  |  |  |  |  |  | { | 
| 652 |  |  |  |  |  |  | #print "bfore:  y/fac: $y / $j \n"; | 
| 653 | 84 |  |  |  |  | 721 | ($y,$mod) = $y->bdiv($j); | 
| 654 | 84 |  |  |  |  | 6787 | $es = $self->{_ones}->[$mod] . $s . $es; | 
| 655 |  |  |  |  |  |  | #print "after:  div: $y rem: $mod \n"; | 
| 656 | 84 |  |  |  |  | 1219 | $digits --;				# one digit done | 
| 657 |  |  |  |  |  |  | } | 
| 658 |  |  |  |  |  |  | # padd the remaining digits with the zero-symbol | 
| 659 | 59 | 100 |  |  |  | 529 | $es = ($self->{_ones}->[0].$s) x $digits . $es if ($digits > 0); | 
| 660 | 59 |  |  |  |  | 293 | $es =~ s/$s\z//;				# strip last sep 'char' | 
| 661 | 59 | 50 |  |  |  | 278 | wantarray ? ($es,$d) : $es; | 
| 662 |  |  |  |  |  |  | } | 
| 663 |  |  |  |  |  |  |  | 
| 664 |  |  |  |  |  |  | sub str2num | 
| 665 |  |  |  |  |  |  | { | 
| 666 |  |  |  |  |  |  | # convert Math::String to Math::BigInt (does not take scale into account) | 
| 667 | 124 |  |  | 124 | 0 | 373 | my ($self,$str) = @_; | 
| 668 |  |  |  |  |  |  |  | 
| 669 | 124 |  |  |  |  | 233 | my $int = Math::BigInt->bzero(); | 
| 670 | 124 |  |  |  |  | 1316 | my $i = CORE::length($str); | 
| 671 |  |  |  |  |  |  |  | 
| 672 | 124 | 100 |  |  |  | 185 | return $int if $i == 0; | 
| 673 | 114 |  |  |  |  | 97 | my $map = $self->{_map}; | 
| 674 | 114 |  | 100 |  |  | 185 | my $clen = $self->{_clen} || 0;	# len of one char | 
| 675 |  |  |  |  |  |  |  | 
| 676 | 114 | 100 |  |  |  | 157 | if ($i == $clen) | 
| 677 |  |  |  |  |  |  | { | 
| 678 | 43 |  |  |  |  | 101 | $int->{value} = $CALC->_new( $map->{$str} ); | 
| 679 | 43 |  |  |  |  | 229 | return $int; | 
| 680 |  |  |  |  |  |  | } | 
| 681 |  |  |  |  |  |  |  | 
| 682 | 71 |  |  |  |  | 64 | my $cnum = $self->{_cnum}; my $j; | 
|  | 71 |  |  |  |  | 56 |  | 
| 683 | 71 | 50 |  |  |  | 87 | if (ref($cnum)) | 
| 684 |  |  |  |  |  |  | { | 
| 685 | 71 |  |  |  |  | 85 | $j = $cnum->{value}; | 
| 686 |  |  |  |  |  |  | } | 
| 687 |  |  |  |  |  |  | else | 
| 688 |  |  |  |  |  |  | { | 
| 689 | 0 |  |  |  |  | 0 | $j = $CALC->_new($cnum); | 
| 690 |  |  |  |  |  |  | } | 
| 691 |  |  |  |  |  |  |  | 
| 692 | 71 | 100 |  |  |  | 95 | if (!defined $self->{_sep}) | 
| 693 |  |  |  |  |  |  | { | 
| 694 |  |  |  |  |  |  | # first step (mul = 1): | 
| 695 |  |  |  |  |  |  | # 0 + 1 * str => str | 
| 696 | 60 |  |  |  |  | 57 | $i -= $clen; | 
| 697 | 60 |  |  |  |  | 158 | $int->{value} = $CALC->_new( $map->{substr($str,$i,$clen)}); | 
| 698 | 60 |  |  |  |  | 342 | my $mul = $CALC->_copy($j); | 
| 699 |  |  |  |  |  |  |  | 
| 700 |  |  |  |  |  |  | # other steps: | 
| 701 | 60 |  |  |  |  | 190 | $i -= $clen; | 
| 702 |  |  |  |  |  |  | # while ($i >= 0) | 
| 703 | 60 |  |  |  |  | 108 | while ($i > 0) | 
| 704 |  |  |  |  |  |  | { | 
| 705 | 21 |  |  |  |  | 45 | $CALC->_add( $int->{value}, $CALC->_mul( $CALC->_copy($mul), $CALC->_new( $map->{substr($str,$i,$clen)} ))); | 
| 706 | 21 |  |  |  |  | 453 | $CALC->_mul( $mul , $j); | 
| 707 | 21 |  |  |  |  | 97 | $i -= $clen; | 
| 708 |  |  |  |  |  |  | #      print "s2n $int j: $j i: $i m: $mul c: ", | 
| 709 |  |  |  |  |  |  | #      substr($str,$i+$clen,$clen),"\n"; | 
| 710 |  |  |  |  |  |  | } | 
| 711 |  |  |  |  |  |  | # last step (no need to update $i or preserving/updating $mul) | 
| 712 | 60 |  |  |  |  | 133 | $CALC->_add( $int->{value}, $CALC->_mul( $CALC->_copy($mul), $CALC->_new( $map->{substr($str,$i,$clen)} ))); | 
| 713 |  |  |  |  |  |  | } | 
| 714 |  |  |  |  |  |  | else | 
| 715 |  |  |  |  |  |  | { | 
| 716 |  |  |  |  |  |  | # with sep char | 
| 717 | 11 |  |  |  |  | 31 | my $mul = $CALC->_one(); | 
| 718 | 11 |  |  |  |  | 73 | my @chars = split /$self->{_sep}/, $str; | 
| 719 | 11 | 100 |  |  |  | 19 | shift @chars if $chars[0] eq '';			# strip leading sep | 
| 720 | 11 |  |  |  |  | 20 | foreach (reverse @chars) | 
| 721 |  |  |  |  |  |  | { | 
| 722 | 28 |  |  |  |  | 106 | $CALC->_add( $int->{value}, $CALC->_mul( $CALC->_copy($mul), $CALC->_new( $map->{$_} ))); | 
| 723 | 28 |  |  |  |  | 450 | $CALC->_mul( $mul , $j); | 
| 724 |  |  |  |  |  |  | } | 
| 725 |  |  |  |  |  |  | } | 
| 726 |  |  |  |  |  |  |  | 
| 727 | 71 |  |  |  |  | 1222 | $int; | 
| 728 |  |  |  |  |  |  | } | 
| 729 |  |  |  |  |  |  |  | 
| 730 |  |  |  |  |  |  | sub char | 
| 731 |  |  |  |  |  |  | { | 
| 732 |  |  |  |  |  |  | # return nth char from charset (see also map()) | 
| 733 | 42 |  |  | 42 | 1 | 36 | my $self = shift; | 
| 734 | 42 |  | 100 |  |  | 90 | my $char = shift || 0; | 
| 735 |  |  |  |  |  |  |  | 
| 736 | 42 | 50 |  |  |  | 161 | return undef if $char > scalar @{$self->{_ones}}; 	# dont create spurios elems | 
|  | 42 |  |  |  |  | 74 |  | 
| 737 | 42 |  |  |  |  | 558 | $self->{_ones}->[$char]; | 
| 738 |  |  |  |  |  |  | } | 
| 739 |  |  |  |  |  |  |  | 
| 740 |  |  |  |  |  |  | sub map | 
| 741 |  |  |  |  |  |  | { | 
| 742 |  |  |  |  |  |  | # map char to number (see also char()) | 
| 743 | 125 |  |  | 125 | 1 | 1262 | my ($self,$char) = @_; | 
| 744 |  |  |  |  |  |  |  | 
| 745 | 125 | 100 | 33 |  |  | 378 | return undef unless defined $char && exists $self->{_map}->{$char}; | 
| 746 | 117 |  |  |  |  | 302 | $self->{_map}->{$char} - 1; | 
| 747 |  |  |  |  |  |  | } | 
| 748 |  |  |  |  |  |  |  | 
| 749 |  |  |  |  |  |  | sub chars | 
| 750 |  |  |  |  |  |  | { | 
| 751 |  |  |  |  |  |  | # return number of characters in output string | 
| 752 | 82 |  |  | 82 | 1 | 418 | my ($self,$x) = @_; | 
| 753 |  |  |  |  |  |  |  | 
| 754 | 82 | 50 | 66 |  |  | 139 | return 0 if $x->is_zero() || $x->is_nan() || $x->is_inf(); | 
|  |  |  | 66 |  |  |  |  | 
| 755 | 81 |  |  |  |  | 1572 | my $i = 1; | 
| 756 | 81 |  |  |  |  | 150 | my $y = $x->as_number()->babs(); | 
| 757 |  |  |  |  |  |  |  | 
| 758 | 81 |  |  |  |  | 1402 | while ($y >= $self->{_sum}->[$i]) | 
| 759 |  |  |  |  |  |  | { | 
| 760 | 202 | 100 |  |  |  | 6702 | $self->_calc($i) if $self->{_cnt} < $i; | 
| 761 | 202 |  |  |  |  | 359 | $i++; | 
| 762 |  |  |  |  |  |  | } | 
| 763 | 81 |  |  |  |  | 1587 | --$i;			# correct for last ++ | 
| 764 |  |  |  |  |  |  | } | 
| 765 |  |  |  |  |  |  |  | 
| 766 |  |  |  |  |  |  | sub first | 
| 767 |  |  |  |  |  |  | { | 
| 768 | 18 |  |  | 18 | 1 | 44 | my $self = shift; | 
| 769 | 18 |  | 100 |  |  | 48 | my $count = abs(shift || 0); | 
| 770 |  |  |  |  |  |  |  | 
| 771 | 18 | 50 |  |  |  | 50 | return if $count < $self->{_minlen}; | 
| 772 | 18 | 50 | 33 |  |  | 941 | return if defined $self->{_maxlen} && $count > $self->{_maxlen}; | 
| 773 | 18 | 100 |  |  |  | 759 | return '' if $count == 0; | 
| 774 |  |  |  |  |  |  |  | 
| 775 | 14 |  | 100 |  |  | 45 | my $t = ($self->{_sep}||'') . $self->{_ones}->[0]; | 
| 776 | 14 |  |  |  |  | 37 | my $es = $t x $count; | 
| 777 | 14 | 100 |  |  |  | 53 | $es =~ s/^$self->{_sep}// if defined $self->{_sep}; | 
| 778 | 14 |  |  |  |  | 35 | $es; | 
| 779 |  |  |  |  |  |  | } | 
| 780 |  |  |  |  |  |  |  | 
| 781 |  |  |  |  |  |  | sub last | 
| 782 |  |  |  |  |  |  | { | 
| 783 | 18 |  |  | 18 | 1 | 26 | my $self = shift; | 
| 784 | 18 |  | 100 |  |  | 41 | my $count = abs(shift || 0); | 
| 785 |  |  |  |  |  |  |  | 
| 786 | 18 | 50 |  |  |  | 50 | return if $count < $self->{_minlen}; | 
| 787 | 18 | 50 | 33 |  |  | 897 | return if defined $self->{_maxlen} && $count > $self->{_maxlen}; | 
| 788 | 18 | 100 |  |  |  | 722 | return '' if $count == 0; | 
| 789 |  |  |  |  |  |  |  | 
| 790 | 14 |  | 100 |  |  | 44 | my $t = ($self->{_sep}||'') . $self->{_ones}->[-1]; | 
| 791 | 14 |  |  |  |  | 19 | my $es = $t x $count; | 
| 792 | 14 | 100 |  |  |  | 45 | $es =~ s/^$self->{_sep}// if defined $self->{_sep}; | 
| 793 | 14 |  |  |  |  | 35 | $es; | 
| 794 |  |  |  |  |  |  | } | 
| 795 |  |  |  |  |  |  |  | 
| 796 |  |  |  |  |  |  | sub next | 
| 797 |  |  |  |  |  |  | { | 
| 798 |  |  |  |  |  |  | # take one string, and return the next string following it (without | 
| 799 |  |  |  |  |  |  | # converting the string to it's number form first for speed reasons) | 
| 800 | 88 |  |  | 88 | 1 | 95 | my ($self,$str) = @_; | 
| 801 |  |  |  |  |  |  |  | 
| 802 | 88 | 100 |  |  |  | 175 | if ($str->{_cache} eq '')				# 0 => 1 | 
| 803 |  |  |  |  |  |  | { | 
| 804 | 3 |  |  |  |  | 7 | my $min = $self->{_minlen}; | 
| 805 |  |  |  |  |  |  | #$str->{_cache} = $self->first($min) and return if $min->is_positive(); | 
| 806 | 3 |  |  |  |  | 5 | $str->{_cache} = $self->{_ones}->[0]; | 
| 807 | 3 |  |  |  |  | 7 | return; | 
| 808 |  |  |  |  |  |  | } | 
| 809 |  |  |  |  |  |  |  | 
| 810 |  |  |  |  |  |  | # only the rightmost digit is adjusted. If this overflows, we simple | 
| 811 |  |  |  |  |  |  | # invalidate the cache. The time saved by updating the cache would be to | 
| 812 |  |  |  |  |  |  | # small to be of use, especially since updating the cache takes more time | 
| 813 |  |  |  |  |  |  | # then. Also, if the cached isn't used later, we would have spent the | 
| 814 |  |  |  |  |  |  | # update-time in vain. | 
| 815 |  |  |  |  |  |  |  | 
| 816 |  |  |  |  |  |  | # simple charsets | 
| 817 | 85 |  |  |  |  | 71 | my $char; | 
| 818 | 85 |  |  |  |  | 80 | my $clen = $self->{_clen}; | 
| 819 | 85 |  |  |  |  | 94 | my $s = \$str->{_cache};		# ref to cache contents | 
| 820 | 85 |  |  |  |  | 93 | my $sep = $self->{_sep}; | 
| 821 | 85 | 100 |  |  |  | 119 | if (defined $sep) | 
| 822 |  |  |  |  |  |  | { | 
| 823 |  |  |  |  |  |  | # split last part | 
| 824 | 28 |  |  |  |  | 167 | $$s =~ /.*$sep(.*?)\z/; $char = $1; | 
|  | 28 |  |  |  |  | 41 |  | 
| 825 | 28 | 100 |  |  |  | 88 | $char = $$s unless $$s =~ /$sep/; | 
| 826 |  |  |  |  |  |  | } | 
| 827 |  |  |  |  |  |  | else | 
| 828 |  |  |  |  |  |  | { | 
| 829 |  |  |  |  |  |  | # extract last char | 
| 830 | 57 |  |  |  |  | 91 | $char = substr($$s,-$clen,$clen); | 
| 831 |  |  |  |  |  |  | } | 
| 832 | 85 |  |  |  |  | 120 | my $old = $char;	# for seperator replacement | 
| 833 | 85 |  |  |  |  | 107 | $char = $self->{_map}->{$char};	# map is +1 by default | 
| 834 | 85 | 100 |  |  |  | 160 | $char -=2 if $str->{sign} eq '-'; | 
| 835 | 85 | 100 | 66 |  |  | 170 | if ((!defined $char) || ($char >= @{$self->{_start}}) || ($char < 0)) | 
|  | 85 |  | 100 |  |  | 392 |  | 
| 836 |  |  |  |  |  |  | { | 
| 837 |  |  |  |  |  |  | # overflow | 
| 838 | 10 |  |  |  |  | 11 | $str->{_cache} = undef;		# invalidate cache | 
| 839 | 10 |  |  |  |  | 21 | return; | 
| 840 |  |  |  |  |  |  | } | 
| 841 | 75 |  |  |  |  | 100 | $char = $self->{_start}->[$char];	# num 2 char | 
| 842 | 75 | 100 |  |  |  | 106 | if (defined $sep) | 
| 843 |  |  |  |  |  |  | { | 
| 844 |  |  |  |  |  |  | # split last part and replace | 
| 845 | 22 |  |  |  |  | 302 | $$s =~ s/$old\z/$char/; | 
| 846 |  |  |  |  |  |  | } | 
| 847 |  |  |  |  |  |  | else | 
| 848 |  |  |  |  |  |  | { | 
| 849 |  |  |  |  |  |  | # replace the last char | 
| 850 | 53 |  |  |  |  | 200 | substr($$s,-$clen,$clen) = $char; | 
| 851 |  |  |  |  |  |  | } | 
| 852 |  |  |  |  |  |  | } | 
| 853 |  |  |  |  |  |  |  | 
| 854 |  |  |  |  |  |  | sub prev | 
| 855 |  |  |  |  |  |  | { | 
| 856 | 75 |  |  | 75 | 1 | 73 | my ($self,$str) = @_; | 
| 857 |  |  |  |  |  |  |  | 
| 858 | 75 | 100 |  |  |  | 106 | if ($str->{_cache} eq '')				# 0 => -1 | 
| 859 |  |  |  |  |  |  | { | 
| 860 | 2 |  |  |  |  | 3 | my $min = $self->{_minlen}; | 
| 861 | 2 | 50 | 0 |  |  | 9 | $str->{_cache} = undef, and return if $min->is_positive(); # >= 0; | 
| 862 | 2 |  |  |  |  | 17 | $str->{_cache} = $self->{_ones}->[0]; | 
| 863 | 2 |  |  |  |  | 2 | return; | 
| 864 |  |  |  |  |  |  | } | 
| 865 |  |  |  |  |  |  |  | 
| 866 |  |  |  |  |  |  | # simple charsets | 
| 867 | 73 |  |  |  |  | 50 | my $char; | 
| 868 | 73 |  |  |  |  | 56 | my $clen = $self->{_clen}; | 
| 869 | 73 |  |  |  |  | 61 | my $s = \$str->{_cache}; | 
| 870 | 73 |  |  |  |  | 66 | my $sep = $self->{_sep}; | 
| 871 | 73 | 100 |  |  |  | 80 | if (defined $sep) | 
| 872 |  |  |  |  |  |  | { | 
| 873 |  |  |  |  |  |  | # split last part and replace | 
| 874 | 46 |  |  |  |  | 217 | $$s =~ /.*$sep(.*?)\z/; $char = $1; | 
|  | 46 |  |  |  |  | 81 |  | 
| 875 | 46 | 100 |  |  |  | 119 | $char = $$s unless $$s =~ /$sep/; | 
| 876 |  |  |  |  |  |  | } | 
| 877 |  |  |  |  |  |  | else | 
| 878 |  |  |  |  |  |  | { | 
| 879 |  |  |  |  |  |  | # extract last char and replace | 
| 880 | 27 |  |  |  |  | 30 | $char = substr($$s,-$clen,$clen); | 
| 881 |  |  |  |  |  |  | } | 
| 882 |  |  |  |  |  |  |  | 
| 883 | 73 |  |  |  |  | 54 | my $old = $char;	# for seperator replacement | 
| 884 | 73 | 50 | 33 |  |  | 175 | if ((defined $char) && (exists $self->{_map}->{$char})) | 
| 885 |  |  |  |  |  |  | { | 
| 886 | 73 |  |  |  |  | 69 | $char = $self->{_map}->{$char} - 1; | 
| 887 | 73 | 100 |  |  |  | 91 | $char += $str->{sign} eq '-' ? 1 : -1; | 
| 888 | 73 | 100 | 100 |  |  | 109 | if ($char < 0 || $char >= @{$self->{_start}}) | 
|  | 65 |  |  |  |  | 183 |  | 
| 889 |  |  |  |  |  |  | { | 
| 890 | 15 |  |  |  |  | 21 | $str->{_cache} = undef;			# invalidate cache | 
| 891 | 15 |  |  |  |  | 21 | return; 					# under or overflow | 
| 892 |  |  |  |  |  |  | } | 
| 893 |  |  |  |  |  |  | } | 
| 894 |  |  |  |  |  |  | else | 
| 895 |  |  |  |  |  |  | { | 
| 896 | 0 |  |  |  |  | 0 | $str->{_cache} = undef;			# invalidate cache | 
| 897 | 0 |  |  |  |  | 0 | return; 					# underflow if char not defined | 
| 898 |  |  |  |  |  |  | } | 
| 899 | 58 |  |  |  |  | 60 | $char = $self->{_start}->[$char];		# map num back to char | 
| 900 | 58 | 100 |  |  |  | 73 | if (defined $self->{_sep}) | 
| 901 |  |  |  |  |  |  | { | 
| 902 | 33 |  |  |  |  | 399 | $$s =~ s/$old\z/$char/; 			# split last part and replace | 
| 903 |  |  |  |  |  |  | } | 
| 904 |  |  |  |  |  |  | else | 
| 905 |  |  |  |  |  |  | { | 
| 906 | 25 |  |  |  |  | 59 | substr($$s,-$clen,$clen) = $char;		# simple replace | 
| 907 |  |  |  |  |  |  | } | 
| 908 |  |  |  |  |  |  | } | 
| 909 |  |  |  |  |  |  |  | 
| 910 |  |  |  |  |  |  | sub merge | 
| 911 |  |  |  |  |  |  | { | 
| 912 |  |  |  |  |  |  | # merge yourself with another simple charset | 
| 913 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 914 | 0 |  |  |  |  | 0 | my $other = shift; | 
| 915 |  |  |  |  |  |  |  | 
| 916 |  |  |  |  |  |  | # TODO | 
| 917 | 0 |  |  |  |  | 0 | $self; | 
| 918 |  |  |  |  |  |  | } | 
| 919 |  |  |  |  |  |  |  | 
| 920 |  |  |  |  |  |  | ############################################################################### | 
| 921 |  |  |  |  |  |  |  | 
| 922 |  |  |  |  |  |  | sub study | 
| 923 |  |  |  |  |  |  | { | 
| 924 |  |  |  |  |  |  | # study a list of words and return a hash describing them | 
| 925 |  |  |  |  |  |  | # study ( { order => $depth, words = \@words, sep => ''}, charlen => 1, | 
| 926 |  |  |  |  |  |  | # hist => 1, ); | 
| 927 |  |  |  |  |  |  |  | 
| 928 | 4 |  |  | 4 | 1 | 1202 | my $arg; | 
| 929 | 4 | 50 |  |  |  | 10 | if (ref $_[0] eq 'HASH') | 
| 930 |  |  |  |  |  |  | { | 
| 931 | 0 |  |  |  |  | 0 | $arg = shift; | 
| 932 |  |  |  |  |  |  | } | 
| 933 |  |  |  |  |  |  | else | 
| 934 |  |  |  |  |  |  | { | 
| 935 | 4 |  |  |  |  | 11 | $arg = { @_ }; | 
| 936 |  |  |  |  |  |  | } | 
| 937 |  |  |  |  |  |  |  | 
| 938 | 4 |  | 0 |  |  | 15 | my $depth = abs($arg->{order} || $arg->{depth} || 1); | 
| 939 | 4 |  | 50 |  |  | 8 | my $words = $arg->{words} || []; | 
| 940 | 4 |  |  |  |  | 6 | my $sep = $arg->{sep}; | 
| 941 | 4 |  | 50 |  |  | 17 | my $charlen = $arg->{charlen} || 1; | 
| 942 | 4 |  | 50 |  |  | 10 | my $cut = $arg->{cut} || 0; | 
| 943 | 4 |  | 50 |  |  | 11 | my $hist = $arg->{hist} || 0; | 
| 944 |  |  |  |  |  |  |  | 
| 945 | 4 | 50 | 33 |  |  | 15 | die "depth of study must be between 1..2" if ($depth < 1 || $depth > 2); | 
| 946 | 4 |  |  |  |  | 4 | my $starts = {};              # word starts | 
| 947 | 4 |  |  |  |  | 4 | my $ends = {};                # word ends | 
| 948 | 4 |  |  |  |  | 3 | my $chars = {};               # for depth 1 | 
| 949 | 4 |  |  |  |  | 4 | my $bi = { }; my ($l,@chars,$x,$y,$word,$i); | 
|  | 4 |  |  |  |  | 3 |  | 
| 950 | 4 |  |  |  |  | 7 | foreach $word (@$words) | 
| 951 |  |  |  |  |  |  | { | 
| 952 |  |  |  |  |  |  | # count starting chars and ending chars | 
| 953 | 18 |  |  |  |  | 21 | $starts->{substr($word,0,$charlen)} ++; | 
| 954 | 18 |  |  |  |  | 17 | $ends->{substr($word,-$charlen,$charlen)} ++; | 
| 955 | 18 |  |  |  |  | 18 | $l = CORE::length($word) / $charlen; | 
| 956 | 18 | 50 |  |  |  | 29 | next if (int($l) != $l);			# illegal word | 
| 957 | 18 | 50 |  |  |  | 22 | if ($depth == 1) | 
| 958 |  |  |  |  |  |  | { | 
| 959 | 0 |  |  |  |  | 0 | for (my $i = 0; $i < $l; $i += $charlen) | 
| 960 |  |  |  |  |  |  | { | 
| 961 | 0 |  |  |  |  | 0 | $chars->{substr($word,$i,$charlen)} ++; | 
| 962 |  |  |  |  |  |  | } | 
| 963 | 0 |  |  |  |  | 0 | next;					# next word | 
| 964 |  |  |  |  |  |  | } | 
| 965 | 18 |  |  |  |  | 18 | $l = $l - $depth + 1; | 
| 966 | 18 |  |  |  |  | 24 | for ($i = 0; $i < $l; $i += $charlen) | 
| 967 |  |  |  |  |  |  | { | 
| 968 | 90 |  |  |  |  | 71 | $x = substr($word,$i,$charlen); $y = substr($word,$i+$charlen,$charlen); | 
|  | 90 |  |  |  |  | 69 |  | 
| 969 | 90 |  |  |  |  | 149 | $bi->{$x}->{$y} ++; | 
| 970 |  |  |  |  |  |  | } | 
| 971 |  |  |  |  |  |  | } | 
| 972 | 4 |  |  |  |  | 6 | my $args = {}; | 
| 973 | 4 |  |  |  |  | 4 | my (@end,@start); | 
| 974 | 4 |  |  |  |  | 16 | foreach (sort { $starts->{$b} <=> $starts->{$a} } keys %$starts) | 
|  | 11 |  |  |  |  | 13 |  | 
| 975 |  |  |  |  |  |  | { | 
| 976 | 11 |  |  |  |  | 15 | push @start, $_; | 
| 977 |  |  |  |  |  |  | } | 
| 978 | 4 |  |  |  |  | 7 | $args->{start} = \@start; | 
| 979 | 4 |  |  |  |  | 8 | foreach (sort { $ends->{$b} <=> $ends->{$a} } keys %$ends) | 
|  | 10 |  |  |  |  | 9 |  | 
| 980 |  |  |  |  |  |  | { | 
| 981 | 11 |  |  |  |  | 10 | push @end, $_; | 
| 982 |  |  |  |  |  |  | } | 
| 983 | 4 |  |  |  |  | 6 | $args->{end} = \@end; | 
| 984 | 4 | 50 |  |  |  | 7 | if ($depth > 1) | 
| 985 |  |  |  |  |  |  | { | 
| 986 | 4 |  |  |  |  | 4 | my @sorted; | 
| 987 | 4 |  |  |  |  | 9 | foreach my $c (keys %$bi) | 
| 988 |  |  |  |  |  |  | { | 
| 989 | 35 |  |  |  |  | 23 | my $bc = $bi->{$c}; | 
| 990 |  |  |  |  |  |  | $args->{bi}->{$c} = [ | 
| 991 | 35 | 50 |  |  |  | 59 | sort { $bc->{$b} <=> $bc->{$a} or $a cmp $b } keys %$bc | 
|  | 22 |  |  |  |  | 45 |  | 
| 992 |  |  |  |  |  |  | ]; | 
| 993 |  |  |  |  |  |  | } | 
| 994 |  |  |  |  |  |  | } | 
| 995 |  |  |  |  |  |  | else | 
| 996 |  |  |  |  |  |  | { | 
| 997 | 0 |  |  |  |  | 0 | my @chars = (); | 
| 998 | 0 |  |  |  |  | 0 | foreach (sort { $chars->{$b} <=> $chars->{$a} } keys %$chars) | 
|  | 0 |  |  |  |  | 0 |  | 
| 999 |  |  |  |  |  |  | { | 
| 1000 | 0 |  |  |  |  | 0 | push @chars, $_; | 
| 1001 |  |  |  |  |  |  | } | 
| 1002 | 0 |  |  |  |  | 0 | $args->{chars} = \@chars; | 
| 1003 |  |  |  |  |  |  | } | 
| 1004 | 4 | 50 |  |  |  | 10 | if ($hist != 0) | 
| 1005 |  |  |  |  |  |  | { | 
| 1006 |  |  |  |  |  |  | # return histogram | 
| 1007 | 0 | 0 |  |  |  | 0 | if ($depth > 1) | 
| 1008 |  |  |  |  |  |  | { | 
| 1009 | 0 |  |  |  |  | 0 | $args->{hist} = $bi; | 
| 1010 |  |  |  |  |  |  | } | 
| 1011 |  |  |  |  |  |  | else | 
| 1012 |  |  |  |  |  |  | { | 
| 1013 | 0 |  |  |  |  | 0 | $args->{hist} = $chars; | 
| 1014 |  |  |  |  |  |  | } | 
| 1015 |  |  |  |  |  |  | } | 
| 1016 | 4 |  |  |  |  | 31 | $args; | 
| 1017 |  |  |  |  |  |  | } | 
| 1018 |  |  |  |  |  |  |  | 
| 1019 |  |  |  |  |  |  | __END__ |