File Coverage

lib/Math/String/Charset/Grouped.pm
Criterion Covered Total %
statement 232 258 89.9
branch 90 140 64.2
condition 26 40 65.0
subroutine 14 19 73.6
pod 10 13 76.9
total 372 470 79.1


line stmt bran cond sub pod time code
1             #############################################################################
2             # Math/String/Charset/Grouped.pm -- a charset of charsets for Math/String
3             #
4             # Copyright (C) 1999-2003 by Tels. All rights reserved.
5             #############################################################################
6              
7             package Math::String::Charset::Grouped;
8              
9             require 5.005; # requires this Perl version or later
10 7     7   46 use strict;
  7         12  
  7         245  
11              
12 7     7   39 use base 'Math::String::Charset';
  7         20  
  7         825  
13              
14             our $VERSION;
15             $VERSION = '1.30'; # Current version of this package
16              
17 7     7   47 use Math::BigInt;
  7         11  
  7         39  
18              
19             our $die_on_error;
20             $die_on_error = 1; # set to 0 to not die
21              
22             # following hash values are used:
23             # _clen : length of one character (all chars must have same len unless sep)
24             # _ones : list of one-character strings (cross of _end and _start)
25             # _start : contains array of all valid start characters
26             # _end : contains hash (for easier lookup) of all valid end characters
27             # _order : = 1
28             # _type : = 1
29             # _error : error message or ""
30             # _count : array of count of different strings with length x
31             # _sum : array of starting number for strings with length x
32             # _sum[x] = _sum[x-1]+_count[x-1]
33             # _cnt : number of elements in _count and _sum (as well as in _scnt & _ssum)
34             # _cnum : number of characters in _ones as BigInt (for speed)
35             # _minlen: minimum string length (anything shorter is invalid), default -inf
36             # _maxlen: maximum string length (anything longer is invalid), default +inf
37             # _scale : optional input/output scale
38              
39             # simple ones:
40             # _sep : separator string (undef for none)
41             # _map : mapping character to number
42              
43             # higher orders:
44             # _bi : hash with refs to array of bi-grams
45             # _bmap : hash with refs to hash of bi-grams
46             # _scnt : array of hashes, count of strings starting with this character
47              
48             # grouped:
49             # _spat : array with pattern of charsets 8for each stirnglen one ARRAY ref)
50              
51             #############################################################################
52             # private, initialize self
53              
54             sub _strict_check
55             {
56             # a per class check, to be overwritten by subclasses
57 8     8   15 my $self = shift;
58 8         12 my $value = shift;
59              
60 8         12 my $class = ref($self);
61             return $self->{_error} = "Wrong type '$self->{_type}' for $class"
62 8 50       20 if $self->{_type} != 1;
63             return $self->{_error} = "Wrong order'$self->{_order}' for $class"
64 8 50       19 if $self->{_order} != 1;
65 8         40 foreach my $key (keys %$value)
66             {
67 12 50       60 return $self->{_error} = "Illegal parameter '$key' for $class"
68             if $key !~ /^(start|minlen|maxlen|sep|sets|end|charlen|scale)$/;
69             }
70             }
71              
72             sub _initialize
73             {
74             # set yourself to the value represented by the given string
75 8     8   14 my $self = shift;
76 8         12 my $value = shift;
77              
78 8         13 $self->{_clen} = $value->{charlen};
79 8         12 $self->{_sep} = $value->{sep}; # separator char
80              
81             return $self->{_error} = "Need HASH ref as 'sets'"
82 8 50       20 if (ref($value->{sets}) ne 'HASH');
83              
84             # make copy at same time
85 8         12 foreach my $key (keys %{$value->{sets}})
  8         29  
86             {
87 23         58 $self->{_sets}->{$key} = $value->{sets}->{$key};
88             }
89              
90             # start/end are sets 1 and -1, respectively, and overwrite 'sets'
91 8 100       21 $self->{_sets}->{1} = $value->{start} if exists $value->{start};
92 8 50       15 $self->{_sets}->{-1} = $value->{end} if exists $value->{end};
93 8 50       20 $self->{_sets}->{0} = $value->{chars} if exists $value->{chars};
94             # default set
95 8 50       19 $self->{_sets}->{0} = ['a'..'z'] if !defined $self->{_sets}->{0};
96              
97 8         28 my $sets = $self->{_sets}; # shortcut
98 8         25 foreach my $set (keys %$sets)
99             {
100             return $self->{_error} =
101             "Entries in 'sets' must be ref to Math::String::Charset or ARRAY"
102             if ((ref($sets->{$set}) ne 'ARRAY') &&
103 24 50 66     77 (ref($sets->{$set}) ne 'Math::String::Charset'));
104              
105             # so for each set, make a Math::String::Charset
106             $sets->{$set} = Math::String::Charset->new($sets->{$set})
107 24 100       89 if ref($sets->{$set}) eq 'ARRAY';
108             }
109 8   33     28 $self->{_start} = $sets->{1} || $sets->{0};
110 8   66     61 $self->{_end} = $sets->{-1} || $sets->{0};
111              
112             $self->{_clen} = $self->{_start}->charlen() if
113 8 100 66     52 ((!defined $self->{_clen}) && (!defined $self->{_sep}));
114              
115             # build _ones list (cross from start/end)
116 8         16 $self->{_ones} = [];
117              
118             # _end is a simple charset, so use it's map directly
119 8         15 my $end = $self->{_end}->{_map};
120 8         20 my $o = $self->{_ones};
121 8         28 foreach ($self->{_start}->start())
122             {
123 187 100       347 push @$o, $_ if exists $end->{$_};
124             }
125             #print "\n";
126              
127             # some tests for validity
128 8 100       47 if (!defined $self->{_sep})
129             {
130 6         9 foreach (keys %{$self->{_sets}})
  6         24  
131             {
132 19         49 my $l = $self->{_sets}->{$_}->charlen();
133             return $self->{_error} =
134             "Illegal character length '$l' for charset '$_', expected '$self->{_clen}'"
135 19 50       37 if $self->{_sets}->{$_}->charlen() != $self->{_clen};
136              
137             }
138             }
139 8         16 $self->{_cnum} = Math::BigInt->new( scalar @{$self->{_ones}} );
  8         27  
140             # initialize array of counts for len of 0..1
141 8         582 $self->{_cnt} = 2; # cached amount of class-sizes
142 8 50       26 if ($self->{_minlen} <= 0)
143             {
144 8         1257 $self->{_count}->[0] = 1; # '' is one string
145 8         38 my $sl = $self->{_start}->length();
146 8         27 my $el = $self->{_end}->length();
147 8         16 $self->{_count}->[1] = $self->{_cnum};
148 8         18 $self->{_count}->[2] = $sl * $el;
149             # init _sum array
150 8         30 $self->{_sum}->[0] = Math::BigInt->bzero();
151 8         185 $self->{_sum}->[1] = Math::BigInt->bone(); # '' is 1 string
152 8         199 $self->{_sum}->[2] = $self->{_count}->[1] + $self->{_sum}->[1];
153 8         607 $self->{_sum}->[3] = $self->{_count}->[2] + $self->{_sum}->[2];
154             # init set patterns
155 8         1315 $self->{_spat}->[1] = [ undef, $self->{_sets}->{0} ];
156 8         24 $self->{_spat}->[2] = [ undef, $self->{_start}, $self->{_end} ];
157             }
158             else
159             {
160 0         0 $self->{_cnt} = 0; # cached amount of class-sizes
161             }
162              
163             # from _ones, make mapping name => number
164 8         23 my $i = Math::BigInt->bone();
165 8         185 foreach (@{$self->{_ones}})
  8         20  
166             {
167 73         3686 $self->{_map}->{$_} = $i++;
168             }
169              
170 8 100       215 if ($self->{_cnum}->is_zero())
171             {
172 4 50       64 $self->{_minlen} = 2 if $self->{_minlen} == 1; # no one's
173             # check whether charset can have 2-character long strings
174 4 50       412 if ($self->{_count}->[2] == 0)
175             {
176 0 0       0 $self->{_minlen} = 3 if $self->{_minlen} == 2; # no two's
177             # check whether some path from start to end set exists, if not: empty
178             }
179             }
180             return $self->{_error} =
181             "Minlen ($self->{_minlen} must be smaller than maxlen ($self->{_maxlen})"
182 8 50       76 if ($self->{_minlen} > $self->{_maxlen});
183 8         238 return $self;
184             }
185              
186             sub dump
187             {
188 1     1 0 3 my $self = shift;
189              
190 1         2 my $txt = "type: GROUPED\n";
191              
192 1         2 foreach my $set (sort { $b<=>$a } keys %{$self->{_sets}})
  3         10  
  1         8  
193             {
194 3         12 $txt .= " $set => ". $self->{_sets}->{$set}->dump(' ');
195             }
196 1         3 $txt .= "ones : " . join(' ',@{$self->{_ones}}) . "\n";
  1         4  
197 1         3 $txt;
198             }
199              
200             sub _calc
201             {
202             # given count of len 1..x, calculate count for y (y > x) and all between
203             # x and y
204             # currently re-calcs from 2 on, we could save the state and only calculate
205             # the missing counts.
206              
207             # print "calc ",caller(),"\n";
208 12     12   24 my $self = shift;
209 12 50 50     26 my $max = shift || 1; $max = 1 if $max < 1;
  12         25  
210 12 100       27 return if $max <= $self->{_cnt};
211              
212             # print "in _calc $self $max\n";
213 8         13 my $i = $self->{_cnt}; # last defined element
214 8         12 my $last = $self->{_count}->[$i];
215 8         32 while ($i++ <= $max)
216             {
217             # build list of charsets for this length
218 20         1387 my $spat = []; # set patterns
219 20         37 my $sets = $self->{_sets}; # shortcut
220 20         61 for (my $j = 1; $j <= $i; $j++)
221             {
222 87         110 my $r = $j-$i-1; # reverse
223             # print "$j reversed $r (for $i)\n";
224 87   100     248 $spat->[$j] = $sets->{$j} || $sets->{$r}; # one of both?
225             $spat->[$j] = $sets->{$j}->merge($sets->{$r}) if
226 87 50 66     193 exists $sets->{$j} && exists $sets->{$r}; # both?
227 87 100       209 $spat->[$j] = $sets->{0} unless defined $spat->[$j]; # none?
228             # print $spat->[$j]->dump(),"\n";
229             }
230 20         36 $self->{_spat}->[$i] = $spat; # store
231             # for each charset, take size and mul together
232 20         50 $last = Math::BigInt->bone();
233 20         483 for (my $j = 1; $j <= $i; $j++)
234             {
235             # print "$i $spat->[$j]\n";
236 87         8522 $last *= $spat->[$j]->length();
237             # print "last $last ",$spat->[$j]->length()," ($spat->[$j])\n";
238             }
239 20         2569 $self->{_count}->[$i] = $last;
240             # print "$i: count $last ";
241 20         67 $self->{_sum}->[$i] = $self->{_sum}->[$i-1] + $self->{_count}->[$i-1];
242             # print "sum $self->{_sum}->[$i]\n";
243             }
244 8         629 $self->{_cnt} = $i-1; # store new cache size
245 8         16 return;
246             }
247              
248             sub is_valid
249             {
250             # check wether a string conforms to the given charset sets
251 15     15 1 37 my $self = shift;
252 15         20 my $str = shift;
253              
254             # print "$str\n";
255 15 50       38 return 0 if !defined $str;
256 15 100 66     43 return 1 if $str eq '' && $self->{_minlen} <= 0;
257              
258 14         27 my @chars;
259 14 100       36 if (defined $self->{_sep})
260             {
261 1         13 @chars = split /$self->{_sep}/,$str;
262 1 50       5 shift @chars if $chars[0] eq '';
263 1 50       4 pop @chars if $chars[-1] eq $self->{_sep};
264             }
265             else
266             {
267 13         15 my $i = 0; my $len = CORE::length($str); my $clen = $self->{_clen};
  13         19  
  13         19  
268 13         37 while ($i < $len)
269             {
270 40         68 push @chars, substr($str,$i,$clen); $i += $clen;
  40         67  
271             }
272             }
273             # length okay?
274 14 50       48 return 0 if scalar @chars < $self->{_minlen};
275 14 50       1061 return 0 if scalar @chars > $self->{_maxlen};
276              
277             # valid start char?
278 14 100       849 return 0 unless defined $self->{_start}->map($chars[0]);
279 13 100       32 return 1 if @chars == 1;
280             # further checks for strings longer than 1
281 12         25 my $k = 1;
282 12         20 my $d = scalar @chars;
283 12 100       37 $self->_calc($d) if ($self->{_cnt} < $d);
284 12         22 my $spat = $self->{_spat}->[$d];
285 12         21 foreach my $c (@chars)
286             {
287 36 100       71 return 0 if !defined $spat->[$k++]->map($c);
288             }
289             # all tests passed
290 7         56 1;
291             }
292              
293             sub minlen
294             {
295 0     0 1 0 my $self = shift;
296              
297 0         0 $self->{_minlen};
298             }
299              
300             sub maxlen
301             {
302 0     0 1 0 my $self = shift;
303              
304 0         0 $self->{_maxlen};
305             }
306              
307             sub start
308             {
309             # this returns all the starting characters in a list, or in case of a simple
310             # charset, simple the charset
311             # in scalar context, returns length of starting set, for simple charsets this
312             # equals the length
313 0     0 1 0 my $self = shift;
314              
315 0 0       0 wantarray ? @{$self->{_start}} : scalar @{$self->{_start}};
  0         0  
  0         0  
316             }
317              
318             sub end
319             {
320             # this returns all the end characters in a list, or in case of a simple
321             # charset, simple the charset
322             # in scalar context, returns length of end set, for simple charsets this
323             # equals the length
324 0     0 1 0 my $self = shift;
325              
326 0 0       0 wantarray ? sort keys %{$self->{_end}} : scalar keys %{$self->{_end}};
  0         0  
  0         0  
327             }
328              
329             sub ones
330             {
331             # this returns all the one-char strings (in scalar context the count of them)
332 0     0 1 0 my $self = shift;
333              
334 0 0       0 wantarray ? @{$self->{_ones}} : scalar @{$self->{_ones}};
  0         0  
  0         0  
335             }
336              
337             sub num2str
338             {
339             # convert Math::BigInt/Math::String to string
340             # in list context, return (string,stringlen)
341 17     17 0 371 my $self = shift;
342 17         28 my $x = shift;
343              
344 17 100       73 $x = new Math::BigInt($x) unless ref $x;
345 17 50       748 return undef if ($x->sign() !~ /^[+-]$/);
346 17 100       137 if ($x->is_zero())
347             {
348 2 100       38 return wantarray ? ('',0) : '';
349             }
350 15         195 my $j = $self->{_cnum}; # nr of chars
351              
352 15 100       39 if ($x <= $j)
353             {
354 5         172 my $c = $self->{_ones}->[$x-1];
355 5 50       1087 return wantarray ? ($c,1) : $c; # string len == 1
356             }
357              
358 10         336 my $digits = $self->chars($x); my $d = $digits;
  10         15  
359             # now treat the string as it were a zero-padded string of length $digits
360              
361 10         14 my $es=""; # result
362             # copy input, make positive number, correct to $digits and cater for 0
363 10         27 my $y = Math::BigInt->new($x); $y->babs();
  10         364  
364             #print "fac $j y: $y new: ";
365 10         81 $y -= $self->{_sum}->[$digits];
366              
367 10 50       915 $self->_calc($d) if ($self->{_cnt} < $d);
368             #print "y: $y\n";
369 10 50       16 my $mod = 0; my $s = $self->{_sep}; $s = '' if !defined $s;
  10         16  
  10         36  
370 10         18 my $spat = $self->{_spat}->[$d]; # set pattern
371 10         17 my $k = $d;
372 10         23 while (!$y->is_zero())
373             {
374             #print "bfore: y/fac: $y / $j \n";
375 9         112 ($y,$mod) = $y->bdiv($spat->[$k]->length());
376             #$es = $self->{_ones}->[$mod] . $s.$es;
377 9         2007 $es = $spat->[$k--]->char($mod) . $s.$es; # find mod'th char
378             #print "after: div: $y rem: $mod \n";
379 9         199 $digits --; # one digit done
380             }
381             # padd the remaining digits with the zero-symbol
382 10         157 while ($digits-- > 0)
383             {
384 16         43 $es = $spat->[$k--]->char(0) . $s . $es;
385             }
386 10         61 $es =~ s/$s$//; # strip last sep 'char'
387 10 50       87 wantarray ? ($es,$d) : $es;
388             }
389              
390             sub str2num
391             {
392             # convert Math::String to Math::BigInt
393 24     24 0 2230 my $self = shift;
394 24         37 my $str = shift; # simple string
395              
396 24         64 my $int = Math::BigInt->bzero();
397 24         497 my $i = CORE::length($str);
398              
399 24 100       55 return $int if $i == 0;
400             # print "str2num $i $clen '$str'\n";
401 22         36 my $map = $self->{_map};
402 22         31 my $clen = $self->{_clen}; # len of one char
403              
404 22 100 100     82 if ((!defined $self->{_sep}) && ($i == $clen))
405             {
406 5 50       12 return $int->bnan() if !exists $map->{$str};
407 5         13 return $map->{$str}->copy();
408             }
409              
410 17         39 my $mul = Math::BigInt->bone();
411 17         376 my $cs; # charset at pos i
412 17         22 my $k = 1; # position
413 17         34 my $c = 0; # chars in string
414 17 100       38 if (!defined $self->{_sep})
415             {
416 13 50       35 return $int->bnan() if $i % $clen != 0; # not multiple times clen
417 13         28 $c = int($i/$clen);
418 13 100       29 $self->_calc($c) if ($self->{_cnt} < $c);
419 13         22 my $spat = $self->{_spat}->[$c];
420             # print "$c ($self->{_cnt}) spat: ",scalar @$spat,"\n";
421 13         25 $i -= $clen;
422 13         20 $k = $c;
423 13         27 while ($i >= 0)
424             {
425 52         89 $cs = $spat->[$k--]; # charset at pos k
426             # print "$i $k $cs nr $int ";
427             # print "mapped ",substr($str,$i,$clen)," => ",
428             # $cs->map(substr($str,$i,$clen)) || 0;
429             # print " mul $mul => ";
430 52         152 $int += $mul * $cs->map(substr($str,$i,$clen));
431 52         10957 $mul *= $cs->length();
432             # print "mul $mul\n";
433 52         6562 $i -= $clen;
434             }
435             }
436             else
437             {
438             # with sep char
439 4         40 my @chars = split /$self->{_sep}/, $str;
440 4 50       13 shift @chars if $chars[0] eq ''; # strip leading sep
441 4 50       11 pop @chars if $chars[-1] eq $self->{_sep}; # strip trailing sep
442 4         7 $c = scalar @chars;
443 4 50       10 $self->_calc($c) if ($self->{_cnt} < $c);
444 4         7 my $spat = $self->{_spat}->[$c];
445 4         6 $k = $c;
446 4         9 foreach (reverse @chars)
447             {
448 8         531 $cs = $spat->[$k--]; # charset at pos k
449 8         21 $int += $mul * $cs->map($_);
450 8         1804 $mul *= $cs->length();
451             }
452             }
453 17         536 $int + $self->{_sum}->[$c]; # add base sum
454             }
455              
456             #sub char
457             # {
458             # # return nth char from charset
459             # my $self = shift;
460             # my $char = shift || 0;
461             #
462             # return undef if $char > scalar @{$self->{_ones}}; # dont create spurios elems
463             # return $self->{_ones}->[$char];
464             # }
465              
466             sub first
467             {
468 4     4 1 9 my $self = shift;
469 4   100     14 my $count = abs(shift || 0);
470              
471 4 50       17 return if $count < $self->{_minlen};
472 4 50 33     348 return if defined $self->{_maxlen} && $count > $self->{_maxlen};
473 4 100       315 return '' if $count == 0;
474              
475 3 100       10 return $self->{_ones}->[0] if $count == 1;
476              
477 2         8 $self->_calc($count);
478 2         3 my $spat = $self->{_spat}->[$count];
479 2         4 my $es = '';
480 2   50     8 my $s = $self->{_sep} || '';
481 2         17 for (my $i = 1; $i <= $count; $i++)
482             {
483 5         14 $es .= $s . $spat->[$i]->char(0);
484             }
485 2         4 $s = quotemeta($s);
486 2 50       5 $es =~ s/^$s// if $s ne ''; # remove first sep
487 2         8 $es;
488             }
489              
490             sub last
491             {
492 4     4 1 10 my $self = shift;
493 4   100     12 my $count = abs(shift || 0);
494              
495 4 50       16 return if $count < $self->{_minlen};
496 4 50 33     354 return if defined $self->{_maxlen} && $count > $self->{_maxlen};
497 4 100       304 return '' if $count == 0;
498              
499 3 100       11 return $self->{_ones}->[-1] if $count == 1;
500              
501 2         16 $self->_calc($count);
502 2         4 my $spat = $self->{_spat}->[$count];
503 2         4 my $es = '';
504 2   50     8 my $s = $self->{_sep} || '';
505 2         6 for (my $i = 1; $i <= $count; $i++)
506             {
507 5         15 $es .= $s . $spat->[$i]->char(-1);
508             }
509 2         5 $s = quotemeta($s);
510 2 50       5 $es =~ s/^$s// if $s ne ''; # remove first sep
511 2         8 $es;
512             }
513              
514             sub next
515             {
516 1     1 1 2 my $self = shift;
517 1         3 my $str = shift;
518              
519 1 50       4 if ($str->{_cache} eq '') # 0 => 1
520             {
521 0 0       0 my $min = $self->{_minlen}; $min = 1 if $min <= 0;
  0         0  
522 0         0 $str->{_cache} = $self->first($min);
523 0         0 return;
524             }
525              
526             # only the rightmost digit is adjusted. If this overflows, we simple
527             # invalidate the cache. The time saved by updating the cache would be to
528             # small to be of use, especially since updating the cache takes more time
529             # then. Also, if the cached isn't used later, we would have spent the
530             # update-time in vain.
531              
532             # for higher orders not ready yet
533 1         2 $str->{_cache} = undef;
534             }
535              
536             sub prev
537             {
538 1     1 1 3 my $self = shift;
539 1         2 my $str = shift;
540              
541 1 50       5 if ($str->{_cache} eq '') # 0 => -1
542             {
543 0 0       0 my $min = $self->{_minlen}; $min = -1 if $min >= 0;
  0         0  
544 0         0 $str->{_cache} = $self->first($min);
545 0         0 return;
546             }
547              
548             # for higher orders not ready yet
549 1         3 $str->{_cache} = undef;
550             }
551              
552             __END__