File Coverage

blib/lib/Data/Entropy/Algorithms.pm
Criterion Covered Total %
statement 134 141 95.0
branch 61 74 82.4
condition 21 33 63.6
subroutine 21 21 100.0
pod 12 12 100.0
total 249 281 88.6


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Data::Entropy::Algorithms - basic entropy-using algorithms
4              
5             =head1 SYNOPSIS
6              
7             use Data::Entropy::Algorithms
8             qw(rand_bits rand_int rand_prob);
9              
10             $str = rand_bits(17);
11             $i = rand_int(12345);
12             $i = rand_int(Math::BigInt->new("1000000000000"));
13             $j = rand_prob(1, 2, 3);
14             $j = rand_prob([ 1, 2, 3 ]);
15              
16             use Data::Entropy::Algorithms qw(rand_fix rand rand_flt);
17              
18             $x = rand_fix(48);
19             $x = rand(7);
20             $x = rand_flt(0.0, 7.0);
21              
22             use Data::Entropy::Algorithms
23             qw(pick pick_r choose choose_r shuffle shuffle_r);
24              
25             $item = pick($item0, $item1, $item2);
26             $item = pick_r(\@items);
27             @chosen = choose(3, $item0, $item1, $item2, $item3, $item4);
28             $chosen = choose_r(3, \@items);
29             @shuffled = shuffle($item0, $item1, $item2, $item3, $item4);
30             $shuffled = shuffle_r(\@items);
31              
32             =head1 DESCRIPTION
33              
34             This module contains a collection of fundamental algorithms that
35             use entropy. They all use the entropy source mechanism described in
36             L.
37              
38             =cut
39              
40             package Data::Entropy::Algorithms;
41              
42 10     10   107739 { use 5.006; }
  10         51  
43 10     10   56 use warnings;
  10         24  
  10         670  
44 10     10   67 use strict;
  10         18  
  10         379  
45              
46 10     10   48 use Carp qw(croak);
  10         20  
  10         699  
47 10     10   386 use Data::Entropy qw(entropy_source);
  10         18  
  10         796  
48 10         1407 use Data::Float 0.008 qw(
49             have_subnormal min_normal_exp significand_bits
50             float_is_finite float_parts float_sign mult_pow2 copysign
51 10     10   5786 );
  10         81197  
52 10     10   79 use Params::Classify 0.000 qw(is_ref);
  10         142  
  10         686  
53              
54             our $VERSION = "0.008";
55              
56 10     10   50 use parent "Exporter";
  10         19  
  10         73  
57             our @EXPORT_OK = qw(
58             rand_bits rand_int rand_prob
59             rand_fix rand rand_flt
60             pick_r pick choose_r choose shuffle_r shuffle
61             );
62              
63             =head1 FUNCTIONS
64              
65             All of these functions use entropy. The entropy source is not an
66             explicit input in any case. All functions use the current entropy source
67             maintained by the C module. To select an entropy source
68             use the C function in that module, or alternatively
69             do nothing to use the default source.
70              
71             =head2 Fundamental entropy extraction
72              
73             =over
74              
75             =item rand_bits(NBITS)
76              
77             Returns NBITS bits of entropy, as a string of octets. If NBITS is
78             not a multiple of eight then the last octet in the string has its most
79             significant bits set to zero.
80              
81             =cut
82              
83             sub rand_bits($) {
84 42     42 1 31811 my($nbits) = @_;
85 42 100       349 croak "need a non-negative number of bits to dispense"
86             unless $nbits >= 0;
87 41         146 return entropy_source->get_bits($nbits);
88             }
89              
90             =item rand_int(LIMIT)
91              
92             LIMIT must be a positive integer. Returns a uniformly-distributed random
93             integer in the range [0, LIMIT). LIMIT may be either a native integer,
94             a C object, or an integer-valued C object;
95             the returned number is of the same type.
96              
97             =cut
98              
99             sub rand_int($) {
100 20645     20645 1 99951 my($limit) = @_;
101 20645 100       49427 croak "need a positive upper limit for random variable"
102             unless $limit > 0;
103 20644         65320 return entropy_source->get_int($limit);
104             }
105              
106             =item rand_prob(PROB ...)
107              
108             =item rand_prob(PROBS)
109              
110             Returns a random integer selected with non-uniform probability. The
111             relative probabilities are supplied as a list of non-negative integers
112             (multiple PROB arguments) or a reference to an array of integers (the
113             PROBS argument). The relative probabilities may be native integers,
114             C objects, or integer-valued C objects;
115             they must all be of the same type. At least one probability value must
116             be positive.
117              
118             The first relative probability value (the first PROB or the first element
119             of PROBS) is the relative probability of returning 0. The absolute
120             probability of returning 0 is this value divided by the total of all
121             the relative probability values. Similarly the second value controls
122             the probability of returning 1, and so on.
123              
124             =cut
125              
126             sub rand_prob(@) {
127 154 100 100 154 1 149889 my $probs = @_ == 1 && is_ref($_[0], "ARRAY") ? $_[0] : \@_;
128 154         380 my $total = 0;
129 154         512 for(my $i = @$probs; $i--; ) {
130 754         1295 my $prob = $probs->[$i];
131 754 100       1931 croak "probabilities must be non-negative" unless $prob >= 0;
132 753         1649 $total += $prob;
133             }
134 153 100       538 croak "can't have nothing possible" if $total == 0;
135 152         454 for(my $i = @$probs; $i--; ) {
136 370         689 my $prob = $probs->[$i];
137 370         631 $total -= $prob;
138 370 100       1045 return $i if entropy_source->get_prob($total, $prob);
139             }
140             }
141              
142             =back
143              
144             =head2 Numbers
145              
146             =over
147              
148             =item rand_fix(NBITS)
149              
150             Returns a uniformly-distributed random NBITS-bit fixed-point fraction in
151             the range [0, 1). That is, the result is a randomly-chosen multiple of
152             2^-NBITS, the multiplier being a random integer in the range [0, 2^NBITS).
153             The value is returned in the form of a native floating point number, so
154             NBITS can be at most one greater than the number of bits of significand
155             in the floating point format.
156              
157             With NBITS = 48 the range of output values is the same as that of the
158             Unix C function.
159              
160             =cut
161              
162             sub rand_fix($) {
163 516     516 1 198553 my($nbits) = @_;
164 516 100       1666 croak "need a non-negative number of bits to dispense"
165             unless $nbits >= 0;
166 515 50       1201 croak "can't generate more than ".(significand_bits+1).
167             " bits of fixed-point fraction"
168             if $nbits > significand_bits+1;
169 515         815 my $frac = 0.0;
170 515         1342 for(my $pos = 24; $pos <= $nbits; $pos += 24) {
171 516         10408 $frac += mult_pow2(rand_int(1 << 24), -$pos);
172             }
173 515         11240 $frac += mult_pow2(rand_int(1 << ($nbits % 24)), -$nbits);
174 515         10759 return $frac;
175             }
176              
177             =item rand([LIMIT])
178              
179             Generates a random fixed-point fraction by C and then multiplies
180             it by LIMIT, returning the result. LIMIT defaults to 1, and if it
181             is 0 then that is also treated as 1. The length of the fixed-point
182             fraction is 48 bits, unless that can't be represented in the native
183             floating point type, in which case the longest possible fraction will
184             be generated instead.
185              
186             This is a drop-in replacement for C: it produces exactly the
187             same range of output values, but using the current entropy source instead
188             of a sucky PRNG with linear relationships between successive outputs.
189             (C does the type of calculation described, but using the
190             PRNG C to generate the fixed-point fraction.) The details of
191             behaviour may change in the future if the behaviour of C
192             changes, to maintain the match.
193              
194             Where the source of a module can't be readily modified, it can be made
195             to use this C by an incantation such as
196              
197             *Foreign::Module::rand = \&Data::Entropy::Algorithms::rand;
198              
199             This must be done before the module is loaded, most likely in a C
200             block. It is also possible to override C for all modules,
201             by performing this similarly early:
202              
203             *CORE::GLOBAL::rand = \&Data::Entropy::Algorithms::rand;
204              
205             This function should not be used in any new code, because the kind
206             of output supplied by C is hardly ever the right thing to use.
207             The C idiom to generate a random integer has non-uniform
208             probabilities of generating each possible value, except when C<$n> is a
209             power of two. For floating point numbers, C can't generate most
210             representable numbers in its output range, and the output is biased
211             towards zero. In new code use C to generate integers and
212             C to generate floating point numbers.
213              
214             =cut
215              
216 10     10   4913 use constant RAND_NBITS => 48 > significand_bits+1 ? significand_bits+1 : 48;
  10         21  
  10         14062  
217              
218             sub rand(;$) {
219 258     258 1 287637 my($limit) = @_;
220 258 100 100     826 return rand_fix(RAND_NBITS) *
221             (!defined($limit) || $limit == 0.0 ? 1.0 : $limit);
222             }
223              
224             =item rand_flt(MIN, MAX)
225              
226             Selects a uniformly-distributed real number (with infinite precision)
227             in the range [MIN, MAX] and then rounds this number to the nearest
228             representable floating point value, which it returns. (Actually it is
229             only I the function worked this way: in fact it never generates
230             the number with infinite precision. It selects between the representable
231             floating point values with the probabilities implied by this process.)
232              
233             This can return absolutely any floating point value in the range [MIN,
234             MAX]; both MIN and MAX themselves are possible return values. All bits
235             of the floating point type are filled randomly, so the range of values
236             that can be returned depends on the details of the floating point format.
237             (See L for low-level floating point utilities.)
238              
239             The function Cs if MIN and MAX are not both finite. If MIN is
240             greater than MAX then their roles are swapped: the order of the limit
241             parameters actually doesn't matter. If the limits are identical then
242             that value is always returned. As a special case, if the limits are
243             positive zero and negative zero then a zero will be returned with a
244             randomly-chosen sign.
245              
246             =cut
247              
248             sub rand_flt($$) {
249 11000     11000 1 5778002 my($a, $b) = @_;
250 11000 50 33     395702 croak "bounds for rand_flt() must be finite"
251             unless float_is_finite($a) && float_is_finite($b);
252 11000 100       524480 if($a == $b) {
253 2000 100 66     31550 return $_[rand_int(2)]
254             if $a == 0.0 && float_sign($a) ne float_sign($b);
255 1000         2568 return $_[0];
256             }
257 9000 100       34184 ($a, $b) = ($b, $a) if abs($a) < abs($b);
258 9000 50       204563 my($prm_sign, $prm_max_exp, $prm_max_sgnf) =
259             $a == 0.0 ? ("+", min_normal_exp, 0.0) : float_parts($a);
260 9000 100       935139 my($b_sign, $b_exp, $b_sgnf) =
261             $b == 0.0 ? ("+", min_normal_exp, 0.0) : float_parts($b);
262 9000         212877 my($min_exp, $min_sgnf);
263 9000         0 my($opp_max_exp, $opp_max_sgnf);
264 9000 100       24633 if($b_sign eq $prm_sign) {
265 6000         12807 ($min_exp, $min_sgnf) = ($b_exp, $b_sgnf);
266 6000         11538 ($opp_max_exp, $opp_max_sgnf) = (min_normal_exp, 0.0);
267             } else {
268 3000         7502 ($min_exp, $min_sgnf) = (min_normal_exp, 0.0);
269 3000         6876 ($opp_max_exp, $opp_max_sgnf) = ($b_exp, $b_sgnf);
270             }
271 9000         16586 TRY_AGAIN:
272             my $exp = $prm_max_exp;
273 9000         17863 my $bdone = significand_bits;
274 9000 50       24596 $bdone = 28 if $bdone > 28;
275 9000         22739 my $prm_frng = $prm_max_sgnf * (1 << $bdone);
276 9000 50       24344 if($prm_max_sgnf < 1.0) {
277             # subnormal limit
278 0         0 my $desired_rng = 1 << $bdone;
279 0         0 while($bdone != significand_bits) {
280 0         0 $bdone++;
281 0         0 $prm_frng += $prm_frng;
282 0 0       0 last if $prm_frng >= $desired_rng;
283             }
284             }
285 9000         16947 my $prm_rng = int($prm_frng);
286 9000 50       20661 $prm_rng++ if $prm_frng != $prm_rng;
287 9000         15862 my $min_b = $bdone - ($exp - $min_exp);
288 9000 100       24414 my $min_rng = $min_b >= 0 ? int(mult_pow2($min_sgnf, $min_b)) : 0;
289 9000         149617 my $opp_b = $bdone - ($exp - $opp_max_exp);
290 9000 100       22843 my $opp_frng = $opp_b >= 0 ? mult_pow2($opp_max_sgnf, $opp_b) : 0;
291 9000         111277 my $opp_rng = int($opp_frng);
292 9000 50       21937 $opp_rng++ if $opp_frng != $opp_rng;
293 9000         26266 my $n = $min_rng + rand_int($prm_rng - $min_rng + $opp_rng);
294 9000         24168 my($sg, $max_exp, $max_sgnf) = ($a, $prm_max_exp, $prm_max_sgnf);
295 9000 100       26853 if($n >= $prm_rng) {
296 1182         2641 $n -= $prm_rng;
297 1182         3142 ($sg, $max_exp, $max_sgnf) = ($b, $opp_max_exp, $opp_max_sgnf);
298             }
299 9000   33     31846 while($n == 0 && $exp - $bdone - 1 >= min_normal_exp) {
300 0         0 $exp -= $bdone + 1;
301 0         0 $n = rand_int(2 << $bdone);
302             }
303 9000         20963 for(my $bit = 16; $bit; $bit >>= 1) {
304 45000 100 33     216308 if($bdone >= $bit && $exp - $bit >= min_normal_exp &&
      66        
305             $n < (2 << ($bdone - $bit))) {
306 8444         14971 $bdone -= $bit;
307 8444         21960 $exp -= $bit;
308             }
309             }
310 9000 50       19921 goto TRY_AGAIN if $exp < $min_exp;
311 9000 100       21403 my $top_sgnf = $exp == $max_exp ? $max_sgnf : 2.0;
312 9000 100       28020 my $bot_sgnf = $exp == $min_exp ? $min_sgnf : 1.0;
313 9000         27178 my $sgnf = mult_pow2($n, -$bdone);
314 9000         283445 if(!have_subnormal && $exp == min_normal_exp && $sgnf < 1.0) {
315             $top_sgnf = 1.0;
316             $sgnf = 0.0;
317             } else {
318 9000         12585 $bot_sgnf = 1.0 if !have_subnormal && $exp == min_normal_exp &&
319             $bot_sgnf < 1.0;
320 9000         20726 while($bdone != significand_bits) {
321 9304         25812 my $bseg = significand_bits - $bdone;
322 9304 100       21383 $bseg = 28 if $bseg > 28;
323 9304         14713 $bdone += $bseg;
324 9304         22204 $sgnf += mult_pow2(rand_int(1 << $bseg), -$bdone);
325             }
326             }
327 9000 50 33     341680 goto TRY_AGAIN if $sgnf < $bot_sgnf || $sgnf >= $top_sgnf;
328 9000 50 33     24953 $sgnf = $top_sgnf if $sgnf == $bot_sgnf && rand_int(2);
329 9000 50       24101 return copysign($sgnf == 0.0 ? 0.0 : mult_pow2($sgnf, $exp), $sg);
330             }
331              
332             =back
333              
334             =head2 Combinatorics
335              
336             =over
337              
338             =item pick(ITEM ...)
339              
340             Randomly selects and returns one of the ITEMs. Each ITEM has equal
341             probability of being selected.
342              
343             =item pick_r(ITEMS)
344              
345             ITEMS must be a reference to an array. Randomly selects and returns
346             one of the elements of the array. Each element has equal probability
347             of being selected.
348              
349             This is the same operation as that performed by C, but using
350             references to avoid expensive copying of arrays.
351              
352             =cut
353              
354             sub pick_r($) {
355 155     155 1 71038 my($a) = @_;
356 155 100 100     1413 croak "need a non-empty array to pick from"
357             unless is_ref($a, "ARRAY") && @$a;
358 152         442 return $a->[rand_int(@$a)];
359             }
360              
361 77     77 1 66009 sub pick(@) { pick_r(\@_) }
362              
363             =item choose(NCHOOSE, ITEM ...)
364              
365             Randomly selects NCHOOSE of the ITEMs. Each ITEM has equal probability
366             of being selected. The chosen items are returned in a list in the same
367             order in which they appeared in the argument list.
368              
369             =item choose_r(NCHOOSE, ITEMS)
370              
371             ITEMS must be a reference to an array. Randomly selects NCHOOSE of
372             the elements in the array. Each element has equal probability of being
373             selected. Returns a reference to an array containing the chosen items
374             in the same order in which they appeared in the input array.
375              
376             This is the same operation as that performed by C, but using
377             references to avoid expensive copying of arrays.
378              
379             =cut
380              
381             sub choose_r($$) {
382 51     51 1 14825 my($nchoose, $a) = @_;
383 51 100       555 croak "need a non-negative number of items to choose"
384             unless $nchoose >= 0;
385 49 100 100     694 croak "need a sufficiently large array to pick from"
386             unless is_ref($a, "ARRAY") && @$a >= $nchoose;
387 46         70 my $ntotal = @$a;
388 46         73 my $nleave = $ntotal - $nchoose;
389 46         69 my @chosen;
390 46         126 for(my $i = 0; $i != $ntotal; $i++) {
391 414 100       584 if(entropy_source->get_prob($nleave, $nchoose)) {
392 128         210 push @chosen, $a->[$i];
393 128         202 $nchoose--;
394             } else {
395 286         416 $nleave--;
396             }
397             }
398 46         254 return \@chosen;
399             }
400              
401 25     25 1 12011 sub choose(@) { @{choose_r(shift, \@_)} }
  25         66  
402              
403             =item shuffle(ITEM ...)
404              
405             Reorders the ITEMs randomly, and returns them in a list in random order.
406             Each possible order has equal probability.
407              
408             =item shuffle_r(ITEMS)
409              
410             ITEMS must be a reference to an array. Reorders the elements of the
411             array randomly. Each possible order has equal probability. Returns a
412             reference to an array containing the elements in random order.
413              
414             This is the same operation as that performed by C, but using
415             references to avoid expensive copying of arrays.
416              
417             =cut
418              
419             sub shuffle_r($) {
420 19     19 1 4589 my($a) = @_;
421 19 100       296 croak "need an array to shuffle"
422             unless is_ref($a, "ARRAY");
423 18         42 $a = [ @$a ];
424 18         48 for(my $i = @$a; $i > 1; ) {
425 126         191 my $j = rand_int($i--);
426 126         172 @{$a}[$i, $j] = @{$a}[$j, $i];
  126         293  
  126         164  
427             }
428 18         115 return $a;
429             }
430              
431 9     9 1 5474 sub shuffle(@) { @{shuffle_r(\@_)} }
  9         29  
432              
433             =back
434              
435             =head1 SEE ALSO
436              
437             L,
438             L
439              
440             =head1 AUTHOR
441              
442             Andrew Main (Zefram)
443              
444             Maintained by Robert Rothenberg
445              
446             =head1 COPYRIGHT
447              
448             Copyright (C) 2006, 2007, 2009, 2011, 2025
449             Andrew Main (Zefram)
450              
451             =head1 LICENSE
452              
453             This module is free software; you can redistribute it and/or modify it
454             under the same terms as Perl itself.
455              
456             =cut
457              
458             1;