File Coverage

blib/lib/Data/Entropy/Algorithms.pm
Criterion Covered Total %
statement 134 142 94.3
branch 61 74 82.4
condition 21 33 63.6
subroutine 21 21 100.0
pod 12 12 100.0
total 249 282 88.3


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Data::Entropy::Algorithms - basic entropy-using algorithms
4              
5             =head1 SYNOPSIS
6              
7             use Data::Entropy::Algorithms
8             qw(rand_bits rand_int rand_prob);
9              
10             $str = rand_bits(17);
11             $i = rand_int(12345);
12             $i = rand_int(Math::BigInt->new("1000000000000"));
13             $j = rand_prob(1, 2, 3);
14             $j = rand_prob([ 1, 2, 3 ]);
15              
16             use Data::Entropy::Algorithms qw(rand_fix rand rand_flt);
17              
18             $x = rand_fix(48);
19             $x = rand(7);
20             $x = rand_flt(0.0, 7.0);
21              
22             use Data::Entropy::Algorithms
23             qw(pick pick_r choose choose_r shuffle shuffle_r);
24              
25             $item = pick($item0, $item1, $item2);
26             $item = pick_r(\@items);
27             @chosen = choose(3, $item0, $item1, $item2, $item3, $item4);
28             $chosen = choose_r(3, \@items);
29             @shuffled = shuffle($item0, $item1, $item2, $item3, $item4);
30             $shuffled = shuffle_r(\@items);
31              
32             =head1 DESCRIPTION
33              
34             This module contains a collection of fundamental algorithms that
35             use entropy. They all use the entropy source mechanism described in
36             L.
37              
38             =cut
39              
40             package Data::Entropy::Algorithms;
41              
42 9     9   78458 { use 5.006; }
  9         36  
  9         424  
43 9     9   55 use warnings;
  9         18  
  9         302  
44 9     9   47 use strict;
  9         16  
  9         326  
45              
46 9     9   46 use Carp qw(croak);
  9         17  
  9         557  
47 9     9   628 use Data::Entropy qw(entropy_source);
  9         14  
  9         542  
48 9         1399 use Data::Float 0.008 qw(
49             have_subnormal min_normal_exp significand_bits
50             float_is_finite float_parts float_sign mult_pow2 copysign
51 9     9   10670 );
  9         89795  
52 9     9   83 use Params::Classify 0.000 qw(is_ref);
  9         154  
  9         585  
53              
54             our $VERSION = "0.007";
55              
56 9     9   52 use parent "Exporter";
  9         22  
  9         78  
57             our @EXPORT_OK = qw(
58             rand_bits rand_int rand_prob
59             rand_fix rand rand_flt
60             pick_r pick choose_r choose shuffle_r shuffle
61             );
62              
63             =head1 FUNCTIONS
64              
65             All of these functions use entropy. The entropy source is not an
66             explicit input in any case. All functions use the current entropy source
67             maintained by the C module. To select an entropy source
68             use the C function in that module, or alternatively
69             do nothing to use the default source.
70              
71             =head2 Fundamental entropy extraction
72              
73             =over
74              
75             =item rand_bits(NBITS)
76              
77             Returns NBITS bits of entropy, as a string of octets. If NBITS is
78             not a multiple of eight then the last octet in the string has its most
79             significant bits set to zero.
80              
81             =cut
82              
83             sub rand_bits($) {
84 42     42 1 14170 my($nbits) = @_;
85 42 100       296 croak "need a non-negative number of bits to dispense"
86             unless $nbits >= 0;
87 41         110 return entropy_source->get_bits($nbits);
88             }
89              
90             =item rand_int(LIMIT)
91              
92             LIMIT must be a positive integer. Returns a uniformly-distributed random
93             integer in the range [0, LIMIT). LIMIT may be either a native integer,
94             a C object, or an integer-valued C object;
95             the returned number is of the same type.
96              
97             =cut
98              
99             sub rand_int($) {
100 20640     20640 1 91631 my($limit) = @_;
101 20640 100       50836 croak "need a positive upper limit for random variable"
102             unless $limit > 0;
103 20639         58780 return entropy_source->get_int($limit);
104             }
105              
106             =item rand_prob(PROB ...)
107              
108             =item rand_prob(PROBS)
109              
110             Returns a random integer selected with non-uniform probability. The
111             relative probabilities are supplied as a list of non-negative integers
112             (multiple PROB arguments) or a reference to an array of integers (the
113             PROBS argument). The relative probabilities may be native integers,
114             C objects, or integer-valued C objects;
115             they must all be of the same type. At least one probability value must
116             be positive.
117              
118             The first relative probability value (the first PROB or the first element
119             of PROBS) is the relative probability of returning 0. The absolute
120             probability of returning 0 is this value divided by the total of all
121             the relative probability values. Similarly the second value controls
122             the probability of returning 1, and so on.
123              
124             =cut
125              
126             sub rand_prob(@) {
127 154 100 100 154 1 95373 my $probs = @_ == 1 && is_ref($_[0], "ARRAY") ? $_[0] : \@_;
128 154         211 my $total = 0;
129 154         401 for(my $i = @$probs; $i--; ) {
130 754         897 my $prob = $probs->[$i];
131 754 100       1675 croak "probabilities must be non-negative" unless $prob >= 0;
132 753         1504 $total += $prob;
133             }
134 153 100       432 croak "can't have nothing possible" if $total == 0;
135 152         411 for(my $i = @$probs; $i--; ) {
136 370         446 my $prob = $probs->[$i];
137 370         398 $total -= $prob;
138 370 100       1476 return $i if entropy_source->get_prob($total, $prob);
139             }
140             }
141              
142             =back
143              
144             =head2 Numbers
145              
146             =over
147              
148             =item rand_fix(NBITS)
149              
150             Returns a uniformly-distributed random NBITS-bit fixed-point fraction in
151             the range [0, 1). That is, the result is a randomly-chosen multiple of
152             2^-NBITS, the multiplier being a random integer in the range [0, 2^NBITS).
153             The value is returned in the form of a native floating point number, so
154             NBITS can be at most one greater than the number of bits of significand
155             in the floating point format.
156              
157             With NBITS = 48 the range of output values is the same as that of the
158             Unix C function.
159              
160             =cut
161              
162             sub rand_fix($) {
163 516     516 1 101117 my($nbits) = @_;
164 516 100       1475 croak "need a non-negative number of bits to dispense"
165             unless $nbits >= 0;
166 515 50       1056 croak "can't generate more than ".(significand_bits+1).
167             " bits of fixed-point fraction"
168             if $nbits > significand_bits+1;
169 515         595 my $frac = 0.0;
170 515         1179 for(my $pos = 24; $pos <= $nbits; $pos += 24) {
171 516         7514 $frac += mult_pow2(rand_int(1 << 24), -$pos);
172             }
173 515         8963 $frac += mult_pow2(rand_int(1 << ($nbits % 24)), -$nbits);
174 515         11299 return $frac;
175             }
176              
177             =item rand([LIMIT])
178              
179             Generates a random fixed-point fraction by C and then multiplies
180             it by LIMIT, returning the result. LIMIT defaults to 1, and if it
181             is 0 then that is also treated as 1. The length of the fixed-point
182             fraction is 48 bits, unless that can't be represented in the native
183             floating point type, in which case the longest possible fraction will
184             be generated instead.
185              
186             This is a drop-in replacement for C: it produces exactly the
187             same range of output values, but using the current entropy source instead
188             of a sucky PRNG with linear relationships between successive outputs.
189             (C does the type of calculation described, but using the
190             PRNG C to generate the fixed-point fraction.) The details of
191             behaviour may change in the future if the behaviour of C
192             changes, to maintain the match.
193              
194             Where the source of a module can't be readily modified, it can be made
195             to use this C by an incantation such as
196              
197             *Foreign::Module::rand = \&Data::Entropy::Algorithms::rand;
198              
199             This must be done before the module is loaded, most likely in a C
200             block. It is also possible to override C for all modules,
201             by performing this similarly early:
202              
203             *CORE::GLOBAL::rand = \&Data::Entropy::Algorithms::rand;
204              
205             This function should not be used in any new code, because the kind
206             of output supplied by C is hardly ever the right thing to use.
207             The C idiom to generate a random integer has non-uniform
208             probabilities of generating each possible value, except when C<$n> is a
209             power of two. For floating point numbers, C can't generate most
210             representable numbers in its output range, and the output is biased
211             towards zero. In new code use C to generate integers and
212             C to generate floating point numbers.
213              
214             =cut
215              
216 9     9   4802 use constant RAND_NBITS => 48 > significand_bits+1 ? significand_bits+1 : 48;
  9         20  
  9         16572  
217              
218             sub rand(;$) {
219 258     258 1 137514 my($limit) = @_;
220 258 100 100     559 return rand_fix(RAND_NBITS) *
221             (!defined($limit) || $limit == 0.0 ? 1.0 : $limit);
222             }
223              
224             =item rand_flt(MIN, MAX)
225              
226             Selects a uniformly-distributed real number (with infinite precision)
227             in the range [MIN, MAX] and then rounds this number to the nearest
228             representable floating point value, which it returns. (Actually it is
229             only I the function worked this way: in fact it never generates
230             the number with infinite precision. It selects between the representable
231             floating point values with the probabilities implied by this process.)
232              
233             This can return absolutely any floating point value in the range [MIN,
234             MAX]; both MIN and MAX themselves are possible return values. All bits
235             of the floating point type are filled randomly, so the range of values
236             that can be returned depends on the details of the floating point format.
237             (See L for low-level floating point utilities.)
238              
239             The function Cs if MIN and MAX are not both finite. If MIN is
240             greater than MAX then their roles are swapped: the order of the limit
241             parameters actually doesn't matter. If the limits are identical then
242             that value is always returned. As a special case, if the limits are
243             positive zero and negative zero then a zero will be returned with a
244             randomly-chosen sign.
245              
246             =cut
247              
248             sub rand_flt($$) {
249 11000     11000 1 6371939 my($a, $b) = @_;
250 11000 50 33     399273 croak "bounds for rand_flt() must be finite"
251             unless float_is_finite($a) && float_is_finite($b);
252 11000 100       535448 if($a == $b) {
253 2000 100 66     27211 return $_[rand_int(2)]
254             if $a == 0.0 && float_sign($a) ne float_sign($b);
255 1000         2459 return $_[0];
256             }
257 9000 100       28765 ($a, $b) = ($b, $a) if abs($a) < abs($b);
258 9000 50       244935 my($prm_sign, $prm_max_exp, $prm_max_sgnf) =
259             $a == 0.0 ? ("+", min_normal_exp, 0.0) : float_parts($a);
260 9000 100       826254 my($b_sign, $b_exp, $b_sgnf) =
261             $b == 0.0 ? ("+", min_normal_exp, 0.0) : float_parts($b);
262 9000         170637 my($min_exp, $min_sgnf);
263 0         0 my($opp_max_exp, $opp_max_sgnf);
264 9000 100       26450 if($b_sign eq $prm_sign) {
265 6000         11696 ($min_exp, $min_sgnf) = ($b_exp, $b_sgnf);
266 6000         10297 ($opp_max_exp, $opp_max_sgnf) = (min_normal_exp, 0.0);
267             } else {
268 3000         4671 ($min_exp, $min_sgnf) = (min_normal_exp, 0.0);
269 3000         4947 ($opp_max_exp, $opp_max_sgnf) = ($b_exp, $b_sgnf);
270             }
271 9000         11680 TRY_AGAIN:
272             my $exp = $prm_max_exp;
273 9000         12118 my $bdone = significand_bits;
274 9000 50       32301 $bdone = 28 if $bdone > 28;
275 9000         16197 my $prm_frng = $prm_max_sgnf * (1 << $bdone);
276 9000 50       19402 if($prm_max_sgnf < 1.0) {
277             # subnormal limit
278 0         0 my $desired_rng = 1 << $bdone;
279 0         0 while($bdone != significand_bits) {
280 0         0 $bdone++;
281 0         0 $prm_frng += $prm_frng;
282 0 0       0 last if $prm_frng >= $desired_rng;
283             }
284             }
285 9000         14875 my $prm_rng = int($prm_frng);
286 9000 50       21631 $prm_rng++ if $prm_frng != $prm_rng;
287 9000         16102 my $min_b = $bdone - ($exp - $min_exp);
288 9000 100       30268 my $min_rng = $min_b >= 0 ? int(mult_pow2($min_sgnf, $min_b)) : 0;
289 9000         159768 my $opp_b = $bdone - ($exp - $opp_max_exp);
290 9000 100       22543 my $opp_frng = $opp_b >= 0 ? mult_pow2($opp_max_sgnf, $opp_b) : 0;
291 9000         82356 my $opp_rng = int($opp_frng);
292 9000 50       25920 $opp_rng++ if $opp_frng != $opp_rng;
293 9000         23281 my $n = $min_rng + rand_int($prm_rng - $min_rng + $opp_rng);
294 9000         19493 my($sg, $max_exp, $max_sgnf) = ($a, $prm_max_exp, $prm_max_sgnf);
295 9000 100       20227 if($n >= $prm_rng) {
296 1184         1412 $n -= $prm_rng;
297 1184         2080 ($sg, $max_exp, $max_sgnf) = ($b, $opp_max_exp, $opp_max_sgnf);
298             }
299 9000   33     27516 while($n == 0 && $exp - $bdone - 1 >= min_normal_exp) {
300 0         0 $exp -= $bdone + 1;
301 0         0 $n = rand_int(2 << $bdone);
302             }
303 9000         19696 for(my $bit = 16; $bit; $bit >>= 1) {
304 45000 100 33     330314 if($bdone >= $bit && $exp - $bit >= min_normal_exp &&
      66        
305             $n < (2 << ($bdone - $bit))) {
306 8440         9632 $bdone -= $bit;
307 8440         18569 $exp -= $bit;
308             }
309             }
310 9000 50       34533 goto TRY_AGAIN if $exp < $min_exp;
311 9000 100       16698 my $top_sgnf = $exp == $max_exp ? $max_sgnf : 2.0;
312 9000 100       25068 my $bot_sgnf = $exp == $min_exp ? $min_sgnf : 1.0;
313 9000         27163 my $sgnf = mult_pow2($n, -$bdone);
314 9000         291077 if(!have_subnormal && $exp == min_normal_exp && $sgnf < 1.0) {
315             $top_sgnf = 1.0;
316             $sgnf = 0.0;
317             } else {
318 9000         9864 $bot_sgnf = 1.0 if !have_subnormal && $exp == min_normal_exp &&
319             $bot_sgnf < 1.0;
320 9000         23798 while($bdone != significand_bits) {
321 9299         22181 my $bseg = significand_bits - $bdone;
322 9299 100       19393 $bseg = 28 if $bseg > 28;
323 9299         14317 $bdone += $bseg;
324 9299         20848 $sgnf += mult_pow2(rand_int(1 << $bseg), -$bdone);
325             }
326             }
327 9000 50 33     308397 goto TRY_AGAIN if $sgnf < $bot_sgnf || $sgnf >= $top_sgnf;
328 9000 50 33     23123 $sgnf = $top_sgnf if $sgnf == $bot_sgnf && rand_int(2);
329 9000 50       31905 return copysign($sgnf == 0.0 ? 0.0 : mult_pow2($sgnf, $exp), $sg);
330             }
331              
332             =back
333              
334             =head2 Combinatorics
335              
336             =over
337              
338             =item pick(ITEM ...)
339              
340             Randomly selects and returns one of the ITEMs. Each ITEM has equal
341             probability of being selected.
342              
343             =item pick_r(ITEMS)
344              
345             ITEMS must be a reference to an array. Randomly selects and returns
346             one of the elements of the array. Each element has equal probability
347             of being selected.
348              
349             This is the same operation as that performed by C, but using
350             references to avoid expensive copying of arrays.
351              
352             =cut
353              
354             sub pick_r($) {
355 155     155 1 44692 my($a) = @_;
356 155 100 100     1427 croak "need a non-empty array to pick from"
357             unless is_ref($a, "ARRAY") && @$a;
358 152         356 return $a->[rand_int(@$a)];
359             }
360              
361 77     77 1 46523 sub pick(@) { pick_r(\@_) }
362              
363             =item choose(NCHOOSE, ITEM ...)
364              
365             Randomly selects NCHOOSE of the ITEMs. Each ITEM has equal probability
366             of being selected. The chosen items are returned in a list in the same
367             order in which they appeared in the argument list.
368              
369             =item choose_r(NCHOOSE, ITEMS)
370              
371             ITEMS must be a reference to an array. Randomly selects NCHOOSE of
372             the elements in the array. Each element has equal probability of being
373             selected. Returns a reference to an array containing the chosen items
374             in the same order in which they appeared in the input array.
375              
376             This is the same operation as that performed by C, but using
377             references to avoid expensive copying of arrays.
378              
379             =cut
380              
381             sub choose_r($$) {
382 51     51 1 21766 my($nchoose, $a) = @_;
383 51 100       583 croak "need a non-negative number of items to choose"
384             unless $nchoose >= 0;
385 49 100 100     955 croak "need a sufficiently large array to pick from"
386             unless is_ref($a, "ARRAY") && @$a >= $nchoose;
387 46         61 my $ntotal = @$a;
388 46         75 my $nleave = $ntotal - $nchoose;
389 46         57 my @chosen;
390 46         113 for(my $i = 0; $i != $ntotal; $i++) {
391 414 100       1002 if(entropy_source->get_prob($nleave, $nchoose)) {
392 128         246 push @chosen, $a->[$i];
393 128         367 $nchoose--;
394             } else {
395 286         777 $nleave--;
396             }
397             }
398 46         303 return \@chosen;
399             }
400              
401 25     25 1 12996 sub choose(@) { @{choose_r(shift, \@_)} }
  25         82  
402              
403             =item shuffle(ITEM ...)
404              
405             Reorders the ITEMs randomly, and returns them in a list in random order.
406             Each possible order has equal probability.
407              
408             =item shuffle_r(ITEMS)
409              
410             ITEMS must be a reference to an array. Reorders the elements of the
411             array randomly. Each possible order has equal probability. Returns a
412             reference to an array containing the elements in random order.
413              
414             This is the same operation as that performed by C, but using
415             references to avoid expensive copying of arrays.
416              
417             =cut
418              
419             sub shuffle_r($) {
420 19     19 1 5782 my($a) = @_;
421 19 100       326 croak "need an array to shuffle"
422             unless is_ref($a, "ARRAY");
423 18         64 $a = [ @$a ];
424 18         62 for(my $i = @$a; $i > 1; ) {
425 126         322 my $j = rand_int($i--);
426 126         193 @{$a}[$i, $j] = @{$a}[$j, $i];
  126         14973  
  126         210  
427             }
428 18         146 return $a;
429             }
430              
431 9     9 1 5023 sub shuffle(@) { @{shuffle_r(\@_)} }
  9         32  
432              
433             =back
434              
435             =head1 SEE ALSO
436              
437             L,
438             L
439              
440             =head1 AUTHOR
441              
442             Andrew Main (Zefram)
443              
444             =head1 COPYRIGHT
445              
446             Copyright (C) 2006, 2007, 2009, 2011
447             Andrew Main (Zefram)
448              
449             =head1 LICENSE
450              
451             This module is free software; you can redistribute it and/or modify it
452             under the same terms as Perl itself.
453              
454             =cut
455              
456             1;