File Coverage

blib/lib/Data/Entropy/Algorithms.pm

Criterion	Covered	Total	%
statement	134	142	94.3
branch	61	74	82.4
condition	21	33	63.6
subroutine	21	21	100.0
pod	12	12	100.0
total	249	282	88.3

line	stmt	bran	cond	sub	pod	time	code
1							=head1 NAME
2
3							Data::Entropy::Algorithms - basic entropy-using algorithms
4
5							=head1 SYNOPSIS
6
7							use Data::Entropy::Algorithms
8							qw(rand_bits rand_int rand_prob);
9
10							$str = rand_bits(17);
11							$i = rand_int(12345);
12							$i = rand_int(Math::BigInt->new("1000000000000"));
13							$j = rand_prob(1, 2, 3);
14							$j = rand_prob([ 1, 2, 3 ]);
15
16							use Data::Entropy::Algorithms qw(rand_fix rand rand_flt);
17
18							$x = rand_fix(48);
19							$x = rand(7);
20							$x = rand_flt(0.0, 7.0);
21
22							use Data::Entropy::Algorithms
23							qw(pick pick_r choose choose_r shuffle shuffle_r);
24
25							$item = pick($item0, $item1, $item2);
26							$item = pick_r(\@items);
27							@chosen = choose(3, $item0, $item1, $item2, $item3, $item4);
28							$chosen = choose_r(3, \@items);
29							@shuffled = shuffle($item0, $item1, $item2, $item3, $item4);
30							$shuffled = shuffle_r(\@items);
31
32							=head1 DESCRIPTION
33
34							This module contains a collection of fundamental algorithms that
35							use entropy. They all use the entropy source mechanism described in
36							L.
37
38							=cut
39
40							package Data::Entropy::Algorithms;
41
42	9			9		78458	{ use 5.006; }
	9					36
	9					424
43	9			9		55	use warnings;
	9					18
	9					302
44	9			9		47	use strict;
	9					16
	9					326
45
46	9			9		46	use Carp qw(croak);
	9					17
	9					557
47	9			9		628	use Data::Entropy qw(entropy_source);
	9					14
	9					542
48	9					1399	use Data::Float 0.008 qw(
49							have_subnormal min_normal_exp significand_bits
50							float_is_finite float_parts float_sign mult_pow2 copysign
51	9			9		10670	);
	9					89795
52	9			9		83	use Params::Classify 0.000 qw(is_ref);
	9					154
	9					585
53
54							our $VERSION = "0.007";
55
56	9			9		52	use parent "Exporter";
	9					22
	9					78
57							our @EXPORT_OK = qw(
58							rand_bits rand_int rand_prob
59							rand_fix rand rand_flt
60							pick_r pick choose_r choose shuffle_r shuffle
61							);
62
63							=head1 FUNCTIONS
64
65							All of these functions use entropy. The entropy source is not an
66							explicit input in any case. All functions use the current entropy source
67							maintained by the C module. To select an entropy source
68							use the C function in that module, or alternatively
69							do nothing to use the default source.
70
71							=head2 Fundamental entropy extraction
72
73							=over
74
75							=item rand_bits(NBITS)
76
77							Returns NBITS bits of entropy, as a string of octets. If NBITS is
78							not a multiple of eight then the last octet in the string has its most
79							significant bits set to zero.
80
81							=cut
82
83							sub rand_bits($) {
84	42			42	1	14170	my($nbits) = @_;
85	42	100				296	croak "need a non-negative number of bits to dispense"
86							unless $nbits >= 0;
87	41					110	return entropy_source->get_bits($nbits);
88							}
89
90							=item rand_int(LIMIT)
91
92							LIMIT must be a positive integer. Returns a uniformly-distributed random
93							integer in the range [0, LIMIT). LIMIT may be either a native integer,
94							a C object, or an integer-valued C object;
95							the returned number is of the same type.
96
97							=cut
98
99							sub rand_int($) {
100	20640			20640	1	91631	my($limit) = @_;
101	20640	100				50836	croak "need a positive upper limit for random variable"
102							unless $limit > 0;
103	20639					58780	return entropy_source->get_int($limit);
104							}
105
106							=item rand_prob(PROB ...)
107
108							=item rand_prob(PROBS)
109
110							Returns a random integer selected with non-uniform probability. The
111							relative probabilities are supplied as a list of non-negative integers
112							(multiple PROB arguments) or a reference to an array of integers (the
113							PROBS argument). The relative probabilities may be native integers,
114							C objects, or integer-valued C objects;
115							they must all be of the same type. At least one probability value must
116							be positive.
117
118							The first relative probability value (the first PROB or the first element
119							of PROBS) is the relative probability of returning 0. The absolute
120							probability of returning 0 is this value divided by the total of all
121							the relative probability values. Similarly the second value controls
122							the probability of returning 1, and so on.
123
124							=cut
125
126							sub rand_prob(@) {
127	154	100	100	154	1	95373	my $probs = @_ == 1 && is_ref($_[0], "ARRAY") ? $_[0] : \@_;
128	154					211	my $total = 0;
129	154					401	for(my $i = @$probs; $i--; ) {
130	754					897	my $prob = $probs->[$i];
131	754	100				1675	croak "probabilities must be non-negative" unless $prob >= 0;
132	753					1504	$total += $prob;
133							}
134	153	100				432	croak "can't have nothing possible" if $total == 0;
135	152					411	for(my $i = @$probs; $i--; ) {
136	370					446	my $prob = $probs->[$i];
137	370					398	$total -= $prob;
138	370	100				1476	return $i if entropy_source->get_prob($total, $prob);
139							}
140							}
141
142							=back
143
144							=head2 Numbers
145
146							=over
147
148							=item rand_fix(NBITS)
149
150							Returns a uniformly-distributed random NBITS-bit fixed-point fraction in
151							the range [0, 1). That is, the result is a randomly-chosen multiple of
152							2^-NBITS, the multiplier being a random integer in the range [0, 2^NBITS).
153							The value is returned in the form of a native floating point number, so
154							NBITS can be at most one greater than the number of bits of significand
155							in the floating point format.
156
157							With NBITS = 48 the range of output values is the same as that of the
158							Unix C function.
159
160							=cut
161
162							sub rand_fix($) {
163	516			516	1	101117	my($nbits) = @_;
164	516	100				1475	croak "need a non-negative number of bits to dispense"
165							unless $nbits >= 0;
166	515	50				1056	croak "can't generate more than ".(significand_bits+1).
167							" bits of fixed-point fraction"
168							if $nbits > significand_bits+1;
169	515					595	my $frac = 0.0;
170	515					1179	for(my $pos = 24; $pos <= $nbits; $pos += 24) {
171	516					7514	$frac += mult_pow2(rand_int(1 << 24), -$pos);
172							}
173	515					8963	$frac += mult_pow2(rand_int(1 << ($nbits % 24)), -$nbits);
174	515					11299	return $frac;
175							}
176
177							=item rand([LIMIT])
178
179							Generates a random fixed-point fraction by C and then multiplies
180							it by LIMIT, returning the result. LIMIT defaults to 1, and if it
181							is 0 then that is also treated as 1. The length of the fixed-point
182							fraction is 48 bits, unless that can't be represented in the native
183							floating point type, in which case the longest possible fraction will
184							be generated instead.
185
186							This is a drop-in replacement for C: it produces exactly the
187							same range of output values, but using the current entropy source instead
188							of a sucky PRNG with linear relationships between successive outputs.
189							(C does the type of calculation described, but using the
190							PRNG C to generate the fixed-point fraction.) The details of
191							behaviour may change in the future if the behaviour of C
192							changes, to maintain the match.
193
194							Where the source of a module can't be readily modified, it can be made
195							to use this C by an incantation such as
196
197							*Foreign::Module::rand = \&Data::Entropy::Algorithms::rand;
198
199							This must be done before the module is loaded, most likely in a C
200							block. It is also possible to override C for all modules,
201							by performing this similarly early:
202
203							*CORE::GLOBAL::rand = \&Data::Entropy::Algorithms::rand;
204
205							This function should not be used in any new code, because the kind
206							of output supplied by C is hardly ever the right thing to use.
207							The C idiom to generate a random integer has non-uniform
208							probabilities of generating each possible value, except when C<$n> is a
209							power of two. For floating point numbers, C can't generate most
210							representable numbers in its output range, and the output is biased
211							towards zero. In new code use C to generate integers and
212							C to generate floating point numbers.
213
214							=cut
215
216	9			9		4802	use constant RAND_NBITS => 48 > significand_bits+1 ? significand_bits+1 : 48;
	9					20
	9					16572
217
218							sub rand(;$) {
219	258			258	1	137514	my($limit) = @_;
220	258	100	100			559	return rand_fix(RAND_NBITS) *
221							(!defined($limit) \|\| $limit == 0.0 ? 1.0 : $limit);
222							}
223
224							=item rand_flt(MIN, MAX)
225
226							Selects a uniformly-distributed real number (with infinite precision)
227							in the range [MIN, MAX] and then rounds this number to the nearest
228							representable floating point value, which it returns. (Actually it is
229							only I the function worked this way: in fact it never generates
230							the number with infinite precision. It selects between the representable
231							floating point values with the probabilities implied by this process.)
232
233							This can return absolutely any floating point value in the range [MIN,
234							MAX]; both MIN and MAX themselves are possible return values. All bits
235							of the floating point type are filled randomly, so the range of values
236							that can be returned depends on the details of the floating point format.
237							(See L for low-level floating point utilities.)
238
239							The function Cs if MIN and MAX are not both finite. If MIN is
240							greater than MAX then their roles are swapped: the order of the limit
241							parameters actually doesn't matter. If the limits are identical then
242							that value is always returned. As a special case, if the limits are
243							positive zero and negative zero then a zero will be returned with a
244							randomly-chosen sign.
245
246							=cut
247
248							sub rand_flt($$) {
249	11000			11000	1	6371939	my($a, $b) = @_;
250	11000	50	33			399273	croak "bounds for rand_flt() must be finite"
251							unless float_is_finite($a) && float_is_finite($b);
252	11000	100				535448	if($a == $b) {
253	2000	100	66			27211	return $_[rand_int(2)]
254							if $a == 0.0 && float_sign($a) ne float_sign($b);
255	1000					2459	return $_[0];
256							}
257	9000	100				28765	($a, $b) = ($b, $a) if abs($a) < abs($b);
258	9000	50				244935	my($prm_sign, $prm_max_exp, $prm_max_sgnf) =
259							$a == 0.0 ? ("+", min_normal_exp, 0.0) : float_parts($a);
260	9000	100				826254	my($b_sign, $b_exp, $b_sgnf) =
261							$b == 0.0 ? ("+", min_normal_exp, 0.0) : float_parts($b);
262	9000					170637	my($min_exp, $min_sgnf);
263	0					0	my($opp_max_exp, $opp_max_sgnf);
264	9000	100				26450	if($b_sign eq $prm_sign) {
265	6000					11696	($min_exp, $min_sgnf) = ($b_exp, $b_sgnf);
266	6000					10297	($opp_max_exp, $opp_max_sgnf) = (min_normal_exp, 0.0);
267							} else {
268	3000					4671	($min_exp, $min_sgnf) = (min_normal_exp, 0.0);
269	3000					4947	($opp_max_exp, $opp_max_sgnf) = ($b_exp, $b_sgnf);
270							}
271	9000					11680	TRY_AGAIN:
272							my $exp = $prm_max_exp;
273	9000					12118	my $bdone = significand_bits;
274	9000	50				32301	$bdone = 28 if $bdone > 28;
275	9000					16197	my $prm_frng = $prm_max_sgnf * (1 << $bdone);
276	9000	50				19402	if($prm_max_sgnf < 1.0) {
277							# subnormal limit
278	0					0	my $desired_rng = 1 << $bdone;
279	0					0	while($bdone != significand_bits) {
280	0					0	$bdone++;
281	0					0	$prm_frng += $prm_frng;
282	0	0				0	last if $prm_frng >= $desired_rng;
283							}
284							}
285	9000					14875	my $prm_rng = int($prm_frng);
286	9000	50				21631	$prm_rng++ if $prm_frng != $prm_rng;
287	9000					16102	my $min_b = $bdone - ($exp - $min_exp);
288	9000	100				30268	my $min_rng = $min_b >= 0 ? int(mult_pow2($min_sgnf, $min_b)) : 0;
289	9000					159768	my $opp_b = $bdone - ($exp - $opp_max_exp);
290	9000	100				22543	my $opp_frng = $opp_b >= 0 ? mult_pow2($opp_max_sgnf, $opp_b) : 0;
291	9000					82356	my $opp_rng = int($opp_frng);
292	9000	50				25920	$opp_rng++ if $opp_frng != $opp_rng;
293	9000					23281	my $n = $min_rng + rand_int($prm_rng - $min_rng + $opp_rng);
294	9000					19493	my($sg, $max_exp, $max_sgnf) = ($a, $prm_max_exp, $prm_max_sgnf);
295	9000	100				20227	if($n >= $prm_rng) {
296	1184					1412	$n -= $prm_rng;
297	1184					2080	($sg, $max_exp, $max_sgnf) = ($b, $opp_max_exp, $opp_max_sgnf);
298							}
299	9000		33			27516	while($n == 0 && $exp - $bdone - 1 >= min_normal_exp) {
300	0					0	$exp -= $bdone + 1;
301	0					0	$n = rand_int(2 << $bdone);
302							}
303	9000					19696	for(my $bit = 16; $bit; $bit >>= 1) {
304	45000	100	33			330314	if($bdone >= $bit && $exp - $bit >= min_normal_exp &&
			66
305							$n < (2 << ($bdone - $bit))) {
306	8440					9632	$bdone -= $bit;
307	8440					18569	$exp -= $bit;
308							}
309							}
310	9000	50				34533	goto TRY_AGAIN if $exp < $min_exp;
311	9000	100				16698	my $top_sgnf = $exp == $max_exp ? $max_sgnf : 2.0;
312	9000	100				25068	my $bot_sgnf = $exp == $min_exp ? $min_sgnf : 1.0;
313	9000					27163	my $sgnf = mult_pow2($n, -$bdone);
314	9000					291077	if(!have_subnormal && $exp == min_normal_exp && $sgnf < 1.0) {
315							$top_sgnf = 1.0;
316							$sgnf = 0.0;
317							} else {
318	9000					9864	$bot_sgnf = 1.0 if !have_subnormal && $exp == min_normal_exp &&
319							$bot_sgnf < 1.0;
320	9000					23798	while($bdone != significand_bits) {
321	9299					22181	my $bseg = significand_bits - $bdone;
322	9299	100				19393	$bseg = 28 if $bseg > 28;
323	9299					14317	$bdone += $bseg;
324	9299					20848	$sgnf += mult_pow2(rand_int(1 << $bseg), -$bdone);
325							}
326							}
327	9000	50	33			308397	goto TRY_AGAIN if $sgnf < $bot_sgnf \|\| $sgnf >= $top_sgnf;
328	9000	50	33			23123	$sgnf = $top_sgnf if $sgnf == $bot_sgnf && rand_int(2);
329	9000	50				31905	return copysign($sgnf == 0.0 ? 0.0 : mult_pow2($sgnf, $exp), $sg);
330							}
331
332							=back
333
334							=head2 Combinatorics
335
336							=over
337
338							=item pick(ITEM ...)
339
340							Randomly selects and returns one of the ITEMs. Each ITEM has equal
341							probability of being selected.
342
343							=item pick_r(ITEMS)
344
345							ITEMS must be a reference to an array. Randomly selects and returns
346							one of the elements of the array. Each element has equal probability
347							of being selected.
348
349							This is the same operation as that performed by C, but using
350							references to avoid expensive copying of arrays.
351
352							=cut
353
354							sub pick_r($) {
355	155			155	1	44692	my($a) = @_;
356	155	100	100			1427	croak "need a non-empty array to pick from"
357							unless is_ref($a, "ARRAY") && @$a;
358	152					356	return $a->[rand_int(@$a)];
359							}
360
361	77			77	1	46523	sub pick(@) { pick_r(\@_) }
362
363							=item choose(NCHOOSE, ITEM ...)
364
365							Randomly selects NCHOOSE of the ITEMs. Each ITEM has equal probability
366							of being selected. The chosen items are returned in a list in the same
367							order in which they appeared in the argument list.
368
369							=item choose_r(NCHOOSE, ITEMS)
370
371							ITEMS must be a reference to an array. Randomly selects NCHOOSE of
372							the elements in the array. Each element has equal probability of being
373							selected. Returns a reference to an array containing the chosen items
374							in the same order in which they appeared in the input array.
375
376							This is the same operation as that performed by C, but using
377							references to avoid expensive copying of arrays.
378
379							=cut
380
381							sub choose_r($$) {
382	51			51	1	21766	my($nchoose, $a) = @_;
383	51	100				583	croak "need a non-negative number of items to choose"
384							unless $nchoose >= 0;
385	49	100	100			955	croak "need a sufficiently large array to pick from"
386							unless is_ref($a, "ARRAY") && @$a >= $nchoose;
387	46					61	my $ntotal = @$a;
388	46					75	my $nleave = $ntotal - $nchoose;
389	46					57	my @chosen;
390	46					113	for(my $i = 0; $i != $ntotal; $i++) {
391	414	100				1002	if(entropy_source->get_prob($nleave, $nchoose)) {
392	128					246	push @chosen, $a->[$i];
393	128					367	$nchoose--;
394							} else {
395	286					777	$nleave--;
396							}
397							}
398	46					303	return \@chosen;
399							}
400
401	25			25	1	12996	sub choose(@) { @{choose_r(shift, \@_)} }
	25					82
402
403							=item shuffle(ITEM ...)
404
405							Reorders the ITEMs randomly, and returns them in a list in random order.
406							Each possible order has equal probability.
407
408							=item shuffle_r(ITEMS)
409
410							ITEMS must be a reference to an array. Reorders the elements of the
411							array randomly. Each possible order has equal probability. Returns a
412							reference to an array containing the elements in random order.
413
414							This is the same operation as that performed by C, but using
415							references to avoid expensive copying of arrays.
416
417							=cut
418
419							sub shuffle_r($) {
420	19			19	1	5782	my($a) = @_;
421	19	100				326	croak "need an array to shuffle"
422							unless is_ref($a, "ARRAY");
423	18					64	$a = [ @$a ];
424	18					62	for(my $i = @$a; $i > 1; ) {
425	126					322	my $j = rand_int($i--);
426	126					193	@{$a}[$i, $j] = @{$a}[$j, $i];
	126					14973
	126					210
427							}
428	18					146	return $a;
429							}
430
431	9			9	1	5023	sub shuffle(@) { @{shuffle_r(\@_)} }
	9					32
432
433							=back
434
435							=head1 SEE ALSO
436
437							L,
438							L
439
440							=head1 AUTHOR
441
442							Andrew Main (Zefram)
443
444							=head1 COPYRIGHT
445
446							Copyright (C) 2006, 2007, 2009, 2011
447							Andrew Main (Zefram)
448
449							=head1 LICENSE
450
451							This module is free software; you can redistribute it and/or modify it
452							under the same terms as Perl itself.
453
454							=cut
455
456							1;