File Coverage

blib/lib/Data/Entropy/Algorithms.pm

Criterion	Covered	Total	%
statement	134	141	95.0
branch	61	74	82.4
condition	21	33	63.6
subroutine	21	21	100.0
pod	12	12	100.0
total	249	281	88.6

line	stmt	bran	cond	sub	pod	time	code
1							=head1 NAME
2
3							Data::Entropy::Algorithms - basic entropy-using algorithms
4
5							=head1 SYNOPSIS
6
7							use Data::Entropy::Algorithms
8							qw(rand_bits rand_int rand_prob);
9
10							$str = rand_bits(17);
11							$i = rand_int(12345);
12							$i = rand_int(Math::BigInt->new("1000000000000"));
13							$j = rand_prob(1, 2, 3);
14							$j = rand_prob([ 1, 2, 3 ]);
15
16							use Data::Entropy::Algorithms qw(rand_fix rand rand_flt);
17
18							$x = rand_fix(48);
19							$x = rand(7);
20							$x = rand_flt(0.0, 7.0);
21
22							use Data::Entropy::Algorithms
23							qw(pick pick_r choose choose_r shuffle shuffle_r);
24
25							$item = pick($item0, $item1, $item2);
26							$item = pick_r(\@items);
27							@chosen = choose(3, $item0, $item1, $item2, $item3, $item4);
28							$chosen = choose_r(3, \@items);
29							@shuffled = shuffle($item0, $item1, $item2, $item3, $item4);
30							$shuffled = shuffle_r(\@items);
31
32							=head1 DESCRIPTION
33
34							This module contains a collection of fundamental algorithms that
35							use entropy. They all use the entropy source mechanism described in
36							L.
37
38							=cut
39
40							package Data::Entropy::Algorithms;
41
42	10			10		107739	{ use 5.006; }
	10					51
43	10			10		56	use warnings;
	10					24
	10					670
44	10			10		67	use strict;
	10					18
	10					379
45
46	10			10		48	use Carp qw(croak);
	10					20
	10					699
47	10			10		386	use Data::Entropy qw(entropy_source);
	10					18
	10					796
48	10					1407	use Data::Float 0.008 qw(
49							have_subnormal min_normal_exp significand_bits
50							float_is_finite float_parts float_sign mult_pow2 copysign
51	10			10		5786	);
	10					81197
52	10			10		79	use Params::Classify 0.000 qw(is_ref);
	10					142
	10					686
53
54							our $VERSION = "0.008";
55
56	10			10		50	use parent "Exporter";
	10					19
	10					73
57							our @EXPORT_OK = qw(
58							rand_bits rand_int rand_prob
59							rand_fix rand rand_flt
60							pick_r pick choose_r choose shuffle_r shuffle
61							);
62
63							=head1 FUNCTIONS
64
65							All of these functions use entropy. The entropy source is not an
66							explicit input in any case. All functions use the current entropy source
67							maintained by the C module. To select an entropy source
68							use the C function in that module, or alternatively
69							do nothing to use the default source.
70
71							=head2 Fundamental entropy extraction
72
73							=over
74
75							=item rand_bits(NBITS)
76
77							Returns NBITS bits of entropy, as a string of octets. If NBITS is
78							not a multiple of eight then the last octet in the string has its most
79							significant bits set to zero.
80
81							=cut
82
83							sub rand_bits($) {
84	42			42	1	31811	my($nbits) = @_;
85	42	100				349	croak "need a non-negative number of bits to dispense"
86							unless $nbits >= 0;
87	41					146	return entropy_source->get_bits($nbits);
88							}
89
90							=item rand_int(LIMIT)
91
92							LIMIT must be a positive integer. Returns a uniformly-distributed random
93							integer in the range [0, LIMIT). LIMIT may be either a native integer,
94							a C object, or an integer-valued C object;
95							the returned number is of the same type.
96
97							=cut
98
99							sub rand_int($) {
100	20645			20645	1	99951	my($limit) = @_;
101	20645	100				49427	croak "need a positive upper limit for random variable"
102							unless $limit > 0;
103	20644					65320	return entropy_source->get_int($limit);
104							}
105
106							=item rand_prob(PROB ...)
107
108							=item rand_prob(PROBS)
109
110							Returns a random integer selected with non-uniform probability. The
111							relative probabilities are supplied as a list of non-negative integers
112							(multiple PROB arguments) or a reference to an array of integers (the
113							PROBS argument). The relative probabilities may be native integers,
114							C objects, or integer-valued C objects;
115							they must all be of the same type. At least one probability value must
116							be positive.
117
118							The first relative probability value (the first PROB or the first element
119							of PROBS) is the relative probability of returning 0. The absolute
120							probability of returning 0 is this value divided by the total of all
121							the relative probability values. Similarly the second value controls
122							the probability of returning 1, and so on.
123
124							=cut
125
126							sub rand_prob(@) {
127	154	100	100	154	1	149889	my $probs = @_ == 1 && is_ref($_[0], "ARRAY") ? $_[0] : \@_;
128	154					380	my $total = 0;
129	154					512	for(my $i = @$probs; $i--; ) {
130	754					1295	my $prob = $probs->[$i];
131	754	100				1931	croak "probabilities must be non-negative" unless $prob >= 0;
132	753					1649	$total += $prob;
133							}
134	153	100				538	croak "can't have nothing possible" if $total == 0;
135	152					454	for(my $i = @$probs; $i--; ) {
136	370					689	my $prob = $probs->[$i];
137	370					631	$total -= $prob;
138	370	100				1045	return $i if entropy_source->get_prob($total, $prob);
139							}
140							}
141
142							=back
143
144							=head2 Numbers
145
146							=over
147
148							=item rand_fix(NBITS)
149
150							Returns a uniformly-distributed random NBITS-bit fixed-point fraction in
151							the range [0, 1). That is, the result is a randomly-chosen multiple of
152							2^-NBITS, the multiplier being a random integer in the range [0, 2^NBITS).
153							The value is returned in the form of a native floating point number, so
154							NBITS can be at most one greater than the number of bits of significand
155							in the floating point format.
156
157							With NBITS = 48 the range of output values is the same as that of the
158							Unix C function.
159
160							=cut
161
162							sub rand_fix($) {
163	516			516	1	198553	my($nbits) = @_;
164	516	100				1666	croak "need a non-negative number of bits to dispense"
165							unless $nbits >= 0;
166	515	50				1201	croak "can't generate more than ".(significand_bits+1).
167							" bits of fixed-point fraction"
168							if $nbits > significand_bits+1;
169	515					815	my $frac = 0.0;
170	515					1342	for(my $pos = 24; $pos <= $nbits; $pos += 24) {
171	516					10408	$frac += mult_pow2(rand_int(1 << 24), -$pos);
172							}
173	515					11240	$frac += mult_pow2(rand_int(1 << ($nbits % 24)), -$nbits);
174	515					10759	return $frac;
175							}
176
177							=item rand([LIMIT])
178
179							Generates a random fixed-point fraction by C and then multiplies
180							it by LIMIT, returning the result. LIMIT defaults to 1, and if it
181							is 0 then that is also treated as 1. The length of the fixed-point
182							fraction is 48 bits, unless that can't be represented in the native
183							floating point type, in which case the longest possible fraction will
184							be generated instead.
185
186							This is a drop-in replacement for C: it produces exactly the
187							same range of output values, but using the current entropy source instead
188							of a sucky PRNG with linear relationships between successive outputs.
189							(C does the type of calculation described, but using the
190							PRNG C to generate the fixed-point fraction.) The details of
191							behaviour may change in the future if the behaviour of C
192							changes, to maintain the match.
193
194							Where the source of a module can't be readily modified, it can be made
195							to use this C by an incantation such as
196
197							*Foreign::Module::rand = \&Data::Entropy::Algorithms::rand;
198
199							This must be done before the module is loaded, most likely in a C
200							block. It is also possible to override C for all modules,
201							by performing this similarly early:
202
203							*CORE::GLOBAL::rand = \&Data::Entropy::Algorithms::rand;
204
205							This function should not be used in any new code, because the kind
206							of output supplied by C is hardly ever the right thing to use.
207							The C idiom to generate a random integer has non-uniform
208							probabilities of generating each possible value, except when C<$n> is a
209							power of two. For floating point numbers, C can't generate most
210							representable numbers in its output range, and the output is biased
211							towards zero. In new code use C to generate integers and
212							C to generate floating point numbers.
213
214							=cut
215
216	10			10		4913	use constant RAND_NBITS => 48 > significand_bits+1 ? significand_bits+1 : 48;
	10					21
	10					14062
217
218							sub rand(;$) {
219	258			258	1	287637	my($limit) = @_;
220	258	100	100			826	return rand_fix(RAND_NBITS) *
221							(!defined($limit) \|\| $limit == 0.0 ? 1.0 : $limit);
222							}
223
224							=item rand_flt(MIN, MAX)
225
226							Selects a uniformly-distributed real number (with infinite precision)
227							in the range [MIN, MAX] and then rounds this number to the nearest
228							representable floating point value, which it returns. (Actually it is
229							only I the function worked this way: in fact it never generates
230							the number with infinite precision. It selects between the representable
231							floating point values with the probabilities implied by this process.)
232
233							This can return absolutely any floating point value in the range [MIN,
234							MAX]; both MIN and MAX themselves are possible return values. All bits
235							of the floating point type are filled randomly, so the range of values
236							that can be returned depends on the details of the floating point format.
237							(See L for low-level floating point utilities.)
238
239							The function Cs if MIN and MAX are not both finite. If MIN is
240							greater than MAX then their roles are swapped: the order of the limit
241							parameters actually doesn't matter. If the limits are identical then
242							that value is always returned. As a special case, if the limits are
243							positive zero and negative zero then a zero will be returned with a
244							randomly-chosen sign.
245
246							=cut
247
248							sub rand_flt($$) {
249	11000			11000	1	5778002	my($a, $b) = @_;
250	11000	50	33			395702	croak "bounds for rand_flt() must be finite"
251							unless float_is_finite($a) && float_is_finite($b);
252	11000	100				524480	if($a == $b) {
253	2000	100	66			31550	return $_[rand_int(2)]
254							if $a == 0.0 && float_sign($a) ne float_sign($b);
255	1000					2568	return $_[0];
256							}
257	9000	100				34184	($a, $b) = ($b, $a) if abs($a) < abs($b);
258	9000	50				204563	my($prm_sign, $prm_max_exp, $prm_max_sgnf) =
259							$a == 0.0 ? ("+", min_normal_exp, 0.0) : float_parts($a);
260	9000	100				935139	my($b_sign, $b_exp, $b_sgnf) =
261							$b == 0.0 ? ("+", min_normal_exp, 0.0) : float_parts($b);
262	9000					212877	my($min_exp, $min_sgnf);
263	9000					0	my($opp_max_exp, $opp_max_sgnf);
264	9000	100				24633	if($b_sign eq $prm_sign) {
265	6000					12807	($min_exp, $min_sgnf) = ($b_exp, $b_sgnf);
266	6000					11538	($opp_max_exp, $opp_max_sgnf) = (min_normal_exp, 0.0);
267							} else {
268	3000					7502	($min_exp, $min_sgnf) = (min_normal_exp, 0.0);
269	3000					6876	($opp_max_exp, $opp_max_sgnf) = ($b_exp, $b_sgnf);
270							}
271	9000					16586	TRY_AGAIN:
272							my $exp = $prm_max_exp;
273	9000					17863	my $bdone = significand_bits;
274	9000	50				24596	$bdone = 28 if $bdone > 28;
275	9000					22739	my $prm_frng = $prm_max_sgnf * (1 << $bdone);
276	9000	50				24344	if($prm_max_sgnf < 1.0) {
277							# subnormal limit
278	0					0	my $desired_rng = 1 << $bdone;
279	0					0	while($bdone != significand_bits) {
280	0					0	$bdone++;
281	0					0	$prm_frng += $prm_frng;
282	0	0				0	last if $prm_frng >= $desired_rng;
283							}
284							}
285	9000					16947	my $prm_rng = int($prm_frng);
286	9000	50				20661	$prm_rng++ if $prm_frng != $prm_rng;
287	9000					15862	my $min_b = $bdone - ($exp - $min_exp);
288	9000	100				24414	my $min_rng = $min_b >= 0 ? int(mult_pow2($min_sgnf, $min_b)) : 0;
289	9000					149617	my $opp_b = $bdone - ($exp - $opp_max_exp);
290	9000	100				22843	my $opp_frng = $opp_b >= 0 ? mult_pow2($opp_max_sgnf, $opp_b) : 0;
291	9000					111277	my $opp_rng = int($opp_frng);
292	9000	50				21937	$opp_rng++ if $opp_frng != $opp_rng;
293	9000					26266	my $n = $min_rng + rand_int($prm_rng - $min_rng + $opp_rng);
294	9000					24168	my($sg, $max_exp, $max_sgnf) = ($a, $prm_max_exp, $prm_max_sgnf);
295	9000	100				26853	if($n >= $prm_rng) {
296	1182					2641	$n -= $prm_rng;
297	1182					3142	($sg, $max_exp, $max_sgnf) = ($b, $opp_max_exp, $opp_max_sgnf);
298							}
299	9000		33			31846	while($n == 0 && $exp - $bdone - 1 >= min_normal_exp) {
300	0					0	$exp -= $bdone + 1;
301	0					0	$n = rand_int(2 << $bdone);
302							}
303	9000					20963	for(my $bit = 16; $bit; $bit >>= 1) {
304	45000	100	33			216308	if($bdone >= $bit && $exp - $bit >= min_normal_exp &&
			66
305							$n < (2 << ($bdone - $bit))) {
306	8444					14971	$bdone -= $bit;
307	8444					21960	$exp -= $bit;
308							}
309							}
310	9000	50				19921	goto TRY_AGAIN if $exp < $min_exp;
311	9000	100				21403	my $top_sgnf = $exp == $max_exp ? $max_sgnf : 2.0;
312	9000	100				28020	my $bot_sgnf = $exp == $min_exp ? $min_sgnf : 1.0;
313	9000					27178	my $sgnf = mult_pow2($n, -$bdone);
314	9000					283445	if(!have_subnormal && $exp == min_normal_exp && $sgnf < 1.0) {
315							$top_sgnf = 1.0;
316							$sgnf = 0.0;
317							} else {
318	9000					12585	$bot_sgnf = 1.0 if !have_subnormal && $exp == min_normal_exp &&
319							$bot_sgnf < 1.0;
320	9000					20726	while($bdone != significand_bits) {
321	9304					25812	my $bseg = significand_bits - $bdone;
322	9304	100				21383	$bseg = 28 if $bseg > 28;
323	9304					14713	$bdone += $bseg;
324	9304					22204	$sgnf += mult_pow2(rand_int(1 << $bseg), -$bdone);
325							}
326							}
327	9000	50	33			341680	goto TRY_AGAIN if $sgnf < $bot_sgnf \|\| $sgnf >= $top_sgnf;
328	9000	50	33			24953	$sgnf = $top_sgnf if $sgnf == $bot_sgnf && rand_int(2);
329	9000	50				24101	return copysign($sgnf == 0.0 ? 0.0 : mult_pow2($sgnf, $exp), $sg);
330							}
331
332							=back
333
334							=head2 Combinatorics
335
336							=over
337
338							=item pick(ITEM ...)
339
340							Randomly selects and returns one of the ITEMs. Each ITEM has equal
341							probability of being selected.
342
343							=item pick_r(ITEMS)
344
345							ITEMS must be a reference to an array. Randomly selects and returns
346							one of the elements of the array. Each element has equal probability
347							of being selected.
348
349							This is the same operation as that performed by C, but using
350							references to avoid expensive copying of arrays.
351
352							=cut
353
354							sub pick_r($) {
355	155			155	1	71038	my($a) = @_;
356	155	100	100			1413	croak "need a non-empty array to pick from"
357							unless is_ref($a, "ARRAY") && @$a;
358	152					442	return $a->[rand_int(@$a)];
359							}
360
361	77			77	1	66009	sub pick(@) { pick_r(\@_) }
362
363							=item choose(NCHOOSE, ITEM ...)
364
365							Randomly selects NCHOOSE of the ITEMs. Each ITEM has equal probability
366							of being selected. The chosen items are returned in a list in the same
367							order in which they appeared in the argument list.
368
369							=item choose_r(NCHOOSE, ITEMS)
370
371							ITEMS must be a reference to an array. Randomly selects NCHOOSE of
372							the elements in the array. Each element has equal probability of being
373							selected. Returns a reference to an array containing the chosen items
374							in the same order in which they appeared in the input array.
375
376							This is the same operation as that performed by C, but using
377							references to avoid expensive copying of arrays.
378
379							=cut
380
381							sub choose_r($$) {
382	51			51	1	14825	my($nchoose, $a) = @_;
383	51	100				555	croak "need a non-negative number of items to choose"
384							unless $nchoose >= 0;
385	49	100	100			694	croak "need a sufficiently large array to pick from"
386							unless is_ref($a, "ARRAY") && @$a >= $nchoose;
387	46					70	my $ntotal = @$a;
388	46					73	my $nleave = $ntotal - $nchoose;
389	46					69	my @chosen;
390	46					126	for(my $i = 0; $i != $ntotal; $i++) {
391	414	100				584	if(entropy_source->get_prob($nleave, $nchoose)) {
392	128					210	push @chosen, $a->[$i];
393	128					202	$nchoose--;
394							} else {
395	286					416	$nleave--;
396							}
397							}
398	46					254	return \@chosen;
399							}
400
401	25			25	1	12011	sub choose(@) { @{choose_r(shift, \@_)} }
	25					66
402
403							=item shuffle(ITEM ...)
404
405							Reorders the ITEMs randomly, and returns them in a list in random order.
406							Each possible order has equal probability.
407
408							=item shuffle_r(ITEMS)
409
410							ITEMS must be a reference to an array. Reorders the elements of the
411							array randomly. Each possible order has equal probability. Returns a
412							reference to an array containing the elements in random order.
413
414							This is the same operation as that performed by C, but using
415							references to avoid expensive copying of arrays.
416
417							=cut
418
419							sub shuffle_r($) {
420	19			19	1	4589	my($a) = @_;
421	19	100				296	croak "need an array to shuffle"
422							unless is_ref($a, "ARRAY");
423	18					42	$a = [ @$a ];
424	18					48	for(my $i = @$a; $i > 1; ) {
425	126					191	my $j = rand_int($i--);
426	126					172	@{$a}[$i, $j] = @{$a}[$j, $i];
	126					293
	126					164
427							}
428	18					115	return $a;
429							}
430
431	9			9	1	5474	sub shuffle(@) { @{shuffle_r(\@_)} }
	9					29
432
433							=back
434
435							=head1 SEE ALSO
436
437							L,
438							L
439
440							=head1 AUTHOR
441
442							Andrew Main (Zefram)
443
444							Maintained by Robert Rothenberg
445
446							=head1 COPYRIGHT
447
448							Copyright (C) 2006, 2007, 2009, 2011, 2025
449							Andrew Main (Zefram)
450
451							=head1 LICENSE
452
453							This module is free software; you can redistribute it and/or modify it
454							under the same terms as Perl itself.
455
456							=cut
457
458							1;