File Coverage

blib/lib/Net/OnlineCode.pm

Criterion	Covered	Total	%
statement	110	190	57.8
branch	19	48	39.5
condition	0	3	0.0
subroutine	20	30	66.6
pod	0	17	0.0
total	149	288	51.7

line	stmt	bran	cond	sub	pod	time	code
1							#!/usr/bin/perl
2
3							package Net::OnlineCode;
4
5							# play nicely as a CPAN module
6
7	4			4		48863	use strict;
	4					10
	4					185
8	4			4		24	use warnings;
	4					8
	4					176
9	4			4		23	use vars qw(@ISA @EXPORT_OK @EXPORT %EXPORT_TAGS $VERSION);
	4					7
	4					445
10
11	4			4		26	use constant DEBUG => 0;
	4					6
	4					362
12	4			4		20	use constant ASSERT => 1;
	4					8
	4					972
13
14							require Exporter;
15
16							@ISA = qw(Exporter);
17
18							our @export_xor = qw (xor_strings safe_xor_strings fast_xor_strings);
19							our @export_default = qw();
20
21							%EXPORT_TAGS = ( all => [ @export_default, @export_xor ],
22							xor => [ @export_xor ],
23							);
24							@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
25							@EXPORT = ();
26							$VERSION = '0.03';
27
28							# Use XS for fast xors (TODO: make this optional)
29							require XSLoader;
30							XSLoader::load('Net::OnlineCode', $VERSION);
31
32							# on to our stuff ...
33
34
35							# Codec parameters
36							# q is the number of message blocks that each auxiliary block will
37							# link to
38							# e epsilon, the degree of "suboptimality". Unlike Reed-Solomon or
39							# Rabin's Information Dispersal Algorithm, Online Codes are not
40							# optimal. This means that slightly more data needs to be generated
41							# than either of these two codes. Also, whereas optimal codes
42							# guarantee that a certain fraction of the "check" blocks/digits
43							# suffice to reconstruct the original message, online codes only
44							# guarantee that it can be reconstructed with a certain
45							# probability
46							#
47							# Together with the number of blocks, n, these two variables define
48							# the online code such that (1+qe)n check blocks are sufficient to
49							# reconstruct the original message with a probability of 1 - (e/2) **
50							# (q+1).
51							#
52
53	4			4		25	use Carp;
	4					6
	4					373
54	4			4		3040	use POSIX qw(ceil floor);
	4					34720
	4					35
55	4			4		9068	use Digest::SHA qw(sha1 sha1_hex);
	4					16111
	4					519
56	4			4		36	use Fcntl;
	4					9
	4					13425
57
58
59							# Constructor for the base class
60							#
61							# This takes the three parameters that define the Online Code scheme,
62							# corrects the value of epsilon if needed (see below) and then derives
63							# the following:
64							#
65							# * max degree variable (F)
66							# * number of auxiliary blocks (0.55 *qen)
67							# * probability distribution p_1, p2, ... , p_F
68
69							sub new {
70
71	2			2	0	13	my $class = shift;
72							# The default parameters used here for q and e (epsilon) are as
73							# suggested in the paper "Rateless Codes and Big Downloads" by Petar
74							# Maymounkov and David Maziere. Note that the value of e may be
75							# overridden with a higher value if the lower value doesn't satisfy
76							# max_degree(epsilon) > ceil(0.55 * q.e.mblocks)
77	2					15	my %args = (
78							e => 0.01,
79							q => 3,
80							mblocks => undef,
81							expand_aux => 0,
82							e_warning => 0,
83
84							# We don't use or store any RNG parameter that's been
85							# passed into the constructor.
86							@_
87							);
88
89
90	2					8	my ($q,$e,$mblocks) = @args{qw(q e mblocks)};
91
92	2	50				7	unless (defined $args{mblocks}) {
93	0					0	carp __PACKAGE__ . ": mblocks => (# message blocks) must be set\n";
94	0					0	return undef;
95							}
96
97	2					2	print "Net::OnlineCode mblocks = $mblocks\n" if DEBUG;
98
99	2					3	my $P = undef;
100	2					3	my $e_changed = 0;
101
102							# how many auxiliary blocks would this scheme need?
103	2					7	my $ablocks = _count_auxiliary($q,$e,$mblocks);
104
105							# does epsilon value need updating?
106	2					4	my $f = _max_degree($e);
107
108	2	100				27	if ($f > $mblocks + $ablocks) {
109
110	1					2	$e_changed = 1;
111
112	1	50				3	if ($args{e_warning}) {
113	0					0	print "E CHANGED!!\nWas: $e\n";
114	0					0	print "Gave F value of $f\n";
115							}
116
117							# use a binary search to find a new epsilon such that
118							# get_max_degree($epsilon) <= mblocks + ablocks (ie, n)
119	1					2	my $epsilon = $e;
120
121							local *eval_f = sub {
122	11			11		6	my $t = shift;
123	11					31	return _max_degree(1/(1 + exp(-$t)));
124	1					5	};
125
126	1					4	my $l = -log(1/$e - 1);
127	1					2	my $r = $l + 1;
128
129							# expand right side of search until we get F <= n'
130	1					2	while (eval_f($r) > $mblocks + $ablocks) {
131							# $r = $l + ($r - $l) * 2;
132	1					3	$r = 2 * $r - $l;
133							}
134
135							# binary search between left and right to find a suitable lower
136							# value of epsilon still satisfying F <= n'
137	1					3	while ($r - $l > 0.01) {
138	8					8	my $m = ($l + $r) / 2;
139	8	100				10	if (eval_f($m) > $mblocks + $ablocks) {
140	3					7	$l = $m;
141							} else {
142	5					8	$r = $m;
143							}
144							}
145
146							# update e and ablocks
147	1					3	$epsilon = 1/(1 + exp(-$r));
148	1					2	$f = eval_f($r);
149							#$f=_max_degree($epsilon);
150	1	50				3	carp __PACKAGE__ . ": increased epsilon value from $e to $epsilon\n"
151							if $args{e_warning};
152	1					2	$e = $epsilon;
153	1					2	$ablocks = _count_auxiliary($q,$e,$mblocks);
154
155	1	50				5	if ($args{e_warning}) {
156
157	0					0	print "Is now: $e\n";
158	0					0	print "New F: $f\n";
159							}
160
161							}
162
163							# calculate the probability distribution
164	2					2	print "new: mblocks=$mblocks, ablocks=$ablocks, q=$q\n" if DEBUG;
165	2					7	$P = _probability_distribution($mblocks + $ablocks,$e);
166
167	2	50				9	die "Wrong number of elements in probability distribution (got "
168							. scalar(@$P) . ", expecting $f)\n"
169							unless @$P == $f;
170
171	2					323	my $self = { q => $q, e => $e, f => $f, P => $P,
172							mblocks => $mblocks, ablocks => $ablocks,
173							coblocks => $mblocks + $ablocks,
174							chblocks => 0, expand_aux=> $args{expand_aux},
175							e_changed => $e_changed, unique => {},
176							fisher_string => pack("L*", (0 .. $mblocks + $ablocks -1)),
177							};
178
179	2					68	print "expand_aux => $self->{expand_aux}\n" if DEBUG;
180
181	2					19	bless $self, $class;
182
183							}
184
185							# while it probably doesn't matter too much to the encoder whether the
186							# supplied e value needed to be changed, if the receiver plugs the
187							# received value of e into the constructor and it ends up changing,
188							# there will be a problem with receiving the file.
189							sub e_changed {
190	0			0	0	0	return shift ->{e_changed};
191							}
192
193							# convenience accessor functions
194							sub get_mblocks { # count message blocks; passed into new
195	2			2	0	11	return shift -> {mblocks};
196							}
197
198							sub get_ablocks { # count auxiliary blocks; set in new
199	2			2	0	12	return shift -> {ablocks};
200							}
201
202							sub get_coblocks { # count composite blocks
203	2			2	0	5	my $self = shift;
204	2					12	return $self->{mblocks} + $self->{ablocks};
205							}
206
207							# count checkblocks
208							sub get_chblocks {
209	0			0	0	0	return shift->{chblocks}
210							}
211
212							sub get_q { # q == reliability factor
213	0			0	0	0	return shift -> {q};
214							}
215
216							sub get_e { # e == suboptimality factor
217	2			2	0	700	return shift -> {e};
218							}
219
220							sub get_epsilon { # epsilon == e, as above
221	0			0	0	0	return shift -> {e};
222							}
223
224							sub get_f { # f == max (check block) degree
225	2			2	0	12	return shift -> {f};
226							}
227
228							sub get_P { # P == probability distribution
229	0			0	0	0	return shift -> {P}; # (array ref)
230							}
231
232
233							# "Private" routines
234
235							# calculate how many auxiliary blocks need to be generated for a given
236							# code setup
237							sub _count_auxiliary {
238	3			3		3	my ($q, $e, $n) = @_;
239
240	3					24	my $count = int(ceil(0.55 * $q * $e * $n));
241	3					4	my $delta = 0.55 * $e;
242
243	3					4	warn "failure probability " . ($delta ** $q) . "\n" if DEBUG;
244							#$count = int(ceil($q * $delta * $n));
245
246							# Is it better to change q or the number of aux blocks if q is too
247							# big? It's certainly easier to keep the q value and increase the
248							# number of aux blocks, as I'm doing here, and may even be the right
249							# thing to do rather than ignoring the user's q value.
250	3	50				6	if ($count < $q) {
251	0					0	$count = $q;
252	0					0	warn "updated _count_auxiliary output value to $q\n";
253							}
254	3					5	return $count;
255							}
256
257							# The max degree specifies the maximum number of blocks to be XORed
258							# together. This parameter is named F.
259							sub _max_degree {
260
261	15			15		11	my $epsilon = shift;
262
263	15					35	my $quotient = (2 * log ($epsilon / 2)) /
264							(log (1 - $epsilon / 2));
265
266	15					8	my $delta = 0.55 * $epsilon;
267							#$quotient = (log ($epsilon) + log($delta)) / (log (1 - $epsilon));
268
269	15					36	return int(ceil($quotient));
270							}
271
272							# Functions relating to probability distribution
273							#
274							# From the wikipedia page:
275							#
276							# http://en.wikipedia.org/wiki/Online_codes
277							#
278							# During the inner coding step the algorithm selects some number of
279							# composite messages at random and XORs them together to form a check
280							# block. In order for the algorithm to work correctly, both the number
281							# of blocks to be XORed together and their distribution over composite
282							# blocks must follow a particular probability distribution.
283							#
284							# Consult the references for the implementation details.
285							#
286							# The probability distribution is designed to map a random number in
287							# the range [0,1) and return a degree i between 1 and F. The
288							# probability distribution depends on a single input, n, which is the
289							# number of blocks in the original message. The fixed values for q and
290							# epsilon are also used.
291							#
292							# This code includes two changes from that described in the wikipedia
293							# page.
294							#
295							# 1) Rather than returning an array of individual probabilities p_i,
296							# the array includes the cumulative probabilities. For example, if
297							# the p_i probabilities were:
298							# (0.1, 0.2, 0.3, 0.2, 0.1, 0.1)
299							# then the returned array would be:
300							# (0.1, 0.3, 0.6, 0.8, 0.9, 1) (last element always has value 1)
301							# This is done simply to make selecting a value based on the random
302							# number more efficient, but the underlying probability distribution
303							# is the same.
304							# 2) Handling edge cases. These are:
305							# a) the case where n = 1; and
306							# b) the case where F > n
307							# In both cases, the default value for epsilon cannot be used, so a
308							# more suitable value is calculated.
309							#
310							# The return value is an array containing:
311							#
312							# * the max degree F
313							# * a possibly updated value of epsilon
314							# * the F values of the (cumulative) probability distribution
315
316							sub _probability_distribution {
317
318	2			2		2	my ($nblocks,$epsilon) = @_; # nblocks = number of composite blocks!
319
320							# after code reorganisation, this shouldn't happen:
321	2	50				5	if ($nblocks == 1) {
322	0					0	croak "BUG: " . __PACKAGE__ ." - number of composite blocks = 1\n";
323	0					0	return (1, 0, 1);
324							}
325
326	2					3	print "generating probability distribution from nblocks $nblocks, e $epsilon\n"
327							if DEBUG;
328
329	2					2	my $f = _max_degree($epsilon);
330
331							# after code reorganisation, this shouldn't happen:
332	2	50				6	if ($f > $nblocks) {
333	0					0	croak "BUG: " .__PACKAGE__ . " - epsilon still too small!\n";
334							}
335
336							# probability distribution
337
338							# Calculate the sum of the sequence:
339							#
340							# 1 + 1/F
341							# p_1 = 1 - ---------
342							# 1 + e
343							#
344							#
345							# F . (1 - p_1)
346							# p_i = ---------------------
347							# (F - 1) . (i^2 - i)
348							#
349							# Since the i term is the only thing that changes for each p_i, I
350							# optimise the calculation by keeping a fixed term involving only p
351							# and f with a variable one involving i, then dividing as
352							# appropriate.
353
354	2					5	my $p1 = 1 - (1 + 1/$f)/(1 + $epsilon);
355	2					4	my $pfterm = (1-$p1) * $f / ($f - 1);
356
357	2	50				6	die "p1 is negative\n" if $p1 < 0;
358
359							# hard-code simple cases where f = 1 or 2
360	2	50				9	if ($f == 1) {
		50
361	0					0	return [1];
362							} elsif ($f == 2) {
363	0					0	return [$p1, 1];
364							}
365
366							# calculate sum(p_i) for 2 <= i < F.
367							# p_i=F is simply set to 1 to avoid rounding errors in the sum
368	2					3	my $sum = $p1;
369	2					3	my @P = ($sum);
370
371	2					7	my $i = 2;
372	2					6	while ($i < $f) {
373	2515					1842	my $iterm = $i * ($i - 1);
374	2515					1749	my $p_i = $pfterm / $iterm;
375
376	2515					1605	$sum += $p_i;
377
378	2515	50				3122	die "p_$i is negative\n" if $p_i < 0;
379
380	2515					1967	push @P, $sum;
381	2515					3282	$i++;
382							}
383
384	2					4	if (DEBUG) {
385							# Make sure of the assumption that the sum of terms approaches 1.
386							# If the "rounding error" below is not a very small number, we
387							# know there is a problem with the assumption!
388							my $p_last = $sum + $pfterm / ($f * $f - $f);
389							my $absdiff = abs (1 - $p_last);
390							warn "Absolute difference of 1,sum to p_F = $absdiff\n" if $absdiff >1e-8;
391							}
392
393	2					262	return [@P,1];
394							}
395
396
397							# Fisher-Yates shuffle algorithm, based on recipe 4.17 from the Perl
398							# Cookbook. Takes an input array it randomises the order (ie,
399							# shuffles) and then truncates the array to "picks" elements.
400							#
401							# This is much more efficient than the usual approach of "keep picking
402							# new elements until we get k distinct ones" particularly as k
403							# approaches the size of the array. That algorithm could make
404							# exponentially many calls to rand, whereas this just makes one call
405							# per item to be picked.
406							#
407							#
408
409							sub fisher_yates_shuffle {
410
411	0			0	0	0	my ($rng, $array, $picks) = @_;
412
413	0					0	if (ASSERT) {
414	0	0				0	die "fisher_yates_shuffle: 1st arg not an RNG object\n"
415							unless ref($rng);
416							}
417
418							# length in 32-bit words
419	0					0	my $len = length($array) >> 2;
420
421							# Change recipe to pick subset of list
422	0	0	0			0	$picks=$len unless
423							defined($picks) and $picks >= 0;
424
425							# algorithm fills picks into the end of the array
426	0					0	my $i=$len;
427	0					0	while (--$i >= $len - $picks) {
428	0					0	my $j=int($rng->rand($i + 1)); # range [0,$i]
429							#next if $i==$j; # not worth checking, probably
430							# @$array[$i,$j]=@$array[$j,$i]
431	0					0	my $tmp1 = substr $array, $i << 2, 4;
432	0					0	my $tmp2 = substr $array, $j << 2, 4;
433	0					0	substr $array, $i << 2, 4, $tmp2;
434	0					0	substr $array, $j << 2, 4, $tmp1;
435							}
436
437	0					0	return (unpack "L*", substr $array, ($len - $picks) << 2);
438							}
439
440							#
441							# Routine to calculate the auxiliary block -> message block* mapping.
442							# The passed rng object must already have been seeded, and both sender
443							# and receiver should use the same seed. Returns [[..],[..],..]
444							# representing which message blocks each of the auxiliary block is
445							# composed of.
446							#
447
448							sub auxiliary_mapping {
449
450	0			0	0	0	my $self = shift;
451	0					0	my $rng = shift;
452
453	0	0				0	croak "auxiliary_mapping: rng is not a reference\n" unless ref($rng);
454
455							#print "auxiliary_mapping: entering RNG value: " . ($rng->as_hex). "\n";
456
457							# hash slices: powerful, but syntax is sometimes confusing
458	0					0	my ($mblocks,$ablocks,$q) = @{$self}{"mblocks","ablocks","q"};
	0					0
459
460							# make sure hash(ref) slice above actually did something sensible:
461							# die "weird mblocks/ablocks" unless $nblocks + $aux_blocks >= 2;
462
463							# I made a big mistake when reading the description for creating aux
464							# blocks. What I implemented first (in the commented-out section
465							# below) was to link each of the auxiliary blocks to q message
466							# blocks. What I should have done was to link each message block
467							# to q auxiliary blocks. As a result, it was taking much more than
468							# the expected number of check blocks to decode the message.
469
470							# as a result of the new algorithm, it makes sense to work out
471							# reciprocal links between message blocks and auxiliary blocks
472							# within the base class. Storing them here won't work out very well,
473							# though: the encoder doesn't care about the message block to aux
474							# block mapping, so it would be a waste of memory, but more
475							# importantly, the decoder object stores all mappings in a private
476							# GraphDecoder object (so duplicating the structure here would be a
477							# waste).
478
479							# I will make one change to the output, though: instead of just
480							# returning the mappings for the 0.55qen auxiliary blocks, I will
481							# return a list of message block and auxiliary block mappings. The
482							# encoder and decoder will have to be changed: encoder immediately
483							# splices the array to remove unwanted message block mappings, while
484							# the decoder will be simplified by only having to pass the full
485							# list to the graph decoder (which will have to be modified
486							# appropriately).
487
488	0					0	my $aux_mapping = [];
489
490	0					0	my $ab_string = pack "L*", ($mblocks .. $mblocks + $ablocks -1);
491
492							# list of empty hashes
493	0					0	my @hashes;
494	0					0	for (0 .. $mblocks + $ablocks -1) { $hashes[$_] = {}; }
	0					0
495
496	0					0	for my $msg (0 .. $mblocks - 1) {
497							# list of all aux block indices
498
499	0					0	foreach my $aux (fisher_yates_shuffle($rng, $ab_string, $q)) {
500	0					0	$hashes[$aux]->{$msg}=undef;
501	0					0	$hashes[$msg]->{$aux}=undef;
502							}
503							}
504
505							# convert list of hashes into a list of lists
506	0					0	for my $i (0 .. $mblocks + $ablocks -1) {
507	0					0	print "map $i: " . (join " ", keys %{$hashes[$i]}) . "\n" if DEBUG;
508	0					0	push @$aux_mapping, [ keys %{$hashes[$i]} ];
	0					0
509							}
510
511							# save and return aux_mapping
512	0					0	$self->{aux_mapping} = $aux_mapping;
513							}
514
515							# Until I get the auto expand_aux working, this will have to do
516							sub blklist_to_msglist {
517
518	0			0	0	0	my ($self,@xor_list) = @_;
519
520	0					0	my $mblocks = $self->{mblocks};
521
522	0					0	my %blocks;
523	0					0	while (@xor_list) {
524	0					0	my $entry = shift(@xor_list);
525	0	0				0	if ($entry < $mblocks) { # is it a message block index?
526							# toggle entry in the hash
527	0	0				0	if (exists($blocks{$entry})) {
528	0					0	delete $blocks{$entry};
529							} else {
530	0					0	$blocks{$entry}= undef;
531							}
532							} else {
533							# aux block : push all message blocks it's composed of
534	0					0	my @expansion = @{$self->{aux_mapping}->[$entry]};
	0					0
535	0					0	if (DEBUG) {
536							print "expand_aux: expanding $entry to " .
537							(join " ", @expansion) . "\n";
538							}
539	0					0	push @xor_list, @expansion;
540							}
541							}
542	0					0	return keys %blocks;
543							}
544
545							# Calculate the composition of a single check block based on the
546							# supplied RNG. Returns a reference to a list of composite blocks
547							# indices.
548
549							sub checkblock_mapping {
550
551	0			0	0	0	my $self = shift;
552	0					0	my $rng = shift;
553
554	0	0				0	croak "rng is not an object reference\n" unless ref($rng);
555
556	0					0	my ($mblocks,$coblocks,$P) = @{$self}{"mblocks","coblocks","P"};
	0					0
557	0					0	my @coblocks;
558
559							# use weighted distribution to find how many blocks to link
560	0					0	my $i = 0;
561	0					0	my $r = $rng->rand;
562	0					0	++$i while($r > $P->[$i]); # terminates since r < P[last]
563	0					0	++$i;
564
565							# select i composite blocks uniformly
566	0					0	@coblocks = fisher_yates_shuffle($rng, $self->{fisher_string} , $i);
567
568	0					0	if (ASSERT) {
569	0	0				0	die "fisher_yates_shuffle: created empty check block\n!" unless @coblocks;
570							}
571
572	0					0	print "CHECKblock mapping: " . (join " ", @coblocks) . "\n" if DEBUG;
573
574	0					0	return \@coblocks;
575
576							}
577
578							# non-method sub for xoring a source string (passed by reference) with
579							# one or more target strings. I may reimplement this using XS later to
580							# make it more efficient, but will keep a pure-perl version with this
581							# name.
582							sub safe_xor_strings {
583
584	4			4	0	25	my $source = shift;
585
586							# die if user forgot to pass in a reference (to allow updating) or
587							# called $self->safe_xor_strings by mistake
588	4	50				18	croak "xor_strings: arg 1 should be a reference to a SCALAR!\n"
589							unless ref($source) eq "SCALAR";
590
591	4					8	my $len = length ($$source);
592
593	4	50				13	croak "xor_strings: source string can't have zero length!\n"
594							unless $len;
595
596	4					10	foreach my $target (@_) {
597	4	50				15	croak "xor_strings: targets not all same size as source\n"
598							unless length($target) == $len;
599	4					11	map { substr ($$source, $_, 1) ^= substr ($target, $_, 1) }
	28					95
600							(0 .. $len-1);
601							}
602
603	4					24	return $$source;
604							}
605
606							# Later, xor_strings could be replaced with an C version with reduced
607							# error checking, so make a backward-compatible version and an
608							# explicit fast/unsafe version.
609	0			0	0		sub xor_strings { safe_xor_strings(@_) }
610							#sub fast_xor_strings { safe_xor_strings(@_) } # implemented in OnlineCode.xs.
611
612
613							1;
614
615							__END__