File Coverage

blib/lib/Algorithm/BitVector.pm

Criterion	Covered	Total	%
statement	114	664	17.1
branch	32	218	14.6
condition	20	155	12.9
subroutine	17	75	22.6
pod	37	39	94.8
total	220	1151	19.1

line	stmt	bran	cond	sub	pod	time	code
1							package Algorithm::BitVector;
2
3							#!/usr/bin/perl -w
4
5							#------------------------------------------------------------------------------------
6							# Copyright (c) 2015 Avinash Kak. All rights reserved. This program is free
7							# software. You may modify and/or distribute it under the same terms as Perl itself.
8							# This copyright notice must remain attached to the file.
9							#
10							# Algorithm::BitVector is a Perl module for creating a memory efficient packed
11							# representation of bit arrays and for logical and numerical operations on such
12							# arrays.
13							# -----------------------------------------------------------------------------------
14
15							#use 5.10.0;
16	1			1		14782	use strict;
	1					4
	1					43
17	1			1		7	use warnings;
	1					1
	1					41
18	1			1		6	use Carp;
	1					7
	1					104
19	1			1		6	use List::Util qw(pairmap max reduce any);
	1					1
	1					141
20	1			1		2949792	use Math::BigInt;
	1					18325
	1					3
21	1			1		606888	use Math::Random;
	1					4961
	1					95
22	1			1		5	use Fcntl 'SEEK_CUR';
	1					2
	1					67
23
24							our $VERSION = '1.23';
25
26	1					7	use overload '+' => '_join',
27							'""' => '_str',
28							'0+' => '_int',
29							'~' => '_invert',
30							'\|' => '_or',
31							'&' => '_and',
32							'^' => '_xor',
33							'<=>' => '_compare',
34							'<<' => '_lshift',
35							'>>' => '_rshift',
36							'<>' => '_iter',
37	1			1		4	'fallback' => 1;
	1					2
38
39							sub _readblock {
40	0			0		0	my $blocksize = shift;
41	0					0	my $bitvector = shift;
42	0					0	my $block;
43							my $more_to_read;
44	0					0	my $i = 0;
45	0					0	my $byte_as_bits;
46	0					0	my $bitstring = '';
47	0					0	while ( $i < $blocksize / 8 ) {
48	0					0	$i++;
49	0					0	my $num_bytes_read = sysread($bitvector->{FILEIN}, my $byte, 1);
50	0	0				0	unless ($num_bytes_read) {
51	0	0				0	if (length($bitstring) < $blocksize) {
52	0					0	$bitvector->{more_to_read} = 0;
53	0					0	$more_to_read = 0;
54							}
55	0					0	return $bitstring;
56							} else {
57	0					0	my $bits_as_string = sprintf "%vb", $byte;
58	0					0	$bits_as_string = '0' x (8 - length($bits_as_string)) . $bits_as_string;
59	0					0	$bitstring .= $bits_as_string;
60							}
61							}
62	0					0	my $file_pos = tell $bitvector->{FILEIN};
63							# peek at the next byte; moves file position only if a
64							# byte is read
65	0					0	my $num_bytes_read = sysread($bitvector->{FILEIN}, my $next_byte, 1);
66	0	0				0	if ($num_bytes_read) {
67							# pretend we never read the byte
68	0					0	sysseek $bitvector->{FILEIN}, $file_pos - 1, SEEK_CUR;
69							} else {
70	0					0	$bitvector->{more_to_read} = 0;
71							}
72	0					0	return $bitstring;
73							}
74
75							# Constructor:
76							sub new {
77	3			3	0	20	my ($class, %args) = @_;
78	3					7	my @params = keys %args;
79	3	50				8	croak "\nYou have used a wrong name for a keyword argument " .
80							"--- perhaps a misspelling\n"
81							if _check_for_illegal_params(@params) == 0;
82	3					18	my $self = {
83							filename => $args{filename},
84							size => $args{size},
85							intVal => $args{intVal},
86							bitlist => $args{bitlist},
87							bitstring => $args{bitstring},
88							hexstring => $args{hexstring},
89							textstring => $args{textstring},
90							};
91	3					6	bless $self, $class;
92	3	50				69	if ( $self->{filename} ) {
		100
		100
		50
		50
		50
		0
93	0	0	0			0	die "When using the `filename' option in the constructor call, you cannot use any " .
			0
			0
			0
			0
94							"other option at the same time: $!"
95							if $self->{intVal} or $self->{size} or $self->{bitlist}
96							or $self->{bitstring} or $self->{hexstring} or $self->{textstring};
97	0	0				0	open $self->{FILEIN}, "< $self->{filename}"
98							or die "unable to open file $self->{filename}: $!";
99	0					0	$self->{more_to_read} = 1;
100	0					0	return $self;
101							} elsif ( defined $self->{intVal} ) {
102	1	50	33			16	die "When using the `intVal' option in the constructor call, you CANNOT use `filename', " .
			33
			33
			33
103							"`bitlist', 'bitstring', `hexstring', or `textstring' options: $!"
104							if $self->{filename} or $self->{bitlist} or $self->{bitstring}
105							or $self->{hexstring} or $self->{textstring};
106	1	50	33			4	if ($self->{intVal} == 0 && ! defined $self->{size}) {
107	0					0	$self->{bitlist} = [0];
108	0					0	$self->{size} = 1;
109							} else {
110	1					2	my @bitarray;
111	1	50				3	if (ref($self->{intVal}) eq 'Math::BigInt') {
112	0					0	my $bitlist_str = $self->{intVal}->as_bin();
113	0					0	@bitarray = split //, $bitlist_str;
114							# Now get rid of the `0b' prefix on the string returned by as_bin():
115	0					0	shift @bitarray;
116	0					0	shift @bitarray;
117							} else {
118	1					7	my $bitlist_str = sprintf "%b", $self->{intVal};
119	1	50				3	if (defined $self->{size}) {
120	0	0				0	die "The value specified for size must be at least as large as for the " .
121							"smallest bitvector possible for the intVal integer: $!"
122							if $self->{size} < length $bitlist_str;
123	0					0	my $n = $self->{size} - length $bitlist_str;
124	0					0	my $extended_bitlist_str = '0' x $n . $bitlist_str;
125	0					0	@bitarray = split //, $extended_bitlist_str;
126							} else {
127	1					5	@bitarray = split //, $bitlist_str;
128							}
129							}
130	1					3	$self->{bitlist} = \@bitarray;
131	1					1	$self->{size} = scalar @{$self->{bitlist}};
	1					20
132							}
133							} elsif (defined $self->{size}) {
134	1	50	33			25	die "When using the `size' option in the constructor call, you CANNOT use `filename', " .
			33
			33
			33
			33
135							"`bitlist', 'bitstring', `hexstring', or `textstring' options: $!"
136							if $self->{filename} or $self->{intVal} or $self->{bitlist}
137							or $self->{bitstring} or $self->{hexstring} or $self->{textstring};
138	1					4	my $bitstring = "0" x $self->{size};
139	1					8	my @bitlist_from_bitstring = split '', $bitstring;
140	1					3	$self->{bitlist} = \@bitlist_from_bitstring;
141							} elsif ($self->{bitlist}) {
142	0	0	0			0	die "When using the `bitlist' option in the constructor call, you cannot use any " .
			0
			0
			0
			0
143							"other option at the same time: $!"
144							if $self->{filename} or $self->{intVal} or $self->{size}
145							or $self->{bitstring} or $self->{hexstring} or $self->{textstring};
146	0					0	$self->{size} = scalar @{$self->{bitlist}};
	0					0
147							} elsif (defined $self->{bitstring}) {
148	0	0	0			0	die "When using the `bitstring' option in the constructor call, you cannot use any " .
			0
			0
			0
			0
149							"other option at the same time: $!"
150							if $self->{filename} or $self->{intVal} or $self->{size}
151							or $self->{bitlist} or $self->{hexstring} or $self->{textstring};
152	0					0	my @bitlist_from_bitstring = split //, $self->{bitstring};
153	0					0	$self->{bitlist} = \@bitlist_from_bitstring;
154	0					0	$self->{size} = scalar @{$self->{bitlist}};
	0					0
155							} elsif (defined $self->{textstring}) {
156	1	50	33			19	die "When using the `textstring' option in the constructor call, you cannot use any " .
			33
			33
			33
			33
157							"other option at the same time: $!"
158							if $self->{filename} or $self->{intVal} or $self->{size}
159							or $self->{bitlist} or $self->{hexstring} or $self->{bitstring};
160	5	50				12	my $bitstring_from_text = join '', map {length($_) == 8 ? $_
	5					17
161	1					6	: ('0' x (8 - length($_))) . $_} map {sprintf "%b", $_}
162							map ord, split //, $self->{textstring};
163	1					10	my @bitlist_from_text = split //, $bitstring_from_text;
164	1					3	$self->{bitlist} = \@bitlist_from_text;
165	1					1	$self->{size} = scalar @{$self->{bitlist}};
	1					3
166							} elsif (defined $self->{hexstring}) {
167	0	0	0			0	die "When using the `hexstring' option in the constructor call, you cannot use any " .
			0
			0
			0
			0
168							"other option at the same time: $!"
169							if $self->{filename} or $self->{intVal} or $self->{size}
170							or $self->{bitlist} or $self->{textstring} or $self->{bitstring};
171	0	0				0	my $bitstring_from_hex = join '', map {length($_) == 4 ? $_
	0					0
172	0					0	: ('0' x (4 - length($_))) . $_} map {sprintf "%b", $_}
173	0					0	map {hex $_} split //, $self->{hexstring};
174	0					0	my @bitlist_from_hex = split //, $bitstring_from_hex;
175	0					0	$self->{bitlist} = \@bitlist_from_hex;
176	0					0	$self->{size} = scalar @{$self->{bitlist}};
	0					0
177							}
178	3					3	my $shorts_needed = int( (@{$self->{bitlist}} + 15) / 16 );
	3					11
179	3					9	@{$self->{_vector}} = map {unpack "n", pack("n", 0)} 0 .. $shorts_needed-1;
	3					9
	5					12
180	3					23	my @interleaved = (0..$self->{size}-1, @{$self->{bitlist}})
	60					73
181	3					8	[map {$_,$_+$self->{size}} (0 .. $self->{size}-1)];
182	3			60		35	pairmap {$self->set_bit($a,$b)} @interleaved;
	60					74
183	3					11	$self->{bitlist} = undef;
184	3					16	return $self;
185							}
186
187							## Set the bit at the designated position to the value shown
188							sub set_bit {
189	60			60	1	52	my $self = shift;
190	60					39	my $posn = shift;
191	60					47	my $val = shift;
192	60	50	66			200	die "incorrect value for a bit" unless $val == 0 or $val == 1;
193	60	50	33			363	die "index range error" if ($posn >= $self->{size}) or ($posn < - $self->{size});
194	60	50				75	$posn = $self->{size} + $posn if $posn < 0;
195	60					75	my $block_index = int($posn / 16);
196	60					44	my $shift = $posn & 0xF;
197	60					45	my $cv = $self->{_vector}->[$block_index];
198	60	100				122	if ( ( ( $cv >> $shift ) & 1 ) != $val) {
199	25					57	$self->{_vector}->[$block_index] = $cv ^ (1 << $shift);
200							}
201							}
202
203							## Get the bit from the designated position. This method can also return a slice of a
204							## bitvector. However, note that the slice is returned as a list of the bits in the
205							## index range you specified. You can easily convert the list of bits returned into a
206							## bitvector in your own code.
207							sub get_bit {
208	65			65	1	47	my $self = shift;
209	65					49	my $pos = shift;
210	65	100				91	unless (ref($pos) eq "ARRAY") {
211	60	50	33			182	die "index range error" if ($pos >= $self->{size}) or ($pos < - $self->{size});
212	60	50				77	$pos = $self->{size} + $pos if $pos < 0;
213	60					131	return ( $self->{_vector}->[int($pos/16)] >> ($pos&15) ) & 1;
214							}
215	5					15	my @slice = map $self->get_bit($_), (@$pos)[0..@$pos-1];
216	5					28	return \@slice;
217							}
218
219							## Overloading of the string conversion operator. Return the string representation
220							## of a bitvector.
221							sub _str {
222	1			1		10	my $self = shift;
223	1					4	return join '', map $self->get_bit($_), 0 .. $self->{size}-1;
224							}
225
226							## Overloading of the `+' operator. Concatenate the argument bitvectors. Return the
227							## concatenated bitvector as a new BitVector instance.
228							sub _join {
229	0			0		0	my ($bv1, $bv2) = @_;
230	0	0	0			0	die "Abort: The concatenation operator invoked with either undefined " .
231							"or wrong types of operands: $!"
232							unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
233							UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
234	0					0	my $allbits = $bv1->_str() . $bv2->_str();
235	0					0	return Algorithm::BitVector->new( bitstring => $allbits );
236							}
237
238							sub _compare {
239	0			0		0	my ($bv1, $bv2) = @_;
240	0	0	0			0	die "Abort: The comparison operator invoked with either undefined " .
241							"or wrong types of operands: $!"
242							unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
243							UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
244	0					0	my $bigint1 = Math::BigInt->from_bin( "$bv1" );
245	0					0	my $bigint2 = Math::BigInt->from_bin( "$bv2" );
246	0					0	return $bigint1->bcmp($bigint2);
247							}
248
249							sub deep_copy {
250	0			0	1	0	my $self = shift;
251	0					0	my $result_bv = Algorithm::BitVector->new( size => $self->{size} );
252	0					0	foreach my $i (0..@{$result_bv->{_vector}}-1) {
	0					0
253	0					0	$result_bv->{_vector}->[$i] = $self->{_vector}->[$i];
254							}
255	0					0	return $result_bv;
256							}
257
258							## Invert the bits in the bitvector on which the method is invoked
259							## and return the result as a new bitvector.
260							sub _invert {
261	0			0		0	my $self = shift;
262	0	0				0	die "Abort: The operator '~' for bit inversion invoked on an object that is " .
263							"not of type Algorithm::BitVector"
264							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
265	0					0	my $result_bv = $self->deep_copy();
266	0					0	foreach my $i (0..@{$result_bv->{_vector}}-1) {
	0					0
267							# The unary `~' operator may assume a 32-bit wide field:
268	0					0	$result_bv->{_vector}->[$i] = ~ $result_bv->{_vector}->[$i] & 0x0000FFFF;
269							}
270	0					0	return $result_bv;
271							}
272
273							## Take a bitwise 'OR' of two bitvectors. Return the result as a new bitvector. If
274							## the two bitvectors are not of the same size, pad the shorter one with zeros from
275							## the left.
276							sub _or {
277	0			0		0	my ($bv1, $bv2) = @_;
278	0	0	0			0	die "Abort: The `\|' operator invoked with either undefined " .
279							"or wrong types of operands: $!"
280							unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
281							UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
282	0					0	my ($bv3, $bv4);
283	0	0				0	if ( $bv1->{size} < $bv2->{size} ) {
		0
284	0					0	$bv3 = $bv1->_resize_pad_from_left($bv2->{size} - $bv1->{size});
285	0					0	$bv4 = $bv2;
286							} elsif ( $bv1->{size} > $bv2->{size} ) {
287	0					0	$bv3 = $bv1;
288	0					0	$bv4 = $bv2->_resize_pad_from_left($bv1->{size} - $bv2->{size});
289							} else {
290	0					0	$bv3 = $bv1;
291	0					0	$bv4 = $bv2;
292							}
293	0					0	my $result_bv = Algorithm::BitVector->new( size => $bv3->{size} );
294	0					0	foreach my $i (0..@{$result_bv->{_vector}}-1) {
	0					0
295							# The binary `\|' operator may assume 32-bit wide fields:
296	0					0	$result_bv->{_vector}->[$i] = ($bv3->{_vector}->[$i] \| $bv4->{_vector}->[$i] ) & 0x0000FFFF;
297							}
298	0					0	return $result_bv;
299							}
300
301							## Resize a bitvector by padding with n 0's from the left. Return the result as a
302							## new bitvector.
303							sub _resize_pad_from_left {
304	0			0		0	my $self = shift;
305	0					0	my $n = shift;
306	0					0	my $new_str = '0' x $n . "$self";
307	0					0	return Algorithm::BitVector->new( bitstring => $new_str );
308							}
309
310							## Take a bitwise 'AND' of the bitvector on which the method is invoked with
311							## the argument bitvector. Return the result as a new bitvector. If the two
312							## bitvectors are not of the same size, pad the shorter one with zeros from the
313							## left.
314							sub _and {
315	0			0		0	my ($bv1, $bv2) = @_;
316	0	0	0			0	die "Abort: The concatenation operator invoked with either undefined " .
317							"or wrong types of operands: $!"
318							unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
319							UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
320	0					0	my ($bv3, $bv4);
321	0	0				0	if ( $bv1->{size} < $bv2->{size} ) {
		0
322	0					0	$bv3 = $bv1->_resize_pad_from_left($bv2->{size} - $bv1->{size});
323	0					0	$bv4 = $bv2;
324							} elsif ( $bv1->{size} > $bv2->{size} ) {
325	0					0	$bv3 = $bv1;
326	0					0	$bv4 = $bv2->_resize_pad_from_left($bv1->{size} - $bv2->{size});
327							} else {
328	0					0	$bv3 = $bv1;
329	0					0	$bv4 = $bv2;
330							}
331	0					0	my $result_bv = Algorithm::BitVector->new( size => $bv3->{size} );
332	0					0	foreach my $i (0..@{$result_bv->{_vector}}-1) {
	0					0
333							# The binary bitwise `&' operator may assume 32-bit wide fields:
334	0					0	$result_bv->{_vector}->[$i] = ($bv3->{_vector}->[$i] & $bv4->{_vector}->[$i] ) & 0x0000FFFF;
335							}
336	0					0	return $result_bv;
337							}
338
339							## Take a bitwise 'XOR' of the bitvector on which the method is invoked with
340							## the argument bitvector. Return the result as a new bitvector. If the two
341							## bitvectors are not of the same size, pad the shorter one with zeros from the
342							## left.
343							sub _xor {
344	0			0		0	my ($bv1, $bv2) = @_;
345	0	0	0			0	die "Abort: The xor operator invoked with either undefined " .
346							"or wrong types of operands: $!"
347							unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
348							UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
349	0					0	my ($bv3, $bv4);
350	0	0				0	if ( $bv1->{size} < $bv2->{size} ) {
		0
351	0					0	$bv3 = $bv1->_resize_pad_from_left($bv2->{size} - $bv1->{size});
352	0					0	$bv4 = $bv2;
353							} elsif ( $bv1->{size} > $bv2->{size} ) {
354	0					0	$bv3 = $bv1;
355	0					0	$bv4 = $bv2->_resize_pad_from_left($bv1->{size} - $bv2->{size});
356							} else {
357	0					0	$bv3 = $bv1;
358	0					0	$bv4 = $bv2;
359							}
360	0					0	my $result_bv = Algorithm::BitVector->new( size => $bv3->{size} );
361	0					0	foreach my $i (0..@{$result_bv->{_vector}}-1) {
	0					0
362							# The binary bitwise `^' operator may assume 32-bit wide fields:
363	0					0	$result_bv->{_vector}->[$i] = ($bv3->{_vector}->[$i] ^ $bv4->{_vector}->[$i] ) & 0x0000FFFF;
364							}
365	0					0	return $result_bv;
366							}
367
368							# For the overloading of the iterator '<>' operator:
369							sub _iter {
370	0			0		0	my $self = shift;
371	0					0	my $pos = 0;
372	0	0				0	$self->{_iterator} = BitVecIterator->new($self, $pos) unless $self->{_iter_called};
373	0					0	&{$self->{_iterator}->next()};
	0					0
374							}
375
376							{
377							# This inner class needed for implementing iterator overloading:
378							package BitVecIterator;
379							sub new {
380	0			0		0	my $self = [ $_[1], $_[2] ];
381	0					0	bless $self, $_[0];
382	0					0	$_[1]->{_iter_called} = 1;
383	0					0	return $self;
384							}
385							sub next {
386	0			0		0	my $self = shift;
387	0					0	my $bitvec = $self->[0];
388							# The anonymous subroutine that follows is a closure over the variables
389							# incorporated in it:
390							return sub {
391	0	0		0		0	if ($self->[1] >= $bitvec->{size}) {
392	0					0	delete $bitvec->{_iter_called};
393	0					0	return;
394							}
395	0					0	my $bit = $bitvec->get_bit($self->[1]);
396	0					0	$self->[1] = $self->[1] + 1;
397	0					0	return $bit;
398							}
399	0					0	}
400							}
401
402							# for the overloading of the numification operator:
403							sub _int {
404	1			1		6	my $self = shift;
405	1					4	return $self->int_value();
406							}
407
408							## Divides an even-sized bitvector into two and returns the two halves as a
409							## list of two bitvectors.
410							sub divide_into_two {
411	0			0	1	0	my $self = shift;
412	0	0				0	die "Abort: The divide_into_two() method invoked on an object that is " .
413							"not of type Algorithm::BitVector"
414							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
415	0	0				0	die "The bitvector to be divided must have even number of bits: $!"
416							if $self->{size} % 2;
417	0					0	my @outlist1 = ();
418	0					0	foreach my $i (0..$self->{size} / 2 - 1) {
419	0					0	push @outlist1, $self->get_bit($i);
420							}
421	0					0	my @outlist2 = ();
422	0					0	foreach my $i ( ($self->{size} / 2) .. ($self->{size} - 1) ) {
423	0					0	push @outlist2, $self->get_bit($i);
424							}
425	0					0	return Algorithm::BitVector->new( bitlist => \@outlist1 ),
426							Algorithm::BitVector->new( bitlist => \@outlist2 );
427							}
428
429							## Permute a bitvector according to the indices shown in the second argument list.
430							## Return the permuted bitvector as a new bitvector.
431							sub permute {
432	0			0	1	0	my $self = shift;
433	0	0				0	die "Abort: The permute() method invoked on an object that is " .
434							"not of type Algorithm::BitVector"
435							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
436	0					0	my $permute_list = shift;
437	0	0				0	die "Bad permutation index in your permutation list"
438							if max(@$permute_list) > $self->{size} - 1;
439	0					0	my @outlist = ();
440	0					0	foreach my $index (@$permute_list) {
441	0					0	push @outlist, $self->get_bit($index);
442							}
443	0					0	return Algorithm::BitVector->new( bitlist => \@outlist );
444							}
445
446							## Unpermute the bitvector according to the permutation list supplied as the
447							## second argument. If you first permute a bitvector by using permute() and
448							## then unpermute() it using the same permutation list, you will get back the
449							## original bitvector.
450							sub unpermute {
451	0			0	1	0	my $self = shift;
452	0	0				0	die "Abort: The unpermute() method invoked on an object that is " .
453							"not of type Algorithm::BitVector"
454							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
455	0					0	my $permute_list = shift;
456	0	0				0	die "Bad permutation index in your permutation list: $!"
457							if max(@$permute_list) > $self->{size} - 1;
458	0	0				0	die "Size of the permute list for unpermuting not consistent with the size of the bet vector:$!"
459							unless $self->{size} == @$permute_list;
460	0					0	my $out_bv = Algorithm::BitVector->new( size => $self->{size} );
461	0					0	foreach my $i (0..@$permute_list-1) {
462	0					0	$out_bv->set_bit( $permute_list->[$i], $self->get_bit($i) );
463							}
464	0					0	return $out_bv;
465							}
466
467							## Write the bitvector to the file object file_out. (A file object is returned
468							## by a call to open()). Since all file I/O is byte oriented, the bitvector must
469							## be multiple of 8 bits. Each byte treated as MSB first (0th index).
470							sub write_to_file {
471	0			0	1	0	my $self = shift;
472	0	0				0	die "Abort: The write_to_file() method invoked on an object that is " .
473							"not of type Algorithm::BitVector"
474							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
475	0					0	my $file_out = shift;
476	0					0	my $err_str = "Only a bitvector whose length is a multiple of 8 can " .
477							"be written to a file. Use the padding functions to satisfy " .
478							"this constraint.";
479	0	0				0	$self->{FILEOUT} = $file_out unless $self->{FILEOUT};
480	0	0				0	die $err_str if $self->{size} % 8;
481	0					0	for my $byte (0..$self->{size}/8 - 1){
482	0					0	my $value = 0;
483	0					0	foreach my $bit (0..7) {
484	0					0	$value += $self->get_bit( $byte*8+(7 - $bit) ) << $bit;
485							}
486	0					0	syswrite $file_out, chr($value), 1;
487							}
488							}
489
490							## For closing a file object that was used for reading the bits into one or more
491							## BitVector objects.
492							sub close_file_handle {
493	0			0	1	0	my $self = shift;
494	0	0				0	die "Abort: The close_file_handle() method invoked on an object that is " .
495							"not of type Algorithm::BitVector"
496							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
497	0	0				0	die "No open file currently associated with the file handle: $!"
498							unless $self->{FILEIN};
499	0					0	close $self->{FILEIN};
500							}
501
502							## Return the integer value of a bitvector. If the original integer from which the
503							## bitvector was constructed is a Math::BigInt object, then return the string
504							## representation of the integer value.
505							sub int_value {
506	1			1	1	1	my $self = shift;
507	1	50				6	die "Abort: The int() method invoked on an object that is " .
508							"not of type Algorithm::BitVector"
509							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
510	1					2	my $int_value;
511	1	50	33			7	if (defined($self->{intVal}) && (ref($self->{intVal}) eq 'Math::BigInt')) {
512	0					0	$int_value = $self->{intVal}->bstr();
513							} else {
514	1					1	$int_value = 0;
515	1					4	foreach my $i (0..$self->{size}-1) {
516	4					7	$int_value += $self->get_bit($i) * ( 2 ** ( $self->{size} - $i - 1 ) );
517							}
518							}
519	1					7	return $int_value;
520							}
521
522							## Return the text string formed by dividing the bitvector into bytes from the
523							## left and replacing each byte by its ASCII character (this is a useful thing
524							## to do only if the length of the vector is an integral multiple of 8 and every
525							## byte in your bitvector has a print representation)
526							sub get_text_from_bitvector {
527	1			1	0	7	my $self = shift;
528	1	50				6	die "Abort: The get_text_from_bitvector() method invoked on an object that is " .
529							"not of type Algorithm::BitVector"
530							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
531	1	50				10	die "\nThe bitvector for get_text_from_bitvector() must be an integral multiple of 8 bits: $!"
532							if $self->{size} % 8;
533	1					2	my $ascii = '';
534	1					5	for (my $i=0; $i < $self->{size}; $i += 8) {
535	5					4	$ascii .= chr oct "0b". join '', @{$self->get_bit([$i..$i+7])};
	5					15
536							}
537	1					3	return $ascii;
538							}
539
540							## Return a string of hex digits by scanning the bits from the left and
541							## replacing each sequence of 4 bits by its corresponding hex digit (this is a
542							## useful thing to do only if the length of the vector is an integral multiple
543							## of 4)
544							sub get_hex_string_from_bitvector {
545	0			0	1	0	my $self = shift;
546	0	0				0	die "Abort: The get_hex_from_bitvector() method invoked on an object that is " .
547							"not of type Algorithm::BitVector"
548							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
549	0	0				0	die "\nThe bitvector for get_hex_from_bitvector() must be a multiple of 4 bits: $!"
550							if $self->{size} % 4;
551	0					0	my $hex = '';
552	0					0	for (my $i=0; $i < $self->{size}; $i += 4) {
553	0					0	$hex .= sprintf "%x", oct "0b". join '', @{$self->get_bit([$i..$i+3])};
	0					0
554							}
555	0					0	return $hex;
556							}
557
558							## Read blocksize bits from a disk file and return a BitVector object containing
559							## the bits. If the file contains fewer bits than blocksize, construct the
560							## BitVector object from however many bits there are in the file. If the file
561							## contains zero bits, return a BitVector object of size attribute set to 0.
562							sub read_bits_from_file {
563	0			0	1	0	my $self = shift;
564	0	0				0	die "Abort: The read_bits_from_file() method invoked on an object that is " .
565							"not of type Algorithm::BitVector"
566							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
567	0					0	my $blocksize = shift;
568	0					0	my $error_str = "You need to first construct a BitVector object with a filename as argument";
569	0	0				0	die "$error_str" unless $self->{filename};
570	0	0				0	die "block size must be a multiple of 8" if $blocksize % 8;
571	0					0	my $bitstr = _readblock( $blocksize, $self );
572	0	0				0	if (length $bitstr == 0) {
573	0					0	return Algorithm::BitVector->new( size => 0 );
574	0					0	print "file has no bits\n";
575							} else {
576	0					0	return Algorithm::BitVector->new( bitstring => $bitstr );
577							}
578							}
579
580							## For an in-place left circular shift by n bit positions
581							sub _lshift {
582	0			0		0	my $self = shift;
583	0					0	my $n = shift;
584	0	0				0	die "Circular shift of an empty vector makes no sense" unless $self->{size};
585	0	0				0	return $self >> abs($n) if $n < 0;
586	0					0	foreach my $i (0..$n-1) {
587	0					0	$self->_circular_rotate_left_by_one();
588							}
589	0					0	return $self;
590							}
591
592							## For an in-place right circular shift by n bit positions
593							sub _rshift {
594	0			0		0	my $self = shift;
595	0					0	my $n = shift;
596	0	0				0	die "Circular shift of an empty vector makes no sense" unless $self->{size};
597	0	0				0	return $self << abs($n) if $n < 0;
598	0					0	foreach my $i (0..$n-1) {
599	0					0	$self->_circular_rotate_right_by_one();
600							}
601	0					0	return $self;
602							}
603
604							## For a one-bit in-place left circular shift
605							sub _circular_rotate_left_by_one {
606	0			0		0	my $self = shift;
607	0					0	my $max_index = int( ($self->{size} - 1) / 16 );
608	0					0	my $left_most_bit = $self->{_vector}->[0] & 1;
609	0					0	$self->{_vector}->[0] = $self->{_vector}->[0] >> 1;
610	0					0	foreach my $i (1 .. $max_index) {
611	0					0	my $left_bit = $self->{_vector}->[$i] & 1;
612	0					0	$self->{_vector}->[$i] = $self->{_vector}->[$i] >> 1;
613	0					0	$self->{_vector}->[$i-1] \|= $left_bit << 15;
614							}
615	0					0	$self->set_bit($self->{size} - 1, $left_most_bit);
616							}
617
618							## For a one-bit in-place right circular shift
619							sub _circular_rotate_right_by_one {
620	0			0		0	my $self = shift;
621	0					0	my $max_index = int( ($self->{size} - 1) / 16 );
622	0					0	my $right_most_bit = $self->get_bit( $self->{size} - 1);
623	0					0	$self->{_vector}->[$max_index] &= ~0x8000;
624	0					0	$self->{_vector}->[$max_index] = $self->{_vector}->[$max_index] << 1;
625	0					0	for (my $i=$max_index-1; $i > -1; $i -= 1) {
626	0					0	my $right_bit = $self->{_vector}->[$i] & 0x8000;
627	0					0	$self->{_vector}->[$i] &= ~0x8000;
628	0					0	$self->{_vector}->[$i] = $self->{_vector}->[$i] << 1;
629	0					0	$self->{_vector}->[$i+1] \|= $right_bit >> 15;
630							}
631	0					0	$self->set_bit(0, $right_most_bit);
632							}
633
634							## Pad a bitvector with n zeros from the left
635							sub pad_from_left {
636	0			0	1	0	my $self = shift;
637	0					0	my $n = shift;
638	0					0	my $new_str = ('0' x $n) . "$self";
639	0					0	my $new_bitvec = Algorithm::BitVector->new( bitstring => $new_str );
640	0					0	$self->{size} = length $new_str;
641	0					0	$self->{_vector} = $new_bitvec->{_vector};
642	0					0	return $self;
643							}
644
645							## Pad a bitvector with n zeros from the right
646							sub pad_from_right {
647	0			0	1	0	my $self = shift;
648	0					0	my $n = shift;
649	0					0	my $new_str = "$self" . ('0' x $n);
650	0					0	my $new_bitvec = Algorithm::BitVector->new( bitstring => $new_str );
651	0					0	$self->{size} = length $new_str;
652	0					0	$self->{_vector} = $new_bitvec->{_vector};
653	0					0	return $self;
654							}
655
656							## Resets a previously created BitVector to either all zeros or all ones
657							## depending on the argument val.
658							sub reset {
659	0			0	1	0	my $self = shift;
660	0					0	my $val = shift;
661	0	0	0			0	die "Incorrect argument to reset(): $!" unless ($val == 0) or ($val == 1);
662	0					0	my $reset_bv = Algorithm::BitVector->new( bitstring => ("$val" x $self->{size}) );
663	0					0	$self->{_vector} = $reset_bv->{_vector};
664							}
665
666							## Return the number of bits set in a BitVector instance.
667							sub count_bits {
668	0			0	1	0	my $self = shift;
669	0			0		0	return reduce {$a + $b} split //, "$self";
	0					0
670							}
671
672							## Changes the bit pattern associated with a previously constructed BitVector
673							## instance. The allowable modes for changing the internally stored bit pattern
674							## are the same as for the constructor.
675							sub set_value {
676	0			0	1	0	my $self = shift;
677	0					0	my $reset_bv = Algorithm::BitVector->new( @_ );
678	0					0	$self->{_vector} = $reset_bv->{_vector};
679	0					0	$self->{size} = $reset_bv->{size};
680							}
681
682
683							## This method for counting the set bits is much faster for sparse bitvectors.
684							## Note, however, that count_bits() may work much faster for dense-packed
685							## bitvectors.
686							sub count_bits_sparse {
687	0			0	1	0	my $self = shift;
688	0					0	my $num = 0;
689	0					0	foreach my $intval (@{$self->{_vector}}) {
	0					0
690	0	0				0	next if $intval == 0;
691	0					0	my ($c, $iv) = (0, $intval);
692	0					0	while ($iv > 0) {
693	0					0	$iv = $iv & ($iv - 1);
694	0					0	$c++;
695							}
696	0					0	$num += $c;
697							}
698	0					0	return $num;
699							}
700
701							## Computes the Jaccard similarity coefficient between two bitvectors
702							sub jaccard_similarity {
703	0			0	1	0	my $self = shift;
704	0					0	my $other = shift;
705	0	0	0			0	die "Jaccard called on two zero vectors --- NOT ALLOWED: $!"
706							if (int($self) == 0) && (int($other) == 0);
707	0	0				0	die "Jaccard called on vectors of unequal size --- NOT ALLOWED: $!"
708							if $self->{size} != $other->{size};
709	0					0	my $intersection = $self & $other;
710	0					0	my $union = $self \| $other;
711	0					0	return $intersection->count_bits_sparse() / $union->count_bits_sparse();
712							}
713
714							## Computes the Jaccard distance between two bitvectors
715							sub jaccard_distance {
716	0			0	1	0	my $self = shift;
717	0					0	my $other = shift;
718	0	0				0	die "Jaccard called on vectors of unequal size --- NOT ALLOWED: $!"
719							if $self->{size} != $other->{size};
720	0					0	return 1 - $self->jaccard_similarity( $other );
721							}
722
723							## Computes the Hamming distance between two bitvectors
724							sub hamming_distance {
725	0			0	1	0	my $self = shift;
726	0					0	my $other = shift;
727	0	0				0	die "hamming_distance() called on vectors of unequal size --- NOT ALLOWED: $!"
728							if $self->{size} != $other->{size};
729	0					0	my $diff = $self ^ $other;
730	0					0	return $diff->count_bits_sparse();
731							}
732
733							## This method calculates the position of the next set bit at or after the current
734							## position index. It returns -1 if there is no next set bit. Logic based on the
735							## contributions by Jason Allum and John Gleeson to the Python version of this
736							## module.
737							sub next_set_bit {
738	0			0	1	0	my $self = shift;
739	0		0			0	my $from_index = shift \|\| 0;
740	0	0				0	die "The from_index must be nonnegative: $!" unless $from_index >= 0;
741	0					0	my $i = $from_index;
742	0					0	my $v = $self->{_vector};
743	0					0	my $l = scalar @$v;
744	0					0	my $o = $i >> 4;
745	0					0	my $s = $i & 0x0F;
746	0					0	$i = $o << 4;
747	0					0	while ($o < $l) {
748	0					0	my $h = $v->[$o];
749	0	0				0	if ($h) {
750	0					0	$i += $s;
751	0					0	my $m = 1 << $s;
752	0					0	while ($m != (1 << 0x10)) {
753	0	0				0	return $i if $h & $m;
754	0					0	$m <<= 1;
755	0					0	$i += 1;
756							}
757							} else {
758	0					0	$i += 0x10;
759							}
760	0					0	$s = 0;
761	0					0	$o += 1;
762							}
763	0					0	return -1;
764							}
765
766							## For a bit that is set at the argument 'position', this method returns how many
767							## bits are set to the left of that bit. For example, in the bit pattern
768							## 000101100100, a call to this method with position set to 9 will return 4.
769							sub rank_of_bit_set_at_index {
770	0			0	1	0	my $self = shift;
771	0					0	my $position = shift;
772	0	0				0	die "The bitvector has no bits set at the position in the call to rank_of_bit_set_at_index(): $!"
773							unless $self->get_bit($position);
774	0					0	my $bv_slice = Algorithm::BitVector->new( bitlist => $self->get_bit([0..$position]) );
775	0					0	return $bv_slice->count_bits();
776							}
777
778							## Determines whether the integer value of a bitvector is a power of 2.
779							sub is_power_of_2 {
780	0			0	1	0	my $self = shift;
781	0	0				0	return 0 if int($self) == 0;
782	0					0	my $bv = $self & Algorithm::BitVector->new( intVal => int($self) - 1);
783	0	0				0	return 1 if int($bv) == 0;
784	0					0	return 0;
785							}
786
787							## Faster version of is_power_of2() for sparse bitvectors
788							sub is_power_of_2_sparse {
789	0			0	1	0	my $self = shift;
790	0	0				0	return 1 if $self->count_bits_sparse() == 1;
791	0					0	return 0;
792							}
793
794							## Returns a new bitvector by reversing the bits in the bitvector on which the
795							## method is invoked.
796							sub reverse {
797	0			0	1	0	my $self = shift;
798	0					0	my @reverseList = ();
799	0					0	for (my $i=1; $i < $self->{size} + 1; $i++) {
800	0					0	push @reverseList, $self->get_bit( -$i );
801							}
802	0					0	return Algorithm::BitVector->new( bitlist => \@reverseList );
803							}
804
805							## Using Euclid's Algorithm, returns the greatest common divisor of the integer
806							## value of the bitvector on which the method is invoked and the integer value
807							## of the argument bitvector.
808							sub gcd {
809	0			0	1	0	my $self = shift;
810	0					0	my $other = shift;
811	0					0	my ($a,$b) = (int($self), int($other));
812	0	0				0	($a,$b) = ($b,$a) if $a < $b;
813	0					0	while ($b != 0) {
814	0					0	($a, $b) = ($b, $a % $b);
815							}
816	0					0	return Algorithm::BitVector->new( intVal => $a );
817							}
818
819							## Calculates the multiplicative inverse of the bitvector on which the method is
820							## invoked modulo the bitvector that is supplied as the argument. Code based on
821							## Extended Euclid's Algorithm.
822							sub multiplicative_inverse {
823	0			0	1	0	my $self = int( shift );
824	0					0	my $modulus = int( shift );
825	0					0	my ($mod, $num) = ($modulus, $self);
826	0					0	my ($x, $x_old) = (0, 1);
827	0					0	my ($y, $y_old) = (1, 0);
828	0					0	while ($mod) {
829	0					0	my $quotient = int($num / $mod);
830	0					0	($num, $mod) = ($mod, $num % $mod);
831	0					0	($x, $x_old) = ($x_old - $x * $quotient, $x);
832	0					0	($y, $y_old) = ($y_old - $y * $quotient, $y);
833							}
834	0	0				0	if ($num != 1) {
835	0					0	return 0;
836							} else {
837	0					0	my $MI = ($x_old + $modulus) % $modulus;
838	0					0	return Algorithm::BitVector->new( intVal => $MI );
839							}
840							}
841
842							## For a one-bit in-place left non-circular shift. Note that the bitvector size
843							## does not change. The leftmost bit that moves past the first element of the
844							## bitvector is discarded and rightmost bit of the returned vector is set to zero.
845							sub _shift_left_by_one {
846	0			0		0	my $self = shift;
847	0					0	my $size = @{$self->{_vector}};
	0					0
848	0					0	my @left_most_bits = map {$_ & 1} @{$self->{_vector}};
	0					0
	0					0
849	0					0	push @left_most_bits, $left_most_bits[0];
850	0					0	shift @left_most_bits;
851	0					0	@{$self->{_vector}} = map {$_ >> 1} @{$self->{_vector}};
	0					0
	0					0
	0					0
852	0					0	my @a = @{$self->{_vector}};
	0					0
853	0					0	my @b = map {$_ << 15} @left_most_bits;
	0					0
854	0					0	my @interleaved = ( @a, @b )[ map { $_, $_ + @a } ( 0 .. $#a ) ];
	0					0
855	0			0		0	@{$self->{_vector}} = pairmap {$a \| $b} @interleaved;
	0					0
	0					0
856	0					0	$self->set_bit(-1,0);
857							}
858
859							## For an in-place left non-circular shift by n bit positions. The bits shifted
860							## out at the left are discarded. The new bit positions on the right are filled with
861							## zeros
862							sub shift_left {
863	0			0	1	0	my $self = shift;
864	0					0	my $n = shift;
865	0					0	foreach my $i (0..$n-1) {
866	0					0	$self->_shift_left_by_one();
867							}
868	0					0	return $self;
869							}
870
871							## For a one-bit in-place right non-circular shift. Note that bitvector size does
872							## not change. The rightmost bit that moves past the last element of the bitvector
873							## on the right is discarded and the leftmost bit of the returned vector is set to
874							## zero.
875							sub _shift_right_by_one {
876	0			0		0	my $self = shift;
877	0					0	my $size = @{$self->{_vector}};
	0					0
878	0					0	my @right_most_bits = map {$_ & 0x8000} @{$self->{_vector}};
	0					0
	0					0
879	0					0	@{$self->{_vector}} = map { $_ & ~0x8000 } @{$self->{_vector}};
	0					0
	0					0
	0					0
880	0					0	unshift @right_most_bits, 0;
881	0					0	pop @right_most_bits;
882	0					0	@{$self->{_vector}} = map {$_ << 1} @{$self->{_vector}};
	0					0
	0					0
	0					0
883	0					0	my @a = @{$self->{_vector}};
	0					0
884	0					0	my @b = map {$_ >> 15} @right_most_bits;
	0					0
885	0					0	my @interleaved = ( @a, @b )[ map { $_, $_ + @a } ( 0 .. $#a ) ];
	0					0
886	0			0		0	@{$self->{_vector}} = pairmap {$a \| $b} @interleaved;
	0					0
	0					0
887	0					0	$self->set_bit(0,0);
888							}
889
890							## For an in-place right non-circular shift by n bit positions. The n bits that
891							## move past the last element of the bitvector on the right are discarded and
892							## leftmost new n bit positions filled with zeros.
893							sub shift_right {
894	0			0	1	0	my $self = shift;
895	0					0	my $n = shift;
896	0					0	foreach my $i (0..$n-1) {
897	0					0	$self->_shift_right_by_one();
898							}
899	0					0	return $self;
900							}
901
902							## In the set of polynomials defined over GF(2), multiplies the bitvector on which
903							## the method is invoked with the argument bitvector. Returns the product
904							## bitvector. Note that this method carries out straight polynomial multiplication
905							## as opposed to modulo polynomial multiplication. As a result, the highest power of
906							## the result polynomial will ALWAYS be greater than the highest powers of the
907							## argument polynomials for non-trivial multiplications.
908							sub gf_multiply {
909	0			0	1	0	my $self = shift;
910	0					0	my $b = shift;
911	0					0	my $a = $self->deep_copy();
912	0					0	my $b_copy = $b->deep_copy();
913	0					0	my $a_highest_power = $a->{size} - $a->next_set_bit(0) - 1;
914	0					0	my $b_highest_power = $b->{size} - $b_copy->next_set_bit(0) - 1;
915	0					0	my $result = Algorithm::BitVector->new( size => $a->{size} + $b_copy->{size} );
916	0					0	$a->pad_from_left( $result->{size} - $a->{size} );
917	0					0	$b_copy->pad_from_left( $result->{size} - $b_copy->{size} );
918	0					0	my @b_list = split //, "$b_copy";
919	0					0	my @enum = (0..@b_list-1, @b_list) [map {$_, $_+ @b_list} (0 .. @b_list - 1)];
	0					0
920	0					0	for (my $i=0; $i < @enum-1; $i = $i + 2) {
921	0	0				0	if ($enum[$i+1]) {
922	0					0	my $power = $b_copy->{size} - $enum[$i] - 1;
923	0					0	my $a_copy = $a->deep_copy();
924	0					0	$a_copy->shift_left( $power );
925	0					0	$result ^= $a_copy;
926							}
927							}
928	0					0	return $result;
929							}
930
931							## Carries out modular division of a bitvector by the modulus bitvector mod in
932							## GF(2^n) finite field. Returns both the quotient and the remainder.
933							sub gf_divide_by_modulus {
934	0			0	1	0	my $num = shift;
935	0					0	my $mod = shift;
936	0					0	my $n = shift;
937	0	0				0	die "Modulus bit pattern too long" if $mod->{size} > $n + 1;
938	0					0	my $quotient = Algorithm::BitVector->new( intVal => 0, size => $num->{size} );
939	0					0	my $remainder = $num->deep_copy();
940	0					0	for (my $i=0; $i < $num->{size}; $i++) {
941	0					0	my $mod_highest_power = $mod->{size} - $mod->next_set_bit(0) - 1;
942	0					0	my $remainder_highest_power;
943	0	0				0	if ($remainder->next_set_bit(0) == -1) {
944	0					0	$remainder_highest_power = 0;
945							} else {
946	0					0	$remainder_highest_power = $remainder->{size} - $remainder->next_set_bit(0) - 1;
947							}
948	0	0	0			0	if (($remainder_highest_power < $mod_highest_power) or (int($remainder) == 0)) {
949	0					0	last;
950							} else {
951	0					0	my $exponent_shift = $remainder_highest_power - $mod_highest_power;
952	0					0	$quotient->set_bit($quotient->{size} - $exponent_shift - 1, 1);
953	0					0	my $quotient_mod_product = $mod->deep_copy();
954	0					0	$quotient_mod_product->pad_from_left($remainder->{size} - $mod->{size});
955	0					0	$quotient_mod_product->shift_left($exponent_shift);
956	0					0	$remainder ^= $quotient_mod_product;
957							}
958							}
959	0	0				0	if ($remainder->{size} > $n) {
960	0					0	$remainder = Algorithm::BitVector->new(
961							bitlist => $remainder->get_bit([$remainder->{size}-$n .. $remainder->{size}-1]));
962							}
963	0					0	return ($quotient, $remainder);
964							}
965
966							## Multiplies a bitvector with the bitvector b in GF(2^n) finite field with the
967							## modulus bit pattern set to mod
968							sub gf_multiply_modular {
969	0			0	1	0	my $self = shift;
970	0					0	my $b = shift;
971	0					0	my $mod = shift;
972	0					0	my $n = shift;
973	0					0	my $a_copy = $self->deep_copy();
974	0					0	my $b_copy = $b->deep_copy();
975	0					0	my $product = $a_copy->gf_multiply($b_copy);
976	0					0	my ($quotient, $remainder) = $product->gf_divide_by_modulus($mod, $n);
977	0					0	return $remainder
978							}
979
980							## Returns the multiplicative inverse of a vector in the GF(2^n) finite field
981							## with the modulus polynomial set to mod
982							sub gf_MI {
983	0			0	1	0	my $num = shift;
984	0					0	my $mod = shift;
985	0					0	my $n = shift;
986	0					0	my ($NUM, $MOD) = ($num->deep_copy(), $mod->deep_copy());
987	0					0	my $x = Algorithm::BitVector->new( size => $mod->{size} );
988	0					0	my $x_old = Algorithm::BitVector->new( intVal => 1, size => $mod->{size} );
989	0					0	my $y = Algorithm::BitVector->new( intVal => 1, size => $mod->{size} );
990	0					0	my $y_old = Algorithm::BitVector->new( size => $mod->{size} );
991	0					0	my ($quotient, $remainder);
992	0					0	while (int($mod)) {
993	0					0	($quotient, $remainder) = $num->gf_divide_by_modulus($mod, $n);
994	0					0	($num, $mod) = ($mod, $remainder);
995	0					0	($x, $x_old) = ($x_old ^ $quotient->gf_multiply($x), $x);
996	0					0	($y, $y_old) = ($y_old ^ $quotient->gf_multiply($y), $y);
997							}
998	0	0				0	if (int($num) != 1) {
999	0					0	return "NO MI. However, the GCD of $NUM and $MOD is $num\n";
1000							} else {
1001	0					0	my $z = $x_old ^ $MOD;
1002	0					0	($quotient, $remainder) = $z->gf_divide_by_modulus($MOD, $n);
1003	0					0	return $remainder;
1004							}
1005							}
1006
1007							## Returns a list of the consecutive runs of 1's and 0's in the bitvector.
1008							## Each run is either a string of all 1's or a string of all 0's.
1009							sub runs {
1010	0			0	1	0	my $self = shift;
1011	0	0				0	die "An empty vector has no runs" if $self->{size} == 0;
1012	0					0	my @allruns = ();
1013	0					0	my $run = '';
1014	0					0	my $previous_bit = $self->get_bit(0);
1015	0	0				0	if ($previous_bit == 0) {
1016	0					0	$run = '0';
1017							} else {
1018	0					0	$run = '1';
1019							}
1020	0					0	my @bitlist = split //, "$self";
1021	0					0	shift @bitlist;
1022	0					0	foreach my $bit (@bitlist) {
1023	0	0	0			0	if (($bit == 0) && ($previous_bit == 0)) {
		0	0
		0	0
1024	0					0	$run .= '0';
1025							} elsif (($bit == 1) && ($previous_bit == 0)) {
1026	0					0	push @allruns, $run;
1027	0					0	$run = '1';
1028							} elsif (($bit == 0) && ($previous_bit == 1)) {
1029	0					0	push @allruns, $run;
1030	0					0	$run = '0';
1031							} else {
1032	0					0	$run .= '1';
1033							}
1034	0					0	$previous_bit = $bit;
1035							}
1036	0					0	push @allruns, $run;
1037							return @allruns
1038	0					0	}
1039
1040							## Check if the integer value of the bitvector is a prime through the Miller-Rabin
1041							## probabilistic test of primality. If not found to be a composite, estimate the
1042							## probability of the bitvector being a prime using this test.
1043							sub test_for_primality {
1044	0			0	1	0	my $p = int(shift);
1045	0	0				0	die "\nThe primality test method test_for_primality() is intended for only " .
1046							"small integers --- integers that are relatively small in relation to " .
1047							"the largest integer that can fit Perl's 4-byte int representation:$!"
1048							if $p > 0x7f_ff_ff_ff;
1049	0	0				0	return "is NOT a prime" if $p == 1;
1050	0					0	my @probes = (2,3,5,7,11,13,17);
1051	0					0	foreach my $a (@probes) {
1052	0	0				0	return "is a prime" if $a == $p;
1053							}
1054	0	0		0		0	return "is NOT a prime" if any {$p % $_ == 0} @probes;
	0					0
1055	0					0	my ($k, $q) = (0, $p-1);
1056	0					0	while (! ($q & 1)) {
1057	0					0	$q >>= 1;
1058	0					0	$k++;
1059							}
1060	0					0	foreach my $a (@probes) {
1061	0					0	my $a_raised_to_q = _powmod_small_ints($a, int($q), $p);
1062	0	0	0			0	next if $a_raised_to_q == 1 or $a_raised_to_q == $p - 1;
1063	0					0	my $a_raised_to_jq = $a_raised_to_q;
1064	0					0	my $primeflag = 0;
1065	0					0	foreach my $j (0..$k-1) {
1066	0					0	$a_raised_to_jq = _powmod_small_ints($a_raised_to_jq, 2, $p);
1067	0	0				0	if ($a_raised_to_jq == $p-1) {
1068	0					0	$primeflag = 1;
1069	0					0	last;
1070							}
1071							}
1072	0	0				0	return "is NOT a prime" unless $primeflag;
1073							}
1074	0					0	my $probability_of_prime = 1 - 1.0/(4 ** @probes);
1075	0					0	return "is a prime with probability $probability_of_prime";
1076							}
1077
1078							## This routine is for modular exponentiation of small integers, meaning the
1079							## integers that can be accommodated (before and after exponentiation) in the native
1080							## 4-byte int representation. For larger integers, a call to this function should
1081							## be replaced by a call to modular exponentiation of the Math::BigInt module.
1082							sub _powmod_small_ints {
1083	0			0		0	my $base = shift;
1084	0					0	my $exponent = shift;
1085	0					0	my $mod = int(shift);
1086	0			0		0	warn "This is just a warning: The modular exponentiation method is not meant for very large numbers (IMPORTANT: This is just a very rough check on the size of the numbers involved)"
1087	0	0				0	if any {$_ > 0x0f_ff_ff_ff} (int($base), int($exponent), int($mod));
1088	0					0	my $result = 1;
1089	0					0	my $a = int($base);
1090	0					0	my $b = $exponent;
1091	0					0	while (int($b) > 0) {
1092	0	0				0	$result = ($result * $a) % $mod if int($b) & 1;
1093	0					0	$b = $b >> 1;
1094	0					0	$a = ($a * $a) % $mod;
1095							}
1096	0					0	return $result;
1097							}
1098
1099							## This method is for a generating a bit pattern of a given size with random bits.
1100							sub gen_random_bits {
1101	0			0	1	0	my $self = shift;
1102	0					0	my $width = shift;
1103	0					0	my $candidate_bits = "0b" . join '', Math::Random::random_uniform_integer($width, 0, 1);
1104	0					0	my $candidate = oct $candidate_bits;
1105	0					0	$candidate \|= 1;
1106	0					0	$candidate \|= (1 << $width-1);
1107	0					0	$candidate \|= (2 << $width-3);
1108	0					0	return Algorithm::BitVector->new( intVal => $candidate );
1109							}
1110
1111							sub length {
1112	0			0	1	0	my $self = shift;
1113	0					0	return $self->{size};
1114							}
1115
1116							sub _check_for_illegal_params {
1117	3			3		4	my @params = @_;
1118	3					6	my @legal_params = qw / filename
1119							size
1120							intVal
1121							bitlist
1122							bitstring
1123							hexstring
1124							textstring
1125							/;
1126	3					4	my $found_match_flag;
1127	3					5	foreach my $param (@params) {
1128	3					4	foreach my $legal (@legal_params) {
1129	12					10	$found_match_flag = 0;
1130	12	100				18	if ($param eq $legal) {
1131	3					3	$found_match_flag = 1;
1132	3					4	last;
1133							}
1134							}
1135	3	50				9	last if $found_match_flag == 0;
1136							}
1137	3					10	return $found_match_flag;
1138							}
1139
1140							1;
1141
1142
1143							=pod
1144
1145							=head1 NAME
1146
1147							Algorithm::BitVector --- A memory efficient packed representation of arbitrary sized
1148							bit arrays and for logical and arithmetic operations on such arrays.
1149
1150							=head1 SYNOPSIS
1151
1152							use Algorithm::BitVector;
1153
1154							# Constructing a given sized bitvector of all zeros:
1155							$bv = Algorithm::BitVector->new( size => 7 );
1156							print "$bv\n"; # 0000000
1157
1158							# Constructing a bitvector whose integer value is specified:
1159							$bv = Algorithm::BitVector->new( intVal => 123456 );
1160							print "$bv\n"; # 11110001001000000
1161							print int($bv); # 123456
1162
1163							# Constructing a bitvector from a very large integer:
1164							use Math::BigInt;
1165							$x = Math::BigInt->new('12345678901234567890123456789012345678901234567890');
1166							$bv = Algorithm::BitVector->new( intVal => $x );
1167
1168							# Constructing a bitvector from a given bit string:
1169							$bv = Algorithm::BitVector->new( bitstring => '00110011' );
1170
1171							# Constructing a bitvector from an ASCII text string:
1172							$bv = Algorithm::BitVector->new( textstring => "hello\njello" );
1173
1174							# Constructing a bitvector from a hex string:
1175							$bv = Algorithm::BitVector->new( hexstring => "68656c6c6f" );
1176
1177							# Constructing a bitvector from a bit list passed as an anonymous array:
1178							$bv = Algorithm::BitVector->new( bitlist => [1, 1, 0, 1] );
1179
1180							# Constructing a bitvector from the contents of a disk file:
1181							$bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1182							$bv1 = $bv->read_bits_from_file(64); # bitvector from the first 64 bits
1183							# and so on
1184
1185
1186							=head1 CHANGES
1187
1188							Version 1.23 mentions the required modules in the C file but with no
1189							minimum version numbers. Additionally, the documentation associated with the methods
1190							was significantly upgraded in this version.
1191
1192							Version 1.22 removes the Perl version restriction from the module and the
1193							C files and the C restrictions from the C file.
1194
1195							Version 1.21 fixes a bug in the code for the Miller-Rabin primality test function
1196							C. This version also places a hard limit on the size of the
1197							integers that are allowed to be tested for primality.
1198
1199							Version 1.2 fixes an important bug in creating bitvectors from the contents of a disk
1200							file. This version also includes corrections for some of the documentation errors
1201							discovered.
1202
1203							Version 1.1 incorporates additional type checking on the operands for the overloaded
1204							operators. Also fixed some minor documentation formatting issues.
1205
1206							=head1 DESCRIPTION
1207
1208							My main motivation for creating this module was to provide the students at Purdue and
1209							elsewhere with a Perl class whose API is the same as that of my Python based
1210							C module that appears to have become popular for prototyping algorithms
1211							for cryptography and hash functions.
1212
1213							This module stores the bits of a bitvector in 16-bit unsigned shorts. As you can see
1214							in the constructor code for C, after resolving the arguments with which the
1215							constructor is called, the very first thing the constructor does is to figure out how
1216							many of those 2-byte shorts it needs for the bits. That does not imply that the size
1217							of a bit array that is stored in a bitvector must be a multiple of 16. B
1218							can be of any size whatsoever.> The C class keeps track of the
1219							number of bits through its C instance variable.
1220
1221							Note that, except for one case, the constructor must be called with a single keyword
1222							argument, which determines how the bitvector will be constructed. The single
1223							exception to this rule is for the keyword argument C which you would normally
1224							use for constructing a bitvector from an integer. The additional option you can
1225							supply with C is C. When both C and C are specified in a
1226							constructor call, you get a bitvector of the specified size provided the value
1227							supplied for C is larger than what it takes to accommodate the bits for the
1228							C integer.
1229
1230							In addition to constructing bitvectors from integers, this module can also construct
1231							bitvectors from bit strings, from ASCII text strings, from hex strings, from a list
1232							of bits, and from the contents of a file. With regards to constructing bitvectors
1233							from integers, the module can construct very large bitvectors from very large
1234							integers stored as C objects.
1235
1236							=head1 OVERLOADED OPERATORS
1237
1238							The following C declaration in the module gives the list of the
1239							overloaded operators. Since C is set to 1, several other operators become
1240							overloaded by autogeneration from those shown below. For example, overloading of the
1241							3-way numerical comparison operator C<< <=> >> automatically overloads the C<< < >>,
1242							C<< <= >>, C<< > >>, C<< >= >>, C<< == >>, and C<< != >> operators.
1243
1244							use overload '+' => '_add',
1245							'""' => '_str',
1246							'0+' => '_int',
1247							'~' => '_invert',
1248							'\|' => '_or',
1249							'&' => '_and',
1250							'^' => '_xor',
1251							'<=>' => '_compare',
1252							'<<' => '_lshift',
1253							'>>' => '_rshift',
1254							'<>' => '_iter',
1255							'fallback' => 1;
1256
1257							It is B to remember that the overloadings for the C<<< << >>> and C<<< >>
1258							>>> operators are for B left and right shifts (their usual meaning as
1259							bitwise operators is for non-circular shifts). This was done because the
1260							applications for which this module is intended is more likely to use circular shifts
1261							of bit fields than non-circular shifts. You can still carry out non-circular shifts
1262							by calling the methods C and C as illustrated elsewhere
1263							in this documentation.
1264
1265							Another B thing to bear in mind is the overloading of the C<+> operator.
1266							It is B addition. On the other hand, it is a concatenation of the two operand
1267							bitvectors. This was done to keep the usage of this operator the same as in the
1268							Python version of this module.
1269
1270							By virtue of how the operators are overloaded, you can make the calls listed in the
1271							rest of this section. To illustrate these calls, I will use the following two bit
1272							vectors:
1273
1274							$bv1 = Algorithm::BitVector->new( bitstring => "111000" );
1275							$bv2 = Algorithm::BitVector->new( bitstring => "000101000" );
1276
1277							These two bitvectors are intentionally of different lengths to illustrate what role
1278							the size differences play in how the various operators work.
1279
1280							=over 4
1281
1282							=item B
1283
1284							$bv3 = $bv1 + $bv2; # 111000000101000
1285
1286							The concatenation of two bitvectors is returned as a new bitvector. This is made
1287							possible by the overload definition for the C<+> operator.
1288
1289							Note that the following also works:
1290
1291							print $bv1 . "hello"; # 111000hello
1292
1293							In this case, Perl implicitly converts the left operand of the `.' operator into a
1294							string (which is made possible by the overloading for the stringification operator in
1295							this module) and then returns the result as a string.
1296
1297							=item B
1298
1299							print "$bv1"; # 111000
1300
1301							This is made possible for the overload definition for the C<""> operator.
1302
1303							=item B
1304
1305							print int($bv1); # 56
1306
1307							This is made possible by the overload definition for the C<0+> operator.
1308
1309							=item B
1310
1311							$bv3 = ~ $bv1;
1312							print $bv3; # 000111
1313
1314							This is made possible by the overload definition for the C<~> operator.
1315
1316							=item B
1317
1318							$bv3 = $bv1 \| $bv2; # 000111000
1319
1320							When two bitvectors are of unequal length (as is the case here), the shorter vector
1321							is zero-padded on the left to equalize their lengths before the application of the
1322							logical operation. If this auto-padding property is not something you want, you
1323							should check the lengths of the argument bitvectors in your own script before
1324							invoking this operator.
1325
1326							=item B
1327
1328							$bv3 = $bv1 & $bv2; # 000101000
1329
1330							When two bitvectors are of unequal length (as is the case here), the shorter vector
1331							is zero-padded on the left to equalize their lengths before the application of the
1332							logical operation. If this auto-padding property is not something you want, you
1333							should check the lengths of the argument bitvectors in your own script before
1334							invoking this operator.
1335
1336							=item B
1337
1338							$bv3 = $bv1 ^ $bv2; # 000010000
1339
1340							When two bitvectors are of unequal length (as is the case here), the shorter vector
1341							is zero-padded on the left to equalize their lengths before the application of the
1342							logical operation. If this auto-padding property is not something you want, you
1343							should check the lengths of the argument bitvectors in your own script before
1344							invoking this operator.
1345
1346							=item B
1347
1348							$bv1 < $bv2 ? print "yes\n" : print "no\n"; # no
1349
1350							$bv1 > $bv2 ? print "yes\n" : print "no\n"; # yes
1351
1352							$bv1 <= $bv2 ? print "yes\n" : print "no\n"; # no
1353
1354							$bv1 >= $bv2 ? print "yes\n" : print "no\n"; # yes
1355
1356							$bv1 == $bv2 ? print "yes\n" : print "no\n"; # no
1357
1358							$bv1 != $bv2 ? print "yes\n" : print "no\n"; # yes
1359
1360							The overload definitions for all these operators are autogenerated from the overload
1361							definition for the 3-way numerical comparison operator C<< <=> >>. B
1362							compared on the basis of their integer values.> That is, C<$bv1> is less than C<$bv2>
1363							is C is less than C.
1364
1365							=item B
1366
1367							$n = 3;
1368
1369							$bv1 << $n; # $bv1 is now 000111
1370							$bv1 >> $n; # $bv1 is now 111000
1371
1372							Since Perl does not expect these two operators to be invoked in a void context, such
1373							statements in your code will elicit a warning from Perl to that effect. If these
1374							warnings annoy you, you can turn them off by surrounding such statements with C
1375							warnings "void";> and C directives. The other option is to invoke
1376							such statements in the following manner:
1377
1378							$bv1 = $bv1 << $n;
1379							$bv2 = $bv1 >> $n;
1380
1381							That works because the overload definitions for these bit shift operators return the
1382							bitvector object on which they are invoked. As it turns out, this also allows for
1383							chained invocation of these operators, as in
1384
1385							$bv1 = $bv1 << 3 << 1 >> 2; # 100011
1386
1387							$bv1 = $bv1 << 2 >> 1 >> 3; # 111000
1388
1389							=item B
1390
1391							while (<$bv1>) {
1392							print "$_ ";
1393							} # 1 1 1 0 0 0
1394
1395							This is made possible by the overload definition for the C<< <> >> operator. The
1396							C class includes an inner class C for this
1397							purpose.
1398
1399							=back
1400
1401
1402							=head1 CONSTRUCTING BITVECTORS
1403
1404							=over 4
1405
1406							=item B
1407
1408							$bv = Algorithm::BitVector->new( size => 0 );
1409
1410							print "$bv\n"; # no output
1411
1412							=item B
1413
1414							$bv = Algorithm::BitVector->new( size => 13 ); # 0000000000000
1415
1416							=item B
1417
1418							$bv = Algorithm::BitVector->new( intVal => 5006 ); # 1001110001110
1419
1420							The module returns the smallest possible bitvector that would accommodate the
1421							integer value provided with the C option.
1422
1423							=item B
1424
1425							As mentioned, with the C option, you get the smallest possible bitvector
1426							that can be generated from that integer. If you want a I bitvector, you can
1427							also supply the C option as shown below:
1428
1429							$bv = Algorithm::BitVector->new( intVal => 5006, size => 16 ); # 0001001110001110
1430
1431							If the value supplied for the C option in such a call is not larger than the
1432							smallest bit array that represents the C value, the constructor will throw an
1433							exception.
1434
1435							=item B
1436
1437							use Math::BigInt;
1438							$x = Math::BigInt->new('12345678901234567890123456789012345678901234567890');
1439							$bv = Algorithm::BitVector->new( intVal => $x );
1440							#1000011100100111111101100011011010011010101011111000001111001010000\
1441							#10101000000100110011101000111101011111000110001111111000110010110110\
1442							#01110001111110000101011010010
1443
1444							=item B
1445
1446							$bv = Algorithm::BitVector->new( bitstring => '00110011' ); # 00110011
1447
1448							=item B
1449
1450							$bv = Algorithm::BitVector->new( textstring => "hello\n" );
1451							# 011010000110010101101100011011000110111100001010
1452
1453							=item B
1454
1455							$bv = Algorithm::BitVector->new( hexstring => "68656c6c6f" );
1456							# 0110100001100101011011000110110001101111
1457
1458							=item B
1459
1460							$bv = Algorithm::BitVector->new( bitlist => [1, 1, 0, 1] ); # 1101
1461
1462							=item B
1463
1464							$bv = Algorithm::BitVector->new( filename => 'testinput1.txt' );
1465
1466							print "$bv\n"; # Nothing to show yet
1467
1468							$bv1 = $bv->read_bits_from_file(64); # Now you have a bitvector from the
1469							# first 64 bits
1470
1471							Note that it takes two calls to create bitvectors from the contents of a file. The
1472							first merely creates an empty bitvector just to set the necessary file handle for
1473							reading the file. It is the second call in which you invoke the method
1474							C that actually returns a bitvector from the bits read from
1475							the file. Each call to C in this manner spits out a new bit
1476							vector.
1477
1478							=back
1479
1480							=head1 METHODS
1481
1482							=head3 close_file_handle()
1483
1484							=over 4
1485
1486							When you construct bitvectors by block scanning a disk file, after you are done, you
1487							can call this method to close the file handle that was created to read the file:
1488
1489							$bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1490							## your code to read bit blocks for constructing bitvectors goes here
1491							$bv->close_file_handle();
1492
1493							The constructor call in the first statement opens a file handle for reading the bits.
1494							It is this file handle that is closed by calling C,
1495
1496							=back
1497
1498							=head3 count_bits()
1499
1500							=over 4
1501
1502							$bv = Algorithm::BitVector->new( intVal => 45, size => 16 );
1503							print $bv->count_bits(); # 4
1504
1505							This method returns an integer value which is the number of bits set to 1 in the
1506							bitvector on which the method is invoked.
1507
1508							=back
1509
1510							=head3 count_bits_sparse()
1511
1512							=over 4
1513
1514							Say you have a bitvector with two million bits:
1515
1516							$bv = Algorithm::BitVector->new( size => 2000000 );
1517
1518							and you happen to set its individual bits by
1519
1520							$bv->set_bit(345234, 1);
1521							$bv->set_bit(233, 1);
1522							$bv->set_bit(243, 1);
1523							$bv->set_bit(18, 1);
1524							$bv->set_bit(785, 1);
1525
1526							The following call returns the number of bits set in the bitvector:
1527
1528							print $bv->count_bits_sparse(); # 5
1529
1530							For very long bitvectors, as in the example here, this method will work much faster
1531							than the C method. However, for dense bitvectors, I expect
1532							C to work faster.
1533
1534							=back
1535
1536							=head3 deep_copy()
1537
1538							=over 4
1539
1540							$bv_copy = $bv->deep_copy();
1541
1542							Subsequently, any alterations to the bitvectors pointed to by either C<$bv> or
1543							C<$bv_copy> will not affect the other.
1544
1545							=back
1546
1547							=head3 divide_into_two()
1548
1549							=over 4
1550
1551							($bv1, $bv2) = $bv->divide_into_two(); # say $bv = 0000000000101101
1552							print "$bv1\n"; # 00000000
1553							print "$bv2\n"; # 00101101
1554
1555							Divides an even sized bitvector into two bitvectors, each of size half of the
1556							bitvector on which this method is invoked. Throws an exception when invoked on a
1557							bitvector that is not even sized.
1558
1559							=back
1560
1561							=head3 gcd()
1562
1563							=over 4
1564
1565							This method uses the Euclid's algorithm to return the Greatest Common Divisor of the
1566							integer values represented by the two bitvectors. The following example shows a call
1567							to C returning the GCD of the integer values of the bitvectors C<$bv1> and
1568							C<$bv2>.
1569
1570							$bv1 = Algorithm::BitVector->new( bitstring => '01100110' ); # 102
1571							$bv2 = Algorithm::BitVector->new( bitstring => '011010' ); # 26
1572							$gcd = $bv1->gcd( $bv2 ); # 10
1573							print int($gcd); # 2
1574
1575							The result returned by C is a bitvector.
1576
1577							=back
1578
1579							=head3 gen_random_bits()
1580
1581							=over 4
1582
1583							$bv = Algorithm::BitVector->new( intVal => 0 );
1584							$bv = $bv->gen_random_bits(16); # 1100111001010101
1585
1586							The call to C returns a bitvector whose bits are randomly
1587							generated. The number of bits in the returned bitvector equals the argument integer.
1588
1589							=back
1590
1591							=head3 get_bit()
1592
1593							=over 4
1594
1595							This method gives you array-like access to the individual bits of a bitvector.
1596
1597							$bv = Algorithm::BitVector->new( bitstring => '10111' );
1598							print $bv->get_bit(0); # 1 (the first bit)
1599							print $bv->get_bit(1); # 0
1600							print $bv->get_bit(4); # 1 (the last bit)
1601
1602							Negative values for the index scan a bitvector from right to left, with the C<-1>
1603							index standing for the last (meaning the right-most) bit in the vector:
1604
1605							print $bv->get_bit(-1); # 1 (the last bit)
1606							print $bv->get_bit(-2); # 1
1607							print $bv->get_bit(-5); # 1 (the first bit)
1608
1609							The C can also return a slice of a bitvector if the argument to the
1610							method is an anonymous array of the index range you desire, as in the second
1611							statement below:
1612
1613							$bv = Algorithm::BitVector->new( bitstring => "10111011");
1614							my $arr = $bv->get_bit( [3..7] );
1615							print "@$arr\n"; # 1 1 0 1 1
1616
1617							In this example, we want C to return all bits at positions indexed 3
1618							through 7, both ends inclusive. Note that the slice is returned as an array of bits.
1619
1620
1621							=back
1622
1623							=head3 get_hex_string_from_bitvector()
1624
1625							=over 4
1626
1627							Assuming that length of your bitvector is a multiple of 4, this methods returns a
1628							hex representation of the bit pattern:
1629
1630							$bv = Algorithm::BitVector->new(bitstring => "0110100001100101011011000110110001101111");
1631							print $bv->get_hex_string_from_bitvector(); # 68656c6c6f
1632
1633							The hex representation is returned in the form if a string of hex characters. This
1634							method throws an exception if the size of the bitvector is not a multiple of 4.
1635
1636							=back
1637
1638							=head3 get_text_string_from_bitvector()
1639
1640							=over 4
1641
1642							$bv = Algorithm::BitVector->new( textstring => "hello" );
1643							print "$bv\n"; # 0110100001100101011011000110110001101111
1644							print $bv->get_text_from_bitvector(); # hello
1645
1646							The method returns a string of ASCII characters by converting successive 8-bit slices
1647							of the bitvector into an ASCII character. It throws an exception if the size of the
1648							bit pattern is not a multiple of 8. Calling this method to create a text-based print
1649							representation of a bit vector makes sense only if you don't expect to see any
1650							unprintable characters in the successive 8-bit slices of the bitvector. Let's say
1651							you have created a bitvector from a text string using the appropriate constructor
1652							call. Subsequently, you encrypted this text string. Next, you or someone else
1653							decrypts the encrypted bit stream. Since what comes out at the decryption end must
1654							be the original text string, it would make sense to invoke this method to recover the
1655							original text.
1656
1657							=back
1658
1659							=head3 gf_divide_by_modulus()
1660
1661							=over 4
1662
1663							This method is for modular division in the Galois Field C. You must specify
1664							the modulus polynomial through a bit pattern and also the value of the integer C:
1665
1666							$mod = Algorithm::BitVector->new( bitstring => '100011011' ); # AES modulus
1667							$n = 8;
1668							$a = Algorithm::BitVector->new( bitstring => '11100010110001' );
1669							($quotient, $remainder) = $a->gf_divide_by_modulus($mod, $n);
1670							print "$quotient\n"; # 00000000111010
1671							print "$remainder\n"; # 10001111
1672
1673							What this example illustrates is dividing the bitvector C<$a> by the modulus bit
1674							vector C<$mod>. For a more general division of one bitvector C<$a> by another bit
1675							vector C<$b>, you would carry out a multiplication of C<$a> by the MI of C<$b>, where
1676							MI stands for "multiplicative inverse" as returned by a call to the method
1677							C. A call to C returns two bitvectors, one for the
1678							quotient and the other for the remainder.
1679
1680							=back
1681
1682							=head3 gf_MI()
1683
1684							=over 4
1685
1686							This method returns the multiplicative inverse of a bit pattern in the Galois Field
1687							C. You must specify both the modulus polynomial through its bit pattern and
1688							the value of C:
1689
1690							$modulus = Algorithm::BitVector->new( bitstring => '100011011' ); # AES modulus
1691							$n = 8;
1692							$a = Algorithm::BitVector->new( bitstring => '00110011' );
1693							print $a->gf_MI($modulus, $n); # 01101100
1694
1695							Note that the multiplicative inverse is returned as a bitvector.
1696
1697							=back
1698
1699							=head3 gf_multiply()
1700
1701							=over 4
1702
1703							This method returns a product of two bit patterns in the Galois Field C field.
1704							That is, given any two polynomials with their coefficients drawn from the 0 and 1
1705							values in C, this method returns the product polynomial.
1706
1707							$a = Algorithm::BitVector->new( bitstring => '0110001' );
1708							$b = Algorithm::BitVector->new( bitstring => '0110' );
1709							print $a->gf_multiply($b); #00010100110
1710
1711							As you would expect, in general, the bit pattern returned by this method will be
1712							longer than the lengths of the two operand bitvectors. The result returned by the
1713							method is in the form of a bitvector.
1714
1715							=back
1716
1717							=head3 gf_multiply_modular()
1718
1719							=over 4
1720
1721							This method carries out modular multiplication in the Galois Field C. You
1722							must supply it the bitvector for the modulus polynomial and the value of C.
1723
1724							$modulus = Algorithm::BitVector->new( bitstring => '100011011' ); # AES modulus
1725							$n = 8;
1726							$a = Algorithm::BitVector->new( bitstring => '0110001' );
1727							$b = Algorithm::BitVector->new( bitstring => '0110' );
1728							print $a->gf_multiply_modular($b, $modulus, $n); # 10100110
1729
1730							This example returns the product of the bit patterns C<$a> and C<$b> modulo the bit
1731							pattern C<$modulus> in C. The result returned by the method is in the form
1732							of a bitvector.
1733
1734							=back
1735
1736							=head3 hamming_distance()
1737
1738							=over 4
1739
1740							Hamming distance is commonly used to measure dissimilarity between two bitvectors of
1741							the same size.
1742
1743							$bv1 = Algorithm::BitVector->new( bitstring => '11111111' );
1744							$bv2 = Algorithm::BitVector->new( bitstring => '00101011' );
1745							print $bv1->hamming_distance( $bv2 ); # 4
1746
1747							This distance returns the number of bit positions in which two the bit patterns
1748							differ. The method throws an exception if the two bitvectors are not of the same
1749							length. The value returned is an integer.
1750
1751							=back
1752
1753							=head3 int_value()
1754
1755							=over 4
1756
1757							You can find the integer value of a bitvector by
1758
1759							$bv = Algorithm::BitVector->new( intVal => 5678 );
1760							print $bv3->int_value(); # 5678
1761
1762							or, even more simply by
1763
1764							print int($bv); # 5678
1765
1766							which works on account of the overloading of the C<0+> operator.
1767
1768							=back
1769
1770							=head3 is_power_of_2()
1771
1772							=over 4
1773
1774							You can use this predicate to test if the integer value of a bitvector is a power of
1775							2:
1776
1777							$bv = Algorithm::BitVector->new( bitstring => '10000000001110' );
1778							print int($bv); # 826
1779							print $bv->is_power_of_2(); # 0
1780
1781							The predicate returns 1 for true and 0 for false.
1782
1783							=back
1784
1785							=head3 is_power_of_2_sparse()
1786
1787							=over 4
1788
1789							This does the same thing as the C method but in a way that makes it
1790							faster for large bitvectors with very few bits set.
1791
1792							$bv = Algorithm::BitVector->new( size => 2000000 );
1793							$bv->set_bit(345234, 1);
1794							print $bv->is_power_of_2_sparse(); # 1
1795
1796							=back
1797
1798							=head3 jaccard_distance()
1799
1800							=over 4
1801
1802							The Jaccard distance between two bitvectors is 1 minus the Jaccard similarity
1803							coefficient:
1804
1805							$bv1 = Algorithm::BitVector->new( bitstring => '11111111' );
1806							$bv2 = Algorithm::BitVector->new( bitstring => '10101011' );
1807							print $bv1->jaccard_distance( $bv2 ); # 0.375
1808
1809							The value returned by the method is a floating point number between 0 and 1.
1810
1811							=back
1812
1813							=head3 jaccard_similarity()
1814
1815							=over 4
1816
1817							This method returns the Jaccard similarity coefficient between the two bitvectors
1818							pointed to by C<$bv1> and C<$bv2>:
1819
1820							$bv1 = Algorithm::BitVector->new( bitstring => '11111111' );
1821							$bv2 = Algorithm::BitVector->new( bitstring => '10101011' );
1822							print $bv1->jaccard_similarity( $bv2 ); # 0.675
1823
1824							The value returned by the method is a floating point number between 0 and 1.
1825
1826							=back
1827
1828							=head3 length()
1829
1830							=over 4
1831
1832							This method returns the total number of bits in a bitvector:
1833
1834							$bv = Algorithm::BitVector->new( intVal => 5678 );
1835							print $bv; # 1011000101110
1836							print $bv->length(); # 13
1837
1838							Note that what C returns is the total size of a bitvector, including any
1839							leading zeros.
1840
1841							=back
1842
1843							=head3 multiplicative_inverse()
1844
1845							=over 4
1846
1847							This method calculates the multiplicative inverse using normal integer arithmetic.
1848							For multiplicative inverses in a Galois Field C, use the method C
1849							described earlier in this API.
1850
1851							$modulus = Algorithm::BitVector->new( intVal => 32 );
1852							$bv = Algorithm::BitVector->new( intVal => 17 );
1853							$result = $bv->multiplicative_inverse( $modulus );
1854							if ($result) {
1855							print $result; # 10001
1856							} else {
1857							print "No multiplicative inverse in this case\n";
1858							}
1859
1860							What this example says is that the multiplicative inverse of 17 modulo 32 is 17. That
1861							is because 17 times 17 in modulo 32 arithmetic equals 1. When using this method, you
1862							must test the value returned for 0. If the returned value is 0, that means that the
1863							number corresponding to the bitvector on which this method is invoked does not
1864							possess a multiplicative inverse with respect to the modulus.
1865
1866							=back
1867
1868							=head3 next_set_bit()
1869
1870							=over 4
1871
1872							Starting from a given bit position, this method returns the index of the next bit
1873							that is set in a bitvector:
1874
1875							$bv = Algorithm::BitVector->new( bitstring => '00000000000001' );
1876							print $bv->next_set_bit(5); # 13
1877
1878							In this example, we are asking the method to return the index of the bit that is set
1879							after the bit position indexed 5.
1880
1881							=back
1882
1883							=head3 pad_from_left()
1884
1885							=over 4
1886
1887							You can pad a bitvector from the left with a designated number of zeros:
1888
1889							$bv = Algorithm::BitVector->new( bitstring => '101010' );
1890							print $bv->pad_from_left( 4 ); # 0000101010
1891
1892							The method returns the bitvector on which it is invoked. So you can think of it as
1893							an in-place extension of a bitvector (although, under the hood, the extension is
1894							carried out by giving a new longer C<_vector> attribute to the bitvector object).
1895
1896							=back
1897
1898							=head3 pad_from_right()
1899
1900							=over 4
1901
1902							You can pad a bitvector from the right with a designated number of zeros:
1903
1904							$bv = Algorithm::BitVector->new( bitstring => '101010' );
1905							print $bv->pad_from_right( 4 ); # 1010100000
1906
1907							The method returns the bitvector on which it is invoked. So you can think of it as
1908							an in-place extension of a bitvector (although, under the hood, the extension is
1909							carried out by giving a new longer C<_vector> attribute to the bitvector object).
1910
1911
1912							=back
1913
1914							=head3 permute()
1915
1916							=over 4
1917
1918							You can permute the bits in a bitvector with a permutation list as shown below:
1919
1920							$bv1 = Algorithm::BitVector->new( intVal => 203, size => 8 );
1921							print $bv1; # 11001011
1922							$bv2 = $bv1->permute( [3, 2, 1, 0, 7, 6, 5, 4] );
1923							print $bv2; # 00111101
1924
1925							The method returns a new bitvector with permuted bits.
1926
1927							=back
1928
1929							=head3 rank_of_bit_set_at_index()
1930
1931							=over 4
1932
1933							You can measure the "rank" of a bit that is set at a given index. Rank is the number
1934							of bits that are set up to the argument position, as in
1935
1936							$bv = Algorithm::BitVector->new( bitstring => '01010101011100' );
1937							print $bv->rank_of_bit_set_at_index( 10 ); # 6
1938
1939							The value 6 returned by this call to C is the number of
1940							bits set up to the position indexed 10 (including that position). The method throws
1941							an exception if there is no bit set at the argument position.
1942
1943							=back
1944
1945							=head3 read_bits_from_file()
1946
1947							=over 4
1948
1949							Constructing bitvectors from the contents of a disk file takes two steps: First you
1950							must make the call shown in the first statement below. The purpose of this call is to
1951							create a file handle that is associated with the variable C<$bv> in this case.
1952							Subsequent invocations of C on this variable will read
1953							blocks of C<$n> bits and return a bitvector for each block thus read. The variable
1954							C<$n> must be a multiple of 8 for this to work.
1955
1956							$bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1957							$bv1 = $bv->read_bits_from_file(64);
1958							$bv2 = $bv->read_bits_from_file(64);
1959							...
1960							...
1961							$bv->close_file_handle();
1962
1963							When reading a file as shown above, you can test the attribute C of the
1964							bitvector object in order to find out if there is more to be read in the file. The
1965							C loop shown below reads all of a file in 64-bit blocks.
1966
1967							$bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1968							while ($bv->{more_to_read}) {
1969							my $bv_read = $bv->read_bits_from_file( 64 );
1970							print "$bv_read\n";
1971							}
1972							$bv->close_file_handle();
1973
1974							The size of the last bitvector constructed from a file corresponds to how many bytes
1975							remain unread in the file at that point. It is your responsibility to zero-pad the
1976							last bitvector appropriately if, say, you are doing block encryption of the whole
1977							file.
1978
1979							=back
1980
1981							=head3 reset()
1982
1983							=over 4
1984
1985							You can reset a previously constructed bitvector all either all 1's or all 0's by
1986							calling this method:
1987
1988							$bv = Algorithm::BitVector->new( intVal => 203, size => 8 );
1989							print $bv; # 11001011
1990							$bv->reset(1);
1991							print $bv; # 11111111
1992							$bv->reset(0);
1993							print $bv; # 00000000
1994
1995							What the method accomplishes can be thought of as in-place resetting of the bits. The
1996							method does not return anything.
1997
1998							=back
1999
2000							=head3 reverse()
2001
2002							=over 4
2003
2004							Given a bitvector, you can construct a bitvector with all the bits reversed, in the
2005							sense that what was left-to-right earlier now becomes right-to-left, as in
2006
2007							$bv = Algorithm::BitVector->new( bitstring => '01100001' );
2008							print $bv->reverse(); # 10000110
2009
2010							A call to this method returns a new bitvector whose bits are in reverse order in
2011							relation to the bits in the bitvector on which the method is called.
2012
2013							=back
2014
2015							=head3 runs()
2016
2017							=over 4
2018
2019							This method returns an array of runs of 1's and 0's in a bitvector:
2020
2021							$bv = Algorithm::BitVector->new( bitlist => [1,1,1,0,0,1] );
2022							my @bvruns = $bv->runs();
2023							print "@bvruns\n"; # 111 00 1
2024
2025							Each element of the array that is returned by C is a string of either 1's or
2026							0's.
2027
2028							=back
2029
2030							=head3 set_bit()
2031
2032							=over 4
2033
2034							With array-like indexing, you can use this method to set the individual bits of a
2035							previously constructed bitvector. Both positive and negative values are allowed for
2036							the bit position index. The method takes two explicit arguments, the first for the
2037							position of the bit you want to set and the second for the value of the bit.
2038
2039							$bv = Algorithm::BitVector->new( bitstring => '1111' );
2040							$bv->set_bit(0,0); # set the first bit to 0
2041							$bv->set_bit(1,0); # set the next bit to 0
2042							print $bv; # 0011
2043							$bv->set_bit(-1,0); # set the last bit to 0
2044							$bv->set_bit(-2,0); # set the bit before the last bit to 0
2045							print $bv; # 0000
2046
2047							=back
2048
2049							=head3 set_value()
2050
2051							=over 4
2052
2053							This method can be used to change the bit pattern associated with a previously
2054							constructed bitvector:
2055
2056							$bv = Algorithm::BitVector->new( intVal => 7, size => 16 );
2057							print $bv; # 0000000000000111
2058							$bv->set_value( intVal => 45 );
2059							print $bv; # 101101
2060
2061							You can think of this method as carrying out an in-place resetting of the bit array
2062							in a bitvector. The method does not return anything.
2063
2064							=back
2065
2066							=head3 shift_left()
2067
2068							=over 4
2069
2070							If you want to shift a bitvector non-circularly to the left, this is the method to
2071							call:
2072
2073							$bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2074							$bv->shift_left(3);
2075							print $bv; # 001000
2076							$bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2077							$bv->shift_left(3)->shift_right(3);
2078							print $bv; # 000001
2079
2080							As the bitvector is shifted non-circularly to the left, the exposed bit positions on
2081							the right are filled with zeros. Note that the method returns the bitvector object
2082							on which it is invoked. That is the reason why the chained invocation of the method
2083							in the fifth statement above works.
2084
2085							=back
2086
2087							=head3 shift_right()
2088
2089							=over 4
2090
2091							If you want to shift a bitvector non-circularly to the right, this is the method to
2092							call:
2093
2094							$bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2095							$bv->shift_right(3);
2096							print $bv; # 000111
2097							$bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2098							$bv->shift_right(3)->shift_right(2);
2099							print $bv; # 000001
2100
2101							As the bitvector is shifted non-circularly to the right, the exposed bit positions on
2102							the left are filled with zeros. Note that the method returns the bitvector object
2103							on which it is invoked. That is the reason why the chained invocation of the method
2104							in the fifth statement above works.
2105
2106							=back
2107
2108							=head3 test_for_primality()
2109
2110							=over 4
2111
2112							If the integer value of a bitvector is small (meaning smaller than C<< 0x7f_ff_ff_ff >>),
2113							you can use this method to test it for its primality through the Miller-Rabin
2114							probabilistic test:
2115
2116							$p = 7417;
2117							$bv = Algorithm::BitVector->new( intVal => $p );
2118							$check = $bv->test_for_primality();
2119							print "The primality test for $p: $check\n";
2120							# The primality test for 7417: is a prime with probability 0.99993896484375
2121
2122							The method returns one of two strings: If the primality test succeeds, the method
2123							returns a string like "C". And if the test fails,
2124							the method returns the string "C".
2125
2126							=back
2127
2128							=head3 unpermute()
2129
2130							=over 4
2131
2132							This method reverses the permutation carried out by a call to the C method
2133							as shown below:
2134
2135							$bv1 = Algorithm::BitVector->new( intVal => 203, size => 8 );
2136							print $bv1; # 11001011
2137							$bv2 = $bv1->permute( [3, 2, 1, 0, 7, 6, 5, 4] );
2138							print $bv2; # 00111101
2139							$bv3 = $bv2->unpermute( [3, 2, 1, 0, 7, 6, 5, 4] );
2140							print $bv3; # 11001011
2141
2142							The method returns a new bitvector with unpermuted bits. Also note that the method
2143							throws an exception if the permutation list is not as long as the size of the
2144							bitvector on which the method is invoked.
2145
2146							=back
2147
2148							=head3 write_to_file()
2149
2150							=over 4
2151
2152							This method writes the bitvectors in your program to a disk file:
2153
2154							$bv1 = Algorithm::BitVector->new( bitstring => '00001010' );
2155							open my $FILEOUT, ">test.txt";
2156							$bv1->write_to_file( $FILEOUT );
2157							close $FILEOUT;
2158
2159							The size of a bitvector must be a multiple of 8 for this write method to work. If
2160							this condition is not met, the method will throw an exception.
2161
2162							B When writing an internally generated bitvector out
2163							to a disk file, it may be important to open the file in the binary mode, since
2164							otherwise the bit pattern `00001010' ('\n') in your bitstring will be written out as
2165							0000110100001010 ('\r\n') which is the line break on Windows machines.
2166
2167							=back
2168
2169							=head1 REQUIRED
2170
2171							This module imports the following modules:
2172
2173							Math::BigInt
2174							List::Util
2175							Math::Random
2176							Fcntl
2177
2178
2179							=head1 THE C DIRECTORY
2180
2181							The C directory contains the following script that invokes all of the
2182							functionality of this module:
2183
2184							BitVectorDemo.pl
2185
2186							In case there is any doubt about how exactly to invoke a method or how to use an
2187							operator, please look at the usage in this script.
2188
2189							=head1 EXPORT
2190
2191							None by design.
2192
2193							=head1 BUGS
2194
2195							Please notify the author if you encounter any bugs. When sending email, please place
2196							the string 'BitVector' in the subject line.
2197
2198							=head1 INSTALLATION
2199
2200							Download the archive from CPAN in any directory of your choice. Unpack the archive
2201							with a command that on a Linux machine would look like:
2202
2203							tar zxvf Algorithm-BitVector-1.23.tar.gz
2204
2205							This will create an installation directory for you whose name will be
2206							C. Enter this directory and execute the following commands
2207							for a standard install of the module if you have root privileges:
2208
2209							perl Makefile.PL
2210							make
2211							make test
2212							sudo make install
2213
2214							If you do not have root privileges, you can carry out a non-standard install the
2215							module in any directory of your choice by:
2216
2217							perl Makefile.PL prefix=/some/other/directory/
2218							make
2219							make test
2220							make install
2221
2222							With a non-standard install, you may also have to set your PERL5LIB environment
2223							variable so that this module can find the required other modules. How you do that
2224							would depend on what platform you are working on. In order to install this module in
2225							a Linux machine on which I use tcsh for the shell, I set the PERL5LIB environment
2226							variable by
2227
2228							setenv PERL5LIB /some/other/directory/lib64/perl5/:/some/other/directory/share/perl5/
2229
2230							If I used bash, I'd need to declare:
2231
2232							export PERL5LIB=/some/other/directory/lib64/perl5/:/some/other/directory/share/perl5/
2233
2234
2235							=head1 THANKS
2236
2237							The bug in the primality test function, whose fix resulted in Version 1.21, was
2238							reported by Dana Jacobsen in a bug report filed at C. Thanks Dana!
2239
2240							The restriction on the Perl version was removed on Slaven Rezic's recommendation. He
2241							says the module runs fine with Perl 5.8.9. Thanks Slaven!
2242
2243							=head1 AUTHOR
2244
2245							Avinash Kak, kak@purdue.edu
2246
2247							If you send email, please place the string "BitVector" in your subject line to get past
2248							my spam filter.
2249
2250							=head1 COPYRIGHT
2251
2252							This library is free software; you can redistribute it and/or modify it under the
2253							same terms as Perl itself.
2254
2255							Copyright 2015 Avinash Kak
2256
2257							=cut
2258