File Coverage

blib/lib/Algorithm/BitVector.pm

Criterion	Covered	Total	%
statement	114	664	17.1
branch	32	218	14.6
condition	20	155	12.9
subroutine	17	75	22.6
pod	37	39	94.8
total	220	1151	19.1

line	stmt	bran	cond	sub	pod	time	code
1							package Algorithm::BitVector;
2
3							#!/usr/bin/perl -w
4
5							#------------------------------------------------------------------------------------
6							# Copyright (c) 2015 Avinash Kak. All rights reserved. This program is free
7							# software. You may modify and/or distribute it under the same terms as Perl itself.
8							# This copyright notice must remain attached to the file.
9							#
10							# Algorithm::BitVector is a Perl module for creating a memory efficient packed
11							# representation of bit arrays and for logical and numerical operations on such
12							# arrays.
13							# -----------------------------------------------------------------------------------
14
15							#use 5.10.0;
16	1			1		14393	use strict;
	1					2
	1					33
17	1			1		4	use warnings;
	1					1
	1					37
18	1			1		8	use Carp;
	1					6
	1					90
19	1			1		5	use List::Util qw(pairmap max reduce any);
	1					1
	1					127
20	1			1		1008	use Math::BigInt;
	1					21583
	1					5
21	1			1		12808	use Math::Random;
	1					5305
	1					114
22	1			1		10	use Fcntl 'SEEK_CUR';
	1					3
	1					79
23
24							our $VERSION = '1.24';
25
26	1					8	use overload '+' => '_join',
27							'""' => '_str',
28							'0+' => '_int',
29							'~' => '_invert',
30							'\|' => '_or',
31							'&' => '_and',
32							'^' => '_xor',
33							'<=>' => '_compare',
34							'<<' => '_lshift',
35							'>>' => '_rshift',
36							'<>' => '_iter',
37	1			1		4	'fallback' => 1;
	1					2
38
39							sub _readblock {
40	0			0		0	my $blocksize = shift;
41	0					0	my $bitvector = shift;
42	0					0	my $block;
43							my $more_to_read;
44	0					0	my $i = 0;
45	0					0	my $byte_as_bits;
46	0					0	my $bitstring = '';
47	0					0	while ( $i < $blocksize / 8 ) {
48	0					0	$i++;
49	0					0	my $num_bytes_read = sysread($bitvector->{FILEIN}, my $byte, 1);
50	0	0				0	unless ($num_bytes_read) {
51	0	0				0	if (length($bitstring) < $blocksize) {
52	0					0	$bitvector->{more_to_read} = 0;
53	0					0	$more_to_read = 0;
54							}
55	0					0	return $bitstring;
56							} else {
57	0					0	my $bits_as_string = sprintf "%vb", $byte;
58	0					0	$bits_as_string = '0' x (8 - length($bits_as_string)) . $bits_as_string;
59	0					0	$bitstring .= $bits_as_string;
60							}
61							}
62	0					0	my $file_pos = tell $bitvector->{FILEIN};
63							# peek at the next byte; moves file position only if a
64							# byte is read
65	0					0	my $num_bytes_read = sysread($bitvector->{FILEIN}, my $next_byte, 1);
66	0	0				0	if ($num_bytes_read) {
67							# pretend we never read the byte
68	0					0	sysseek $bitvector->{FILEIN}, $file_pos - 1, SEEK_CUR;
69							} else {
70	0					0	$bitvector->{more_to_read} = 0;
71							}
72	0					0	return $bitstring;
73							}
74
75							# Constructor:
76							sub new {
77	3			3	0	19	my ($class, %args) = @_;
78	3					9	my @params = keys %args;
79	3	50				8	croak "\nYou have used a wrong name for a keyword argument " .
80							"--- perhaps a misspelling\n"
81							if _check_for_illegal_params(@params) == 0;
82	3					16	my $self = {
83							filename => $args{filename},
84							size => $args{size},
85							intVal => $args{intVal},
86							bitlist => $args{bitlist},
87							bitstring => $args{bitstring},
88							hexstring => $args{hexstring},
89							textstring => $args{textstring},
90							};
91	3					6	bless $self, $class;
92	3	50				76	if ( $self->{filename} ) {
		100
		100
		50
		50
		50
		0
93	0	0	0			0	die "When using the `filename' option in the constructor call, you cannot use any " .
			0
			0
			0
			0
94							"other option at the same time: $!"
95							if $self->{intVal} or $self->{size} or $self->{bitlist}
96							or $self->{bitstring} or $self->{hexstring} or $self->{textstring};
97	0	0				0	open $self->{FILEIN}, "< $self->{filename}"
98							or die "unable to open file $self->{filename}: $!";
99	0					0	$self->{more_to_read} = 1;
100	0					0	return $self;
101							} elsif ( defined $self->{intVal} ) {
102	1	50	33			21	die "When using the `intVal' option in the constructor call, you CANNOT use `filename', " .
			33
			33
			33
103							"`bitlist', 'bitstring', `hexstring', or `textstring' options: $!"
104							if $self->{filename} or $self->{bitlist} or $self->{bitstring}
105							or $self->{hexstring} or $self->{textstring};
106	1	50	33			7	if ($self->{intVal} == 0 && ! defined $self->{size}) {
107	0					0	$self->{bitlist} = [0];
108	0					0	$self->{size} = 1;
109							} else {
110	1					2	my @bitarray;
111	1	50				5	if (ref($self->{intVal}) eq 'Math::BigInt') {
112	0					0	my $bitlist_str = $self->{intVal}->as_bin();
113	0					0	@bitarray = split //, $bitlist_str;
114							# Now get rid of the `0b' prefix on the string returned by as_bin():
115	0					0	shift @bitarray;
116	0					0	shift @bitarray;
117							} else {
118	1					11	my $bitlist_str = sprintf "%b", $self->{intVal};
119	1	50				4	if (defined $self->{size}) {
120	0	0				0	die "The value specified for size must be at least as large as for the " .
121							"smallest bitvector possible for the intVal integer: $!"
122							if $self->{size} < length $bitlist_str;
123	0					0	my $n = $self->{size} - length $bitlist_str;
124	0					0	my $extended_bitlist_str = '0' x $n . $bitlist_str;
125	0					0	@bitarray = split //, $extended_bitlist_str;
126							} else {
127	1					7	@bitarray = split //, $bitlist_str;
128							}
129							}
130	1					3	$self->{bitlist} = \@bitarray;
131	1					2	$self->{size} = scalar @{$self->{bitlist}};
	1					12
132							}
133							} elsif (defined $self->{size}) {
134	1	50	33			32	die "When using the `size' option in the constructor call, you CANNOT use `filename', " .
			33
			33
			33
			33
135							"`bitlist', 'bitstring', `hexstring', or `textstring' options: $!"
136							if $self->{filename} or $self->{intVal} or $self->{bitlist}
137							or $self->{bitstring} or $self->{hexstring} or $self->{textstring};
138	1					4	my $bitstring = "0" x $self->{size};
139	1					8	my @bitlist_from_bitstring = split '', $bitstring;
140	1					3	$self->{bitlist} = \@bitlist_from_bitstring;
141							} elsif ($self->{bitlist}) {
142	0	0	0			0	die "When using the `bitlist' option in the constructor call, you cannot use any " .
			0
			0
			0
			0
143							"other option at the same time: $!"
144							if $self->{filename} or $self->{intVal} or $self->{size}
145							or $self->{bitstring} or $self->{hexstring} or $self->{textstring};
146	0					0	$self->{size} = scalar @{$self->{bitlist}};
	0					0
147							} elsif (defined $self->{bitstring}) {
148	0	0	0			0	die "When using the `bitstring' option in the constructor call, you cannot use any " .
			0
			0
			0
			0
149							"other option at the same time: $!"
150							if $self->{filename} or $self->{intVal} or $self->{size}
151							or $self->{bitlist} or $self->{hexstring} or $self->{textstring};
152	0					0	my @bitlist_from_bitstring = split //, $self->{bitstring};
153	0					0	$self->{bitlist} = \@bitlist_from_bitstring;
154	0					0	$self->{size} = scalar @{$self->{bitlist}};
	0					0
155							} elsif (defined $self->{textstring}) {
156	1	50	33			16	die "When using the `textstring' option in the constructor call, you cannot use any " .
			33
			33
			33
			33
157							"other option at the same time: $!"
158							if $self->{filename} or $self->{intVal} or $self->{size}
159							or $self->{bitlist} or $self->{hexstring} or $self->{bitstring};
160	5	50				17	my $bitstring_from_text = join '', map {length($_) == 8 ? $_
	5					9
161	1					7	: ('0' x (8 - length($_))) . $_} map {sprintf "%b", $_}
162							map ord, split //, $self->{textstring};
163	1					10	my @bitlist_from_text = split //, $bitstring_from_text;
164	1					3	$self->{bitlist} = \@bitlist_from_text;
165	1					2	$self->{size} = scalar @{$self->{bitlist}};
	1					3
166							} elsif (defined $self->{hexstring}) {
167	0	0	0			0	die "When using the `hexstring' option in the constructor call, you cannot use any " .
			0
			0
			0
			0
168							"other option at the same time: $!"
169							if $self->{filename} or $self->{intVal} or $self->{size}
170							or $self->{bitlist} or $self->{textstring} or $self->{bitstring};
171	0	0				0	my $bitstring_from_hex = join '', map {length($_) == 4 ? $_
	0					0
172	0					0	: ('0' x (4 - length($_))) . $_} map {sprintf "%b", $_}
173	0					0	map {hex $_} split //, $self->{hexstring};
174	0					0	my @bitlist_from_hex = split //, $bitstring_from_hex;
175	0					0	$self->{bitlist} = \@bitlist_from_hex;
176	0					0	$self->{size} = scalar @{$self->{bitlist}};
	0					0
177							}
178	3					3	my $shorts_needed = int( (@{$self->{bitlist}} + 15) / 16 );
	3					10
179	3					8	@{$self->{_vector}} = map {unpack "n", pack("n", 0)} 0 .. $shorts_needed-1;
	3					8
	5					12
180	3					31	my @interleaved = (0..$self->{size}-1, @{$self->{bitlist}})
	60					66
181	3					7	[map {$_,$_+$self->{size}} (0 .. $self->{size}-1)];
182	3			60		37	pairmap {$self->set_bit($a,$b)} @interleaved;
	60					80
183	3					11	$self->{bitlist} = undef;
184	3					21	return $self;
185							}
186
187							## Set the bit at the designated position to the value shown
188							sub set_bit {
189	60			60	1	48	my $self = shift;
190	60					48	my $posn = shift;
191	60					37	my $val = shift;
192	60	50	66			146	die "incorrect value for a bit" unless $val == 0 or $val == 1;
193	60	50	33			190	die "index range error" if ($posn >= $self->{size}) or ($posn < - $self->{size});
194	60	50				73	$posn = $self->{size} + $posn if $posn < 0;
195	60					61	my $block_index = int($posn / 16);
196	60					46	my $shift = $posn & 0xF;
197	60					56	my $cv = $self->{_vector}->[$block_index];
198	60	100				120	if ( ( ( $cv >> $shift ) & 1 ) != $val) {
199	25					62	$self->{_vector}->[$block_index] = $cv ^ (1 << $shift);
200							}
201							}
202
203							## Get the bit from the designated position. This method can also return a slice of a
204							## bitvector. However, note that the slice is returned as a list of the bits in the
205							## index range you specified. You can easily convert the list of bits returned into a
206							## bitvector in your own code.
207							sub get_bit {
208	65			65	1	38	my $self = shift;
209	65					45	my $pos = shift;
210	65	100				81	unless (ref($pos) eq "ARRAY") {
211	60	50	33			166	die "index range error" if ($pos >= $self->{size}) or ($pos < - $self->{size});
212	60	50				77	$pos = $self->{size} + $pos if $pos < 0;
213	60					124	return ( $self->{_vector}->[int($pos/16)] >> ($pos&15) ) & 1;
214							}
215	5					15	my @slice = map $self->get_bit($_), (@$pos)[0..@$pos-1];
216	5					33	return \@slice;
217							}
218
219							## Overloading of the string conversion operator. Return the string representation
220							## of a bitvector.
221							sub _str {
222	1			1		16	my $self = shift;
223	1					6	return join '', map $self->get_bit($_), 0 .. $self->{size}-1;
224							}
225
226							## Overloading of the `+' operator. Concatenate the argument bitvectors. Return the
227							## concatenated bitvector as a new BitVector instance.
228							sub _join {
229	0			0		0	my ($bv1, $bv2) = @_;
230	0	0	0			0	die "Abort: The concatenation operator invoked with either undefined " .
231							"or wrong types of operands: $!"
232							unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
233							UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
234	0					0	my $allbits = $bv1->_str() . $bv2->_str();
235	0					0	return Algorithm::BitVector->new( bitstring => $allbits );
236							}
237
238							sub _compare {
239	0			0		0	my ($bv1, $bv2) = @_;
240	0	0	0			0	die "Abort: The comparison operator invoked with either undefined " .
241							"or wrong types of operands: $!"
242							unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
243							UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
244	0					0	my $bigint1 = Math::BigInt->from_bin( "$bv1" );
245	0					0	my $bigint2 = Math::BigInt->from_bin( "$bv2" );
246	0					0	return $bigint1->bcmp($bigint2);
247							}
248
249							sub deep_copy {
250	0			0	1	0	my $self = shift;
251	0					0	my $result_bv = Algorithm::BitVector->new( size => $self->{size} );
252	0					0	foreach my $i (0..@{$result_bv->{_vector}}-1) {
	0					0
253	0					0	$result_bv->{_vector}->[$i] = $self->{_vector}->[$i];
254							}
255	0					0	return $result_bv;
256							}
257
258							## Invert the bits in the bitvector on which the method is invoked
259							## and return the result as a new bitvector.
260							sub _invert {
261	0			0		0	my $self = shift;
262	0	0				0	die "Abort: The operator '~' for bit inversion invoked on an object that is " .
263							"not of type Algorithm::BitVector"
264							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
265	0					0	my $result_bv = $self->deep_copy();
266	0					0	foreach my $i (0..@{$result_bv->{_vector}}-1) {
	0					0
267							# The unary `~' operator may assume a 32-bit wide field:
268	0					0	$result_bv->{_vector}->[$i] = ~ $result_bv->{_vector}->[$i] & 0x0000FFFF;
269							}
270	0					0	return $result_bv;
271							}
272
273							## Take a bitwise 'OR' of two bitvectors. Return the result as a new bitvector. If
274							## the two bitvectors are not of the same size, pad the shorter one with zeros from
275							## the left.
276							sub _or {
277	0			0		0	my ($bv1, $bv2) = @_;
278	0	0	0			0	die "Abort: The `\|' operator invoked with either undefined " .
279							"or wrong types of operands: $!"
280							unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
281							UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
282	0					0	my ($bv3, $bv4);
283	0	0				0	if ( $bv1->{size} < $bv2->{size} ) {
		0
284	0					0	$bv3 = $bv1->_resize_pad_from_left($bv2->{size} - $bv1->{size});
285	0					0	$bv4 = $bv2;
286							} elsif ( $bv1->{size} > $bv2->{size} ) {
287	0					0	$bv3 = $bv1;
288	0					0	$bv4 = $bv2->_resize_pad_from_left($bv1->{size} - $bv2->{size});
289							} else {
290	0					0	$bv3 = $bv1;
291	0					0	$bv4 = $bv2;
292							}
293	0					0	my $result_bv = Algorithm::BitVector->new( size => $bv3->{size} );
294	0					0	foreach my $i (0..@{$result_bv->{_vector}}-1) {
	0					0
295							# The binary `\|' operator may assume 32-bit wide fields:
296	0					0	$result_bv->{_vector}->[$i] = ($bv3->{_vector}->[$i] \| $bv4->{_vector}->[$i] ) & 0x0000FFFF;
297							}
298	0					0	return $result_bv;
299							}
300
301							## Resize a bitvector by padding with n 0's from the left. Return the result as a
302							## new bitvector.
303							sub _resize_pad_from_left {
304	0			0		0	my $self = shift;
305	0					0	my $n = shift;
306	0					0	my $new_str = '0' x $n . "$self";
307	0					0	return Algorithm::BitVector->new( bitstring => $new_str );
308							}
309
310							## Take a bitwise 'AND' of the bitvector on which the method is invoked with
311							## the argument bitvector. Return the result as a new bitvector. If the two
312							## bitvectors are not of the same size, pad the shorter one with zeros from the
313							## left.
314							sub _and {
315	0			0		0	my ($bv1, $bv2) = @_;
316	0	0	0			0	die "Abort: The concatenation operator invoked with either undefined " .
317							"or wrong types of operands: $!"
318							unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
319							UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
320	0					0	my ($bv3, $bv4);
321	0	0				0	if ( $bv1->{size} < $bv2->{size} ) {
		0
322	0					0	$bv3 = $bv1->_resize_pad_from_left($bv2->{size} - $bv1->{size});
323	0					0	$bv4 = $bv2;
324							} elsif ( $bv1->{size} > $bv2->{size} ) {
325	0					0	$bv3 = $bv1;
326	0					0	$bv4 = $bv2->_resize_pad_from_left($bv1->{size} - $bv2->{size});
327							} else {
328	0					0	$bv3 = $bv1;
329	0					0	$bv4 = $bv2;
330							}
331	0					0	my $result_bv = Algorithm::BitVector->new( size => $bv3->{size} );
332	0					0	foreach my $i (0..@{$result_bv->{_vector}}-1) {
	0					0
333							# The binary bitwise `&' operator may assume 32-bit wide fields:
334	0					0	$result_bv->{_vector}->[$i] = ($bv3->{_vector}->[$i] & $bv4->{_vector}->[$i] ) & 0x0000FFFF;
335							}
336	0					0	return $result_bv;
337							}
338
339							## Take a bitwise 'XOR' of the bitvector on which the method is invoked with
340							## the argument bitvector. Return the result as a new bitvector. If the two
341							## bitvectors are not of the same size, pad the shorter one with zeros from the
342							## left.
343							sub _xor {
344	0			0		0	my ($bv1, $bv2) = @_;
345	0	0	0			0	die "Abort: The xor operator invoked with either undefined " .
346							"or wrong types of operands: $!"
347							unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
348							UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
349	0					0	my ($bv3, $bv4);
350	0	0				0	if ( $bv1->{size} < $bv2->{size} ) {
		0
351	0					0	$bv3 = $bv1->_resize_pad_from_left($bv2->{size} - $bv1->{size});
352	0					0	$bv4 = $bv2;
353							} elsif ( $bv1->{size} > $bv2->{size} ) {
354	0					0	$bv3 = $bv1;
355	0					0	$bv4 = $bv2->_resize_pad_from_left($bv1->{size} - $bv2->{size});
356							} else {
357	0					0	$bv3 = $bv1;
358	0					0	$bv4 = $bv2;
359							}
360	0					0	my $result_bv = Algorithm::BitVector->new( size => $bv3->{size} );
361	0					0	foreach my $i (0..@{$result_bv->{_vector}}-1) {
	0					0
362							# The binary bitwise `^' operator may assume 32-bit wide fields:
363	0					0	$result_bv->{_vector}->[$i] = ($bv3->{_vector}->[$i] ^ $bv4->{_vector}->[$i] ) & 0x0000FFFF;
364							}
365	0					0	return $result_bv;
366							}
367
368							# For the overloading of the iterator '<>' operator:
369							sub _iter {
370	0			0		0	my $self = shift;
371	0					0	my $pos = 0;
372	0	0				0	$self->{_iterator} = BitVecIterator->new($self, $pos) unless $self->{_iter_called};
373	0					0	&{$self->{_iterator}->next()};
	0					0
374							}
375
376							{
377							# This inner class needed for implementing iterator overloading:
378							package BitVecIterator;
379							sub new {
380	0			0		0	my $self = [ $_[1], $_[2] ];
381	0					0	bless $self, $_[0];
382	0					0	$_[1]->{_iter_called} = 1;
383	0					0	return $self;
384							}
385							sub next {
386	0			0		0	my $self = shift;
387	0					0	my $bitvec = $self->[0];
388							# The anonymous subroutine that follows is a closure over the variables
389							# incorporated in it:
390							return sub {
391	0	0		0		0	if ($self->[1] >= $bitvec->{size}) {
392	0					0	delete $bitvec->{_iter_called};
393	0					0	return;
394							}
395	0					0	my $bit = $bitvec->get_bit($self->[1]);
396	0					0	$self->[1] = $self->[1] + 1;
397	0					0	return $bit;
398							}
399	0					0	}
400							}
401
402							# for the overloading of the numification operator:
403							sub _int {
404	1			1		7	my $self = shift;
405	1					4	return $self->int_value();
406							}
407
408							## Divides an even-sized bitvector into two and returns the two halves as a
409							## list of two bitvectors.
410							sub divide_into_two {
411	0			0	1	0	my $self = shift;
412	0	0				0	die "Abort: The divide_into_two() method invoked on an object that is " .
413							"not of type Algorithm::BitVector"
414							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
415	0	0				0	die "The bitvector to be divided must have even number of bits: $!"
416							if $self->{size} % 2;
417	0					0	my @outlist1 = ();
418	0					0	foreach my $i (0..$self->{size} / 2 - 1) {
419	0					0	push @outlist1, $self->get_bit($i);
420							}
421	0					0	my @outlist2 = ();
422	0					0	foreach my $i ( ($self->{size} / 2) .. ($self->{size} - 1) ) {
423	0					0	push @outlist2, $self->get_bit($i);
424							}
425	0					0	return Algorithm::BitVector->new( bitlist => \@outlist1 ),
426							Algorithm::BitVector->new( bitlist => \@outlist2 );
427							}
428
429							## Permute a bitvector according to the indices shown in the second argument list.
430							## Return the permuted bitvector as a new bitvector.
431							sub permute {
432	0			0	1	0	my $self = shift;
433	0	0				0	die "Abort: The permute() method invoked on an object that is " .
434							"not of type Algorithm::BitVector"
435							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
436	0					0	my $permute_list = shift;
437	0	0				0	die "Bad permutation index in your permutation list"
438							if max(@$permute_list) > $self->{size} - 1;
439	0					0	my @outlist = ();
440	0					0	foreach my $index (@$permute_list) {
441	0					0	push @outlist, $self->get_bit($index);
442							}
443	0					0	return Algorithm::BitVector->new( bitlist => \@outlist );
444							}
445
446							## Unpermute the bitvector according to the permutation list supplied as the
447							## second argument. If you first permute a bitvector by using permute() and
448							## then unpermute() it using the same permutation list, you will get back the
449							## original bitvector.
450							sub unpermute {
451	0			0	1	0	my $self = shift;
452	0	0				0	die "Abort: The unpermute() method invoked on an object that is " .
453							"not of type Algorithm::BitVector"
454							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
455	0					0	my $permute_list = shift;
456	0	0				0	die "Bad permutation index in your permutation list: $!"
457							if max(@$permute_list) > $self->{size} - 1;
458	0	0				0	die "Size of the permute list for unpermuting not consistent with the size of the bet vector:$!"
459							unless $self->{size} == @$permute_list;
460	0					0	my $out_bv = Algorithm::BitVector->new( size => $self->{size} );
461	0					0	foreach my $i (0..@$permute_list-1) {
462	0					0	$out_bv->set_bit( $permute_list->[$i], $self->get_bit($i) );
463							}
464	0					0	return $out_bv;
465							}
466
467							## Write the bitvector to the file object file_out. (A file object is returned
468							## by a call to open()). Since all file I/O is byte oriented, the bitvector must
469							## be multiple of 8 bits. Each byte treated as MSB first (0th index).
470							sub write_to_file {
471	0			0	1	0	my $self = shift;
472	0	0				0	die "Abort: The write_to_file() method invoked on an object that is " .
473							"not of type Algorithm::BitVector"
474							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
475	0					0	my $file_out = shift;
476	0					0	my $err_str = "Only a bitvector whose length is a multiple of 8 can " .
477							"be written to a file. Use the padding functions to satisfy " .
478							"this constraint.";
479	0	0				0	$self->{FILEOUT} = $file_out unless $self->{FILEOUT};
480	0	0				0	die $err_str if $self->{size} % 8;
481	0					0	for my $byte (0..$self->{size}/8 - 1){
482	0					0	my $value = 0;
483	0					0	foreach my $bit (0..7) {
484	0					0	$value += $self->get_bit( $byte*8+(7 - $bit) ) << $bit;
485							}
486	0					0	syswrite $file_out, chr($value), 1;
487							}
488							}
489
490							## For closing a file object that was used for reading the bits into one or more
491							## BitVector objects.
492							sub close_file_handle {
493	0			0	1	0	my $self = shift;
494	0	0				0	die "Abort: The close_file_handle() method invoked on an object that is " .
495							"not of type Algorithm::BitVector"
496							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
497	0	0				0	die "No open file currently associated with the file handle: $!"
498							unless $self->{FILEIN};
499	0					0	close $self->{FILEIN};
500							}
501
502							## Return the integer value of a bitvector. If the original integer from which the
503							## bitvector was constructed is a Math::BigInt object, then return the string
504							## representation of the integer value.
505							sub int_value {
506	1			1	1	1	my $self = shift;
507	1	50				6	die "Abort: The int() method invoked on an object that is " .
508							"not of type Algorithm::BitVector"
509							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
510	1					1	my $int_value;
511	1	50	33			5	if (defined($self->{intVal}) && (ref($self->{intVal}) eq 'Math::BigInt')) {
512	0					0	$int_value = $self->{intVal}->bstr();
513							} else {
514	1					2	$int_value = 0;
515	1					4	foreach my $i (0..$self->{size}-1) {
516	4					8	$int_value += $self->get_bit($i) * ( 2 ** ( $self->{size} - $i - 1 ) );
517							}
518							}
519	1					6	return $int_value;
520							}
521
522							## Return the text string formed by dividing the bitvector into bytes from the
523							## left and replacing each byte by its ASCII character (this is a useful thing
524							## to do only if the length of the vector is an integral multiple of 8 and every
525							## byte in your bitvector has a print representation)
526							sub get_text_from_bitvector {
527	1			1	0	7	my $self = shift;
528	1	50				6	die "Abort: The get_text_from_bitvector() method invoked on an object that is " .
529							"not of type Algorithm::BitVector"
530							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
531	1	50				9	die "\nThe bitvector for get_text_from_bitvector() must be an integral multiple of 8 bits: $!"
532							if $self->{size} % 8;
533	1					2	my $ascii = '';
534	1					4	for (my $i=0; $i < $self->{size}; $i += 8) {
535	5					4	$ascii .= chr oct "0b". join '', @{$self->get_bit([$i..$i+7])};
	5					15
536							}
537	1					6	return $ascii;
538							}
539
540							## Return a string of hex digits by scanning the bits from the left and
541							## replacing each sequence of 4 bits by its corresponding hex digit (this is a
542							## useful thing to do only if the length of the vector is an integral multiple
543							## of 4)
544							sub get_hex_string_from_bitvector {
545	0			0	1	0	my $self = shift;
546	0	0				0	die "Abort: The get_hex_from_bitvector() method invoked on an object that is " .
547							"not of type Algorithm::BitVector"
548							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
549	0	0				0	die "\nThe bitvector for get_hex_from_bitvector() must be a multiple of 4 bits: $!"
550							if $self->{size} % 4;
551	0					0	my $hex = '';
552	0					0	for (my $i=0; $i < $self->{size}; $i += 4) {
553	0					0	$hex .= sprintf "%x", oct "0b". join '', @{$self->get_bit([$i..$i+3])};
	0					0
554							}
555	0					0	return $hex;
556							}
557
558							## Read blocksize bits from a disk file and return a BitVector object containing
559							## the bits. If the file contains fewer bits than blocksize, construct the
560							## BitVector object from however many bits there are in the file. If the file
561							## contains zero bits, return a BitVector object of size attribute set to 0.
562							sub read_bits_from_file {
563	0			0	1	0	my $self = shift;
564	0	0				0	die "Abort: The read_bits_from_file() method invoked on an object that is " .
565							"not of type Algorithm::BitVector"
566							unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
567	0					0	my $blocksize = shift;
568	0					0	my $error_str = "You need to first construct a BitVector object with a filename as argument";
569	0	0				0	die "$error_str" unless $self->{filename};
570	0	0				0	die "block size must be a multiple of 8" if $blocksize % 8;
571	0					0	my $bitstr = _readblock( $blocksize, $self );
572	0	0				0	if (length $bitstr == 0) {
573	0					0	return Algorithm::BitVector->new( size => 0 );
574	0					0	print "file has no bits\n";
575							} else {
576	0					0	return Algorithm::BitVector->new( bitstring => $bitstr );
577							}
578							}
579
580							## For an in-place left circular shift by n bit positions
581							sub _lshift {
582	0			0		0	my $self = shift;
583	0					0	my $n = shift;
584	0	0				0	die "Circular shift of an empty vector makes no sense" unless $self->{size};
585	0	0				0	return $self >> abs($n) if $n < 0;
586	0					0	foreach my $i (0..$n-1) {
587	0					0	$self->_circular_rotate_left_by_one();
588							}
589	0					0	return $self;
590							}
591
592							## For an in-place right circular shift by n bit positions
593							sub _rshift {
594	0			0		0	my $self = shift;
595	0					0	my $n = shift;
596	0	0				0	die "Circular shift of an empty vector makes no sense" unless $self->{size};
597	0	0				0	return $self << abs($n) if $n < 0;
598	0					0	foreach my $i (0..$n-1) {
599	0					0	$self->_circular_rotate_right_by_one();
600							}
601	0					0	return $self;
602							}
603
604							## For a one-bit in-place left circular shift
605							sub _circular_rotate_left_by_one {
606	0			0		0	my $self = shift;
607	0					0	my $max_index = int( ($self->{size} - 1) / 16 );
608	0					0	my $left_most_bit = $self->{_vector}->[0] & 1;
609	0					0	$self->{_vector}->[0] = $self->{_vector}->[0] >> 1;
610	0					0	foreach my $i (1 .. $max_index) {
611	0					0	my $left_bit = $self->{_vector}->[$i] & 1;
612	0					0	$self->{_vector}->[$i] = $self->{_vector}->[$i] >> 1;
613	0					0	$self->{_vector}->[$i-1] \|= $left_bit << 15;
614							}
615	0					0	$self->set_bit($self->{size} - 1, $left_most_bit);
616							}
617
618							## For a one-bit in-place right circular shift
619							sub _circular_rotate_right_by_one {
620	0			0		0	my $self = shift;
621	0					0	my $max_index = int( ($self->{size} - 1) / 16 );
622	0					0	my $right_most_bit = $self->get_bit( $self->{size} - 1);
623	0					0	$self->{_vector}->[$max_index] &= ~0x8000;
624	0					0	$self->{_vector}->[$max_index] = $self->{_vector}->[$max_index] << 1;
625	0					0	for (my $i=$max_index-1; $i > -1; $i -= 1) {
626	0					0	my $right_bit = $self->{_vector}->[$i] & 0x8000;
627	0					0	$self->{_vector}->[$i] &= ~0x8000;
628	0					0	$self->{_vector}->[$i] = $self->{_vector}->[$i] << 1;
629	0					0	$self->{_vector}->[$i+1] \|= $right_bit >> 15;
630							}
631	0					0	$self->set_bit(0, $right_most_bit);
632							}
633
634							## Pad a bitvector with n zeros from the left
635							sub pad_from_left {
636	0			0	1	0	my $self = shift;
637	0					0	my $n = shift;
638	0					0	my $new_str = ('0' x $n) . "$self";
639	0					0	my $new_bitvec = Algorithm::BitVector->new( bitstring => $new_str );
640	0					0	$self->{size} = length $new_str;
641	0					0	$self->{_vector} = $new_bitvec->{_vector};
642	0					0	return $self;
643							}
644
645							## Pad a bitvector with n zeros from the right
646							sub pad_from_right {
647	0			0	1	0	my $self = shift;
648	0					0	my $n = shift;
649	0					0	my $new_str = "$self" . ('0' x $n);
650	0					0	my $new_bitvec = Algorithm::BitVector->new( bitstring => $new_str );
651	0					0	$self->{size} = length $new_str;
652	0					0	$self->{_vector} = $new_bitvec->{_vector};
653	0					0	return $self;
654							}
655
656							## Resets a previously created BitVector to either all zeros or all ones
657							## depending on the argument val.
658							sub reset {
659	0			0	1	0	my $self = shift;
660	0					0	my $val = shift;
661	0	0	0			0	die "Incorrect argument to reset(): $!" unless ($val == 0) or ($val == 1);
662	0					0	my $reset_bv = Algorithm::BitVector->new( bitstring => ("$val" x $self->{size}) );
663	0					0	$self->{_vector} = $reset_bv->{_vector};
664							}
665
666							## Return the number of bits set in a BitVector instance.
667							sub count_bits {
668	0			0	1	0	my $self = shift;
669	0			0		0	return reduce {$a + $b} split //, "$self";
	0					0
670							}
671
672							## Changes the bit pattern associated with a previously constructed BitVector
673							## instance. The allowable modes for changing the internally stored bit pattern
674							## are the same as for the constructor.
675							sub set_value {
676	0			0	1	0	my $self = shift;
677	0					0	my $reset_bv = Algorithm::BitVector->new( @_ );
678	0					0	$self->{_vector} = $reset_bv->{_vector};
679	0					0	$self->{size} = $reset_bv->{size};
680							}
681
682
683							## This method for counting the set bits is much faster for sparse bitvectors.
684							## Note, however, that count_bits() may work much faster for dense-packed
685							## bitvectors.
686							sub count_bits_sparse {
687	0			0	1	0	my $self = shift;
688	0					0	my $num = 0;
689	0					0	foreach my $intval (@{$self->{_vector}}) {
	0					0
690	0	0				0	next if $intval == 0;
691	0					0	my ($c, $iv) = (0, $intval);
692	0					0	while ($iv > 0) {
693	0					0	$iv = $iv & ($iv - 1);
694	0					0	$c++;
695							}
696	0					0	$num += $c;
697							}
698	0					0	return $num;
699							}
700
701							## Computes the Jaccard similarity coefficient between two bitvectors
702							sub jaccard_similarity {
703	0			0	1	0	my $self = shift;
704	0					0	my $other = shift;
705	0	0	0			0	die "Jaccard called on two zero vectors --- NOT ALLOWED: $!"
706							if (int($self) == 0) && (int($other) == 0);
707	0	0				0	die "Jaccard called on vectors of unequal size --- NOT ALLOWED: $!"
708							if $self->{size} != $other->{size};
709	0					0	my $intersection = $self & $other;
710	0					0	my $union = $self \| $other;
711	0					0	return $intersection->count_bits_sparse() / $union->count_bits_sparse();
712							}
713
714							## Computes the Jaccard distance between two bitvectors
715							sub jaccard_distance {
716	0			0	1	0	my $self = shift;
717	0					0	my $other = shift;
718	0	0				0	die "Jaccard called on vectors of unequal size --- NOT ALLOWED: $!"
719							if $self->{size} != $other->{size};
720	0					0	return 1 - $self->jaccard_similarity( $other );
721							}
722
723							## Computes the Hamming distance between two bitvectors
724							sub hamming_distance {
725	0			0	1	0	my $self = shift;
726	0					0	my $other = shift;
727	0	0				0	die "hamming_distance() called on vectors of unequal size --- NOT ALLOWED: $!"
728							if $self->{size} != $other->{size};
729	0					0	my $diff = $self ^ $other;
730	0					0	return $diff->count_bits_sparse();
731							}
732
733							## This method calculates the position of the next set bit at or after the current
734							## position index. It returns -1 if there is no next set bit. Logic based on the
735							## contributions by Jason Allum and John Gleeson to the Python version of this
736							## module.
737							sub next_set_bit {
738	0			0	1	0	my $self = shift;
739	0		0			0	my $from_index = shift \|\| 0;
740	0	0				0	die "The from_index must be nonnegative: $!" unless $from_index >= 0;
741	0					0	my $i = $from_index;
742	0					0	my $v = $self->{_vector};
743	0					0	my $l = scalar @$v;
744	0					0	my $o = $i >> 4;
745	0					0	my $s = $i & 0x0F;
746	0					0	$i = $o << 4;
747	0					0	while ($o < $l) {
748	0					0	my $h = $v->[$o];
749	0	0				0	if ($h) {
750	0					0	$i += $s;
751	0					0	my $m = 1 << $s;
752	0					0	while ($m != (1 << 0x10)) {
753	0	0				0	return $i if $h & $m;
754	0					0	$m <<= 1;
755	0					0	$i += 1;
756							}
757							} else {
758	0					0	$i += 0x10;
759							}
760	0					0	$s = 0;
761	0					0	$o += 1;
762							}
763	0					0	return -1;
764							}
765
766							## For a bit that is set at the argument 'position', this method returns how many
767							## bits are set to the left of that bit. For example, in the bit pattern
768							## 000101100100, a call to this method with position set to 9 will return 4.
769							sub rank_of_bit_set_at_index {
770	0			0	1	0	my $self = shift;
771	0					0	my $position = shift;
772	0	0				0	die "The bitvector has no bits set at the position in the call to rank_of_bit_set_at_index(): $!"
773							unless $self->get_bit($position);
774	0					0	my $bv_slice = Algorithm::BitVector->new( bitlist => $self->get_bit([0..$position]) );
775	0					0	return $bv_slice->count_bits();
776							}
777
778							## Determines whether the integer value of a bitvector is a power of 2.
779							sub is_power_of_2 {
780	0			0	1	0	my $self = shift;
781	0	0				0	return 0 if int($self) == 0;
782	0					0	my $bv = $self & Algorithm::BitVector->new( intVal => int($self) - 1);
783	0	0				0	return 1 if int($bv) == 0;
784	0					0	return 0;
785							}
786
787							## Faster version of is_power_of2() for sparse bitvectors
788							sub is_power_of_2_sparse {
789	0			0	1	0	my $self = shift;
790	0	0				0	return 1 if $self->count_bits_sparse() == 1;
791	0					0	return 0;
792							}
793
794							## Returns a new bitvector by reversing the bits in the bitvector on which the
795							## method is invoked.
796							sub reverse {
797	0			0	1	0	my $self = shift;
798	0					0	my @reverseList = ();
799	0					0	for (my $i=1; $i < $self->{size} + 1; $i++) {
800	0					0	push @reverseList, $self->get_bit( -$i );
801							}
802	0					0	return Algorithm::BitVector->new( bitlist => \@reverseList );
803							}
804
805							## Using Euclid's Algorithm, returns the greatest common divisor of the integer
806							## value of the bitvector on which the method is invoked and the integer value
807							## of the argument bitvector.
808							sub gcd {
809	0			0	1	0	my $self = shift;
810	0					0	my $other = shift;
811	0					0	my ($a,$b) = (int($self), int($other));
812	0	0				0	($a,$b) = ($b,$a) if $a < $b;
813	0					0	while ($b != 0) {
814	0					0	($a, $b) = ($b, $a % $b);
815							}
816	0					0	return Algorithm::BitVector->new( intVal => $a );
817							}
818
819							## Calculates the multiplicative inverse of the bitvector on which the method is
820							## invoked modulo the bitvector that is supplied as the argument. Code based on
821							## Extended Euclid's Algorithm.
822							sub multiplicative_inverse {
823	0			0	1	0	my $self = int( shift );
824	0					0	my $modulus = int( shift );
825	0					0	my ($mod, $num) = ($modulus, $self);
826	0					0	my ($x, $x_old) = (0, 1);
827	0					0	my ($y, $y_old) = (1, 0);
828	0					0	while ($mod) {
829	0					0	my $quotient = int($num / $mod);
830	0					0	($num, $mod) = ($mod, $num % $mod);
831	0					0	($x, $x_old) = ($x_old - $x * $quotient, $x);
832	0					0	($y, $y_old) = ($y_old - $y * $quotient, $y);
833							}
834	0	0				0	if ($num != 1) {
835	0					0	return 0;
836							} else {
837	0					0	my $MI = ($x_old + $modulus) % $modulus;
838	0					0	return Algorithm::BitVector->new( intVal => $MI );
839							}
840							}
841
842							## For a one-bit in-place left non-circular shift. Note that the bitvector size
843							## does not change. The leftmost bit that moves past the first element of the
844							## bitvector is discarded and rightmost bit of the returned vector is set to zero.
845							sub _shift_left_by_one {
846	0			0		0	my $self = shift;
847	0					0	my $size = @{$self->{_vector}};
	0					0
848	0					0	my @left_most_bits = map {$_ & 1} @{$self->{_vector}};
	0					0
	0					0
849	0					0	push @left_most_bits, $left_most_bits[0];
850	0					0	shift @left_most_bits;
851	0					0	@{$self->{_vector}} = map {$_ >> 1} @{$self->{_vector}};
	0					0
	0					0
	0					0
852	0					0	my @a = @{$self->{_vector}};
	0					0
853	0					0	my @b = map {$_ << 15} @left_most_bits;
	0					0
854	0					0	my @interleaved = ( @a, @b )[ map { $_, $_ + @a } ( 0 .. $#a ) ];
	0					0
855	0			0		0	@{$self->{_vector}} = pairmap {$a \| $b} @interleaved;
	0					0
	0					0
856	0					0	$self->set_bit(-1,0);
857							}
858
859							## For an in-place left non-circular shift by n bit positions. The bits shifted
860							## out at the left are discarded. The new bit positions on the right are filled with
861							## zeros
862							sub shift_left {
863	0			0	1	0	my $self = shift;
864	0					0	my $n = shift;
865	0					0	foreach my $i (0..$n-1) {
866	0					0	$self->_shift_left_by_one();
867							}
868	0					0	return $self;
869							}
870
871							## For a one-bit in-place right non-circular shift. Note that bitvector size does
872							## not change. The rightmost bit that moves past the last element of the bitvector
873							## on the right is discarded and the leftmost bit of the returned vector is set to
874							## zero.
875							sub _shift_right_by_one {
876	0			0		0	my $self = shift;
877	0					0	my $size = @{$self->{_vector}};
	0					0
878	0					0	my @right_most_bits = map {$_ & 0x8000} @{$self->{_vector}};
	0					0
	0					0
879	0					0	@{$self->{_vector}} = map { $_ & ~0x8000 } @{$self->{_vector}};
	0					0
	0					0
	0					0
880	0					0	unshift @right_most_bits, 0;
881	0					0	pop @right_most_bits;
882	0					0	@{$self->{_vector}} = map {$_ << 1} @{$self->{_vector}};
	0					0
	0					0
	0					0
883	0					0	my @a = @{$self->{_vector}};
	0					0
884	0					0	my @b = map {$_ >> 15} @right_most_bits;
	0					0
885	0					0	my @interleaved = ( @a, @b )[ map { $_, $_ + @a } ( 0 .. $#a ) ];
	0					0
886	0			0		0	@{$self->{_vector}} = pairmap {$a \| $b} @interleaved;
	0					0
	0					0
887	0					0	$self->set_bit(0,0);
888							}
889
890							## For an in-place right non-circular shift by n bit positions. The n bits that
891							## move past the last element of the bitvector on the right are discarded and
892							## leftmost new n bit positions filled with zeros.
893							sub shift_right {
894	0			0	1	0	my $self = shift;
895	0					0	my $n = shift;
896	0					0	foreach my $i (0..$n-1) {
897	0					0	$self->_shift_right_by_one();
898							}
899	0					0	return $self;
900							}
901
902							## In the set of polynomials defined over GF(2), multiplies the bitvector on which
903							## the method is invoked with the argument bitvector. Returns the product
904							## bitvector. Note that this method carries out straight polynomial multiplication
905							## as opposed to modulo polynomial multiplication. As a result, the highest power of
906							## the result polynomial will ALWAYS be greater than the highest powers of the
907							## argument polynomials for non-trivial multiplications.
908							sub gf_multiply {
909	0			0	1	0	my $self = shift;
910	0					0	my $b = shift;
911	0					0	my $a = $self->deep_copy();
912	0					0	my $b_copy = $b->deep_copy();
913	0					0	my $a_highest_power = $a->{size} - $a->next_set_bit(0) - 1;
914	0					0	my $b_highest_power = $b->{size} - $b_copy->next_set_bit(0) - 1;
915	0					0	my $result = Algorithm::BitVector->new( size => $a->{size} + $b_copy->{size} );
916	0					0	$a->pad_from_left( $result->{size} - $a->{size} );
917	0					0	$b_copy->pad_from_left( $result->{size} - $b_copy->{size} );
918	0					0	my @b_list = split //, "$b_copy";
919	0					0	my @enum = (0..@b_list-1, @b_list) [map {$_, $_+ @b_list} (0 .. @b_list - 1)];
	0					0
920	0					0	for (my $i=0; $i < @enum-1; $i = $i + 2) {
921	0	0				0	if ($enum[$i+1]) {
922	0					0	my $power = $b_copy->{size} - $enum[$i] - 1;
923	0					0	my $a_copy = $a->deep_copy();
924	0					0	$a_copy->shift_left( $power );
925	0					0	$result ^= $a_copy;
926							}
927							}
928	0					0	return $result;
929							}
930
931							## Carries out modular division of a bitvector by the modulus bitvector mod in
932							## GF(2^n) finite field. Returns both the quotient and the remainder.
933							sub gf_divide_by_modulus {
934	0			0	1	0	my $num = shift;
935	0					0	my $mod = shift;
936	0					0	my $n = shift;
937	0	0				0	die "Modulus bit pattern too long" if $mod->{size} > $n + 1;
938	0					0	my $quotient = Algorithm::BitVector->new( intVal => 0, size => $num->{size} );
939	0					0	my $remainder = $num->deep_copy();
940	0					0	for (my $i=0; $i < $num->{size}; $i++) {
941	0					0	my $mod_highest_power = $mod->{size} - $mod->next_set_bit(0) - 1;
942	0					0	my $remainder_highest_power;
943	0	0				0	if ($remainder->next_set_bit(0) == -1) {
944	0					0	$remainder_highest_power = 0;
945							} else {
946	0					0	$remainder_highest_power = $remainder->{size} - $remainder->next_set_bit(0) - 1;
947							}
948	0	0	0			0	if (($remainder_highest_power < $mod_highest_power) or (int($remainder) == 0)) {
949	0					0	last;
950							} else {
951	0					0	my $exponent_shift = $remainder_highest_power - $mod_highest_power;
952	0					0	$quotient->set_bit($quotient->{size} - $exponent_shift - 1, 1);
953	0					0	my $quotient_mod_product = $mod->deep_copy();
954	0					0	$quotient_mod_product->pad_from_left($remainder->{size} - $mod->{size});
955	0					0	$quotient_mod_product->shift_left($exponent_shift);
956	0					0	$remainder ^= $quotient_mod_product;
957							}
958							}
959	0	0				0	if ($remainder->{size} > $n) {
960	0					0	$remainder = Algorithm::BitVector->new(
961							bitlist => $remainder->get_bit([$remainder->{size}-$n .. $remainder->{size}-1]));
962							}
963	0					0	return ($quotient, $remainder);
964							}
965
966							## Multiplies a bitvector with the bitvector b in GF(2^n) finite field with the
967							## modulus bit pattern set to mod
968							sub gf_multiply_modular {
969	0			0	1	0	my $self = shift;
970	0					0	my $b = shift;
971	0					0	my $mod = shift;
972	0					0	my $n = shift;
973	0					0	my $a_copy = $self->deep_copy();
974	0					0	my $b_copy = $b->deep_copy();
975	0					0	my $product = $a_copy->gf_multiply($b_copy);
976	0					0	my ($quotient, $remainder) = $product->gf_divide_by_modulus($mod, $n);
977	0					0	return $remainder
978							}
979
980							## Returns the multiplicative inverse of a vector in the GF(2^n) finite field
981							## with the modulus polynomial set to mod
982							sub gf_MI {
983	0			0	1	0	my $num = shift;
984	0					0	my $mod = shift;
985	0					0	my $n = shift;
986	0					0	my ($NUM, $MOD) = ($num->deep_copy(), $mod->deep_copy());
987	0					0	my $x = Algorithm::BitVector->new( size => $mod->{size} );
988	0					0	my $x_old = Algorithm::BitVector->new( intVal => 1, size => $mod->{size} );
989	0					0	my $y = Algorithm::BitVector->new( intVal => 1, size => $mod->{size} );
990	0					0	my $y_old = Algorithm::BitVector->new( size => $mod->{size} );
991	0					0	my ($quotient, $remainder);
992	0					0	while (int($mod)) {
993	0					0	($quotient, $remainder) = $num->gf_divide_by_modulus($mod, $n);
994	0					0	($num, $mod) = ($mod, $remainder);
995	0					0	($x, $x_old) = ($x_old ^ $quotient->gf_multiply($x), $x);
996	0					0	($y, $y_old) = ($y_old ^ $quotient->gf_multiply($y), $y);
997							}
998	0	0				0	if (int($num) != 1) {
999	0					0	return "NO MI. However, the GCD of $NUM and $MOD is $num\n";
1000							} else {
1001	0					0	my $z = $x_old ^ $MOD;
1002	0					0	($quotient, $remainder) = $z->gf_divide_by_modulus($MOD, $n);
1003	0					0	return $remainder;
1004							}
1005							}
1006
1007							## Returns a list of the consecutive runs of 1's and 0's in the bitvector.
1008							## Each run is either a string of all 1's or a string of all 0's.
1009							sub runs {
1010	0			0	1	0	my $self = shift;
1011	0	0				0	die "An empty vector has no runs" if $self->{size} == 0;
1012	0					0	my @allruns = ();
1013	0					0	my $run = '';
1014	0					0	my $previous_bit = $self->get_bit(0);
1015	0	0				0	if ($previous_bit == 0) {
1016	0					0	$run = '0';
1017							} else {
1018	0					0	$run = '1';
1019							}
1020	0					0	my @bitlist = split //, "$self";
1021	0					0	shift @bitlist;
1022	0					0	foreach my $bit (@bitlist) {
1023	0	0	0			0	if (($bit == 0) && ($previous_bit == 0)) {
		0	0
		0	0
1024	0					0	$run .= '0';
1025							} elsif (($bit == 1) && ($previous_bit == 0)) {
1026	0					0	push @allruns, $run;
1027	0					0	$run = '1';
1028							} elsif (($bit == 0) && ($previous_bit == 1)) {
1029	0					0	push @allruns, $run;
1030	0					0	$run = '0';
1031							} else {
1032	0					0	$run .= '1';
1033							}
1034	0					0	$previous_bit = $bit;
1035							}
1036	0					0	push @allruns, $run;
1037							return @allruns
1038	0					0	}
1039
1040							## Check if the integer value of the bitvector is a prime through the Miller-Rabin
1041							## probabilistic test of primality. If not found to be a composite, estimate the
1042							## probability of the bitvector being a prime using this test.
1043							sub test_for_primality {
1044	0			0	1	0	my $p = int(shift);
1045	0	0				0	die "\nThe primality test method test_for_primality() is intended for only " .
1046							"small integers --- integers that are relatively small in relation to " .
1047							"the largest integer that can fit Perl's 4-byte int representation:$!"
1048							if $p > 0x7f_ff_ff_ff;
1049	0	0				0	return "is NOT a prime" if $p == 1;
1050	0					0	my @probes = (2,3,5,7,11,13,17);
1051	0					0	foreach my $a (@probes) {
1052	0	0				0	return "is a prime" if $a == $p;
1053							}
1054	0	0		0		0	return "is NOT a prime" if any {$p % $_ == 0} @probes;
	0					0
1055	0					0	my ($k, $q) = (0, $p-1);
1056	0					0	while (! ($q & 1)) {
1057	0					0	$q >>= 1;
1058	0					0	$k++;
1059							}
1060	0					0	foreach my $a (@probes) {
1061	0					0	my $a_raised_to_q = _powmod_small_ints($a, int($q), $p);
1062	0	0	0			0	next if $a_raised_to_q == 1 or $a_raised_to_q == $p - 1;
1063	0					0	my $a_raised_to_jq = $a_raised_to_q;
1064	0					0	my $primeflag = 0;
1065	0					0	foreach my $j (0..$k-1) {
1066	0					0	$a_raised_to_jq = _powmod_small_ints($a_raised_to_jq, 2, $p);
1067	0	0				0	if ($a_raised_to_jq == $p-1) {
1068	0					0	$primeflag = 1;
1069	0					0	last;
1070							}
1071							}
1072	0	0				0	return "is NOT a prime" unless $primeflag;
1073							}
1074	0					0	my $probability_of_prime = 1 - 1.0/(4 ** @probes);
1075	0					0	return "is a prime with probability $probability_of_prime";
1076							}
1077
1078							## This routine is for modular exponentiation of small integers, meaning the
1079							## integers that can be accommodated (before and after exponentiation) in the native
1080							## 4-byte int representation. For larger integers, a call to this function should
1081							## be replaced by a call to modular exponentiation of the Math::BigInt module.
1082							sub _powmod_small_ints {
1083	0			0		0	my $base = shift;
1084	0					0	my $exponent = shift;
1085	0					0	my $mod = int(shift);
1086	0			0		0	warn "This is just a warning: The modular exponentiation method is not meant for very large numbers (IMPORTANT: This is just a very rough check on the size of the numbers involved)"
1087	0	0				0	if any {$_ > 0x0f_ff_ff_ff} (int($base), int($exponent), int($mod));
1088	0					0	my $result = 1;
1089	0					0	my $a = int($base);
1090	0					0	my $b = $exponent;
1091	0					0	while (int($b) > 0) {
1092	0	0				0	$result = ($result * $a) % $mod if int($b) & 1;
1093	0					0	$b = $b >> 1;
1094	0					0	$a = ($a * $a) % $mod;
1095							}
1096	0					0	return $result;
1097							}
1098
1099							## This method is for a generating a bit pattern of a given size with random bits.
1100							sub gen_random_bits {
1101	0			0	1	0	my $self = shift;
1102	0					0	my $width = shift;
1103	0					0	my $candidate_bits = "0b" . join '', Math::Random::random_uniform_integer($width, 0, 1);
1104	0					0	my $candidate = oct $candidate_bits;
1105	0					0	$candidate \|= 1;
1106	0					0	$candidate \|= (1 << $width-1);
1107	0					0	$candidate \|= (2 << $width-3);
1108	0					0	return Algorithm::BitVector->new( intVal => $candidate );
1109							}
1110
1111							sub length {
1112	0			0	1	0	my $self = shift;
1113	0					0	return $self->{size};
1114							}
1115
1116							sub _check_for_illegal_params {
1117	3			3		4	my @params = @_;
1118	3					7	my @legal_params = qw / filename
1119							size
1120							intVal
1121							bitlist
1122							bitstring
1123							hexstring
1124							textstring
1125							/;
1126	3					4	my $found_match_flag;
1127	3					5	foreach my $param (@params) {
1128	3					4	foreach my $legal (@legal_params) {
1129	12					10	$found_match_flag = 0;
1130	12	100				20	if ($param eq $legal) {
1131	3					5	$found_match_flag = 1;
1132	3					4	last;
1133							}
1134							}
1135	3	50				10	last if $found_match_flag == 0;
1136							}
1137	3					10	return $found_match_flag;
1138							}
1139
1140							1;
1141
1142
1143							=pod
1144
1145							=head1 NAME
1146
1147							Algorithm::BitVector --- A memory efficient packed representation of arbitrary sized
1148							bit arrays and for logical and arithmetic operations on such arrays.
1149
1150							=head1 SYNOPSIS
1151
1152							use Algorithm::BitVector;
1153
1154							# Constructing a given sized bitvector of all zeros:
1155							$bv = Algorithm::BitVector->new( size => 7 );
1156							print "$bv\n"; # 0000000
1157
1158							# Constructing a bitvector whose integer value is specified:
1159							$bv = Algorithm::BitVector->new( intVal => 123456 );
1160							print "$bv\n"; # 11110001001000000
1161							print int($bv); # 123456
1162
1163							# Constructing a bitvector from a very large integer:
1164							use Math::BigInt;
1165							$x = Math::BigInt->new('12345678901234567890123456789012345678901234567890');
1166							$bv = Algorithm::BitVector->new( intVal => $x );
1167
1168							# Constructing a bitvector from a given bit string:
1169							$bv = Algorithm::BitVector->new( bitstring => '00110011' );
1170
1171							# Constructing a bitvector from an ASCII text string:
1172							$bv = Algorithm::BitVector->new( textstring => "hello\njello" );
1173
1174							# Constructing a bitvector from a hex string:
1175							$bv = Algorithm::BitVector->new( hexstring => "68656c6c6f" );
1176
1177							# Constructing a bitvector from a bit list passed as an anonymous array:
1178							$bv = Algorithm::BitVector->new( bitlist => [1, 1, 0, 1] );
1179
1180							# Constructing a bitvector from the contents of a disk file:
1181							$bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1182							$bv1 = $bv->read_bits_from_file(64); # bitvector from the first 64 bits
1183							# and so on
1184
1185
1186							=head1 CHANGES
1187
1188							Version 1.24 includes in the C file the minimum version restriction on
1189							the C module that is imported.
1190
1191							Version 1.23 mentions the required modules in the C file but with no
1192							minimum version numbers. Additionally, the documentation associated with the methods
1193							was significantly upgraded in this version.
1194
1195							Version 1.22 removes the Perl version restriction from the module and the
1196							C files and the C restrictions from the C file.
1197
1198							Version 1.21 fixes a bug in the code for the Miller-Rabin primality test function
1199							C. This version also places a hard limit on the size of the
1200							integers that are allowed to be tested for primality.
1201
1202							Version 1.2 fixes an important bug in creating bitvectors from the contents of a disk
1203							file. This version also includes corrections for some of the documentation errors
1204							discovered.
1205
1206							Version 1.1 incorporates additional type checking on the operands for the overloaded
1207							operators. Also fixed some minor documentation formatting issues.
1208
1209							=head1 DESCRIPTION
1210
1211							My main motivation for creating this module was to provide the students at Purdue and
1212							elsewhere with a Perl class whose API is the same as that of my Python based
1213							C module that appears to have become popular for prototyping algorithms
1214							for cryptography and hash functions.
1215
1216							This module stores the bits of a bitvector in 16-bit unsigned shorts. As you can see
1217							in the constructor code for C, after resolving the arguments with which the
1218							constructor is called, the very first thing the constructor does is to figure out how
1219							many of those 2-byte shorts it needs for the bits. That does not imply that the size
1220							of a bit array that is stored in a bitvector must be a multiple of 16. B
1221							can be of any size whatsoever.> The C class keeps track of the
1222							number of bits through its C instance variable.
1223
1224							Note that, except for one case, the constructor must be called with a single keyword
1225							argument, which determines how the bitvector will be constructed. The single
1226							exception to this rule is for the keyword argument C which you would normally
1227							use for constructing a bitvector from an integer. The additional option you can
1228							supply with C is C. When both C and C are specified in a
1229							constructor call, you get a bitvector of the specified size provided the value
1230							supplied for C is larger than what it takes to accommodate the bits for the
1231							C integer.
1232
1233							In addition to constructing bitvectors from integers, this module can also construct
1234							bitvectors from bit strings, from ASCII text strings, from hex strings, from a list
1235							of bits, and from the contents of a file. With regards to constructing bitvectors
1236							from integers, the module can construct very large bitvectors from very large
1237							integers stored as C objects.
1238
1239							=head1 OVERLOADED OPERATORS
1240
1241							The following C declaration in the module gives the list of the
1242							overloaded operators. Since C is set to 1, several other operators become
1243							overloaded by autogeneration from those shown below. For example, overloading of the
1244							3-way numerical comparison operator C<< <=> >> automatically overloads the C<< < >>,
1245							C<< <= >>, C<< > >>, C<< >= >>, C<< == >>, and C<< != >> operators.
1246
1247							use overload '+' => '_add',
1248							'""' => '_str',
1249							'0+' => '_int',
1250							'~' => '_invert',
1251							'\|' => '_or',
1252							'&' => '_and',
1253							'^' => '_xor',
1254							'<=>' => '_compare',
1255							'<<' => '_lshift',
1256							'>>' => '_rshift',
1257							'<>' => '_iter',
1258							'fallback' => 1;
1259
1260							It is B to remember that the overloadings for the C<<< << >>> and C<<< >>
1261							>>> operators are for B left and right shifts (their usual meaning as
1262							bitwise operators is for non-circular shifts). This was done because the
1263							applications for which this module is intended is more likely to use circular shifts
1264							of bit fields than non-circular shifts. You can still carry out non-circular shifts
1265							by calling the methods C and C as illustrated elsewhere
1266							in this documentation.
1267
1268							Another B thing to bear in mind is the overloading of the C<+> operator.
1269							It is B addition. On the other hand, it is a concatenation of the two operand
1270							bitvectors. This was done to keep the usage of this operator the same as in the
1271							Python version of this module.
1272
1273							By virtue of how the operators are overloaded, you can make the calls listed in the
1274							rest of this section. To illustrate these calls, I will use the following two bit
1275							vectors:
1276
1277							$bv1 = Algorithm::BitVector->new( bitstring => "111000" );
1278							$bv2 = Algorithm::BitVector->new( bitstring => "000101000" );
1279
1280							These two bitvectors are intentionally of different lengths to illustrate what role
1281							the size differences play in how the various operators work.
1282
1283							=over 4
1284
1285							=item B
1286
1287							$bv3 = $bv1 + $bv2; # 111000000101000
1288
1289							The concatenation of two bitvectors is returned as a new bitvector. This is made
1290							possible by the overload definition for the C<+> operator.
1291
1292							Note that the following also works:
1293
1294							print $bv1 . "hello"; # 111000hello
1295
1296							In this case, Perl implicitly converts the left operand of the `.' operator into a
1297							string (which is made possible by the overloading for the stringification operator in
1298							this module) and then returns the result as a string.
1299
1300							=item B
1301
1302							print "$bv1"; # 111000
1303
1304							This is made possible for the overload definition for the C<""> operator.
1305
1306							=item B
1307
1308							print int($bv1); # 56
1309
1310							This is made possible by the overload definition for the C<0+> operator.
1311
1312							=item B
1313
1314							$bv3 = ~ $bv1;
1315							print $bv3; # 000111
1316
1317							This is made possible by the overload definition for the C<~> operator. The original
1318							bitvector on which this unary operator is invoked remains unchanged.
1319
1320							=item B
1321
1322							$bv3 = $bv1 \| $bv2; # 000111000
1323
1324							When two bitvectors are of unequal length (as is the case here), the shorter vector
1325							is zero-padded on the left to equalize their lengths before the application of the
1326							logical operation. If this auto-padding property is not something you want, you
1327							should check the lengths of the argument bitvectors in your own script before
1328							invoking this operator. The result of the logical OR operation is returned as a new
1329							bitvector. The two operand bitvectors remain unchanged.
1330
1331							=item B
1332
1333							$bv3 = $bv1 & $bv2; # 000101000
1334
1335							When two bitvectors are of unequal length (as is the case here), the shorter vector
1336							is zero-padded on the left to equalize their lengths before the application of the
1337							logical operation. If this auto-padding property is not something you want, you
1338							should check the lengths of the argument bitvectors in your own script before
1339							invoking this operator. The result of the logical AND operation is returned as a new
1340							bitvector. The two operand bitvectors remain unchanged.
1341
1342
1343							=item B
1344
1345							$bv3 = $bv1 ^ $bv2; # 000010000
1346
1347							When two bitvectors are of unequal length (as is the case here), the shorter vector
1348							is zero-padded on the left to equalize their lengths before the application of the
1349							logical operation. If this auto-padding property is not something you want, you
1350							should check the lengths of the argument bitvectors in your own script before
1351							invoking this operator. The result of the logical XOR operation is returned as a new
1352							bitvector. The two operand bitvectors remain unchanged.
1353
1354
1355							=item B
1356
1357							$bv1 < $bv2 ? print "yes\n" : print "no\n"; # no
1358
1359							$bv1 > $bv2 ? print "yes\n" : print "no\n"; # yes
1360
1361							$bv1 <= $bv2 ? print "yes\n" : print "no\n"; # no
1362
1363							$bv1 >= $bv2 ? print "yes\n" : print "no\n"; # yes
1364
1365							$bv1 == $bv2 ? print "yes\n" : print "no\n"; # no
1366
1367							$bv1 != $bv2 ? print "yes\n" : print "no\n"; # yes
1368
1369							The overload definitions for all these operators are autogenerated from the overload
1370							definition for the 3-way numerical comparison operator 'C<< <=> >>'. B
1371							bitvectors are compared on the basis of their integer values.> That is, C<$bv1> is
1372							less than C<$bv2> if C is less than C.
1373
1374							=item B
1375
1376							$n = 3;
1377
1378							$bv1 << $n; # $bv1 is now 000111
1379							$bv1 >> $n; # $bv1 is now 111000
1380
1381							Since Perl does not expect these two operators to be invoked in a void context, such
1382							statements in your code will elicit a warning from Perl to that effect. If these
1383							warnings annoy you, you can turn them off by surrounding such statements with C
1384							warnings "void";> and C directives. The other option is to invoke
1385							such statements in the following manner:
1386
1387							$bv1 = $bv1 << $n;
1388							$bv2 = $bv1 >> $n;
1389
1390							That works because the overload definitions for these bit shift operators return the
1391							bitvector object on which they are invoked. As it turns out, this also allows for
1392							chained invocation of these operators, as in
1393
1394							$bv1 = $bv1 << 3 << 1 >> 2; # 100011
1395
1396							$bv1 = $bv1 << 2 >> 1 >> 3; # 111000
1397
1398							=item B
1399
1400							while (<$bv1>) {
1401							print "$_ ";
1402							} # 1 1 1 0 0 0
1403
1404							This is made possible by the overload definition for the C<< <> >> operator. The
1405							C class includes an inner class C for this
1406							purpose.
1407
1408							=back
1409
1410
1411							=head1 CONSTRUCTING BITVECTORS
1412
1413							=over 4
1414
1415							=item B
1416
1417							$bv = Algorithm::BitVector->new( size => 0 );
1418
1419							print "$bv\n"; # no output
1420
1421							=item B
1422
1423							$bv = Algorithm::BitVector->new( size => 13 ); # 0000000000000
1424
1425							=item B
1426
1427							$bv = Algorithm::BitVector->new( intVal => 5006 ); # 1001110001110
1428
1429							The module returns the smallest possible bitvector that would accommodate the
1430							integer value provided with the C option.
1431
1432							=item B
1433
1434							As mentioned, with the C option, you get the smallest possible bitvector
1435							that can be generated from that integer. If you want a I bitvector, you can
1436							also supply the C option as shown below:
1437
1438							$bv = Algorithm::BitVector->new( intVal => 5006, size => 16 ); # 0001001110001110
1439
1440							If the value supplied for the C option in such a call is not larger than the
1441							smallest bit array that represents the C value, the constructor will throw an
1442							exception.
1443
1444							=item B
1445
1446							use Math::BigInt;
1447							$x = Math::BigInt->new('12345678901234567890123456789012345678901234567890');
1448							$bv = Algorithm::BitVector->new( intVal => $x );
1449							#1000011100100111111101100011011010011010101011111000001111001010000\
1450							#10101000000100110011101000111101011111000110001111111000110010110110\
1451							#01110001111110000101011010010
1452
1453							=item B
1454
1455							$bv = Algorithm::BitVector->new( bitstring => '00110011' ); # 00110011
1456
1457							=item B
1458
1459							$bv = Algorithm::BitVector->new( textstring => "hello\n" );
1460							# 011010000110010101101100011011000110111100001010
1461
1462							=item B
1463
1464							$bv = Algorithm::BitVector->new( hexstring => "68656c6c6f" );
1465							# 0110100001100101011011000110110001101111
1466
1467							=item B
1468
1469							$bv = Algorithm::BitVector->new( bitlist => [1, 1, 0, 1] ); # 1101
1470
1471							=item B
1472
1473							$bv = Algorithm::BitVector->new( filename => 'testinput1.txt' );
1474
1475							print "$bv\n"; # Nothing to show yet
1476
1477							$bv1 = $bv->read_bits_from_file(64); # Now you have a bitvector from the
1478							# first 64 bits
1479
1480							Note that it takes two calls to create bitvectors from the contents of a file. The
1481							first merely creates an empty bitvector just to set the necessary file handle for
1482							reading the file. It is the second call in which you invoke the method
1483							C that actually returns a bitvector from the bits read from
1484							the file. Each call to C in this manner spits out a new bit
1485							vector.
1486
1487							=back
1488
1489							=head1 METHODS
1490
1491							=head3 close_file_handle()
1492
1493							=over 4
1494
1495							When you construct bitvectors by block scanning a disk file, after you are done, you
1496							can call this method to close the file handle that was created to read the file:
1497
1498							$bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1499							## your code to read bit blocks for constructing bitvectors goes here
1500							$bv->close_file_handle();
1501
1502							The constructor call in the first statement opens a file handle for reading the bits.
1503							It is this file handle that is closed by calling C,
1504
1505							=back
1506
1507							=head3 count_bits()
1508
1509							=over 4
1510
1511							$bv = Algorithm::BitVector->new( intVal => 45, size => 16 );
1512							print $bv->count_bits(); # 4
1513
1514							This method returns an integer value which is the number of bits set to 1 in the
1515							bitvector on which the method is invoked.
1516
1517							=back
1518
1519							=head3 count_bits_sparse()
1520
1521							=over 4
1522
1523							Say you have a bitvector with two million bits:
1524
1525							$bv = Algorithm::BitVector->new( size => 2000000 );
1526
1527							and you happen to set its individual bits by
1528
1529							$bv->set_bit(345234, 1);
1530							$bv->set_bit(233, 1);
1531							$bv->set_bit(243, 1);
1532							$bv->set_bit(18, 1);
1533							$bv->set_bit(785, 1);
1534
1535							The following call returns the number of bits set in the bitvector:
1536
1537							print $bv->count_bits_sparse(); # 5
1538
1539							For very long bitvectors, as in the example here, this method will work much faster
1540							than the C method. However, for dense bitvectors, I expect
1541							C to work faster.
1542
1543							=back
1544
1545							=head3 deep_copy()
1546
1547							=over 4
1548
1549							$bv_copy = $bv->deep_copy();
1550
1551							Subsequently, any alterations to the bitvectors pointed to by either C<$bv> or
1552							C<$bv_copy> will not affect the other.
1553
1554							=back
1555
1556							=head3 divide_into_two()
1557
1558							=over 4
1559
1560							($bv1, $bv2) = $bv->divide_into_two(); # say $bv = 0000000000101101
1561							print "$bv1\n"; # 00000000
1562							print "$bv2\n"; # 00101101
1563
1564							Divides an even sized bitvector into two bitvectors, each of size half of the
1565							bitvector on which this method is invoked. Throws an exception when invoked on a
1566							bitvector that is not even sized.
1567
1568							=back
1569
1570							=head3 gcd()
1571
1572							=over 4
1573
1574							This method uses the Euclid's algorithm to return the Greatest Common Divisor of the
1575							integer values represented by the two bitvectors. The following example shows a call
1576							to C returning the GCD of the integer values of the bitvectors C<$bv1> and
1577							C<$bv2>.
1578
1579							$bv1 = Algorithm::BitVector->new( bitstring => '01100110' ); # 102
1580							$bv2 = Algorithm::BitVector->new( bitstring => '011010' ); # 26
1581							$gcd = $bv1->gcd( $bv2 ); # 10
1582							print int($gcd); # 2
1583
1584							The result returned by C is a bitvector.
1585
1586							=back
1587
1588							=head3 gen_random_bits()
1589
1590							=over 4
1591
1592							$bv = Algorithm::BitVector->new( intVal => 0 );
1593							$bv = $bv->gen_random_bits(16); # 1100111001010101
1594
1595							The call to C returns a bitvector whose bits are randomly
1596							generated. The number of bits in the returned bitvector equals the argument integer.
1597
1598							=back
1599
1600							=head3 get_bit()
1601
1602							=over 4
1603
1604							This method gives you array-like access to the individual bits of a bitvector.
1605
1606							$bv = Algorithm::BitVector->new( bitstring => '10111' );
1607							print $bv->get_bit(0); # 1 (the first bit)
1608							print $bv->get_bit(1); # 0
1609							print $bv->get_bit(4); # 1 (the last bit)
1610
1611							Negative values for the index scan a bitvector from right to left, with the C<-1>
1612							index standing for the last (meaning the right-most) bit in the vector:
1613
1614							print $bv->get_bit(-1); # 1 (the last bit)
1615							print $bv->get_bit(-2); # 1
1616							print $bv->get_bit(-5); # 1 (the first bit)
1617
1618							The C can also return a slice of a bitvector if the argument to the
1619							method is an anonymous array of the index range you desire, as in the second
1620							statement below:
1621
1622							$bv = Algorithm::BitVector->new( bitstring => "10111011");
1623							my $arr = $bv->get_bit( [3..7] );
1624							print "@$arr\n"; # 1 1 0 1 1
1625
1626							In this example, we want C to return all bits at positions indexed 3
1627							through 7, both ends inclusive. Note that the slice is returned as an array of bits.
1628
1629
1630							=back
1631
1632							=head3 get_hex_string_from_bitvector()
1633
1634							=over 4
1635
1636							Assuming that length of your bitvector is a multiple of 4, this methods returns a
1637							hex representation of the bit pattern:
1638
1639							$bv = Algorithm::BitVector->new(bitstring => "0110100001100101011011000110110001101111");
1640							print $bv->get_hex_string_from_bitvector(); # 68656c6c6f
1641
1642							The hex representation is returned in the form if a string of hex characters. This
1643							method throws an exception if the size of the bitvector is not a multiple of 4.
1644
1645							=back
1646
1647							=head3 get_text_string_from_bitvector()
1648
1649							=over 4
1650
1651							$bv = Algorithm::BitVector->new( textstring => "hello" );
1652							print "$bv\n"; # 0110100001100101011011000110110001101111
1653							print $bv->get_text_from_bitvector(); # hello
1654
1655							The method returns a string of ASCII characters by converting successive 8-bit slices
1656							of the bitvector into an ASCII character. It throws an exception if the size of the
1657							bit pattern is not a multiple of 8. Calling this method to create a text-based print
1658							representation of a bit vector makes sense only if you don't expect to see any
1659							unprintable characters in the successive 8-bit slices of the bitvector. Let's say
1660							you have created a bitvector from a text string using the appropriate constructor
1661							call. Subsequently, you encrypted this text string. Next, you or someone else
1662							decrypts the encrypted bit stream. Since what comes out at the decryption end must
1663							be the original text string, it would make sense to invoke this method to recover the
1664							original text.
1665
1666							=back
1667
1668							=head3 gf_divide_by_modulus()
1669
1670							=over 4
1671
1672							This method is for modular division in the Galois Field C. You must specify
1673							the modulus polynomial through a bit pattern and also the value of the integer C:
1674
1675							$mod = Algorithm::BitVector->new( bitstring => '100011011' ); # AES modulus
1676							$n = 8;
1677							$a = Algorithm::BitVector->new( bitstring => '11100010110001' );
1678							($quotient, $remainder) = $a->gf_divide_by_modulus($mod, $n);
1679							print "$quotient\n"; # 00000000111010
1680							print "$remainder\n"; # 10001111
1681
1682							What this example illustrates is dividing the bitvector C<$a> by the modulus bit
1683							vector C<$mod>. For a more general division of one bitvector C<$a> by another bit
1684							vector C<$b>, you would carry out a multiplication of C<$a> by the MI of C<$b>, where
1685							MI stands for "multiplicative inverse" as returned by a call to the method
1686							C. A call to C returns two bitvectors, one for the
1687							quotient and the other for the remainder.
1688
1689							=back
1690
1691							=head3 gf_MI()
1692
1693							=over 4
1694
1695							This method returns the multiplicative inverse of a bit pattern in the Galois Field
1696							C. You must specify both the modulus polynomial through its bit pattern and
1697							the value of C:
1698
1699							$modulus = Algorithm::BitVector->new( bitstring => '100011011' ); # AES modulus
1700							$n = 8;
1701							$a = Algorithm::BitVector->new( bitstring => '00110011' );
1702							print $a->gf_MI($modulus, $n); # 01101100
1703
1704							Note that the multiplicative inverse is returned as a bitvector.
1705
1706							=back
1707
1708							=head3 gf_multiply()
1709
1710							=over 4
1711
1712							This method returns a product of two bit patterns in the Galois Field C field.
1713							That is, given any two polynomials with their coefficients drawn from the 0 and 1
1714							values in C, this method returns the product polynomial.
1715
1716							$a = Algorithm::BitVector->new( bitstring => '0110001' );
1717							$b = Algorithm::BitVector->new( bitstring => '0110' );
1718							print $a->gf_multiply($b); #00010100110
1719
1720							As you would expect, in general, the bit pattern returned by this method will be
1721							longer than the lengths of the two operand bitvectors. The result returned by the
1722							method is in the form of a bitvector.
1723
1724							=back
1725
1726							=head3 gf_multiply_modular()
1727
1728							=over 4
1729
1730							This method carries out modular multiplication in the Galois Field C. You
1731							must supply it the bitvector for the modulus polynomial and the value of C.
1732
1733							$modulus = Algorithm::BitVector->new( bitstring => '100011011' ); # AES modulus
1734							$n = 8;
1735							$a = Algorithm::BitVector->new( bitstring => '0110001' );
1736							$b = Algorithm::BitVector->new( bitstring => '0110' );
1737							print $a->gf_multiply_modular($b, $modulus, $n); # 10100110
1738
1739							This example returns the product of the bit patterns C<$a> and C<$b> modulo the bit
1740							pattern C<$modulus> in C. The result returned by the method is in the form
1741							of a bitvector.
1742
1743							=back
1744
1745							=head3 hamming_distance()
1746
1747							=over 4
1748
1749							Hamming distance is commonly used to measure dissimilarity between two bitvectors of
1750							the same size.
1751
1752							$bv1 = Algorithm::BitVector->new( bitstring => '11111111' );
1753							$bv2 = Algorithm::BitVector->new( bitstring => '00101011' );
1754							print $bv1->hamming_distance( $bv2 ); # 4
1755
1756							This distance returns the number of bit positions in which two the bit patterns
1757							differ. The method throws an exception if the two bitvectors are not of the same
1758							length. The value returned is an integer.
1759
1760							=back
1761
1762							=head3 int_value()
1763
1764							=over 4
1765
1766							You can find the integer value of a bitvector by
1767
1768							$bv = Algorithm::BitVector->new( intVal => 5678 );
1769							print $bv3->int_value(); # 5678
1770
1771							or, even more simply by
1772
1773							print int($bv); # 5678
1774
1775							which works on account of the overloading of the C<0+> operator.
1776
1777							=back
1778
1779							=head3 is_power_of_2()
1780
1781							=over 4
1782
1783							You can use this predicate to test if the integer value of a bitvector is a power of
1784							2:
1785
1786							$bv = Algorithm::BitVector->new( bitstring => '10000000001110' );
1787							print int($bv); # 826
1788							print $bv->is_power_of_2(); # 0
1789
1790							The predicate returns 1 for true and 0 for false.
1791
1792							=back
1793
1794							=head3 is_power_of_2_sparse()
1795
1796							=over 4
1797
1798							This does the same thing as the C method but in a way that makes it
1799							faster for large bitvectors with very few bits set.
1800
1801							$bv = Algorithm::BitVector->new( size => 2000000 );
1802							$bv->set_bit(345234, 1);
1803							print $bv->is_power_of_2_sparse(); # 1
1804
1805							=back
1806
1807							=head3 jaccard_distance()
1808
1809							=over 4
1810
1811							The Jaccard distance between two bitvectors is 1 minus the Jaccard similarity
1812							coefficient:
1813
1814							$bv1 = Algorithm::BitVector->new( bitstring => '11111111' );
1815							$bv2 = Algorithm::BitVector->new( bitstring => '10101011' );
1816							print $bv1->jaccard_distance( $bv2 ); # 0.375
1817
1818							The value returned by the method is a floating point number between 0 and 1.
1819
1820							=back
1821
1822							=head3 jaccard_similarity()
1823
1824							=over 4
1825
1826							This method returns the Jaccard similarity coefficient between the two bitvectors
1827							pointed to by C<$bv1> and C<$bv2>:
1828
1829							$bv1 = Algorithm::BitVector->new( bitstring => '11111111' );
1830							$bv2 = Algorithm::BitVector->new( bitstring => '10101011' );
1831							print $bv1->jaccard_similarity( $bv2 ); # 0.675
1832
1833							The value returned by the method is a floating point number between 0 and 1.
1834
1835							=back
1836
1837							=head3 length()
1838
1839							=over 4
1840
1841							This method returns the total number of bits in a bitvector:
1842
1843							$bv = Algorithm::BitVector->new( intVal => 5678 );
1844							print $bv; # 1011000101110
1845							print $bv->length(); # 13
1846
1847							Note that what C returns is the total size of a bitvector, including any
1848							leading zeros.
1849
1850							=back
1851
1852							=head3 multiplicative_inverse()
1853
1854							=over 4
1855
1856							This method calculates the multiplicative inverse using normal integer arithmetic.
1857							For multiplicative inverses in a Galois Field C, use the method C
1858							described earlier in this API.
1859
1860							$modulus = Algorithm::BitVector->new( intVal => 32 );
1861							$bv = Algorithm::BitVector->new( intVal => 17 );
1862							$result = $bv->multiplicative_inverse( $modulus );
1863							if ($result) {
1864							print $result; # 10001
1865							} else {
1866							print "No multiplicative inverse in this case\n";
1867							}
1868
1869							What this example says is that the multiplicative inverse of 17 modulo 32 is 17. That
1870							is because 17 times 17 in modulo 32 arithmetic equals 1. When using this method, you
1871							must test the value returned for 0. If the returned value is 0, that means that the
1872							number corresponding to the bitvector on which this method is invoked does not
1873							possess a multiplicative inverse with respect to the modulus.
1874
1875							=back
1876
1877							=head3 next_set_bit()
1878
1879							=over 4
1880
1881							Starting from a given bit position, this method returns the index of the next bit
1882							that is set in a bitvector:
1883
1884							$bv = Algorithm::BitVector->new( bitstring => '00000000000001' );
1885							print $bv->next_set_bit(5); # 13
1886
1887							In this example, we are asking the method to return the index of the bit that is set
1888							after the bit position indexed 5.
1889
1890							=back
1891
1892							=head3 pad_from_left()
1893
1894							=over 4
1895
1896							You can pad a bitvector from the left with a designated number of zeros:
1897
1898							$bv = Algorithm::BitVector->new( bitstring => '101010' );
1899							print $bv->pad_from_left( 4 ); # 0000101010
1900
1901							The method returns the bitvector on which it is invoked. So you can think of it as
1902							an in-place extension of a bitvector (although, under the hood, the extension is
1903							carried out by giving a new longer C<_vector> attribute to the bitvector object).
1904
1905							=back
1906
1907							=head3 pad_from_right()
1908
1909							=over 4
1910
1911							You can pad a bitvector from the right with a designated number of zeros:
1912
1913							$bv = Algorithm::BitVector->new( bitstring => '101010' );
1914							print $bv->pad_from_right( 4 ); # 1010100000
1915
1916							The method returns the bitvector on which it is invoked. So you can think of it as
1917							an in-place extension of a bitvector (although, under the hood, the extension is
1918							carried out by giving a new longer C<_vector> attribute to the bitvector object).
1919
1920
1921							=back
1922
1923							=head3 permute()
1924
1925							=over 4
1926
1927							You can permute the bits in a bitvector with a permutation list as shown below:
1928
1929							$bv1 = Algorithm::BitVector->new( intVal => 203, size => 8 );
1930							print $bv1; # 11001011
1931							$bv2 = $bv1->permute( [3, 2, 1, 0, 7, 6, 5, 4] );
1932							print $bv2; # 00111101
1933
1934							The method returns a new bitvector with permuted bits.
1935
1936							=back
1937
1938							=head3 rank_of_bit_set_at_index()
1939
1940							=over 4
1941
1942							You can measure the "rank" of a bit that is set at a given index. Rank is the number
1943							of bits that are set up to the argument position, as in
1944
1945							$bv = Algorithm::BitVector->new( bitstring => '01010101011100' );
1946							print $bv->rank_of_bit_set_at_index( 10 ); # 6
1947
1948							The value 6 returned by this call to C is the number of
1949							bits set up to the position indexed 10 (including that position). The method throws
1950							an exception if there is no bit set at the argument position.
1951
1952							=back
1953
1954							=head3 read_bits_from_file()
1955
1956							=over 4
1957
1958							Constructing bitvectors from the contents of a disk file takes two steps: First you
1959							must make the call shown in the first statement below. The purpose of this call is to
1960							create a file handle that is associated with the variable C<$bv> in this case.
1961							Subsequent invocations of C on this variable will read
1962							blocks of C<$n> bits and return a bitvector for each block thus read. The variable
1963							C<$n> must be a multiple of 8 for this to work.
1964
1965							$bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1966							$bv1 = $bv->read_bits_from_file(64);
1967							$bv2 = $bv->read_bits_from_file(64);
1968							...
1969							...
1970							$bv->close_file_handle();
1971
1972							When reading a file as shown above, you can test the attribute C of the
1973							bitvector object in order to find out if there is more to be read in the file. The
1974							C loop shown below reads all of a file in 64-bit blocks.
1975
1976							$bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1977							while ($bv->{more_to_read}) {
1978							my $bv_read = $bv->read_bits_from_file( 64 );
1979							print "$bv_read\n";
1980							}
1981							$bv->close_file_handle();
1982
1983							The size of the last bitvector constructed from a file corresponds to how many bytes
1984							remain unread in the file at that point. It is your responsibility to zero-pad the
1985							last bitvector appropriately if, say, you are doing block encryption of the whole
1986							file.
1987
1988							=back
1989
1990							=head3 reset()
1991
1992							=over 4
1993
1994							You can reset a previously constructed bitvector all either all 1's or all 0's by
1995							calling this method:
1996
1997							$bv = Algorithm::BitVector->new( intVal => 203, size => 8 );
1998							print $bv; # 11001011
1999							$bv->reset(1);
2000							print $bv; # 11111111
2001							$bv->reset(0);
2002							print $bv; # 00000000
2003
2004							What the method accomplishes can be thought of as in-place resetting of the bits. The
2005							method does not return anything.
2006
2007							=back
2008
2009							=head3 reverse()
2010
2011							=over 4
2012
2013							Given a bitvector, you can construct a bitvector with all the bits reversed, in the
2014							sense that what was left-to-right earlier now becomes right-to-left, as in
2015
2016							$bv = Algorithm::BitVector->new( bitstring => '01100001' );
2017							print $bv->reverse(); # 10000110
2018
2019							A call to this method returns a new bitvector whose bits are in reverse order in
2020							relation to the bits in the bitvector on which the method is called.
2021
2022							=back
2023
2024							=head3 runs()
2025
2026							=over 4
2027
2028							This method returns an array of runs of 1's and 0's in a bitvector:
2029
2030							$bv = Algorithm::BitVector->new( bitlist => [1,1,1,0,0,1] );
2031							my @bvruns = $bv->runs();
2032							print "@bvruns\n"; # 111 00 1
2033
2034							Each element of the array that is returned by C is a string of either 1's or
2035							0's.
2036
2037							=back
2038
2039							=head3 set_bit()
2040
2041							=over 4
2042
2043							With array-like indexing, you can use this method to set the individual bits of a
2044							previously constructed bitvector. Both positive and negative values are allowed for
2045							the bit position index. The method takes two explicit arguments, the first for the
2046							position of the bit you want to set and the second for the value of the bit.
2047
2048							$bv = Algorithm::BitVector->new( bitstring => '1111' );
2049							$bv->set_bit(0,0); # set the first bit to 0
2050							$bv->set_bit(1,0); # set the next bit to 0
2051							print $bv; # 0011
2052							$bv->set_bit(-1,0); # set the last bit to 0
2053							$bv->set_bit(-2,0); # set the bit before the last bit to 0
2054							print $bv; # 0000
2055
2056							=back
2057
2058							=head3 set_value()
2059
2060							=over 4
2061
2062							This method can be used to change the bit pattern associated with a previously
2063							constructed bitvector:
2064
2065							$bv = Algorithm::BitVector->new( intVal => 7, size => 16 );
2066							print $bv; # 0000000000000111
2067							$bv->set_value( intVal => 45 );
2068							print $bv; # 101101
2069
2070							You can think of this method as carrying out an in-place resetting of the bit array
2071							in a bitvector. The method does not return anything.
2072
2073							=back
2074
2075							=head3 shift_left()
2076
2077							=over 4
2078
2079							If you want to shift a bitvector non-circularly to the left, this is the method to
2080							call:
2081
2082							$bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2083							$bv->shift_left(3);
2084							print $bv; # 001000
2085							$bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2086							$bv->shift_left(3)->shift_right(3);
2087							print $bv; # 000001
2088
2089							As the bitvector is shifted non-circularly to the left, the exposed bit positions on
2090							the right are filled with zeros. Note that the method returns the bitvector object
2091							on which it is invoked. That is the reason why the chained invocation of the method
2092							in the fifth statement above works.
2093
2094							=back
2095
2096							=head3 shift_right()
2097
2098							=over 4
2099
2100							If you want to shift a bitvector non-circularly to the right, this is the method to
2101							call:
2102
2103							$bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2104							$bv->shift_right(3);
2105							print $bv; # 000111
2106							$bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2107							$bv->shift_right(3)->shift_right(2);
2108							print $bv; # 000001
2109
2110							As the bitvector is shifted non-circularly to the right, the exposed bit positions on
2111							the left are filled with zeros. Note that the method returns the bitvector object
2112							on which it is invoked. That is the reason why the chained invocation of the method
2113							in the fifth statement above works.
2114
2115							=back
2116
2117							=head3 test_for_primality()
2118
2119							=over 4
2120
2121							If the integer value of a bitvector is small (meaning smaller than C<< 0x7f_ff_ff_ff >>),
2122							you can use this method to test it for its primality through the Miller-Rabin
2123							probabilistic test:
2124
2125							$p = 7417;
2126							$bv = Algorithm::BitVector->new( intVal => $p );
2127							$check = $bv->test_for_primality();
2128							print "The primality test for $p: $check\n";
2129							# The primality test for 7417: is a prime with probability 0.99993896484375
2130
2131							The method returns one of two strings: If the primality test succeeds, the method
2132							returns a string like "C". And if the test fails,
2133							the method returns the string "C".
2134
2135							=back
2136
2137							=head3 unpermute()
2138
2139							=over 4
2140
2141							This method reverses the permutation carried out by a call to the C method
2142							as shown below:
2143
2144							$bv1 = Algorithm::BitVector->new( intVal => 203, size => 8 );
2145							print $bv1; # 11001011
2146							$bv2 = $bv1->permute( [3, 2, 1, 0, 7, 6, 5, 4] );
2147							print $bv2; # 00111101
2148							$bv3 = $bv2->unpermute( [3, 2, 1, 0, 7, 6, 5, 4] );
2149							print $bv3; # 11001011
2150
2151							The method returns a new bitvector with unpermuted bits. Also note that the method
2152							throws an exception if the permutation list is not as long as the size of the
2153							bitvector on which the method is invoked.
2154
2155							=back
2156
2157							=head3 write_to_file()
2158
2159							=over 4
2160
2161							This method writes the bitvectors in your program to a disk file:
2162
2163							$bv1 = Algorithm::BitVector->new( bitstring => '00001010' );
2164							open my $FILEOUT, ">test.txt";
2165							$bv1->write_to_file( $FILEOUT );
2166							close $FILEOUT;
2167
2168							The size of a bitvector must be a multiple of 8 for this write method to work. If
2169							this condition is not met, the method will throw an exception.
2170
2171							B When writing an internally generated bitvector out
2172							to a disk file, it may be important to open the file in the binary mode, since
2173							otherwise the bit pattern `00001010' ('\n') in your bitstring will be written out as
2174							0000110100001010 ('\r\n') which is the line break on Windows machines.
2175
2176							=back
2177
2178							=head1 REQUIRED
2179
2180							This module imports the following modules:
2181
2182							Math::BigInt
2183							List::Util
2184							Math::Random
2185							Fcntl
2186
2187
2188							=head1 THE C DIRECTORY
2189
2190							The C directory contains the following script that invokes all of the
2191							functionality of this module:
2192
2193							BitVectorDemo.pl
2194
2195							In case there is any doubt about how exactly to invoke a method or how to use an
2196							operator, please look at the usage in this script.
2197
2198							=head1 EXPORT
2199
2200							None by design.
2201
2202							=head1 BUGS
2203
2204							Please notify the author if you encounter any bugs. When sending email, please place
2205							the string 'BitVector' in the subject line.
2206
2207							=head1 INSTALLATION
2208
2209							Download the archive from CPAN in any directory of your choice. Unpack the archive
2210							with a command that on a Linux machine would look like:
2211
2212							tar zxvf Algorithm-BitVector-1.24.tar.gz
2213
2214							This will create an installation directory for you whose name will be
2215							C. Enter this directory and execute the following commands
2216							for a standard install of the module if you have root privileges:
2217
2218							perl Makefile.PL
2219							make
2220							make test
2221							sudo make install
2222
2223							If you do not have root privileges, you can carry out a non-standard install the
2224							module in any directory of your choice by:
2225
2226							perl Makefile.PL prefix=/some/other/directory/
2227							make
2228							make test
2229							make install
2230
2231							With a non-standard install, you may also have to set your PERL5LIB environment
2232							variable so that this module can find the required other modules. How you do that
2233							would depend on what platform you are working on. In order to install this module in
2234							a Linux machine on which I use tcsh for the shell, I set the PERL5LIB environment
2235							variable by
2236
2237							setenv PERL5LIB /some/other/directory/lib64/perl5/:/some/other/directory/share/perl5/
2238
2239							If I used bash, I'd need to declare:
2240
2241							export PERL5LIB=/some/other/directory/lib64/perl5/:/some/other/directory/share/perl5/
2242
2243
2244							=head1 THANKS
2245
2246							The bug in the primality test function, whose fix resulted in Version 1.21, was
2247							reported by Dana Jacobsen in a bug report filed at C. Thanks Dana!
2248
2249							The restriction on the Perl version was removed on Slaven Rezic's recommendation. He
2250							says the module runs fine with Perl 5.8.9. Thanks Slaven!
2251
2252							=head1 AUTHOR
2253
2254							Avinash Kak, kak@purdue.edu
2255
2256							If you send email, please place the string "BitVector" in your subject line to get past
2257							my spam filter.
2258
2259							=head1 COPYRIGHT
2260
2261							This library is free software; you can redistribute it and/or modify it under the
2262							same terms as Perl itself.
2263
2264							Copyright 2015 Avinash Kak
2265
2266							=cut
2267