File Coverage

blib/lib/Algorithm/BitVector.pm
Criterion Covered Total %
statement 114 664 17.1
branch 32 218 14.6
condition 20 155 12.9
subroutine 17 75 22.6
pod 37 39 94.8
total 220 1151 19.1


line stmt bran cond sub pod time code
1             package Algorithm::BitVector;
2              
3             #!/usr/bin/perl -w
4              
5             #------------------------------------------------------------------------------------
6             # Copyright (c) 2015 Avinash Kak. All rights reserved. This program is free
7             # software. You may modify and/or distribute it under the same terms as Perl itself.
8             # This copyright notice must remain attached to the file.
9             #
10             # Algorithm::BitVector is a Perl module for creating a memory efficient packed
11             # representation of bit arrays and for logical and numerical operations on such
12             # arrays.
13             # -----------------------------------------------------------------------------------
14              
15             #use 5.10.0;
16 1     1   14393 use strict;
  1         2  
  1         33  
17 1     1   4 use warnings;
  1         1  
  1         37  
18 1     1   8 use Carp;
  1         6  
  1         90  
19 1     1   5 use List::Util qw(pairmap max reduce any);
  1         1  
  1         127  
20 1     1   1008 use Math::BigInt;
  1         21583  
  1         5  
21 1     1   12808 use Math::Random;
  1         5305  
  1         114  
22 1     1   10 use Fcntl 'SEEK_CUR';
  1         3  
  1         79  
23              
24             our $VERSION = '1.24';
25              
26 1         8 use overload '+' => '_join',
27             '""' => '_str',
28             '0+' => '_int',
29             '~' => '_invert',
30             '|' => '_or',
31             '&' => '_and',
32             '^' => '_xor',
33             '<=>' => '_compare',
34             '<<' => '_lshift',
35             '>>' => '_rshift',
36             '<>' => '_iter',
37 1     1   4 'fallback' => 1;
  1         2  
38              
39             sub _readblock {
40 0     0   0 my $blocksize = shift;
41 0         0 my $bitvector = shift;
42 0         0 my $block;
43             my $more_to_read;
44 0         0 my $i = 0;
45 0         0 my $byte_as_bits;
46 0         0 my $bitstring = '';
47 0         0 while ( $i < $blocksize / 8 ) {
48 0         0 $i++;
49 0         0 my $num_bytes_read = sysread($bitvector->{FILEIN}, my $byte, 1);
50 0 0       0 unless ($num_bytes_read) {
51 0 0       0 if (length($bitstring) < $blocksize) {
52 0         0 $bitvector->{more_to_read} = 0;
53 0         0 $more_to_read = 0;
54             }
55 0         0 return $bitstring;
56             } else {
57 0         0 my $bits_as_string = sprintf "%vb", $byte;
58 0         0 $bits_as_string = '0' x (8 - length($bits_as_string)) . $bits_as_string;
59 0         0 $bitstring .= $bits_as_string;
60             }
61             }
62 0         0 my $file_pos = tell $bitvector->{FILEIN};
63             # peek at the next byte; moves file position only if a
64             # byte is read
65 0         0 my $num_bytes_read = sysread($bitvector->{FILEIN}, my $next_byte, 1);
66 0 0       0 if ($num_bytes_read) {
67             # pretend we never read the byte
68 0         0 sysseek $bitvector->{FILEIN}, $file_pos - 1, SEEK_CUR;
69             } else {
70 0         0 $bitvector->{more_to_read} = 0;
71             }
72 0         0 return $bitstring;
73             }
74              
75             # Constructor:
76             sub new {
77 3     3 0 19 my ($class, %args) = @_;
78 3         9 my @params = keys %args;
79 3 50       8 croak "\nYou have used a wrong name for a keyword argument " .
80             "--- perhaps a misspelling\n"
81             if _check_for_illegal_params(@params) == 0;
82 3         16 my $self = {
83             filename => $args{filename},
84             size => $args{size},
85             intVal => $args{intVal},
86             bitlist => $args{bitlist},
87             bitstring => $args{bitstring},
88             hexstring => $args{hexstring},
89             textstring => $args{textstring},
90             };
91 3         6 bless $self, $class;
92 3 50       76 if ( $self->{filename} ) {
    100          
    100          
    50          
    50          
    50          
    0          
93 0 0 0     0 die "When using the `filename' option in the constructor call, you cannot use any " .
      0        
      0        
      0        
      0        
94             "other option at the same time: $!"
95             if $self->{intVal} or $self->{size} or $self->{bitlist}
96             or $self->{bitstring} or $self->{hexstring} or $self->{textstring};
97 0 0       0 open $self->{FILEIN}, "< $self->{filename}"
98             or die "unable to open file $self->{filename}: $!";
99 0         0 $self->{more_to_read} = 1;
100 0         0 return $self;
101             } elsif ( defined $self->{intVal} ) {
102 1 50 33     21 die "When using the `intVal' option in the constructor call, you CANNOT use `filename', " .
      33        
      33        
      33        
103             "`bitlist', 'bitstring', `hexstring', or `textstring' options: $!"
104             if $self->{filename} or $self->{bitlist} or $self->{bitstring}
105             or $self->{hexstring} or $self->{textstring};
106 1 50 33     7 if ($self->{intVal} == 0 && ! defined $self->{size}) {
107 0         0 $self->{bitlist} = [0];
108 0         0 $self->{size} = 1;
109             } else {
110 1         2 my @bitarray;
111 1 50       5 if (ref($self->{intVal}) eq 'Math::BigInt') {
112 0         0 my $bitlist_str = $self->{intVal}->as_bin();
113 0         0 @bitarray = split //, $bitlist_str;
114             # Now get rid of the `0b' prefix on the string returned by as_bin():
115 0         0 shift @bitarray;
116 0         0 shift @bitarray;
117             } else {
118 1         11 my $bitlist_str = sprintf "%b", $self->{intVal};
119 1 50       4 if (defined $self->{size}) {
120 0 0       0 die "The value specified for size must be at least as large as for the " .
121             "smallest bitvector possible for the intVal integer: $!"
122             if $self->{size} < length $bitlist_str;
123 0         0 my $n = $self->{size} - length $bitlist_str;
124 0         0 my $extended_bitlist_str = '0' x $n . $bitlist_str;
125 0         0 @bitarray = split //, $extended_bitlist_str;
126             } else {
127 1         7 @bitarray = split //, $bitlist_str;
128             }
129             }
130 1         3 $self->{bitlist} = \@bitarray;
131 1         2 $self->{size} = scalar @{$self->{bitlist}};
  1         12  
132             }
133             } elsif (defined $self->{size}) {
134 1 50 33     32 die "When using the `size' option in the constructor call, you CANNOT use `filename', " .
      33        
      33        
      33        
      33        
135             "`bitlist', 'bitstring', `hexstring', or `textstring' options: $!"
136             if $self->{filename} or $self->{intVal} or $self->{bitlist}
137             or $self->{bitstring} or $self->{hexstring} or $self->{textstring};
138 1         4 my $bitstring = "0" x $self->{size};
139 1         8 my @bitlist_from_bitstring = split '', $bitstring;
140 1         3 $self->{bitlist} = \@bitlist_from_bitstring;
141             } elsif ($self->{bitlist}) {
142 0 0 0     0 die "When using the `bitlist' option in the constructor call, you cannot use any " .
      0        
      0        
      0        
      0        
143             "other option at the same time: $!"
144             if $self->{filename} or $self->{intVal} or $self->{size}
145             or $self->{bitstring} or $self->{hexstring} or $self->{textstring};
146 0         0 $self->{size} = scalar @{$self->{bitlist}};
  0         0  
147             } elsif (defined $self->{bitstring}) {
148 0 0 0     0 die "When using the `bitstring' option in the constructor call, you cannot use any " .
      0        
      0        
      0        
      0        
149             "other option at the same time: $!"
150             if $self->{filename} or $self->{intVal} or $self->{size}
151             or $self->{bitlist} or $self->{hexstring} or $self->{textstring};
152 0         0 my @bitlist_from_bitstring = split //, $self->{bitstring};
153 0         0 $self->{bitlist} = \@bitlist_from_bitstring;
154 0         0 $self->{size} = scalar @{$self->{bitlist}};
  0         0  
155             } elsif (defined $self->{textstring}) {
156 1 50 33     16 die "When using the `textstring' option in the constructor call, you cannot use any " .
      33        
      33        
      33        
      33        
157             "other option at the same time: $!"
158             if $self->{filename} or $self->{intVal} or $self->{size}
159             or $self->{bitlist} or $self->{hexstring} or $self->{bitstring};
160 5 50       17 my $bitstring_from_text = join '', map {length($_) == 8 ? $_
  5         9  
161 1         7 : ('0' x (8 - length($_))) . $_} map {sprintf "%b", $_}
162             map ord, split //, $self->{textstring};
163 1         10 my @bitlist_from_text = split //, $bitstring_from_text;
164 1         3 $self->{bitlist} = \@bitlist_from_text;
165 1         2 $self->{size} = scalar @{$self->{bitlist}};
  1         3  
166             } elsif (defined $self->{hexstring}) {
167 0 0 0     0 die "When using the `hexstring' option in the constructor call, you cannot use any " .
      0        
      0        
      0        
      0        
168             "other option at the same time: $!"
169             if $self->{filename} or $self->{intVal} or $self->{size}
170             or $self->{bitlist} or $self->{textstring} or $self->{bitstring};
171 0 0       0 my $bitstring_from_hex = join '', map {length($_) == 4 ? $_
  0         0  
172 0         0 : ('0' x (4 - length($_))) . $_} map {sprintf "%b", $_}
173 0         0 map {hex $_} split //, $self->{hexstring};
174 0         0 my @bitlist_from_hex = split //, $bitstring_from_hex;
175 0         0 $self->{bitlist} = \@bitlist_from_hex;
176 0         0 $self->{size} = scalar @{$self->{bitlist}};
  0         0  
177             }
178 3         3 my $shorts_needed = int( (@{$self->{bitlist}} + 15) / 16 );
  3         10  
179 3         8 @{$self->{_vector}} = map {unpack "n", pack("n", 0)} 0 .. $shorts_needed-1;
  3         8  
  5         12  
180 3         31 my @interleaved = (0..$self->{size}-1, @{$self->{bitlist}})
  60         66  
181 3         7 [map {$_,$_+$self->{size}} (0 .. $self->{size}-1)];
182 3     60   37 pairmap {$self->set_bit($a,$b)} @interleaved;
  60         80  
183 3         11 $self->{bitlist} = undef;
184 3         21 return $self;
185             }
186              
187             ## Set the bit at the designated position to the value shown
188             sub set_bit {
189 60     60 1 48 my $self = shift;
190 60         48 my $posn = shift;
191 60         37 my $val = shift;
192 60 50 66     146 die "incorrect value for a bit" unless $val == 0 or $val == 1;
193 60 50 33     190 die "index range error" if ($posn >= $self->{size}) or ($posn < - $self->{size});
194 60 50       73 $posn = $self->{size} + $posn if $posn < 0;
195 60         61 my $block_index = int($posn / 16);
196 60         46 my $shift = $posn & 0xF;
197 60         56 my $cv = $self->{_vector}->[$block_index];
198 60 100       120 if ( ( ( $cv >> $shift ) & 1 ) != $val) {
199 25         62 $self->{_vector}->[$block_index] = $cv ^ (1 << $shift);
200             }
201             }
202              
203             ## Get the bit from the designated position. This method can also return a slice of a
204             ## bitvector. However, note that the slice is returned as a list of the bits in the
205             ## index range you specified. You can easily convert the list of bits returned into a
206             ## bitvector in your own code.
207             sub get_bit {
208 65     65 1 38 my $self = shift;
209 65         45 my $pos = shift;
210 65 100       81 unless (ref($pos) eq "ARRAY") {
211 60 50 33     166 die "index range error" if ($pos >= $self->{size}) or ($pos < - $self->{size});
212 60 50       77 $pos = $self->{size} + $pos if $pos < 0;
213 60         124 return ( $self->{_vector}->[int($pos/16)] >> ($pos&15) ) & 1;
214             }
215 5         15 my @slice = map $self->get_bit($_), (@$pos)[0..@$pos-1];
216 5         33 return \@slice;
217             }
218              
219             ## Overloading of the string conversion operator. Return the string representation
220             ## of a bitvector.
221             sub _str {
222 1     1   16 my $self = shift;
223 1         6 return join '', map $self->get_bit($_), 0 .. $self->{size}-1;
224             }
225              
226             ## Overloading of the `+' operator. Concatenate the argument bitvectors. Return the
227             ## concatenated bitvector as a new BitVector instance.
228             sub _join {
229 0     0   0 my ($bv1, $bv2) = @_;
230 0 0 0     0 die "Abort: The concatenation operator invoked with either undefined " .
231             "or wrong types of operands: $!"
232             unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
233             UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
234 0         0 my $allbits = $bv1->_str() . $bv2->_str();
235 0         0 return Algorithm::BitVector->new( bitstring => $allbits );
236             }
237              
238             sub _compare {
239 0     0   0 my ($bv1, $bv2) = @_;
240 0 0 0     0 die "Abort: The comparison operator invoked with either undefined " .
241             "or wrong types of operands: $!"
242             unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
243             UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
244 0         0 my $bigint1 = Math::BigInt->from_bin( "$bv1" );
245 0         0 my $bigint2 = Math::BigInt->from_bin( "$bv2" );
246 0         0 return $bigint1->bcmp($bigint2);
247             }
248              
249             sub deep_copy {
250 0     0 1 0 my $self = shift;
251 0         0 my $result_bv = Algorithm::BitVector->new( size => $self->{size} );
252 0         0 foreach my $i (0..@{$result_bv->{_vector}}-1) {
  0         0  
253 0         0 $result_bv->{_vector}->[$i] = $self->{_vector}->[$i];
254             }
255 0         0 return $result_bv;
256             }
257              
258             ## Invert the bits in the bitvector on which the method is invoked
259             ## and return the result as a new bitvector.
260             sub _invert {
261 0     0   0 my $self = shift;
262 0 0       0 die "Abort: The operator '~' for bit inversion invoked on an object that is " .
263             "not of type Algorithm::BitVector"
264             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
265 0         0 my $result_bv = $self->deep_copy();
266 0         0 foreach my $i (0..@{$result_bv->{_vector}}-1) {
  0         0  
267             # The unary `~' operator may assume a 32-bit wide field:
268 0         0 $result_bv->{_vector}->[$i] = ~ $result_bv->{_vector}->[$i] & 0x0000FFFF;
269             }
270 0         0 return $result_bv;
271             }
272              
273             ## Take a bitwise 'OR' of two bitvectors. Return the result as a new bitvector. If
274             ## the two bitvectors are not of the same size, pad the shorter one with zeros from
275             ## the left.
276             sub _or {
277 0     0   0 my ($bv1, $bv2) = @_;
278 0 0 0     0 die "Abort: The `|' operator invoked with either undefined " .
279             "or wrong types of operands: $!"
280             unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
281             UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
282 0         0 my ($bv3, $bv4);
283 0 0       0 if ( $bv1->{size} < $bv2->{size} ) {
    0          
284 0         0 $bv3 = $bv1->_resize_pad_from_left($bv2->{size} - $bv1->{size});
285 0         0 $bv4 = $bv2;
286             } elsif ( $bv1->{size} > $bv2->{size} ) {
287 0         0 $bv3 = $bv1;
288 0         0 $bv4 = $bv2->_resize_pad_from_left($bv1->{size} - $bv2->{size});
289             } else {
290 0         0 $bv3 = $bv1;
291 0         0 $bv4 = $bv2;
292             }
293 0         0 my $result_bv = Algorithm::BitVector->new( size => $bv3->{size} );
294 0         0 foreach my $i (0..@{$result_bv->{_vector}}-1) {
  0         0  
295             # The binary `|' operator may assume 32-bit wide fields:
296 0         0 $result_bv->{_vector}->[$i] = ($bv3->{_vector}->[$i] | $bv4->{_vector}->[$i] ) & 0x0000FFFF;
297             }
298 0         0 return $result_bv;
299             }
300              
301             ## Resize a bitvector by padding with n 0's from the left. Return the result as a
302             ## new bitvector.
303             sub _resize_pad_from_left {
304 0     0   0 my $self = shift;
305 0         0 my $n = shift;
306 0         0 my $new_str = '0' x $n . "$self";
307 0         0 return Algorithm::BitVector->new( bitstring => $new_str );
308             }
309              
310             ## Take a bitwise 'AND' of the bitvector on which the method is invoked with
311             ## the argument bitvector. Return the result as a new bitvector. If the two
312             ## bitvectors are not of the same size, pad the shorter one with zeros from the
313             ## left.
314             sub _and {
315 0     0   0 my ($bv1, $bv2) = @_;
316 0 0 0     0 die "Abort: The concatenation operator invoked with either undefined " .
317             "or wrong types of operands: $!"
318             unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
319             UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
320 0         0 my ($bv3, $bv4);
321 0 0       0 if ( $bv1->{size} < $bv2->{size} ) {
    0          
322 0         0 $bv3 = $bv1->_resize_pad_from_left($bv2->{size} - $bv1->{size});
323 0         0 $bv4 = $bv2;
324             } elsif ( $bv1->{size} > $bv2->{size} ) {
325 0         0 $bv3 = $bv1;
326 0         0 $bv4 = $bv2->_resize_pad_from_left($bv1->{size} - $bv2->{size});
327             } else {
328 0         0 $bv3 = $bv1;
329 0         0 $bv4 = $bv2;
330             }
331 0         0 my $result_bv = Algorithm::BitVector->new( size => $bv3->{size} );
332 0         0 foreach my $i (0..@{$result_bv->{_vector}}-1) {
  0         0  
333             # The binary bitwise `&' operator may assume 32-bit wide fields:
334 0         0 $result_bv->{_vector}->[$i] = ($bv3->{_vector}->[$i] & $bv4->{_vector}->[$i] ) & 0x0000FFFF;
335             }
336 0         0 return $result_bv;
337             }
338              
339             ## Take a bitwise 'XOR' of the bitvector on which the method is invoked with
340             ## the argument bitvector. Return the result as a new bitvector. If the two
341             ## bitvectors are not of the same size, pad the shorter one with zeros from the
342             ## left.
343             sub _xor {
344 0     0   0 my ($bv1, $bv2) = @_;
345 0 0 0     0 die "Abort: The xor operator invoked with either undefined " .
346             "or wrong types of operands: $!"
347             unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
348             UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
349 0         0 my ($bv3, $bv4);
350 0 0       0 if ( $bv1->{size} < $bv2->{size} ) {
    0          
351 0         0 $bv3 = $bv1->_resize_pad_from_left($bv2->{size} - $bv1->{size});
352 0         0 $bv4 = $bv2;
353             } elsif ( $bv1->{size} > $bv2->{size} ) {
354 0         0 $bv3 = $bv1;
355 0         0 $bv4 = $bv2->_resize_pad_from_left($bv1->{size} - $bv2->{size});
356             } else {
357 0         0 $bv3 = $bv1;
358 0         0 $bv4 = $bv2;
359             }
360 0         0 my $result_bv = Algorithm::BitVector->new( size => $bv3->{size} );
361 0         0 foreach my $i (0..@{$result_bv->{_vector}}-1) {
  0         0  
362             # The binary bitwise `^' operator may assume 32-bit wide fields:
363 0         0 $result_bv->{_vector}->[$i] = ($bv3->{_vector}->[$i] ^ $bv4->{_vector}->[$i] ) & 0x0000FFFF;
364             }
365 0         0 return $result_bv;
366             }
367              
368             # For the overloading of the iterator '<>' operator:
369             sub _iter {
370 0     0   0 my $self = shift;
371 0         0 my $pos = 0;
372 0 0       0 $self->{_iterator} = BitVecIterator->new($self, $pos) unless $self->{_iter_called};
373 0         0 &{$self->{_iterator}->next()};
  0         0  
374             }
375              
376             {
377             # This inner class needed for implementing iterator overloading:
378             package BitVecIterator;
379             sub new {
380 0     0   0 my $self = [ $_[1], $_[2] ];
381 0         0 bless $self, $_[0];
382 0         0 $_[1]->{_iter_called} = 1;
383 0         0 return $self;
384             }
385             sub next {
386 0     0   0 my $self = shift;
387 0         0 my $bitvec = $self->[0];
388             # The anonymous subroutine that follows is a closure over the variables
389             # incorporated in it:
390             return sub {
391 0 0   0   0 if ($self->[1] >= $bitvec->{size}) {
392 0         0 delete $bitvec->{_iter_called};
393 0         0 return;
394             }
395 0         0 my $bit = $bitvec->get_bit($self->[1]);
396 0         0 $self->[1] = $self->[1] + 1;
397 0         0 return $bit;
398             }
399 0         0 }
400             }
401              
402             # for the overloading of the numification operator:
403             sub _int {
404 1     1   7 my $self = shift;
405 1         4 return $self->int_value();
406             }
407              
408             ## Divides an even-sized bitvector into two and returns the two halves as a
409             ## list of two bitvectors.
410             sub divide_into_two {
411 0     0 1 0 my $self = shift;
412 0 0       0 die "Abort: The divide_into_two() method invoked on an object that is " .
413             "not of type Algorithm::BitVector"
414             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
415 0 0       0 die "The bitvector to be divided must have even number of bits: $!"
416             if $self->{size} % 2;
417 0         0 my @outlist1 = ();
418 0         0 foreach my $i (0..$self->{size} / 2 - 1) {
419 0         0 push @outlist1, $self->get_bit($i);
420             }
421 0         0 my @outlist2 = ();
422 0         0 foreach my $i ( ($self->{size} / 2) .. ($self->{size} - 1) ) {
423 0         0 push @outlist2, $self->get_bit($i);
424             }
425 0         0 return Algorithm::BitVector->new( bitlist => \@outlist1 ),
426             Algorithm::BitVector->new( bitlist => \@outlist2 );
427             }
428              
429             ## Permute a bitvector according to the indices shown in the second argument list.
430             ## Return the permuted bitvector as a new bitvector.
431             sub permute {
432 0     0 1 0 my $self = shift;
433 0 0       0 die "Abort: The permute() method invoked on an object that is " .
434             "not of type Algorithm::BitVector"
435             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
436 0         0 my $permute_list = shift;
437 0 0       0 die "Bad permutation index in your permutation list"
438             if max(@$permute_list) > $self->{size} - 1;
439 0         0 my @outlist = ();
440 0         0 foreach my $index (@$permute_list) {
441 0         0 push @outlist, $self->get_bit($index);
442             }
443 0         0 return Algorithm::BitVector->new( bitlist => \@outlist );
444             }
445              
446             ## Unpermute the bitvector according to the permutation list supplied as the
447             ## second argument. If you first permute a bitvector by using permute() and
448             ## then unpermute() it using the same permutation list, you will get back the
449             ## original bitvector.
450             sub unpermute {
451 0     0 1 0 my $self = shift;
452 0 0       0 die "Abort: The unpermute() method invoked on an object that is " .
453             "not of type Algorithm::BitVector"
454             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
455 0         0 my $permute_list = shift;
456 0 0       0 die "Bad permutation index in your permutation list: $!"
457             if max(@$permute_list) > $self->{size} - 1;
458 0 0       0 die "Size of the permute list for unpermuting not consistent with the size of the bet vector:$!"
459             unless $self->{size} == @$permute_list;
460 0         0 my $out_bv = Algorithm::BitVector->new( size => $self->{size} );
461 0         0 foreach my $i (0..@$permute_list-1) {
462 0         0 $out_bv->set_bit( $permute_list->[$i], $self->get_bit($i) );
463             }
464 0         0 return $out_bv;
465             }
466              
467             ## Write the bitvector to the file object file_out. (A file object is returned
468             ## by a call to open()). Since all file I/O is byte oriented, the bitvector must
469             ## be multiple of 8 bits. Each byte treated as MSB first (0th index).
470             sub write_to_file {
471 0     0 1 0 my $self = shift;
472 0 0       0 die "Abort: The write_to_file() method invoked on an object that is " .
473             "not of type Algorithm::BitVector"
474             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
475 0         0 my $file_out = shift;
476 0         0 my $err_str = "Only a bitvector whose length is a multiple of 8 can " .
477             "be written to a file. Use the padding functions to satisfy " .
478             "this constraint.";
479 0 0       0 $self->{FILEOUT} = $file_out unless $self->{FILEOUT};
480 0 0       0 die $err_str if $self->{size} % 8;
481 0         0 for my $byte (0..$self->{size}/8 - 1){
482 0         0 my $value = 0;
483 0         0 foreach my $bit (0..7) {
484 0         0 $value += $self->get_bit( $byte*8+(7 - $bit) ) << $bit;
485             }
486 0         0 syswrite $file_out, chr($value), 1;
487             }
488             }
489              
490             ## For closing a file object that was used for reading the bits into one or more
491             ## BitVector objects.
492             sub close_file_handle {
493 0     0 1 0 my $self = shift;
494 0 0       0 die "Abort: The close_file_handle() method invoked on an object that is " .
495             "not of type Algorithm::BitVector"
496             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
497 0 0       0 die "No open file currently associated with the file handle: $!"
498             unless $self->{FILEIN};
499 0         0 close $self->{FILEIN};
500             }
501              
502             ## Return the integer value of a bitvector. If the original integer from which the
503             ## bitvector was constructed is a Math::BigInt object, then return the string
504             ## representation of the integer value.
505             sub int_value {
506 1     1 1 1 my $self = shift;
507 1 50       6 die "Abort: The int() method invoked on an object that is " .
508             "not of type Algorithm::BitVector"
509             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
510 1         1 my $int_value;
511 1 50 33     5 if (defined($self->{intVal}) && (ref($self->{intVal}) eq 'Math::BigInt')) {
512 0         0 $int_value = $self->{intVal}->bstr();
513             } else {
514 1         2 $int_value = 0;
515 1         4 foreach my $i (0..$self->{size}-1) {
516 4         8 $int_value += $self->get_bit($i) * ( 2 ** ( $self->{size} - $i - 1 ) );
517             }
518             }
519 1         6 return $int_value;
520             }
521              
522             ## Return the text string formed by dividing the bitvector into bytes from the
523             ## left and replacing each byte by its ASCII character (this is a useful thing
524             ## to do only if the length of the vector is an integral multiple of 8 and every
525             ## byte in your bitvector has a print representation)
526             sub get_text_from_bitvector {
527 1     1 0 7 my $self = shift;
528 1 50       6 die "Abort: The get_text_from_bitvector() method invoked on an object that is " .
529             "not of type Algorithm::BitVector"
530             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
531 1 50       9 die "\nThe bitvector for get_text_from_bitvector() must be an integral multiple of 8 bits: $!"
532             if $self->{size} % 8;
533 1         2 my $ascii = '';
534 1         4 for (my $i=0; $i < $self->{size}; $i += 8) {
535 5         4 $ascii .= chr oct "0b". join '', @{$self->get_bit([$i..$i+7])};
  5         15  
536             }
537 1         6 return $ascii;
538             }
539              
540             ## Return a string of hex digits by scanning the bits from the left and
541             ## replacing each sequence of 4 bits by its corresponding hex digit (this is a
542             ## useful thing to do only if the length of the vector is an integral multiple
543             ## of 4)
544             sub get_hex_string_from_bitvector {
545 0     0 1 0 my $self = shift;
546 0 0       0 die "Abort: The get_hex_from_bitvector() method invoked on an object that is " .
547             "not of type Algorithm::BitVector"
548             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
549 0 0       0 die "\nThe bitvector for get_hex_from_bitvector() must be a multiple of 4 bits: $!"
550             if $self->{size} % 4;
551 0         0 my $hex = '';
552 0         0 for (my $i=0; $i < $self->{size}; $i += 4) {
553 0         0 $hex .= sprintf "%x", oct "0b". join '', @{$self->get_bit([$i..$i+3])};
  0         0  
554             }
555 0         0 return $hex;
556             }
557              
558             ## Read blocksize bits from a disk file and return a BitVector object containing
559             ## the bits. If the file contains fewer bits than blocksize, construct the
560             ## BitVector object from however many bits there are in the file. If the file
561             ## contains zero bits, return a BitVector object of size attribute set to 0.
562             sub read_bits_from_file {
563 0     0 1 0 my $self = shift;
564 0 0       0 die "Abort: The read_bits_from_file() method invoked on an object that is " .
565             "not of type Algorithm::BitVector"
566             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
567 0         0 my $blocksize = shift;
568 0         0 my $error_str = "You need to first construct a BitVector object with a filename as argument";
569 0 0       0 die "$error_str" unless $self->{filename};
570 0 0       0 die "block size must be a multiple of 8" if $blocksize % 8;
571 0         0 my $bitstr = _readblock( $blocksize, $self );
572 0 0       0 if (length $bitstr == 0) {
573 0         0 return Algorithm::BitVector->new( size => 0 );
574 0         0 print "file has no bits\n";
575             } else {
576 0         0 return Algorithm::BitVector->new( bitstring => $bitstr );
577             }
578             }
579              
580             ## For an in-place left circular shift by n bit positions
581             sub _lshift {
582 0     0   0 my $self = shift;
583 0         0 my $n = shift;
584 0 0       0 die "Circular shift of an empty vector makes no sense" unless $self->{size};
585 0 0       0 return $self >> abs($n) if $n < 0;
586 0         0 foreach my $i (0..$n-1) {
587 0         0 $self->_circular_rotate_left_by_one();
588             }
589 0         0 return $self;
590             }
591              
592             ## For an in-place right circular shift by n bit positions
593             sub _rshift {
594 0     0   0 my $self = shift;
595 0         0 my $n = shift;
596 0 0       0 die "Circular shift of an empty vector makes no sense" unless $self->{size};
597 0 0       0 return $self << abs($n) if $n < 0;
598 0         0 foreach my $i (0..$n-1) {
599 0         0 $self->_circular_rotate_right_by_one();
600             }
601 0         0 return $self;
602             }
603              
604             ## For a one-bit in-place left circular shift
605             sub _circular_rotate_left_by_one {
606 0     0   0 my $self = shift;
607 0         0 my $max_index = int( ($self->{size} - 1) / 16 );
608 0         0 my $left_most_bit = $self->{_vector}->[0] & 1;
609 0         0 $self->{_vector}->[0] = $self->{_vector}->[0] >> 1;
610 0         0 foreach my $i (1 .. $max_index) {
611 0         0 my $left_bit = $self->{_vector}->[$i] & 1;
612 0         0 $self->{_vector}->[$i] = $self->{_vector}->[$i] >> 1;
613 0         0 $self->{_vector}->[$i-1] |= $left_bit << 15;
614             }
615 0         0 $self->set_bit($self->{size} - 1, $left_most_bit);
616             }
617              
618             ## For a one-bit in-place right circular shift
619             sub _circular_rotate_right_by_one {
620 0     0   0 my $self = shift;
621 0         0 my $max_index = int( ($self->{size} - 1) / 16 );
622 0         0 my $right_most_bit = $self->get_bit( $self->{size} - 1);
623 0         0 $self->{_vector}->[$max_index] &= ~0x8000;
624 0         0 $self->{_vector}->[$max_index] = $self->{_vector}->[$max_index] << 1;
625 0         0 for (my $i=$max_index-1; $i > -1; $i -= 1) {
626 0         0 my $right_bit = $self->{_vector}->[$i] & 0x8000;
627 0         0 $self->{_vector}->[$i] &= ~0x8000;
628 0         0 $self->{_vector}->[$i] = $self->{_vector}->[$i] << 1;
629 0         0 $self->{_vector}->[$i+1] |= $right_bit >> 15;
630             }
631 0         0 $self->set_bit(0, $right_most_bit);
632             }
633              
634             ## Pad a bitvector with n zeros from the left
635             sub pad_from_left {
636 0     0 1 0 my $self = shift;
637 0         0 my $n = shift;
638 0         0 my $new_str = ('0' x $n) . "$self";
639 0         0 my $new_bitvec = Algorithm::BitVector->new( bitstring => $new_str );
640 0         0 $self->{size} = length $new_str;
641 0         0 $self->{_vector} = $new_bitvec->{_vector};
642 0         0 return $self;
643             }
644              
645             ## Pad a bitvector with n zeros from the right
646             sub pad_from_right {
647 0     0 1 0 my $self = shift;
648 0         0 my $n = shift;
649 0         0 my $new_str = "$self" . ('0' x $n);
650 0         0 my $new_bitvec = Algorithm::BitVector->new( bitstring => $new_str );
651 0         0 $self->{size} = length $new_str;
652 0         0 $self->{_vector} = $new_bitvec->{_vector};
653 0         0 return $self;
654             }
655              
656             ## Resets a previously created BitVector to either all zeros or all ones
657             ## depending on the argument val.
658             sub reset {
659 0     0 1 0 my $self = shift;
660 0         0 my $val = shift;
661 0 0 0     0 die "Incorrect argument to reset(): $!" unless ($val == 0) or ($val == 1);
662 0         0 my $reset_bv = Algorithm::BitVector->new( bitstring => ("$val" x $self->{size}) );
663 0         0 $self->{_vector} = $reset_bv->{_vector};
664             }
665              
666             ## Return the number of bits set in a BitVector instance.
667             sub count_bits {
668 0     0 1 0 my $self = shift;
669 0     0   0 return reduce {$a + $b} split //, "$self";
  0         0  
670             }
671              
672             ## Changes the bit pattern associated with a previously constructed BitVector
673             ## instance. The allowable modes for changing the internally stored bit pattern
674             ## are the same as for the constructor.
675             sub set_value {
676 0     0 1 0 my $self = shift;
677 0         0 my $reset_bv = Algorithm::BitVector->new( @_ );
678 0         0 $self->{_vector} = $reset_bv->{_vector};
679 0         0 $self->{size} = $reset_bv->{size};
680             }
681              
682              
683             ## This method for counting the set bits is much faster for sparse bitvectors.
684             ## Note, however, that count_bits() may work much faster for dense-packed
685             ## bitvectors.
686             sub count_bits_sparse {
687 0     0 1 0 my $self = shift;
688 0         0 my $num = 0;
689 0         0 foreach my $intval (@{$self->{_vector}}) {
  0         0  
690 0 0       0 next if $intval == 0;
691 0         0 my ($c, $iv) = (0, $intval);
692 0         0 while ($iv > 0) {
693 0         0 $iv = $iv & ($iv - 1);
694 0         0 $c++;
695             }
696 0         0 $num += $c;
697             }
698 0         0 return $num;
699             }
700              
701             ## Computes the Jaccard similarity coefficient between two bitvectors
702             sub jaccard_similarity {
703 0     0 1 0 my $self = shift;
704 0         0 my $other = shift;
705 0 0 0     0 die "Jaccard called on two zero vectors --- NOT ALLOWED: $!"
706             if (int($self) == 0) && (int($other) == 0);
707 0 0       0 die "Jaccard called on vectors of unequal size --- NOT ALLOWED: $!"
708             if $self->{size} != $other->{size};
709 0         0 my $intersection = $self & $other;
710 0         0 my $union = $self | $other;
711 0         0 return $intersection->count_bits_sparse() / $union->count_bits_sparse();
712             }
713              
714             ## Computes the Jaccard distance between two bitvectors
715             sub jaccard_distance {
716 0     0 1 0 my $self = shift;
717 0         0 my $other = shift;
718 0 0       0 die "Jaccard called on vectors of unequal size --- NOT ALLOWED: $!"
719             if $self->{size} != $other->{size};
720 0         0 return 1 - $self->jaccard_similarity( $other );
721             }
722              
723             ## Computes the Hamming distance between two bitvectors
724             sub hamming_distance {
725 0     0 1 0 my $self = shift;
726 0         0 my $other = shift;
727 0 0       0 die "hamming_distance() called on vectors of unequal size --- NOT ALLOWED: $!"
728             if $self->{size} != $other->{size};
729 0         0 my $diff = $self ^ $other;
730 0         0 return $diff->count_bits_sparse();
731             }
732              
733             ## This method calculates the position of the next set bit at or after the current
734             ## position index. It returns -1 if there is no next set bit. Logic based on the
735             ## contributions by Jason Allum and John Gleeson to the Python version of this
736             ## module.
737             sub next_set_bit {
738 0     0 1 0 my $self = shift;
739 0   0     0 my $from_index = shift || 0;
740 0 0       0 die "The from_index must be nonnegative: $!" unless $from_index >= 0;
741 0         0 my $i = $from_index;
742 0         0 my $v = $self->{_vector};
743 0         0 my $l = scalar @$v;
744 0         0 my $o = $i >> 4;
745 0         0 my $s = $i & 0x0F;
746 0         0 $i = $o << 4;
747 0         0 while ($o < $l) {
748 0         0 my $h = $v->[$o];
749 0 0       0 if ($h) {
750 0         0 $i += $s;
751 0         0 my $m = 1 << $s;
752 0         0 while ($m != (1 << 0x10)) {
753 0 0       0 return $i if $h & $m;
754 0         0 $m <<= 1;
755 0         0 $i += 1;
756             }
757             } else {
758 0         0 $i += 0x10;
759             }
760 0         0 $s = 0;
761 0         0 $o += 1;
762             }
763 0         0 return -1;
764             }
765              
766             ## For a bit that is set at the argument 'position', this method returns how many
767             ## bits are set to the left of that bit. For example, in the bit pattern
768             ## 000101100100, a call to this method with position set to 9 will return 4.
769             sub rank_of_bit_set_at_index {
770 0     0 1 0 my $self = shift;
771 0         0 my $position = shift;
772 0 0       0 die "The bitvector has no bits set at the position in the call to rank_of_bit_set_at_index(): $!"
773             unless $self->get_bit($position);
774 0         0 my $bv_slice = Algorithm::BitVector->new( bitlist => $self->get_bit([0..$position]) );
775 0         0 return $bv_slice->count_bits();
776             }
777              
778             ## Determines whether the integer value of a bitvector is a power of 2.
779             sub is_power_of_2 {
780 0     0 1 0 my $self = shift;
781 0 0       0 return 0 if int($self) == 0;
782 0         0 my $bv = $self & Algorithm::BitVector->new( intVal => int($self) - 1);
783 0 0       0 return 1 if int($bv) == 0;
784 0         0 return 0;
785             }
786              
787             ## Faster version of is_power_of2() for sparse bitvectors
788             sub is_power_of_2_sparse {
789 0     0 1 0 my $self = shift;
790 0 0       0 return 1 if $self->count_bits_sparse() == 1;
791 0         0 return 0;
792             }
793              
794             ## Returns a new bitvector by reversing the bits in the bitvector on which the
795             ## method is invoked.
796             sub reverse {
797 0     0 1 0 my $self = shift;
798 0         0 my @reverseList = ();
799 0         0 for (my $i=1; $i < $self->{size} + 1; $i++) {
800 0         0 push @reverseList, $self->get_bit( -$i );
801             }
802 0         0 return Algorithm::BitVector->new( bitlist => \@reverseList );
803             }
804              
805             ## Using Euclid's Algorithm, returns the greatest common divisor of the integer
806             ## value of the bitvector on which the method is invoked and the integer value
807             ## of the argument bitvector.
808             sub gcd {
809 0     0 1 0 my $self = shift;
810 0         0 my $other = shift;
811 0         0 my ($a,$b) = (int($self), int($other));
812 0 0       0 ($a,$b) = ($b,$a) if $a < $b;
813 0         0 while ($b != 0) {
814 0         0 ($a, $b) = ($b, $a % $b);
815             }
816 0         0 return Algorithm::BitVector->new( intVal => $a );
817             }
818              
819             ## Calculates the multiplicative inverse of the bitvector on which the method is
820             ## invoked modulo the bitvector that is supplied as the argument. Code based on
821             ## Extended Euclid's Algorithm.
822             sub multiplicative_inverse {
823 0     0 1 0 my $self = int( shift );
824 0         0 my $modulus = int( shift );
825 0         0 my ($mod, $num) = ($modulus, $self);
826 0         0 my ($x, $x_old) = (0, 1);
827 0         0 my ($y, $y_old) = (1, 0);
828 0         0 while ($mod) {
829 0         0 my $quotient = int($num / $mod);
830 0         0 ($num, $mod) = ($mod, $num % $mod);
831 0         0 ($x, $x_old) = ($x_old - $x * $quotient, $x);
832 0         0 ($y, $y_old) = ($y_old - $y * $quotient, $y);
833             }
834 0 0       0 if ($num != 1) {
835 0         0 return 0;
836             } else {
837 0         0 my $MI = ($x_old + $modulus) % $modulus;
838 0         0 return Algorithm::BitVector->new( intVal => $MI );
839             }
840             }
841              
842             ## For a one-bit in-place left non-circular shift. Note that the bitvector size
843             ## does not change. The leftmost bit that moves past the first element of the
844             ## bitvector is discarded and rightmost bit of the returned vector is set to zero.
845             sub _shift_left_by_one {
846 0     0   0 my $self = shift;
847 0         0 my $size = @{$self->{_vector}};
  0         0  
848 0         0 my @left_most_bits = map {$_ & 1} @{$self->{_vector}};
  0         0  
  0         0  
849 0         0 push @left_most_bits, $left_most_bits[0];
850 0         0 shift @left_most_bits;
851 0         0 @{$self->{_vector}} = map {$_ >> 1} @{$self->{_vector}};
  0         0  
  0         0  
  0         0  
852 0         0 my @a = @{$self->{_vector}};
  0         0  
853 0         0 my @b = map {$_ << 15} @left_most_bits;
  0         0  
854 0         0 my @interleaved = ( @a, @b )[ map { $_, $_ + @a } ( 0 .. $#a ) ];
  0         0  
855 0     0   0 @{$self->{_vector}} = pairmap {$a | $b} @interleaved;
  0         0  
  0         0  
856 0         0 $self->set_bit(-1,0);
857             }
858              
859             ## For an in-place left non-circular shift by n bit positions. The bits shifted
860             ## out at the left are discarded. The new bit positions on the right are filled with
861             ## zeros
862             sub shift_left {
863 0     0 1 0 my $self = shift;
864 0         0 my $n = shift;
865 0         0 foreach my $i (0..$n-1) {
866 0         0 $self->_shift_left_by_one();
867             }
868 0         0 return $self;
869             }
870              
871             ## For a one-bit in-place right non-circular shift. Note that bitvector size does
872             ## not change. The rightmost bit that moves past the last element of the bitvector
873             ## on the right is discarded and the leftmost bit of the returned vector is set to
874             ## zero.
875             sub _shift_right_by_one {
876 0     0   0 my $self = shift;
877 0         0 my $size = @{$self->{_vector}};
  0         0  
878 0         0 my @right_most_bits = map {$_ & 0x8000} @{$self->{_vector}};
  0         0  
  0         0  
879 0         0 @{$self->{_vector}} = map { $_ & ~0x8000 } @{$self->{_vector}};
  0         0  
  0         0  
  0         0  
880 0         0 unshift @right_most_bits, 0;
881 0         0 pop @right_most_bits;
882 0         0 @{$self->{_vector}} = map {$_ << 1} @{$self->{_vector}};
  0         0  
  0         0  
  0         0  
883 0         0 my @a = @{$self->{_vector}};
  0         0  
884 0         0 my @b = map {$_ >> 15} @right_most_bits;
  0         0  
885 0         0 my @interleaved = ( @a, @b )[ map { $_, $_ + @a } ( 0 .. $#a ) ];
  0         0  
886 0     0   0 @{$self->{_vector}} = pairmap {$a | $b} @interleaved;
  0         0  
  0         0  
887 0         0 $self->set_bit(0,0);
888             }
889              
890             ## For an in-place right non-circular shift by n bit positions. The n bits that
891             ## move past the last element of the bitvector on the right are discarded and
892             ## leftmost new n bit positions filled with zeros.
893             sub shift_right {
894 0     0 1 0 my $self = shift;
895 0         0 my $n = shift;
896 0         0 foreach my $i (0..$n-1) {
897 0         0 $self->_shift_right_by_one();
898             }
899 0         0 return $self;
900             }
901              
902             ## In the set of polynomials defined over GF(2), multiplies the bitvector on which
903             ## the method is invoked with the argument bitvector. Returns the product
904             ## bitvector. Note that this method carries out straight polynomial multiplication
905             ## as opposed to modulo polynomial multiplication. As a result, the highest power of
906             ## the result polynomial will ALWAYS be greater than the highest powers of the
907             ## argument polynomials for non-trivial multiplications.
908             sub gf_multiply {
909 0     0 1 0 my $self = shift;
910 0         0 my $b = shift;
911 0         0 my $a = $self->deep_copy();
912 0         0 my $b_copy = $b->deep_copy();
913 0         0 my $a_highest_power = $a->{size} - $a->next_set_bit(0) - 1;
914 0         0 my $b_highest_power = $b->{size} - $b_copy->next_set_bit(0) - 1;
915 0         0 my $result = Algorithm::BitVector->new( size => $a->{size} + $b_copy->{size} );
916 0         0 $a->pad_from_left( $result->{size} - $a->{size} );
917 0         0 $b_copy->pad_from_left( $result->{size} - $b_copy->{size} );
918 0         0 my @b_list = split //, "$b_copy";
919 0         0 my @enum = (0..@b_list-1, @b_list) [map {$_, $_+ @b_list} (0 .. @b_list - 1)];
  0         0  
920 0         0 for (my $i=0; $i < @enum-1; $i = $i + 2) {
921 0 0       0 if ($enum[$i+1]) {
922 0         0 my $power = $b_copy->{size} - $enum[$i] - 1;
923 0         0 my $a_copy = $a->deep_copy();
924 0         0 $a_copy->shift_left( $power );
925 0         0 $result ^= $a_copy;
926             }
927             }
928 0         0 return $result;
929             }
930              
931             ## Carries out modular division of a bitvector by the modulus bitvector mod in
932             ## GF(2^n) finite field. Returns both the quotient and the remainder.
933             sub gf_divide_by_modulus {
934 0     0 1 0 my $num = shift;
935 0         0 my $mod = shift;
936 0         0 my $n = shift;
937 0 0       0 die "Modulus bit pattern too long" if $mod->{size} > $n + 1;
938 0         0 my $quotient = Algorithm::BitVector->new( intVal => 0, size => $num->{size} );
939 0         0 my $remainder = $num->deep_copy();
940 0         0 for (my $i=0; $i < $num->{size}; $i++) {
941 0         0 my $mod_highest_power = $mod->{size} - $mod->next_set_bit(0) - 1;
942 0         0 my $remainder_highest_power;
943 0 0       0 if ($remainder->next_set_bit(0) == -1) {
944 0         0 $remainder_highest_power = 0;
945             } else {
946 0         0 $remainder_highest_power = $remainder->{size} - $remainder->next_set_bit(0) - 1;
947             }
948 0 0 0     0 if (($remainder_highest_power < $mod_highest_power) or (int($remainder) == 0)) {
949 0         0 last;
950             } else {
951 0         0 my $exponent_shift = $remainder_highest_power - $mod_highest_power;
952 0         0 $quotient->set_bit($quotient->{size} - $exponent_shift - 1, 1);
953 0         0 my $quotient_mod_product = $mod->deep_copy();
954 0         0 $quotient_mod_product->pad_from_left($remainder->{size} - $mod->{size});
955 0         0 $quotient_mod_product->shift_left($exponent_shift);
956 0         0 $remainder ^= $quotient_mod_product;
957             }
958             }
959 0 0       0 if ($remainder->{size} > $n) {
960 0         0 $remainder = Algorithm::BitVector->new(
961             bitlist => $remainder->get_bit([$remainder->{size}-$n .. $remainder->{size}-1]));
962             }
963 0         0 return ($quotient, $remainder);
964             }
965              
966             ## Multiplies a bitvector with the bitvector b in GF(2^n) finite field with the
967             ## modulus bit pattern set to mod
968             sub gf_multiply_modular {
969 0     0 1 0 my $self = shift;
970 0         0 my $b = shift;
971 0         0 my $mod = shift;
972 0         0 my $n = shift;
973 0         0 my $a_copy = $self->deep_copy();
974 0         0 my $b_copy = $b->deep_copy();
975 0         0 my $product = $a_copy->gf_multiply($b_copy);
976 0         0 my ($quotient, $remainder) = $product->gf_divide_by_modulus($mod, $n);
977 0         0 return $remainder
978             }
979              
980             ## Returns the multiplicative inverse of a vector in the GF(2^n) finite field
981             ## with the modulus polynomial set to mod
982             sub gf_MI {
983 0     0 1 0 my $num = shift;
984 0         0 my $mod = shift;
985 0         0 my $n = shift;
986 0         0 my ($NUM, $MOD) = ($num->deep_copy(), $mod->deep_copy());
987 0         0 my $x = Algorithm::BitVector->new( size => $mod->{size} );
988 0         0 my $x_old = Algorithm::BitVector->new( intVal => 1, size => $mod->{size} );
989 0         0 my $y = Algorithm::BitVector->new( intVal => 1, size => $mod->{size} );
990 0         0 my $y_old = Algorithm::BitVector->new( size => $mod->{size} );
991 0         0 my ($quotient, $remainder);
992 0         0 while (int($mod)) {
993 0         0 ($quotient, $remainder) = $num->gf_divide_by_modulus($mod, $n);
994 0         0 ($num, $mod) = ($mod, $remainder);
995 0         0 ($x, $x_old) = ($x_old ^ $quotient->gf_multiply($x), $x);
996 0         0 ($y, $y_old) = ($y_old ^ $quotient->gf_multiply($y), $y);
997             }
998 0 0       0 if (int($num) != 1) {
999 0         0 return "NO MI. However, the GCD of $NUM and $MOD is $num\n";
1000             } else {
1001 0         0 my $z = $x_old ^ $MOD;
1002 0         0 ($quotient, $remainder) = $z->gf_divide_by_modulus($MOD, $n);
1003 0         0 return $remainder;
1004             }
1005             }
1006              
1007             ## Returns a list of the consecutive runs of 1's and 0's in the bitvector.
1008             ## Each run is either a string of all 1's or a string of all 0's.
1009             sub runs {
1010 0     0 1 0 my $self = shift;
1011 0 0       0 die "An empty vector has no runs" if $self->{size} == 0;
1012 0         0 my @allruns = ();
1013 0         0 my $run = '';
1014 0         0 my $previous_bit = $self->get_bit(0);
1015 0 0       0 if ($previous_bit == 0) {
1016 0         0 $run = '0';
1017             } else {
1018 0         0 $run = '1';
1019             }
1020 0         0 my @bitlist = split //, "$self";
1021 0         0 shift @bitlist;
1022 0         0 foreach my $bit (@bitlist) {
1023 0 0 0     0 if (($bit == 0) && ($previous_bit == 0)) {
    0 0        
    0 0        
1024 0         0 $run .= '0';
1025             } elsif (($bit == 1) && ($previous_bit == 0)) {
1026 0         0 push @allruns, $run;
1027 0         0 $run = '1';
1028             } elsif (($bit == 0) && ($previous_bit == 1)) {
1029 0         0 push @allruns, $run;
1030 0         0 $run = '0';
1031             } else {
1032 0         0 $run .= '1';
1033             }
1034 0         0 $previous_bit = $bit;
1035             }
1036 0         0 push @allruns, $run;
1037             return @allruns
1038 0         0 }
1039              
1040             ## Check if the integer value of the bitvector is a prime through the Miller-Rabin
1041             ## probabilistic test of primality. If not found to be a composite, estimate the
1042             ## probability of the bitvector being a prime using this test.
1043             sub test_for_primality {
1044 0     0 1 0 my $p = int(shift);
1045 0 0       0 die "\nThe primality test method test_for_primality() is intended for only " .
1046             "small integers --- integers that are relatively small in relation to " .
1047             "the largest integer that can fit Perl's 4-byte int representation:$!"
1048             if $p > 0x7f_ff_ff_ff;
1049 0 0       0 return "is NOT a prime" if $p == 1;
1050 0         0 my @probes = (2,3,5,7,11,13,17);
1051 0         0 foreach my $a (@probes) {
1052 0 0       0 return "is a prime" if $a == $p;
1053             }
1054 0 0   0   0 return "is NOT a prime" if any {$p % $_ == 0} @probes;
  0         0  
1055 0         0 my ($k, $q) = (0, $p-1);
1056 0         0 while (! ($q & 1)) {
1057 0         0 $q >>= 1;
1058 0         0 $k++;
1059             }
1060 0         0 foreach my $a (@probes) {
1061 0         0 my $a_raised_to_q = _powmod_small_ints($a, int($q), $p);
1062 0 0 0     0 next if $a_raised_to_q == 1 or $a_raised_to_q == $p - 1;
1063 0         0 my $a_raised_to_jq = $a_raised_to_q;
1064 0         0 my $primeflag = 0;
1065 0         0 foreach my $j (0..$k-1) {
1066 0         0 $a_raised_to_jq = _powmod_small_ints($a_raised_to_jq, 2, $p);
1067 0 0       0 if ($a_raised_to_jq == $p-1) {
1068 0         0 $primeflag = 1;
1069 0         0 last;
1070             }
1071             }
1072 0 0       0 return "is NOT a prime" unless $primeflag;
1073             }
1074 0         0 my $probability_of_prime = 1 - 1.0/(4 ** @probes);
1075 0         0 return "is a prime with probability $probability_of_prime";
1076             }
1077              
1078             ## This routine is for modular exponentiation of small integers, meaning the
1079             ## integers that can be accommodated (before and after exponentiation) in the native
1080             ## 4-byte int representation. For larger integers, a call to this function should
1081             ## be replaced by a call to modular exponentiation of the Math::BigInt module.
1082             sub _powmod_small_ints {
1083 0     0   0 my $base = shift;
1084 0         0 my $exponent = shift;
1085 0         0 my $mod = int(shift);
1086 0     0   0 warn "This is just a warning: The modular exponentiation method is not meant for very large numbers (IMPORTANT: This is just a very rough check on the size of the numbers involved)"
1087 0 0       0 if any {$_ > 0x0f_ff_ff_ff} (int($base), int($exponent), int($mod));
1088 0         0 my $result = 1;
1089 0         0 my $a = int($base);
1090 0         0 my $b = $exponent;
1091 0         0 while (int($b) > 0) {
1092 0 0       0 $result = ($result * $a) % $mod if int($b) & 1;
1093 0         0 $b = $b >> 1;
1094 0         0 $a = ($a * $a) % $mod;
1095             }
1096 0         0 return $result;
1097             }
1098              
1099             ## This method is for a generating a bit pattern of a given size with random bits.
1100             sub gen_random_bits {
1101 0     0 1 0 my $self = shift;
1102 0         0 my $width = shift;
1103 0         0 my $candidate_bits = "0b" . join '', Math::Random::random_uniform_integer($width, 0, 1);
1104 0         0 my $candidate = oct $candidate_bits;
1105 0         0 $candidate |= 1;
1106 0         0 $candidate |= (1 << $width-1);
1107 0         0 $candidate |= (2 << $width-3);
1108 0         0 return Algorithm::BitVector->new( intVal => $candidate );
1109             }
1110              
1111             sub length {
1112 0     0 1 0 my $self = shift;
1113 0         0 return $self->{size};
1114             }
1115              
1116             sub _check_for_illegal_params {
1117 3     3   4 my @params = @_;
1118 3         7 my @legal_params = qw / filename
1119             size
1120             intVal
1121             bitlist
1122             bitstring
1123             hexstring
1124             textstring
1125             /;
1126 3         4 my $found_match_flag;
1127 3         5 foreach my $param (@params) {
1128 3         4 foreach my $legal (@legal_params) {
1129 12         10 $found_match_flag = 0;
1130 12 100       20 if ($param eq $legal) {
1131 3         5 $found_match_flag = 1;
1132 3         4 last;
1133             }
1134             }
1135 3 50       10 last if $found_match_flag == 0;
1136             }
1137 3         10 return $found_match_flag;
1138             }
1139              
1140             1;
1141              
1142              
1143             =pod
1144              
1145             =head1 NAME
1146              
1147             Algorithm::BitVector --- A memory efficient packed representation of arbitrary sized
1148             bit arrays and for logical and arithmetic operations on such arrays.
1149              
1150             =head1 SYNOPSIS
1151              
1152             use Algorithm::BitVector;
1153              
1154             # Constructing a given sized bitvector of all zeros:
1155             $bv = Algorithm::BitVector->new( size => 7 );
1156             print "$bv\n"; # 0000000
1157              
1158             # Constructing a bitvector whose integer value is specified:
1159             $bv = Algorithm::BitVector->new( intVal => 123456 );
1160             print "$bv\n"; # 11110001001000000
1161             print int($bv); # 123456
1162              
1163             # Constructing a bitvector from a very large integer:
1164             use Math::BigInt;
1165             $x = Math::BigInt->new('12345678901234567890123456789012345678901234567890');
1166             $bv = Algorithm::BitVector->new( intVal => $x );
1167            
1168             # Constructing a bitvector from a given bit string:
1169             $bv = Algorithm::BitVector->new( bitstring => '00110011' );
1170            
1171             # Constructing a bitvector from an ASCII text string:
1172             $bv = Algorithm::BitVector->new( textstring => "hello\njello" );
1173              
1174             # Constructing a bitvector from a hex string:
1175             $bv = Algorithm::BitVector->new( hexstring => "68656c6c6f" );
1176              
1177             # Constructing a bitvector from a bit list passed as an anonymous array:
1178             $bv = Algorithm::BitVector->new( bitlist => [1, 1, 0, 1] );
1179              
1180             # Constructing a bitvector from the contents of a disk file:
1181             $bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1182             $bv1 = $bv->read_bits_from_file(64); # bitvector from the first 64 bits
1183             # and so on
1184              
1185              
1186             =head1 CHANGES
1187              
1188             Version 1.24 includes in the C file the minimum version restriction on
1189             the C module that is imported.
1190              
1191             Version 1.23 mentions the required modules in the C file but with no
1192             minimum version numbers. Additionally, the documentation associated with the methods
1193             was significantly upgraded in this version.
1194              
1195             Version 1.22 removes the Perl version restriction from the module and the
1196             C files and the C restrictions from the C file.
1197              
1198             Version 1.21 fixes a bug in the code for the Miller-Rabin primality test function
1199             C. This version also places a hard limit on the size of the
1200             integers that are allowed to be tested for primality.
1201              
1202             Version 1.2 fixes an important bug in creating bitvectors from the contents of a disk
1203             file. This version also includes corrections for some of the documentation errors
1204             discovered.
1205              
1206             Version 1.1 incorporates additional type checking on the operands for the overloaded
1207             operators. Also fixed some minor documentation formatting issues.
1208              
1209             =head1 DESCRIPTION
1210              
1211             My main motivation for creating this module was to provide the students at Purdue and
1212             elsewhere with a Perl class whose API is the same as that of my Python based
1213             C module that appears to have become popular for prototyping algorithms
1214             for cryptography and hash functions.
1215              
1216             This module stores the bits of a bitvector in 16-bit unsigned shorts. As you can see
1217             in the constructor code for C, after resolving the arguments with which the
1218             constructor is called, the very first thing the constructor does is to figure out how
1219             many of those 2-byte shorts it needs for the bits. That does not imply that the size
1220             of a bit array that is stored in a bitvector must be a multiple of 16. B
1221             can be of any size whatsoever.> The C class keeps track of the
1222             number of bits through its C instance variable.
1223              
1224             Note that, except for one case, the constructor must be called with a single keyword
1225             argument, which determines how the bitvector will be constructed. The single
1226             exception to this rule is for the keyword argument C which you would normally
1227             use for constructing a bitvector from an integer. The additional option you can
1228             supply with C is C. When both C and C are specified in a
1229             constructor call, you get a bitvector of the specified size provided the value
1230             supplied for C is larger than what it takes to accommodate the bits for the
1231             C integer.
1232              
1233             In addition to constructing bitvectors from integers, this module can also construct
1234             bitvectors from bit strings, from ASCII text strings, from hex strings, from a list
1235             of bits, and from the contents of a file. With regards to constructing bitvectors
1236             from integers, the module can construct very large bitvectors from very large
1237             integers stored as C objects.
1238              
1239             =head1 OVERLOADED OPERATORS
1240              
1241             The following C declaration in the module gives the list of the
1242             overloaded operators. Since C is set to 1, several other operators become
1243             overloaded by autogeneration from those shown below. For example, overloading of the
1244             3-way numerical comparison operator C<< <=> >> automatically overloads the C<< < >>,
1245             C<< <= >>, C<< > >>, C<< >= >>, C<< == >>, and C<< != >> operators.
1246              
1247             use overload '+' => '_add',
1248             '""' => '_str',
1249             '0+' => '_int',
1250             '~' => '_invert',
1251             '|' => '_or',
1252             '&' => '_and',
1253             '^' => '_xor',
1254             '<=>' => '_compare',
1255             '<<' => '_lshift',
1256             '>>' => '_rshift',
1257             '<>' => '_iter',
1258             'fallback' => 1;
1259              
1260             It is B to remember that the overloadings for the C<<< << >>> and C<<< >>
1261             >>> operators are for B left and right shifts (their usual meaning as
1262             bitwise operators is for non-circular shifts). This was done because the
1263             applications for which this module is intended is more likely to use circular shifts
1264             of bit fields than non-circular shifts. You can still carry out non-circular shifts
1265             by calling the methods C and C as illustrated elsewhere
1266             in this documentation.
1267              
1268             Another B thing to bear in mind is the overloading of the C<+> operator.
1269             It is B addition. On the other hand, it is a concatenation of the two operand
1270             bitvectors. This was done to keep the usage of this operator the same as in the
1271             Python version of this module.
1272              
1273             By virtue of how the operators are overloaded, you can make the calls listed in the
1274             rest of this section. To illustrate these calls, I will use the following two bit
1275             vectors:
1276              
1277             $bv1 = Algorithm::BitVector->new( bitstring => "111000" );
1278             $bv2 = Algorithm::BitVector->new( bitstring => "000101000" );
1279              
1280             These two bitvectors are intentionally of different lengths to illustrate what role
1281             the size differences play in how the various operators work.
1282              
1283             =over 4
1284              
1285             =item B
1286              
1287             $bv3 = $bv1 + $bv2; # 111000000101000
1288              
1289             The concatenation of two bitvectors is returned as a new bitvector. This is made
1290             possible by the overload definition for the C<+> operator.
1291              
1292             Note that the following also works:
1293              
1294             print $bv1 . "hello"; # 111000hello
1295              
1296             In this case, Perl implicitly converts the left operand of the `.' operator into a
1297             string (which is made possible by the overloading for the stringification operator in
1298             this module) and then returns the result as a string.
1299              
1300             =item B
1301              
1302             print "$bv1"; # 111000
1303              
1304             This is made possible for the overload definition for the C<""> operator.
1305              
1306             =item B
1307              
1308             print int($bv1); # 56
1309              
1310             This is made possible by the overload definition for the C<0+> operator.
1311              
1312             =item B
1313              
1314             $bv3 = ~ $bv1;
1315             print $bv3; # 000111
1316              
1317             This is made possible by the overload definition for the C<~> operator. The original
1318             bitvector on which this unary operator is invoked remains unchanged.
1319              
1320             =item B
1321              
1322             $bv3 = $bv1 | $bv2; # 000111000
1323              
1324             When two bitvectors are of unequal length (as is the case here), the shorter vector
1325             is zero-padded on the left to equalize their lengths before the application of the
1326             logical operation. If this auto-padding property is not something you want, you
1327             should check the lengths of the argument bitvectors in your own script before
1328             invoking this operator. The result of the logical OR operation is returned as a new
1329             bitvector. The two operand bitvectors remain unchanged.
1330              
1331             =item B
1332              
1333             $bv3 = $bv1 & $bv2; # 000101000
1334              
1335             When two bitvectors are of unequal length (as is the case here), the shorter vector
1336             is zero-padded on the left to equalize their lengths before the application of the
1337             logical operation. If this auto-padding property is not something you want, you
1338             should check the lengths of the argument bitvectors in your own script before
1339             invoking this operator. The result of the logical AND operation is returned as a new
1340             bitvector. The two operand bitvectors remain unchanged.
1341              
1342              
1343             =item B
1344              
1345             $bv3 = $bv1 ^ $bv2; # 000010000
1346              
1347             When two bitvectors are of unequal length (as is the case here), the shorter vector
1348             is zero-padded on the left to equalize their lengths before the application of the
1349             logical operation. If this auto-padding property is not something you want, you
1350             should check the lengths of the argument bitvectors in your own script before
1351             invoking this operator. The result of the logical XOR operation is returned as a new
1352             bitvector. The two operand bitvectors remain unchanged.
1353              
1354              
1355             =item B
1356              
1357             $bv1 < $bv2 ? print "yes\n" : print "no\n"; # no
1358              
1359             $bv1 > $bv2 ? print "yes\n" : print "no\n"; # yes
1360              
1361             $bv1 <= $bv2 ? print "yes\n" : print "no\n"; # no
1362              
1363             $bv1 >= $bv2 ? print "yes\n" : print "no\n"; # yes
1364              
1365             $bv1 == $bv2 ? print "yes\n" : print "no\n"; # no
1366              
1367             $bv1 != $bv2 ? print "yes\n" : print "no\n"; # yes
1368              
1369             The overload definitions for all these operators are autogenerated from the overload
1370             definition for the 3-way numerical comparison operator 'C<< <=> >>'. B
1371             bitvectors are compared on the basis of their integer values.> That is, C<$bv1> is
1372             less than C<$bv2> if C is less than C.
1373              
1374             =item B
1375              
1376             $n = 3;
1377              
1378             $bv1 << $n; # $bv1 is now 000111
1379             $bv1 >> $n; # $bv1 is now 111000
1380              
1381             Since Perl does not expect these two operators to be invoked in a void context, such
1382             statements in your code will elicit a warning from Perl to that effect. If these
1383             warnings annoy you, you can turn them off by surrounding such statements with C
1384             warnings "void";> and C directives. The other option is to invoke
1385             such statements in the following manner:
1386              
1387             $bv1 = $bv1 << $n;
1388             $bv2 = $bv1 >> $n;
1389              
1390             That works because the overload definitions for these bit shift operators return the
1391             bitvector object on which they are invoked. As it turns out, this also allows for
1392             chained invocation of these operators, as in
1393              
1394             $bv1 = $bv1 << 3 << 1 >> 2; # 100011
1395              
1396             $bv1 = $bv1 << 2 >> 1 >> 3; # 111000
1397              
1398             =item B
1399              
1400             while (<$bv1>) {
1401             print "$_ ";
1402             } # 1 1 1 0 0 0
1403              
1404             This is made possible by the overload definition for the C<< <> >> operator. The
1405             C class includes an inner class C for this
1406             purpose.
1407              
1408             =back
1409              
1410              
1411             =head1 CONSTRUCTING BITVECTORS
1412              
1413             =over 4
1414              
1415             =item B
1416              
1417             $bv = Algorithm::BitVector->new( size => 0 );
1418              
1419             print "$bv\n"; # no output
1420              
1421             =item B
1422              
1423             $bv = Algorithm::BitVector->new( size => 13 ); # 0000000000000
1424              
1425             =item B
1426              
1427             $bv = Algorithm::BitVector->new( intVal => 5006 ); # 1001110001110
1428              
1429             The module returns the smallest possible bitvector that would accommodate the
1430             integer value provided with the C option.
1431              
1432             =item B
1433              
1434             As mentioned, with the C option, you get the smallest possible bitvector
1435             that can be generated from that integer. If you want a I bitvector, you can
1436             also supply the C option as shown below:
1437              
1438             $bv = Algorithm::BitVector->new( intVal => 5006, size => 16 ); # 0001001110001110
1439              
1440             If the value supplied for the C option in such a call is not larger than the
1441             smallest bit array that represents the C value, the constructor will throw an
1442             exception.
1443              
1444             =item B
1445              
1446             use Math::BigInt;
1447             $x = Math::BigInt->new('12345678901234567890123456789012345678901234567890');
1448             $bv = Algorithm::BitVector->new( intVal => $x );
1449             #1000011100100111111101100011011010011010101011111000001111001010000\
1450             #10101000000100110011101000111101011111000110001111111000110010110110\
1451             #01110001111110000101011010010
1452              
1453             =item B
1454              
1455             $bv = Algorithm::BitVector->new( bitstring => '00110011' ); # 00110011
1456              
1457             =item B
1458              
1459             $bv = Algorithm::BitVector->new( textstring => "hello\n" );
1460             # 011010000110010101101100011011000110111100001010
1461              
1462             =item B
1463              
1464             $bv = Algorithm::BitVector->new( hexstring => "68656c6c6f" );
1465             # 0110100001100101011011000110110001101111
1466              
1467             =item B
1468              
1469             $bv = Algorithm::BitVector->new( bitlist => [1, 1, 0, 1] ); # 1101
1470              
1471             =item B
1472              
1473             $bv = Algorithm::BitVector->new( filename => 'testinput1.txt' );
1474              
1475             print "$bv\n"; # Nothing to show yet
1476              
1477             $bv1 = $bv->read_bits_from_file(64); # Now you have a bitvector from the
1478             # first 64 bits
1479              
1480             Note that it takes two calls to create bitvectors from the contents of a file. The
1481             first merely creates an empty bitvector just to set the necessary file handle for
1482             reading the file. It is the second call in which you invoke the method
1483             C that actually returns a bitvector from the bits read from
1484             the file. Each call to C in this manner spits out a new bit
1485             vector.
1486              
1487             =back
1488              
1489             =head1 METHODS
1490              
1491             =head3 close_file_handle()
1492              
1493             =over 4
1494              
1495             When you construct bitvectors by block scanning a disk file, after you are done, you
1496             can call this method to close the file handle that was created to read the file:
1497              
1498             $bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1499             ## your code to read bit blocks for constructing bitvectors goes here
1500             $bv->close_file_handle();
1501              
1502             The constructor call in the first statement opens a file handle for reading the bits.
1503             It is this file handle that is closed by calling C,
1504              
1505             =back
1506              
1507             =head3 count_bits()
1508              
1509             =over 4
1510              
1511             $bv = Algorithm::BitVector->new( intVal => 45, size => 16 );
1512             print $bv->count_bits(); # 4
1513              
1514             This method returns an integer value which is the number of bits set to 1 in the
1515             bitvector on which the method is invoked.
1516              
1517             =back
1518              
1519             =head3 count_bits_sparse()
1520              
1521             =over 4
1522              
1523             Say you have a bitvector with two million bits:
1524              
1525             $bv = Algorithm::BitVector->new( size => 2000000 );
1526              
1527             and you happen to set its individual bits by
1528              
1529             $bv->set_bit(345234, 1);
1530             $bv->set_bit(233, 1);
1531             $bv->set_bit(243, 1);
1532             $bv->set_bit(18, 1);
1533             $bv->set_bit(785, 1);
1534              
1535             The following call returns the number of bits set in the bitvector:
1536              
1537             print $bv->count_bits_sparse(); # 5
1538              
1539             For very long bitvectors, as in the example here, this method will work much faster
1540             than the C method. However, for dense bitvectors, I expect
1541             C to work faster.
1542              
1543             =back
1544              
1545             =head3 deep_copy()
1546              
1547             =over 4
1548              
1549             $bv_copy = $bv->deep_copy();
1550              
1551             Subsequently, any alterations to the bitvectors pointed to by either C<$bv> or
1552             C<$bv_copy> will not affect the other.
1553              
1554             =back
1555              
1556             =head3 divide_into_two()
1557              
1558             =over 4
1559              
1560             ($bv1, $bv2) = $bv->divide_into_two(); # say $bv = 0000000000101101
1561             print "$bv1\n"; # 00000000
1562             print "$bv2\n"; # 00101101
1563              
1564             Divides an even sized bitvector into two bitvectors, each of size half of the
1565             bitvector on which this method is invoked. Throws an exception when invoked on a
1566             bitvector that is not even sized.
1567              
1568             =back
1569              
1570             =head3 gcd()
1571              
1572             =over 4
1573              
1574             This method uses the Euclid's algorithm to return the Greatest Common Divisor of the
1575             integer values represented by the two bitvectors. The following example shows a call
1576             to C returning the GCD of the integer values of the bitvectors C<$bv1> and
1577             C<$bv2>.
1578              
1579             $bv1 = Algorithm::BitVector->new( bitstring => '01100110' ); # 102
1580             $bv2 = Algorithm::BitVector->new( bitstring => '011010' ); # 26
1581             $gcd = $bv1->gcd( $bv2 ); # 10
1582             print int($gcd); # 2
1583              
1584             The result returned by C is a bitvector.
1585              
1586             =back
1587              
1588             =head3 gen_random_bits()
1589              
1590             =over 4
1591              
1592             $bv = Algorithm::BitVector->new( intVal => 0 );
1593             $bv = $bv->gen_random_bits(16); # 1100111001010101
1594              
1595             The call to C returns a bitvector whose bits are randomly
1596             generated. The number of bits in the returned bitvector equals the argument integer.
1597              
1598             =back
1599              
1600             =head3 get_bit()
1601              
1602             =over 4
1603              
1604             This method gives you array-like access to the individual bits of a bitvector.
1605              
1606             $bv = Algorithm::BitVector->new( bitstring => '10111' );
1607             print $bv->get_bit(0); # 1 (the first bit)
1608             print $bv->get_bit(1); # 0
1609             print $bv->get_bit(4); # 1 (the last bit)
1610              
1611             Negative values for the index scan a bitvector from right to left, with the C<-1>
1612             index standing for the last (meaning the right-most) bit in the vector:
1613              
1614             print $bv->get_bit(-1); # 1 (the last bit)
1615             print $bv->get_bit(-2); # 1
1616             print $bv->get_bit(-5); # 1 (the first bit)
1617              
1618             The C can also return a slice of a bitvector if the argument to the
1619             method is an anonymous array of the index range you desire, as in the second
1620             statement below:
1621              
1622             $bv = Algorithm::BitVector->new( bitstring => "10111011");
1623             my $arr = $bv->get_bit( [3..7] );
1624             print "@$arr\n"; # 1 1 0 1 1
1625              
1626             In this example, we want C to return all bits at positions indexed 3
1627             through 7, both ends inclusive. Note that the slice is returned as an array of bits.
1628              
1629              
1630             =back
1631              
1632             =head3 get_hex_string_from_bitvector()
1633              
1634             =over 4
1635              
1636             Assuming that length of your bitvector is a multiple of 4, this methods returns a
1637             hex representation of the bit pattern:
1638              
1639             $bv = Algorithm::BitVector->new(bitstring => "0110100001100101011011000110110001101111");
1640             print $bv->get_hex_string_from_bitvector(); # 68656c6c6f
1641              
1642             The hex representation is returned in the form if a string of hex characters. This
1643             method throws an exception if the size of the bitvector is not a multiple of 4.
1644              
1645             =back
1646              
1647             =head3 get_text_string_from_bitvector()
1648              
1649             =over 4
1650              
1651             $bv = Algorithm::BitVector->new( textstring => "hello" );
1652             print "$bv\n"; # 0110100001100101011011000110110001101111
1653             print $bv->get_text_from_bitvector(); # hello
1654              
1655             The method returns a string of ASCII characters by converting successive 8-bit slices
1656             of the bitvector into an ASCII character. It throws an exception if the size of the
1657             bit pattern is not a multiple of 8. Calling this method to create a text-based print
1658             representation of a bit vector makes sense only if you don't expect to see any
1659             unprintable characters in the successive 8-bit slices of the bitvector. Let's say
1660             you have created a bitvector from a text string using the appropriate constructor
1661             call. Subsequently, you encrypted this text string. Next, you or someone else
1662             decrypts the encrypted bit stream. Since what comes out at the decryption end must
1663             be the original text string, it would make sense to invoke this method to recover the
1664             original text.
1665              
1666             =back
1667              
1668             =head3 gf_divide_by_modulus()
1669              
1670             =over 4
1671              
1672             This method is for modular division in the Galois Field C. You must specify
1673             the modulus polynomial through a bit pattern and also the value of the integer C:
1674              
1675             $mod = Algorithm::BitVector->new( bitstring => '100011011' ); # AES modulus
1676             $n = 8;
1677             $a = Algorithm::BitVector->new( bitstring => '11100010110001' );
1678             ($quotient, $remainder) = $a->gf_divide_by_modulus($mod, $n);
1679             print "$quotient\n"; # 00000000111010
1680             print "$remainder\n"; # 10001111
1681              
1682             What this example illustrates is dividing the bitvector C<$a> by the modulus bit
1683             vector C<$mod>. For a more general division of one bitvector C<$a> by another bit
1684             vector C<$b>, you would carry out a multiplication of C<$a> by the MI of C<$b>, where
1685             MI stands for "multiplicative inverse" as returned by a call to the method
1686             C. A call to C returns two bitvectors, one for the
1687             quotient and the other for the remainder.
1688              
1689             =back
1690              
1691             =head3 gf_MI()
1692              
1693             =over 4
1694              
1695             This method returns the multiplicative inverse of a bit pattern in the Galois Field
1696             C. You must specify both the modulus polynomial through its bit pattern and
1697             the value of C:
1698              
1699             $modulus = Algorithm::BitVector->new( bitstring => '100011011' ); # AES modulus
1700             $n = 8;
1701             $a = Algorithm::BitVector->new( bitstring => '00110011' );
1702             print $a->gf_MI($modulus, $n); # 01101100
1703              
1704             Note that the multiplicative inverse is returned as a bitvector.
1705              
1706             =back
1707              
1708             =head3 gf_multiply()
1709              
1710             =over 4
1711              
1712             This method returns a product of two bit patterns in the Galois Field C field.
1713             That is, given any two polynomials with their coefficients drawn from the 0 and 1
1714             values in C, this method returns the product polynomial.
1715              
1716             $a = Algorithm::BitVector->new( bitstring => '0110001' );
1717             $b = Algorithm::BitVector->new( bitstring => '0110' );
1718             print $a->gf_multiply($b); #00010100110
1719              
1720             As you would expect, in general, the bit pattern returned by this method will be
1721             longer than the lengths of the two operand bitvectors. The result returned by the
1722             method is in the form of a bitvector.
1723              
1724             =back
1725              
1726             =head3 gf_multiply_modular()
1727              
1728             =over 4
1729              
1730             This method carries out modular multiplication in the Galois Field C. You
1731             must supply it the bitvector for the modulus polynomial and the value of C.
1732            
1733             $modulus = Algorithm::BitVector->new( bitstring => '100011011' ); # AES modulus
1734             $n = 8;
1735             $a = Algorithm::BitVector->new( bitstring => '0110001' );
1736             $b = Algorithm::BitVector->new( bitstring => '0110' );
1737             print $a->gf_multiply_modular($b, $modulus, $n); # 10100110
1738              
1739             This example returns the product of the bit patterns C<$a> and C<$b> modulo the bit
1740             pattern C<$modulus> in C. The result returned by the method is in the form
1741             of a bitvector.
1742              
1743             =back
1744              
1745             =head3 hamming_distance()
1746              
1747             =over 4
1748              
1749             Hamming distance is commonly used to measure dissimilarity between two bitvectors of
1750             the same size.
1751              
1752             $bv1 = Algorithm::BitVector->new( bitstring => '11111111' );
1753             $bv2 = Algorithm::BitVector->new( bitstring => '00101011' );
1754             print $bv1->hamming_distance( $bv2 ); # 4
1755              
1756             This distance returns the number of bit positions in which two the bit patterns
1757             differ. The method throws an exception if the two bitvectors are not of the same
1758             length. The value returned is an integer.
1759              
1760             =back
1761              
1762             =head3 int_value()
1763              
1764             =over 4
1765              
1766             You can find the integer value of a bitvector by
1767              
1768             $bv = Algorithm::BitVector->new( intVal => 5678 );
1769             print $bv3->int_value(); # 5678
1770              
1771             or, even more simply by
1772              
1773             print int($bv); # 5678
1774              
1775             which works on account of the overloading of the C<0+> operator.
1776              
1777             =back
1778              
1779             =head3 is_power_of_2()
1780              
1781             =over 4
1782              
1783             You can use this predicate to test if the integer value of a bitvector is a power of
1784             2:
1785              
1786             $bv = Algorithm::BitVector->new( bitstring => '10000000001110' );
1787             print int($bv); # 826
1788             print $bv->is_power_of_2(); # 0
1789              
1790             The predicate returns 1 for true and 0 for false.
1791              
1792             =back
1793              
1794             =head3 is_power_of_2_sparse()
1795              
1796             =over 4
1797              
1798             This does the same thing as the C method but in a way that makes it
1799             faster for large bitvectors with very few bits set.
1800              
1801             $bv = Algorithm::BitVector->new( size => 2000000 );
1802             $bv->set_bit(345234, 1);
1803             print $bv->is_power_of_2_sparse(); # 1
1804              
1805             =back
1806              
1807             =head3 jaccard_distance()
1808              
1809             =over 4
1810              
1811             The Jaccard distance between two bitvectors is 1 minus the Jaccard similarity
1812             coefficient:
1813              
1814             $bv1 = Algorithm::BitVector->new( bitstring => '11111111' );
1815             $bv2 = Algorithm::BitVector->new( bitstring => '10101011' );
1816             print $bv1->jaccard_distance( $bv2 ); # 0.375
1817              
1818             The value returned by the method is a floating point number between 0 and 1.
1819              
1820             =back
1821              
1822             =head3 jaccard_similarity()
1823              
1824             =over 4
1825              
1826             This method returns the Jaccard similarity coefficient between the two bitvectors
1827             pointed to by C<$bv1> and C<$bv2>:
1828              
1829             $bv1 = Algorithm::BitVector->new( bitstring => '11111111' );
1830             $bv2 = Algorithm::BitVector->new( bitstring => '10101011' );
1831             print $bv1->jaccard_similarity( $bv2 ); # 0.675
1832              
1833             The value returned by the method is a floating point number between 0 and 1.
1834              
1835             =back
1836              
1837             =head3 length()
1838              
1839             =over 4
1840              
1841             This method returns the total number of bits in a bitvector:
1842              
1843             $bv = Algorithm::BitVector->new( intVal => 5678 );
1844             print $bv; # 1011000101110
1845             print $bv->length(); # 13
1846              
1847             Note that what C returns is the total size of a bitvector, including any
1848             leading zeros.
1849              
1850             =back
1851              
1852             =head3 multiplicative_inverse()
1853              
1854             =over 4
1855              
1856             This method calculates the multiplicative inverse using normal integer arithmetic.
1857             For multiplicative inverses in a Galois Field C, use the method C
1858             described earlier in this API.
1859              
1860             $modulus = Algorithm::BitVector->new( intVal => 32 );
1861             $bv = Algorithm::BitVector->new( intVal => 17 );
1862             $result = $bv->multiplicative_inverse( $modulus );
1863             if ($result) {
1864             print $result; # 10001
1865             } else {
1866             print "No multiplicative inverse in this case\n";
1867             }
1868              
1869             What this example says is that the multiplicative inverse of 17 modulo 32 is 17. That
1870             is because 17 times 17 in modulo 32 arithmetic equals 1. When using this method, you
1871             must test the value returned for 0. If the returned value is 0, that means that the
1872             number corresponding to the bitvector on which this method is invoked does not
1873             possess a multiplicative inverse with respect to the modulus.
1874              
1875             =back
1876              
1877             =head3 next_set_bit()
1878              
1879             =over 4
1880              
1881             Starting from a given bit position, this method returns the index of the next bit
1882             that is set in a bitvector:
1883              
1884             $bv = Algorithm::BitVector->new( bitstring => '00000000000001' );
1885             print $bv->next_set_bit(5); # 13
1886              
1887             In this example, we are asking the method to return the index of the bit that is set
1888             after the bit position indexed 5.
1889              
1890             =back
1891              
1892             =head3 pad_from_left()
1893              
1894             =over 4
1895              
1896             You can pad a bitvector from the left with a designated number of zeros:
1897              
1898             $bv = Algorithm::BitVector->new( bitstring => '101010' );
1899             print $bv->pad_from_left( 4 ); # 0000101010
1900              
1901             The method returns the bitvector on which it is invoked. So you can think of it as
1902             an in-place extension of a bitvector (although, under the hood, the extension is
1903             carried out by giving a new longer C<_vector> attribute to the bitvector object).
1904              
1905             =back
1906              
1907             =head3 pad_from_right()
1908              
1909             =over 4
1910              
1911             You can pad a bitvector from the right with a designated number of zeros:
1912              
1913             $bv = Algorithm::BitVector->new( bitstring => '101010' );
1914             print $bv->pad_from_right( 4 ); # 1010100000
1915              
1916             The method returns the bitvector on which it is invoked. So you can think of it as
1917             an in-place extension of a bitvector (although, under the hood, the extension is
1918             carried out by giving a new longer C<_vector> attribute to the bitvector object).
1919              
1920              
1921             =back
1922              
1923             =head3 permute()
1924              
1925             =over 4
1926              
1927             You can permute the bits in a bitvector with a permutation list as shown below:
1928              
1929             $bv1 = Algorithm::BitVector->new( intVal => 203, size => 8 );
1930             print $bv1; # 11001011
1931             $bv2 = $bv1->permute( [3, 2, 1, 0, 7, 6, 5, 4] );
1932             print $bv2; # 00111101
1933              
1934             The method returns a new bitvector with permuted bits.
1935              
1936             =back
1937              
1938             =head3 rank_of_bit_set_at_index()
1939              
1940             =over 4
1941              
1942             You can measure the "rank" of a bit that is set at a given index. Rank is the number
1943             of bits that are set up to the argument position, as in
1944              
1945             $bv = Algorithm::BitVector->new( bitstring => '01010101011100' );
1946             print $bv->rank_of_bit_set_at_index( 10 ); # 6
1947              
1948             The value 6 returned by this call to C is the number of
1949             bits set up to the position indexed 10 (including that position). The method throws
1950             an exception if there is no bit set at the argument position.
1951              
1952             =back
1953              
1954             =head3 read_bits_from_file()
1955              
1956             =over 4
1957              
1958             Constructing bitvectors from the contents of a disk file takes two steps: First you
1959             must make the call shown in the first statement below. The purpose of this call is to
1960             create a file handle that is associated with the variable C<$bv> in this case.
1961             Subsequent invocations of C on this variable will read
1962             blocks of C<$n> bits and return a bitvector for each block thus read. The variable
1963             C<$n> must be a multiple of 8 for this to work.
1964              
1965             $bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1966             $bv1 = $bv->read_bits_from_file(64);
1967             $bv2 = $bv->read_bits_from_file(64);
1968             ...
1969             ...
1970             $bv->close_file_handle();
1971              
1972             When reading a file as shown above, you can test the attribute C of the
1973             bitvector object in order to find out if there is more to be read in the file. The
1974             C loop shown below reads all of a file in 64-bit blocks.
1975              
1976             $bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1977             while ($bv->{more_to_read}) {
1978             my $bv_read = $bv->read_bits_from_file( 64 );
1979             print "$bv_read\n";
1980             }
1981             $bv->close_file_handle();
1982              
1983             The size of the last bitvector constructed from a file corresponds to how many bytes
1984             remain unread in the file at that point. It is your responsibility to zero-pad the
1985             last bitvector appropriately if, say, you are doing block encryption of the whole
1986             file.
1987              
1988             =back
1989              
1990             =head3 reset()
1991              
1992             =over 4
1993              
1994             You can reset a previously constructed bitvector all either all 1's or all 0's by
1995             calling this method:
1996              
1997             $bv = Algorithm::BitVector->new( intVal => 203, size => 8 );
1998             print $bv; # 11001011
1999             $bv->reset(1);
2000             print $bv; # 11111111
2001             $bv->reset(0);
2002             print $bv; # 00000000
2003              
2004             What the method accomplishes can be thought of as in-place resetting of the bits. The
2005             method does not return anything.
2006              
2007             =back
2008              
2009             =head3 reverse()
2010              
2011             =over 4
2012              
2013             Given a bitvector, you can construct a bitvector with all the bits reversed, in the
2014             sense that what was left-to-right earlier now becomes right-to-left, as in
2015              
2016             $bv = Algorithm::BitVector->new( bitstring => '01100001' );
2017             print $bv->reverse(); # 10000110
2018              
2019             A call to this method returns a new bitvector whose bits are in reverse order in
2020             relation to the bits in the bitvector on which the method is called.
2021              
2022             =back
2023              
2024             =head3 runs()
2025              
2026             =over 4
2027              
2028             This method returns an array of runs of 1's and 0's in a bitvector:
2029              
2030             $bv = Algorithm::BitVector->new( bitlist => [1,1,1,0,0,1] );
2031             my @bvruns = $bv->runs();
2032             print "@bvruns\n"; # 111 00 1
2033              
2034             Each element of the array that is returned by C is a string of either 1's or
2035             0's.
2036              
2037             =back
2038              
2039             =head3 set_bit()
2040              
2041             =over 4
2042              
2043             With array-like indexing, you can use this method to set the individual bits of a
2044             previously constructed bitvector. Both positive and negative values are allowed for
2045             the bit position index. The method takes two explicit arguments, the first for the
2046             position of the bit you want to set and the second for the value of the bit.
2047              
2048             $bv = Algorithm::BitVector->new( bitstring => '1111' );
2049             $bv->set_bit(0,0); # set the first bit to 0
2050             $bv->set_bit(1,0); # set the next bit to 0
2051             print $bv; # 0011
2052             $bv->set_bit(-1,0); # set the last bit to 0
2053             $bv->set_bit(-2,0); # set the bit before the last bit to 0
2054             print $bv; # 0000
2055              
2056             =back
2057              
2058             =head3 set_value()
2059              
2060             =over 4
2061              
2062             This method can be used to change the bit pattern associated with a previously
2063             constructed bitvector:
2064              
2065             $bv = Algorithm::BitVector->new( intVal => 7, size => 16 );
2066             print $bv; # 0000000000000111
2067             $bv->set_value( intVal => 45 );
2068             print $bv; # 101101
2069              
2070             You can think of this method as carrying out an in-place resetting of the bit array
2071             in a bitvector. The method does not return anything.
2072              
2073             =back
2074              
2075             =head3 shift_left()
2076              
2077             =over 4
2078              
2079             If you want to shift a bitvector non-circularly to the left, this is the method to
2080             call:
2081              
2082             $bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2083             $bv->shift_left(3);
2084             print $bv; # 001000
2085             $bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2086             $bv->shift_left(3)->shift_right(3);
2087             print $bv; # 000001
2088              
2089             As the bitvector is shifted non-circularly to the left, the exposed bit positions on
2090             the right are filled with zeros. Note that the method returns the bitvector object
2091             on which it is invoked. That is the reason why the chained invocation of the method
2092             in the fifth statement above works.
2093              
2094             =back
2095              
2096             =head3 shift_right()
2097              
2098             =over 4
2099              
2100             If you want to shift a bitvector non-circularly to the right, this is the method to
2101             call:
2102              
2103             $bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2104             $bv->shift_right(3);
2105             print $bv; # 000111
2106             $bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2107             $bv->shift_right(3)->shift_right(2);
2108             print $bv; # 000001
2109              
2110             As the bitvector is shifted non-circularly to the right, the exposed bit positions on
2111             the left are filled with zeros. Note that the method returns the bitvector object
2112             on which it is invoked. That is the reason why the chained invocation of the method
2113             in the fifth statement above works.
2114              
2115             =back
2116              
2117             =head3 test_for_primality()
2118              
2119             =over 4
2120              
2121             If the integer value of a bitvector is small (meaning smaller than C<< 0x7f_ff_ff_ff >>),
2122             you can use this method to test it for its primality through the Miller-Rabin
2123             probabilistic test:
2124              
2125             $p = 7417;
2126             $bv = Algorithm::BitVector->new( intVal => $p );
2127             $check = $bv->test_for_primality();
2128             print "The primality test for $p: $check\n";
2129             # The primality test for 7417: is a prime with probability 0.99993896484375
2130              
2131             The method returns one of two strings: If the primality test succeeds, the method
2132             returns a string like "C". And if the test fails,
2133             the method returns the string "C".
2134              
2135             =back
2136              
2137             =head3 unpermute()
2138              
2139             =over 4
2140              
2141             This method reverses the permutation carried out by a call to the C method
2142             as shown below:
2143              
2144             $bv1 = Algorithm::BitVector->new( intVal => 203, size => 8 );
2145             print $bv1; # 11001011
2146             $bv2 = $bv1->permute( [3, 2, 1, 0, 7, 6, 5, 4] );
2147             print $bv2; # 00111101
2148             $bv3 = $bv2->unpermute( [3, 2, 1, 0, 7, 6, 5, 4] );
2149             print $bv3; # 11001011
2150              
2151             The method returns a new bitvector with unpermuted bits. Also note that the method
2152             throws an exception if the permutation list is not as long as the size of the
2153             bitvector on which the method is invoked.
2154              
2155             =back
2156              
2157             =head3 write_to_file()
2158              
2159             =over 4
2160              
2161             This method writes the bitvectors in your program to a disk file:
2162              
2163             $bv1 = Algorithm::BitVector->new( bitstring => '00001010' );
2164             open my $FILEOUT, ">test.txt";
2165             $bv1->write_to_file( $FILEOUT );
2166             close $FILEOUT;
2167              
2168             The size of a bitvector must be a multiple of 8 for this write method to work. If
2169             this condition is not met, the method will throw an exception.
2170              
2171             B When writing an internally generated bitvector out
2172             to a disk file, it may be important to open the file in the binary mode, since
2173             otherwise the bit pattern `00001010' ('\n') in your bitstring will be written out as
2174             0000110100001010 ('\r\n') which is the line break on Windows machines.
2175              
2176             =back
2177              
2178             =head1 REQUIRED
2179              
2180             This module imports the following modules:
2181              
2182             Math::BigInt
2183             List::Util
2184             Math::Random
2185             Fcntl
2186              
2187              
2188             =head1 THE C DIRECTORY
2189              
2190             The C directory contains the following script that invokes all of the
2191             functionality of this module:
2192              
2193             BitVectorDemo.pl
2194              
2195             In case there is any doubt about how exactly to invoke a method or how to use an
2196             operator, please look at the usage in this script.
2197              
2198             =head1 EXPORT
2199              
2200             None by design.
2201              
2202             =head1 BUGS
2203              
2204             Please notify the author if you encounter any bugs. When sending email, please place
2205             the string 'BitVector' in the subject line.
2206              
2207             =head1 INSTALLATION
2208              
2209             Download the archive from CPAN in any directory of your choice. Unpack the archive
2210             with a command that on a Linux machine would look like:
2211              
2212             tar zxvf Algorithm-BitVector-1.24.tar.gz
2213              
2214             This will create an installation directory for you whose name will be
2215             C. Enter this directory and execute the following commands
2216             for a standard install of the module if you have root privileges:
2217              
2218             perl Makefile.PL
2219             make
2220             make test
2221             sudo make install
2222              
2223             If you do not have root privileges, you can carry out a non-standard install the
2224             module in any directory of your choice by:
2225              
2226             perl Makefile.PL prefix=/some/other/directory/
2227             make
2228             make test
2229             make install
2230              
2231             With a non-standard install, you may also have to set your PERL5LIB environment
2232             variable so that this module can find the required other modules. How you do that
2233             would depend on what platform you are working on. In order to install this module in
2234             a Linux machine on which I use tcsh for the shell, I set the PERL5LIB environment
2235             variable by
2236              
2237             setenv PERL5LIB /some/other/directory/lib64/perl5/:/some/other/directory/share/perl5/
2238              
2239             If I used bash, I'd need to declare:
2240              
2241             export PERL5LIB=/some/other/directory/lib64/perl5/:/some/other/directory/share/perl5/
2242              
2243              
2244             =head1 THANKS
2245              
2246             The bug in the primality test function, whose fix resulted in Version 1.21, was
2247             reported by Dana Jacobsen in a bug report filed at C. Thanks Dana!
2248              
2249             The restriction on the Perl version was removed on Slaven Rezic's recommendation. He
2250             says the module runs fine with Perl 5.8.9. Thanks Slaven!
2251              
2252             =head1 AUTHOR
2253              
2254             Avinash Kak, kak@purdue.edu
2255              
2256             If you send email, please place the string "BitVector" in your subject line to get past
2257             my spam filter.
2258              
2259             =head1 COPYRIGHT
2260              
2261             This library is free software; you can redistribute it and/or modify it under the
2262             same terms as Perl itself.
2263              
2264             Copyright 2015 Avinash Kak
2265              
2266             =cut
2267