File Coverage

blib/lib/Algorithm/BitVector.pm
Criterion Covered Total %
statement 114 664 17.1
branch 32 218 14.6
condition 20 155 12.9
subroutine 17 75 22.6
pod 37 39 94.8
total 220 1151 19.1


line stmt bran cond sub pod time code
1             package Algorithm::BitVector;
2              
3             #!/usr/bin/perl -w
4              
5             #------------------------------------------------------------------------------------
6             # Copyright (c) 2015 Avinash Kak. All rights reserved. This program is free
7             # software. You may modify and/or distribute it under the same terms as Perl itself.
8             # This copyright notice must remain attached to the file.
9             #
10             # Algorithm::BitVector is a Perl module for creating a memory efficient packed
11             # representation of bit arrays and for logical and numerical operations on such
12             # arrays.
13             # -----------------------------------------------------------------------------------
14              
15             #use 5.10.0;
16 1     1   14782 use strict;
  1         4  
  1         43  
17 1     1   7 use warnings;
  1         1  
  1         41  
18 1     1   6 use Carp;
  1         7  
  1         104  
19 1     1   6 use List::Util qw(pairmap max reduce any);
  1         1  
  1         141  
20 1     1   2949792 use Math::BigInt;
  1         18325  
  1         3  
21 1     1   606888 use Math::Random;
  1         4961  
  1         95  
22 1     1   5 use Fcntl 'SEEK_CUR';
  1         2  
  1         67  
23              
24             our $VERSION = '1.23';
25              
26 1         7 use overload '+' => '_join',
27             '""' => '_str',
28             '0+' => '_int',
29             '~' => '_invert',
30             '|' => '_or',
31             '&' => '_and',
32             '^' => '_xor',
33             '<=>' => '_compare',
34             '<<' => '_lshift',
35             '>>' => '_rshift',
36             '<>' => '_iter',
37 1     1   4 'fallback' => 1;
  1         2  
38              
39             sub _readblock {
40 0     0   0 my $blocksize = shift;
41 0         0 my $bitvector = shift;
42 0         0 my $block;
43             my $more_to_read;
44 0         0 my $i = 0;
45 0         0 my $byte_as_bits;
46 0         0 my $bitstring = '';
47 0         0 while ( $i < $blocksize / 8 ) {
48 0         0 $i++;
49 0         0 my $num_bytes_read = sysread($bitvector->{FILEIN}, my $byte, 1);
50 0 0       0 unless ($num_bytes_read) {
51 0 0       0 if (length($bitstring) < $blocksize) {
52 0         0 $bitvector->{more_to_read} = 0;
53 0         0 $more_to_read = 0;
54             }
55 0         0 return $bitstring;
56             } else {
57 0         0 my $bits_as_string = sprintf "%vb", $byte;
58 0         0 $bits_as_string = '0' x (8 - length($bits_as_string)) . $bits_as_string;
59 0         0 $bitstring .= $bits_as_string;
60             }
61             }
62 0         0 my $file_pos = tell $bitvector->{FILEIN};
63             # peek at the next byte; moves file position only if a
64             # byte is read
65 0         0 my $num_bytes_read = sysread($bitvector->{FILEIN}, my $next_byte, 1);
66 0 0       0 if ($num_bytes_read) {
67             # pretend we never read the byte
68 0         0 sysseek $bitvector->{FILEIN}, $file_pos - 1, SEEK_CUR;
69             } else {
70 0         0 $bitvector->{more_to_read} = 0;
71             }
72 0         0 return $bitstring;
73             }
74              
75             # Constructor:
76             sub new {
77 3     3 0 20 my ($class, %args) = @_;
78 3         7 my @params = keys %args;
79 3 50       8 croak "\nYou have used a wrong name for a keyword argument " .
80             "--- perhaps a misspelling\n"
81             if _check_for_illegal_params(@params) == 0;
82 3         18 my $self = {
83             filename => $args{filename},
84             size => $args{size},
85             intVal => $args{intVal},
86             bitlist => $args{bitlist},
87             bitstring => $args{bitstring},
88             hexstring => $args{hexstring},
89             textstring => $args{textstring},
90             };
91 3         6 bless $self, $class;
92 3 50       69 if ( $self->{filename} ) {
    100          
    100          
    50          
    50          
    50          
    0          
93 0 0 0     0 die "When using the `filename' option in the constructor call, you cannot use any " .
      0        
      0        
      0        
      0        
94             "other option at the same time: $!"
95             if $self->{intVal} or $self->{size} or $self->{bitlist}
96             or $self->{bitstring} or $self->{hexstring} or $self->{textstring};
97 0 0       0 open $self->{FILEIN}, "< $self->{filename}"
98             or die "unable to open file $self->{filename}: $!";
99 0         0 $self->{more_to_read} = 1;
100 0         0 return $self;
101             } elsif ( defined $self->{intVal} ) {
102 1 50 33     16 die "When using the `intVal' option in the constructor call, you CANNOT use `filename', " .
      33        
      33        
      33        
103             "`bitlist', 'bitstring', `hexstring', or `textstring' options: $!"
104             if $self->{filename} or $self->{bitlist} or $self->{bitstring}
105             or $self->{hexstring} or $self->{textstring};
106 1 50 33     4 if ($self->{intVal} == 0 && ! defined $self->{size}) {
107 0         0 $self->{bitlist} = [0];
108 0         0 $self->{size} = 1;
109             } else {
110 1         2 my @bitarray;
111 1 50       3 if (ref($self->{intVal}) eq 'Math::BigInt') {
112 0         0 my $bitlist_str = $self->{intVal}->as_bin();
113 0         0 @bitarray = split //, $bitlist_str;
114             # Now get rid of the `0b' prefix on the string returned by as_bin():
115 0         0 shift @bitarray;
116 0         0 shift @bitarray;
117             } else {
118 1         7 my $bitlist_str = sprintf "%b", $self->{intVal};
119 1 50       3 if (defined $self->{size}) {
120 0 0       0 die "The value specified for size must be at least as large as for the " .
121             "smallest bitvector possible for the intVal integer: $!"
122             if $self->{size} < length $bitlist_str;
123 0         0 my $n = $self->{size} - length $bitlist_str;
124 0         0 my $extended_bitlist_str = '0' x $n . $bitlist_str;
125 0         0 @bitarray = split //, $extended_bitlist_str;
126             } else {
127 1         5 @bitarray = split //, $bitlist_str;
128             }
129             }
130 1         3 $self->{bitlist} = \@bitarray;
131 1         1 $self->{size} = scalar @{$self->{bitlist}};
  1         20  
132             }
133             } elsif (defined $self->{size}) {
134 1 50 33     25 die "When using the `size' option in the constructor call, you CANNOT use `filename', " .
      33        
      33        
      33        
      33        
135             "`bitlist', 'bitstring', `hexstring', or `textstring' options: $!"
136             if $self->{filename} or $self->{intVal} or $self->{bitlist}
137             or $self->{bitstring} or $self->{hexstring} or $self->{textstring};
138 1         4 my $bitstring = "0" x $self->{size};
139 1         8 my @bitlist_from_bitstring = split '', $bitstring;
140 1         3 $self->{bitlist} = \@bitlist_from_bitstring;
141             } elsif ($self->{bitlist}) {
142 0 0 0     0 die "When using the `bitlist' option in the constructor call, you cannot use any " .
      0        
      0        
      0        
      0        
143             "other option at the same time: $!"
144             if $self->{filename} or $self->{intVal} or $self->{size}
145             or $self->{bitstring} or $self->{hexstring} or $self->{textstring};
146 0         0 $self->{size} = scalar @{$self->{bitlist}};
  0         0  
147             } elsif (defined $self->{bitstring}) {
148 0 0 0     0 die "When using the `bitstring' option in the constructor call, you cannot use any " .
      0        
      0        
      0        
      0        
149             "other option at the same time: $!"
150             if $self->{filename} or $self->{intVal} or $self->{size}
151             or $self->{bitlist} or $self->{hexstring} or $self->{textstring};
152 0         0 my @bitlist_from_bitstring = split //, $self->{bitstring};
153 0         0 $self->{bitlist} = \@bitlist_from_bitstring;
154 0         0 $self->{size} = scalar @{$self->{bitlist}};
  0         0  
155             } elsif (defined $self->{textstring}) {
156 1 50 33     19 die "When using the `textstring' option in the constructor call, you cannot use any " .
      33        
      33        
      33        
      33        
157             "other option at the same time: $!"
158             if $self->{filename} or $self->{intVal} or $self->{size}
159             or $self->{bitlist} or $self->{hexstring} or $self->{bitstring};
160 5 50       12 my $bitstring_from_text = join '', map {length($_) == 8 ? $_
  5         17  
161 1         6 : ('0' x (8 - length($_))) . $_} map {sprintf "%b", $_}
162             map ord, split //, $self->{textstring};
163 1         10 my @bitlist_from_text = split //, $bitstring_from_text;
164 1         3 $self->{bitlist} = \@bitlist_from_text;
165 1         1 $self->{size} = scalar @{$self->{bitlist}};
  1         3  
166             } elsif (defined $self->{hexstring}) {
167 0 0 0     0 die "When using the `hexstring' option in the constructor call, you cannot use any " .
      0        
      0        
      0        
      0        
168             "other option at the same time: $!"
169             if $self->{filename} or $self->{intVal} or $self->{size}
170             or $self->{bitlist} or $self->{textstring} or $self->{bitstring};
171 0 0       0 my $bitstring_from_hex = join '', map {length($_) == 4 ? $_
  0         0  
172 0         0 : ('0' x (4 - length($_))) . $_} map {sprintf "%b", $_}
173 0         0 map {hex $_} split //, $self->{hexstring};
174 0         0 my @bitlist_from_hex = split //, $bitstring_from_hex;
175 0         0 $self->{bitlist} = \@bitlist_from_hex;
176 0         0 $self->{size} = scalar @{$self->{bitlist}};
  0         0  
177             }
178 3         3 my $shorts_needed = int( (@{$self->{bitlist}} + 15) / 16 );
  3         11  
179 3         9 @{$self->{_vector}} = map {unpack "n", pack("n", 0)} 0 .. $shorts_needed-1;
  3         9  
  5         12  
180 3         23 my @interleaved = (0..$self->{size}-1, @{$self->{bitlist}})
  60         73  
181 3         8 [map {$_,$_+$self->{size}} (0 .. $self->{size}-1)];
182 3     60   35 pairmap {$self->set_bit($a,$b)} @interleaved;
  60         74  
183 3         11 $self->{bitlist} = undef;
184 3         16 return $self;
185             }
186              
187             ## Set the bit at the designated position to the value shown
188             sub set_bit {
189 60     60 1 52 my $self = shift;
190 60         39 my $posn = shift;
191 60         47 my $val = shift;
192 60 50 66     200 die "incorrect value for a bit" unless $val == 0 or $val == 1;
193 60 50 33     363 die "index range error" if ($posn >= $self->{size}) or ($posn < - $self->{size});
194 60 50       75 $posn = $self->{size} + $posn if $posn < 0;
195 60         75 my $block_index = int($posn / 16);
196 60         44 my $shift = $posn & 0xF;
197 60         45 my $cv = $self->{_vector}->[$block_index];
198 60 100       122 if ( ( ( $cv >> $shift ) & 1 ) != $val) {
199 25         57 $self->{_vector}->[$block_index] = $cv ^ (1 << $shift);
200             }
201             }
202              
203             ## Get the bit from the designated position. This method can also return a slice of a
204             ## bitvector. However, note that the slice is returned as a list of the bits in the
205             ## index range you specified. You can easily convert the list of bits returned into a
206             ## bitvector in your own code.
207             sub get_bit {
208 65     65 1 47 my $self = shift;
209 65         49 my $pos = shift;
210 65 100       91 unless (ref($pos) eq "ARRAY") {
211 60 50 33     182 die "index range error" if ($pos >= $self->{size}) or ($pos < - $self->{size});
212 60 50       77 $pos = $self->{size} + $pos if $pos < 0;
213 60         131 return ( $self->{_vector}->[int($pos/16)] >> ($pos&15) ) & 1;
214             }
215 5         15 my @slice = map $self->get_bit($_), (@$pos)[0..@$pos-1];
216 5         28 return \@slice;
217             }
218              
219             ## Overloading of the string conversion operator. Return the string representation
220             ## of a bitvector.
221             sub _str {
222 1     1   10 my $self = shift;
223 1         4 return join '', map $self->get_bit($_), 0 .. $self->{size}-1;
224             }
225              
226             ## Overloading of the `+' operator. Concatenate the argument bitvectors. Return the
227             ## concatenated bitvector as a new BitVector instance.
228             sub _join {
229 0     0   0 my ($bv1, $bv2) = @_;
230 0 0 0     0 die "Abort: The concatenation operator invoked with either undefined " .
231             "or wrong types of operands: $!"
232             unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
233             UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
234 0         0 my $allbits = $bv1->_str() . $bv2->_str();
235 0         0 return Algorithm::BitVector->new( bitstring => $allbits );
236             }
237              
238             sub _compare {
239 0     0   0 my ($bv1, $bv2) = @_;
240 0 0 0     0 die "Abort: The comparison operator invoked with either undefined " .
241             "or wrong types of operands: $!"
242             unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
243             UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
244 0         0 my $bigint1 = Math::BigInt->from_bin( "$bv1" );
245 0         0 my $bigint2 = Math::BigInt->from_bin( "$bv2" );
246 0         0 return $bigint1->bcmp($bigint2);
247             }
248              
249             sub deep_copy {
250 0     0 1 0 my $self = shift;
251 0         0 my $result_bv = Algorithm::BitVector->new( size => $self->{size} );
252 0         0 foreach my $i (0..@{$result_bv->{_vector}}-1) {
  0         0  
253 0         0 $result_bv->{_vector}->[$i] = $self->{_vector}->[$i];
254             }
255 0         0 return $result_bv;
256             }
257              
258             ## Invert the bits in the bitvector on which the method is invoked
259             ## and return the result as a new bitvector.
260             sub _invert {
261 0     0   0 my $self = shift;
262 0 0       0 die "Abort: The operator '~' for bit inversion invoked on an object that is " .
263             "not of type Algorithm::BitVector"
264             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
265 0         0 my $result_bv = $self->deep_copy();
266 0         0 foreach my $i (0..@{$result_bv->{_vector}}-1) {
  0         0  
267             # The unary `~' operator may assume a 32-bit wide field:
268 0         0 $result_bv->{_vector}->[$i] = ~ $result_bv->{_vector}->[$i] & 0x0000FFFF;
269             }
270 0         0 return $result_bv;
271             }
272              
273             ## Take a bitwise 'OR' of two bitvectors. Return the result as a new bitvector. If
274             ## the two bitvectors are not of the same size, pad the shorter one with zeros from
275             ## the left.
276             sub _or {
277 0     0   0 my ($bv1, $bv2) = @_;
278 0 0 0     0 die "Abort: The `|' operator invoked with either undefined " .
279             "or wrong types of operands: $!"
280             unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
281             UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
282 0         0 my ($bv3, $bv4);
283 0 0       0 if ( $bv1->{size} < $bv2->{size} ) {
    0          
284 0         0 $bv3 = $bv1->_resize_pad_from_left($bv2->{size} - $bv1->{size});
285 0         0 $bv4 = $bv2;
286             } elsif ( $bv1->{size} > $bv2->{size} ) {
287 0         0 $bv3 = $bv1;
288 0         0 $bv4 = $bv2->_resize_pad_from_left($bv1->{size} - $bv2->{size});
289             } else {
290 0         0 $bv3 = $bv1;
291 0         0 $bv4 = $bv2;
292             }
293 0         0 my $result_bv = Algorithm::BitVector->new( size => $bv3->{size} );
294 0         0 foreach my $i (0..@{$result_bv->{_vector}}-1) {
  0         0  
295             # The binary `|' operator may assume 32-bit wide fields:
296 0         0 $result_bv->{_vector}->[$i] = ($bv3->{_vector}->[$i] | $bv4->{_vector}->[$i] ) & 0x0000FFFF;
297             }
298 0         0 return $result_bv;
299             }
300              
301             ## Resize a bitvector by padding with n 0's from the left. Return the result as a
302             ## new bitvector.
303             sub _resize_pad_from_left {
304 0     0   0 my $self = shift;
305 0         0 my $n = shift;
306 0         0 my $new_str = '0' x $n . "$self";
307 0         0 return Algorithm::BitVector->new( bitstring => $new_str );
308             }
309              
310             ## Take a bitwise 'AND' of the bitvector on which the method is invoked with
311             ## the argument bitvector. Return the result as a new bitvector. If the two
312             ## bitvectors are not of the same size, pad the shorter one with zeros from the
313             ## left.
314             sub _and {
315 0     0   0 my ($bv1, $bv2) = @_;
316 0 0 0     0 die "Abort: The concatenation operator invoked with either undefined " .
317             "or wrong types of operands: $!"
318             unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
319             UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
320 0         0 my ($bv3, $bv4);
321 0 0       0 if ( $bv1->{size} < $bv2->{size} ) {
    0          
322 0         0 $bv3 = $bv1->_resize_pad_from_left($bv2->{size} - $bv1->{size});
323 0         0 $bv4 = $bv2;
324             } elsif ( $bv1->{size} > $bv2->{size} ) {
325 0         0 $bv3 = $bv1;
326 0         0 $bv4 = $bv2->_resize_pad_from_left($bv1->{size} - $bv2->{size});
327             } else {
328 0         0 $bv3 = $bv1;
329 0         0 $bv4 = $bv2;
330             }
331 0         0 my $result_bv = Algorithm::BitVector->new( size => $bv3->{size} );
332 0         0 foreach my $i (0..@{$result_bv->{_vector}}-1) {
  0         0  
333             # The binary bitwise `&' operator may assume 32-bit wide fields:
334 0         0 $result_bv->{_vector}->[$i] = ($bv3->{_vector}->[$i] & $bv4->{_vector}->[$i] ) & 0x0000FFFF;
335             }
336 0         0 return $result_bv;
337             }
338              
339             ## Take a bitwise 'XOR' of the bitvector on which the method is invoked with
340             ## the argument bitvector. Return the result as a new bitvector. If the two
341             ## bitvectors are not of the same size, pad the shorter one with zeros from the
342             ## left.
343             sub _xor {
344 0     0   0 my ($bv1, $bv2) = @_;
345 0 0 0     0 die "Abort: The xor operator invoked with either undefined " .
346             "or wrong types of operands: $!"
347             unless UNIVERSAL::isa( $bv1, 'Algorithm::BitVector') and
348             UNIVERSAL::isa( $bv2, 'Algorithm::BitVector');
349 0         0 my ($bv3, $bv4);
350 0 0       0 if ( $bv1->{size} < $bv2->{size} ) {
    0          
351 0         0 $bv3 = $bv1->_resize_pad_from_left($bv2->{size} - $bv1->{size});
352 0         0 $bv4 = $bv2;
353             } elsif ( $bv1->{size} > $bv2->{size} ) {
354 0         0 $bv3 = $bv1;
355 0         0 $bv4 = $bv2->_resize_pad_from_left($bv1->{size} - $bv2->{size});
356             } else {
357 0         0 $bv3 = $bv1;
358 0         0 $bv4 = $bv2;
359             }
360 0         0 my $result_bv = Algorithm::BitVector->new( size => $bv3->{size} );
361 0         0 foreach my $i (0..@{$result_bv->{_vector}}-1) {
  0         0  
362             # The binary bitwise `^' operator may assume 32-bit wide fields:
363 0         0 $result_bv->{_vector}->[$i] = ($bv3->{_vector}->[$i] ^ $bv4->{_vector}->[$i] ) & 0x0000FFFF;
364             }
365 0         0 return $result_bv;
366             }
367              
368             # For the overloading of the iterator '<>' operator:
369             sub _iter {
370 0     0   0 my $self = shift;
371 0         0 my $pos = 0;
372 0 0       0 $self->{_iterator} = BitVecIterator->new($self, $pos) unless $self->{_iter_called};
373 0         0 &{$self->{_iterator}->next()};
  0         0  
374             }
375              
376             {
377             # This inner class needed for implementing iterator overloading:
378             package BitVecIterator;
379             sub new {
380 0     0   0 my $self = [ $_[1], $_[2] ];
381 0         0 bless $self, $_[0];
382 0         0 $_[1]->{_iter_called} = 1;
383 0         0 return $self;
384             }
385             sub next {
386 0     0   0 my $self = shift;
387 0         0 my $bitvec = $self->[0];
388             # The anonymous subroutine that follows is a closure over the variables
389             # incorporated in it:
390             return sub {
391 0 0   0   0 if ($self->[1] >= $bitvec->{size}) {
392 0         0 delete $bitvec->{_iter_called};
393 0         0 return;
394             }
395 0         0 my $bit = $bitvec->get_bit($self->[1]);
396 0         0 $self->[1] = $self->[1] + 1;
397 0         0 return $bit;
398             }
399 0         0 }
400             }
401              
402             # for the overloading of the numification operator:
403             sub _int {
404 1     1   6 my $self = shift;
405 1         4 return $self->int_value();
406             }
407              
408             ## Divides an even-sized bitvector into two and returns the two halves as a
409             ## list of two bitvectors.
410             sub divide_into_two {
411 0     0 1 0 my $self = shift;
412 0 0       0 die "Abort: The divide_into_two() method invoked on an object that is " .
413             "not of type Algorithm::BitVector"
414             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
415 0 0       0 die "The bitvector to be divided must have even number of bits: $!"
416             if $self->{size} % 2;
417 0         0 my @outlist1 = ();
418 0         0 foreach my $i (0..$self->{size} / 2 - 1) {
419 0         0 push @outlist1, $self->get_bit($i);
420             }
421 0         0 my @outlist2 = ();
422 0         0 foreach my $i ( ($self->{size} / 2) .. ($self->{size} - 1) ) {
423 0         0 push @outlist2, $self->get_bit($i);
424             }
425 0         0 return Algorithm::BitVector->new( bitlist => \@outlist1 ),
426             Algorithm::BitVector->new( bitlist => \@outlist2 );
427             }
428              
429             ## Permute a bitvector according to the indices shown in the second argument list.
430             ## Return the permuted bitvector as a new bitvector.
431             sub permute {
432 0     0 1 0 my $self = shift;
433 0 0       0 die "Abort: The permute() method invoked on an object that is " .
434             "not of type Algorithm::BitVector"
435             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
436 0         0 my $permute_list = shift;
437 0 0       0 die "Bad permutation index in your permutation list"
438             if max(@$permute_list) > $self->{size} - 1;
439 0         0 my @outlist = ();
440 0         0 foreach my $index (@$permute_list) {
441 0         0 push @outlist, $self->get_bit($index);
442             }
443 0         0 return Algorithm::BitVector->new( bitlist => \@outlist );
444             }
445              
446             ## Unpermute the bitvector according to the permutation list supplied as the
447             ## second argument. If you first permute a bitvector by using permute() and
448             ## then unpermute() it using the same permutation list, you will get back the
449             ## original bitvector.
450             sub unpermute {
451 0     0 1 0 my $self = shift;
452 0 0       0 die "Abort: The unpermute() method invoked on an object that is " .
453             "not of type Algorithm::BitVector"
454             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
455 0         0 my $permute_list = shift;
456 0 0       0 die "Bad permutation index in your permutation list: $!"
457             if max(@$permute_list) > $self->{size} - 1;
458 0 0       0 die "Size of the permute list for unpermuting not consistent with the size of the bet vector:$!"
459             unless $self->{size} == @$permute_list;
460 0         0 my $out_bv = Algorithm::BitVector->new( size => $self->{size} );
461 0         0 foreach my $i (0..@$permute_list-1) {
462 0         0 $out_bv->set_bit( $permute_list->[$i], $self->get_bit($i) );
463             }
464 0         0 return $out_bv;
465             }
466              
467             ## Write the bitvector to the file object file_out. (A file object is returned
468             ## by a call to open()). Since all file I/O is byte oriented, the bitvector must
469             ## be multiple of 8 bits. Each byte treated as MSB first (0th index).
470             sub write_to_file {
471 0     0 1 0 my $self = shift;
472 0 0       0 die "Abort: The write_to_file() method invoked on an object that is " .
473             "not of type Algorithm::BitVector"
474             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
475 0         0 my $file_out = shift;
476 0         0 my $err_str = "Only a bitvector whose length is a multiple of 8 can " .
477             "be written to a file. Use the padding functions to satisfy " .
478             "this constraint.";
479 0 0       0 $self->{FILEOUT} = $file_out unless $self->{FILEOUT};
480 0 0       0 die $err_str if $self->{size} % 8;
481 0         0 for my $byte (0..$self->{size}/8 - 1){
482 0         0 my $value = 0;
483 0         0 foreach my $bit (0..7) {
484 0         0 $value += $self->get_bit( $byte*8+(7 - $bit) ) << $bit;
485             }
486 0         0 syswrite $file_out, chr($value), 1;
487             }
488             }
489              
490             ## For closing a file object that was used for reading the bits into one or more
491             ## BitVector objects.
492             sub close_file_handle {
493 0     0 1 0 my $self = shift;
494 0 0       0 die "Abort: The close_file_handle() method invoked on an object that is " .
495             "not of type Algorithm::BitVector"
496             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
497 0 0       0 die "No open file currently associated with the file handle: $!"
498             unless $self->{FILEIN};
499 0         0 close $self->{FILEIN};
500             }
501              
502             ## Return the integer value of a bitvector. If the original integer from which the
503             ## bitvector was constructed is a Math::BigInt object, then return the string
504             ## representation of the integer value.
505             sub int_value {
506 1     1 1 1 my $self = shift;
507 1 50       6 die "Abort: The int() method invoked on an object that is " .
508             "not of type Algorithm::BitVector"
509             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
510 1         2 my $int_value;
511 1 50 33     7 if (defined($self->{intVal}) && (ref($self->{intVal}) eq 'Math::BigInt')) {
512 0         0 $int_value = $self->{intVal}->bstr();
513             } else {
514 1         1 $int_value = 0;
515 1         4 foreach my $i (0..$self->{size}-1) {
516 4         7 $int_value += $self->get_bit($i) * ( 2 ** ( $self->{size} - $i - 1 ) );
517             }
518             }
519 1         7 return $int_value;
520             }
521              
522             ## Return the text string formed by dividing the bitvector into bytes from the
523             ## left and replacing each byte by its ASCII character (this is a useful thing
524             ## to do only if the length of the vector is an integral multiple of 8 and every
525             ## byte in your bitvector has a print representation)
526             sub get_text_from_bitvector {
527 1     1 0 7 my $self = shift;
528 1 50       6 die "Abort: The get_text_from_bitvector() method invoked on an object that is " .
529             "not of type Algorithm::BitVector"
530             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
531 1 50       10 die "\nThe bitvector for get_text_from_bitvector() must be an integral multiple of 8 bits: $!"
532             if $self->{size} % 8;
533 1         2 my $ascii = '';
534 1         5 for (my $i=0; $i < $self->{size}; $i += 8) {
535 5         4 $ascii .= chr oct "0b". join '', @{$self->get_bit([$i..$i+7])};
  5         15  
536             }
537 1         3 return $ascii;
538             }
539              
540             ## Return a string of hex digits by scanning the bits from the left and
541             ## replacing each sequence of 4 bits by its corresponding hex digit (this is a
542             ## useful thing to do only if the length of the vector is an integral multiple
543             ## of 4)
544             sub get_hex_string_from_bitvector {
545 0     0 1 0 my $self = shift;
546 0 0       0 die "Abort: The get_hex_from_bitvector() method invoked on an object that is " .
547             "not of type Algorithm::BitVector"
548             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
549 0 0       0 die "\nThe bitvector for get_hex_from_bitvector() must be a multiple of 4 bits: $!"
550             if $self->{size} % 4;
551 0         0 my $hex = '';
552 0         0 for (my $i=0; $i < $self->{size}; $i += 4) {
553 0         0 $hex .= sprintf "%x", oct "0b". join '', @{$self->get_bit([$i..$i+3])};
  0         0  
554             }
555 0         0 return $hex;
556             }
557              
558             ## Read blocksize bits from a disk file and return a BitVector object containing
559             ## the bits. If the file contains fewer bits than blocksize, construct the
560             ## BitVector object from however many bits there are in the file. If the file
561             ## contains zero bits, return a BitVector object of size attribute set to 0.
562             sub read_bits_from_file {
563 0     0 1 0 my $self = shift;
564 0 0       0 die "Abort: The read_bits_from_file() method invoked on an object that is " .
565             "not of type Algorithm::BitVector"
566             unless UNIVERSAL::isa( $self, 'Algorithm::BitVector');
567 0         0 my $blocksize = shift;
568 0         0 my $error_str = "You need to first construct a BitVector object with a filename as argument";
569 0 0       0 die "$error_str" unless $self->{filename};
570 0 0       0 die "block size must be a multiple of 8" if $blocksize % 8;
571 0         0 my $bitstr = _readblock( $blocksize, $self );
572 0 0       0 if (length $bitstr == 0) {
573 0         0 return Algorithm::BitVector->new( size => 0 );
574 0         0 print "file has no bits\n";
575             } else {
576 0         0 return Algorithm::BitVector->new( bitstring => $bitstr );
577             }
578             }
579              
580             ## For an in-place left circular shift by n bit positions
581             sub _lshift {
582 0     0   0 my $self = shift;
583 0         0 my $n = shift;
584 0 0       0 die "Circular shift of an empty vector makes no sense" unless $self->{size};
585 0 0       0 return $self >> abs($n) if $n < 0;
586 0         0 foreach my $i (0..$n-1) {
587 0         0 $self->_circular_rotate_left_by_one();
588             }
589 0         0 return $self;
590             }
591              
592             ## For an in-place right circular shift by n bit positions
593             sub _rshift {
594 0     0   0 my $self = shift;
595 0         0 my $n = shift;
596 0 0       0 die "Circular shift of an empty vector makes no sense" unless $self->{size};
597 0 0       0 return $self << abs($n) if $n < 0;
598 0         0 foreach my $i (0..$n-1) {
599 0         0 $self->_circular_rotate_right_by_one();
600             }
601 0         0 return $self;
602             }
603              
604             ## For a one-bit in-place left circular shift
605             sub _circular_rotate_left_by_one {
606 0     0   0 my $self = shift;
607 0         0 my $max_index = int( ($self->{size} - 1) / 16 );
608 0         0 my $left_most_bit = $self->{_vector}->[0] & 1;
609 0         0 $self->{_vector}->[0] = $self->{_vector}->[0] >> 1;
610 0         0 foreach my $i (1 .. $max_index) {
611 0         0 my $left_bit = $self->{_vector}->[$i] & 1;
612 0         0 $self->{_vector}->[$i] = $self->{_vector}->[$i] >> 1;
613 0         0 $self->{_vector}->[$i-1] |= $left_bit << 15;
614             }
615 0         0 $self->set_bit($self->{size} - 1, $left_most_bit);
616             }
617              
618             ## For a one-bit in-place right circular shift
619             sub _circular_rotate_right_by_one {
620 0     0   0 my $self = shift;
621 0         0 my $max_index = int( ($self->{size} - 1) / 16 );
622 0         0 my $right_most_bit = $self->get_bit( $self->{size} - 1);
623 0         0 $self->{_vector}->[$max_index] &= ~0x8000;
624 0         0 $self->{_vector}->[$max_index] = $self->{_vector}->[$max_index] << 1;
625 0         0 for (my $i=$max_index-1; $i > -1; $i -= 1) {
626 0         0 my $right_bit = $self->{_vector}->[$i] & 0x8000;
627 0         0 $self->{_vector}->[$i] &= ~0x8000;
628 0         0 $self->{_vector}->[$i] = $self->{_vector}->[$i] << 1;
629 0         0 $self->{_vector}->[$i+1] |= $right_bit >> 15;
630             }
631 0         0 $self->set_bit(0, $right_most_bit);
632             }
633              
634             ## Pad a bitvector with n zeros from the left
635             sub pad_from_left {
636 0     0 1 0 my $self = shift;
637 0         0 my $n = shift;
638 0         0 my $new_str = ('0' x $n) . "$self";
639 0         0 my $new_bitvec = Algorithm::BitVector->new( bitstring => $new_str );
640 0         0 $self->{size} = length $new_str;
641 0         0 $self->{_vector} = $new_bitvec->{_vector};
642 0         0 return $self;
643             }
644              
645             ## Pad a bitvector with n zeros from the right
646             sub pad_from_right {
647 0     0 1 0 my $self = shift;
648 0         0 my $n = shift;
649 0         0 my $new_str = "$self" . ('0' x $n);
650 0         0 my $new_bitvec = Algorithm::BitVector->new( bitstring => $new_str );
651 0         0 $self->{size} = length $new_str;
652 0         0 $self->{_vector} = $new_bitvec->{_vector};
653 0         0 return $self;
654             }
655              
656             ## Resets a previously created BitVector to either all zeros or all ones
657             ## depending on the argument val.
658             sub reset {
659 0     0 1 0 my $self = shift;
660 0         0 my $val = shift;
661 0 0 0     0 die "Incorrect argument to reset(): $!" unless ($val == 0) or ($val == 1);
662 0         0 my $reset_bv = Algorithm::BitVector->new( bitstring => ("$val" x $self->{size}) );
663 0         0 $self->{_vector} = $reset_bv->{_vector};
664             }
665              
666             ## Return the number of bits set in a BitVector instance.
667             sub count_bits {
668 0     0 1 0 my $self = shift;
669 0     0   0 return reduce {$a + $b} split //, "$self";
  0         0  
670             }
671              
672             ## Changes the bit pattern associated with a previously constructed BitVector
673             ## instance. The allowable modes for changing the internally stored bit pattern
674             ## are the same as for the constructor.
675             sub set_value {
676 0     0 1 0 my $self = shift;
677 0         0 my $reset_bv = Algorithm::BitVector->new( @_ );
678 0         0 $self->{_vector} = $reset_bv->{_vector};
679 0         0 $self->{size} = $reset_bv->{size};
680             }
681              
682              
683             ## This method for counting the set bits is much faster for sparse bitvectors.
684             ## Note, however, that count_bits() may work much faster for dense-packed
685             ## bitvectors.
686             sub count_bits_sparse {
687 0     0 1 0 my $self = shift;
688 0         0 my $num = 0;
689 0         0 foreach my $intval (@{$self->{_vector}}) {
  0         0  
690 0 0       0 next if $intval == 0;
691 0         0 my ($c, $iv) = (0, $intval);
692 0         0 while ($iv > 0) {
693 0         0 $iv = $iv & ($iv - 1);
694 0         0 $c++;
695             }
696 0         0 $num += $c;
697             }
698 0         0 return $num;
699             }
700              
701             ## Computes the Jaccard similarity coefficient between two bitvectors
702             sub jaccard_similarity {
703 0     0 1 0 my $self = shift;
704 0         0 my $other = shift;
705 0 0 0     0 die "Jaccard called on two zero vectors --- NOT ALLOWED: $!"
706             if (int($self) == 0) && (int($other) == 0);
707 0 0       0 die "Jaccard called on vectors of unequal size --- NOT ALLOWED: $!"
708             if $self->{size} != $other->{size};
709 0         0 my $intersection = $self & $other;
710 0         0 my $union = $self | $other;
711 0         0 return $intersection->count_bits_sparse() / $union->count_bits_sparse();
712             }
713              
714             ## Computes the Jaccard distance between two bitvectors
715             sub jaccard_distance {
716 0     0 1 0 my $self = shift;
717 0         0 my $other = shift;
718 0 0       0 die "Jaccard called on vectors of unequal size --- NOT ALLOWED: $!"
719             if $self->{size} != $other->{size};
720 0         0 return 1 - $self->jaccard_similarity( $other );
721             }
722              
723             ## Computes the Hamming distance between two bitvectors
724             sub hamming_distance {
725 0     0 1 0 my $self = shift;
726 0         0 my $other = shift;
727 0 0       0 die "hamming_distance() called on vectors of unequal size --- NOT ALLOWED: $!"
728             if $self->{size} != $other->{size};
729 0         0 my $diff = $self ^ $other;
730 0         0 return $diff->count_bits_sparse();
731             }
732              
733             ## This method calculates the position of the next set bit at or after the current
734             ## position index. It returns -1 if there is no next set bit. Logic based on the
735             ## contributions by Jason Allum and John Gleeson to the Python version of this
736             ## module.
737             sub next_set_bit {
738 0     0 1 0 my $self = shift;
739 0   0     0 my $from_index = shift || 0;
740 0 0       0 die "The from_index must be nonnegative: $!" unless $from_index >= 0;
741 0         0 my $i = $from_index;
742 0         0 my $v = $self->{_vector};
743 0         0 my $l = scalar @$v;
744 0         0 my $o = $i >> 4;
745 0         0 my $s = $i & 0x0F;
746 0         0 $i = $o << 4;
747 0         0 while ($o < $l) {
748 0         0 my $h = $v->[$o];
749 0 0       0 if ($h) {
750 0         0 $i += $s;
751 0         0 my $m = 1 << $s;
752 0         0 while ($m != (1 << 0x10)) {
753 0 0       0 return $i if $h & $m;
754 0         0 $m <<= 1;
755 0         0 $i += 1;
756             }
757             } else {
758 0         0 $i += 0x10;
759             }
760 0         0 $s = 0;
761 0         0 $o += 1;
762             }
763 0         0 return -1;
764             }
765              
766             ## For a bit that is set at the argument 'position', this method returns how many
767             ## bits are set to the left of that bit. For example, in the bit pattern
768             ## 000101100100, a call to this method with position set to 9 will return 4.
769             sub rank_of_bit_set_at_index {
770 0     0 1 0 my $self = shift;
771 0         0 my $position = shift;
772 0 0       0 die "The bitvector has no bits set at the position in the call to rank_of_bit_set_at_index(): $!"
773             unless $self->get_bit($position);
774 0         0 my $bv_slice = Algorithm::BitVector->new( bitlist => $self->get_bit([0..$position]) );
775 0         0 return $bv_slice->count_bits();
776             }
777              
778             ## Determines whether the integer value of a bitvector is a power of 2.
779             sub is_power_of_2 {
780 0     0 1 0 my $self = shift;
781 0 0       0 return 0 if int($self) == 0;
782 0         0 my $bv = $self & Algorithm::BitVector->new( intVal => int($self) - 1);
783 0 0       0 return 1 if int($bv) == 0;
784 0         0 return 0;
785             }
786              
787             ## Faster version of is_power_of2() for sparse bitvectors
788             sub is_power_of_2_sparse {
789 0     0 1 0 my $self = shift;
790 0 0       0 return 1 if $self->count_bits_sparse() == 1;
791 0         0 return 0;
792             }
793              
794             ## Returns a new bitvector by reversing the bits in the bitvector on which the
795             ## method is invoked.
796             sub reverse {
797 0     0 1 0 my $self = shift;
798 0         0 my @reverseList = ();
799 0         0 for (my $i=1; $i < $self->{size} + 1; $i++) {
800 0         0 push @reverseList, $self->get_bit( -$i );
801             }
802 0         0 return Algorithm::BitVector->new( bitlist => \@reverseList );
803             }
804              
805             ## Using Euclid's Algorithm, returns the greatest common divisor of the integer
806             ## value of the bitvector on which the method is invoked and the integer value
807             ## of the argument bitvector.
808             sub gcd {
809 0     0 1 0 my $self = shift;
810 0         0 my $other = shift;
811 0         0 my ($a,$b) = (int($self), int($other));
812 0 0       0 ($a,$b) = ($b,$a) if $a < $b;
813 0         0 while ($b != 0) {
814 0         0 ($a, $b) = ($b, $a % $b);
815             }
816 0         0 return Algorithm::BitVector->new( intVal => $a );
817             }
818              
819             ## Calculates the multiplicative inverse of the bitvector on which the method is
820             ## invoked modulo the bitvector that is supplied as the argument. Code based on
821             ## Extended Euclid's Algorithm.
822             sub multiplicative_inverse {
823 0     0 1 0 my $self = int( shift );
824 0         0 my $modulus = int( shift );
825 0         0 my ($mod, $num) = ($modulus, $self);
826 0         0 my ($x, $x_old) = (0, 1);
827 0         0 my ($y, $y_old) = (1, 0);
828 0         0 while ($mod) {
829 0         0 my $quotient = int($num / $mod);
830 0         0 ($num, $mod) = ($mod, $num % $mod);
831 0         0 ($x, $x_old) = ($x_old - $x * $quotient, $x);
832 0         0 ($y, $y_old) = ($y_old - $y * $quotient, $y);
833             }
834 0 0       0 if ($num != 1) {
835 0         0 return 0;
836             } else {
837 0         0 my $MI = ($x_old + $modulus) % $modulus;
838 0         0 return Algorithm::BitVector->new( intVal => $MI );
839             }
840             }
841              
842             ## For a one-bit in-place left non-circular shift. Note that the bitvector size
843             ## does not change. The leftmost bit that moves past the first element of the
844             ## bitvector is discarded and rightmost bit of the returned vector is set to zero.
845             sub _shift_left_by_one {
846 0     0   0 my $self = shift;
847 0         0 my $size = @{$self->{_vector}};
  0         0  
848 0         0 my @left_most_bits = map {$_ & 1} @{$self->{_vector}};
  0         0  
  0         0  
849 0         0 push @left_most_bits, $left_most_bits[0];
850 0         0 shift @left_most_bits;
851 0         0 @{$self->{_vector}} = map {$_ >> 1} @{$self->{_vector}};
  0         0  
  0         0  
  0         0  
852 0         0 my @a = @{$self->{_vector}};
  0         0  
853 0         0 my @b = map {$_ << 15} @left_most_bits;
  0         0  
854 0         0 my @interleaved = ( @a, @b )[ map { $_, $_ + @a } ( 0 .. $#a ) ];
  0         0  
855 0     0   0 @{$self->{_vector}} = pairmap {$a | $b} @interleaved;
  0         0  
  0         0  
856 0         0 $self->set_bit(-1,0);
857             }
858              
859             ## For an in-place left non-circular shift by n bit positions. The bits shifted
860             ## out at the left are discarded. The new bit positions on the right are filled with
861             ## zeros
862             sub shift_left {
863 0     0 1 0 my $self = shift;
864 0         0 my $n = shift;
865 0         0 foreach my $i (0..$n-1) {
866 0         0 $self->_shift_left_by_one();
867             }
868 0         0 return $self;
869             }
870              
871             ## For a one-bit in-place right non-circular shift. Note that bitvector size does
872             ## not change. The rightmost bit that moves past the last element of the bitvector
873             ## on the right is discarded and the leftmost bit of the returned vector is set to
874             ## zero.
875             sub _shift_right_by_one {
876 0     0   0 my $self = shift;
877 0         0 my $size = @{$self->{_vector}};
  0         0  
878 0         0 my @right_most_bits = map {$_ & 0x8000} @{$self->{_vector}};
  0         0  
  0         0  
879 0         0 @{$self->{_vector}} = map { $_ & ~0x8000 } @{$self->{_vector}};
  0         0  
  0         0  
  0         0  
880 0         0 unshift @right_most_bits, 0;
881 0         0 pop @right_most_bits;
882 0         0 @{$self->{_vector}} = map {$_ << 1} @{$self->{_vector}};
  0         0  
  0         0  
  0         0  
883 0         0 my @a = @{$self->{_vector}};
  0         0  
884 0         0 my @b = map {$_ >> 15} @right_most_bits;
  0         0  
885 0         0 my @interleaved = ( @a, @b )[ map { $_, $_ + @a } ( 0 .. $#a ) ];
  0         0  
886 0     0   0 @{$self->{_vector}} = pairmap {$a | $b} @interleaved;
  0         0  
  0         0  
887 0         0 $self->set_bit(0,0);
888             }
889              
890             ## For an in-place right non-circular shift by n bit positions. The n bits that
891             ## move past the last element of the bitvector on the right are discarded and
892             ## leftmost new n bit positions filled with zeros.
893             sub shift_right {
894 0     0 1 0 my $self = shift;
895 0         0 my $n = shift;
896 0         0 foreach my $i (0..$n-1) {
897 0         0 $self->_shift_right_by_one();
898             }
899 0         0 return $self;
900             }
901              
902             ## In the set of polynomials defined over GF(2), multiplies the bitvector on which
903             ## the method is invoked with the argument bitvector. Returns the product
904             ## bitvector. Note that this method carries out straight polynomial multiplication
905             ## as opposed to modulo polynomial multiplication. As a result, the highest power of
906             ## the result polynomial will ALWAYS be greater than the highest powers of the
907             ## argument polynomials for non-trivial multiplications.
908             sub gf_multiply {
909 0     0 1 0 my $self = shift;
910 0         0 my $b = shift;
911 0         0 my $a = $self->deep_copy();
912 0         0 my $b_copy = $b->deep_copy();
913 0         0 my $a_highest_power = $a->{size} - $a->next_set_bit(0) - 1;
914 0         0 my $b_highest_power = $b->{size} - $b_copy->next_set_bit(0) - 1;
915 0         0 my $result = Algorithm::BitVector->new( size => $a->{size} + $b_copy->{size} );
916 0         0 $a->pad_from_left( $result->{size} - $a->{size} );
917 0         0 $b_copy->pad_from_left( $result->{size} - $b_copy->{size} );
918 0         0 my @b_list = split //, "$b_copy";
919 0         0 my @enum = (0..@b_list-1, @b_list) [map {$_, $_+ @b_list} (0 .. @b_list - 1)];
  0         0  
920 0         0 for (my $i=0; $i < @enum-1; $i = $i + 2) {
921 0 0       0 if ($enum[$i+1]) {
922 0         0 my $power = $b_copy->{size} - $enum[$i] - 1;
923 0         0 my $a_copy = $a->deep_copy();
924 0         0 $a_copy->shift_left( $power );
925 0         0 $result ^= $a_copy;
926             }
927             }
928 0         0 return $result;
929             }
930              
931             ## Carries out modular division of a bitvector by the modulus bitvector mod in
932             ## GF(2^n) finite field. Returns both the quotient and the remainder.
933             sub gf_divide_by_modulus {
934 0     0 1 0 my $num = shift;
935 0         0 my $mod = shift;
936 0         0 my $n = shift;
937 0 0       0 die "Modulus bit pattern too long" if $mod->{size} > $n + 1;
938 0         0 my $quotient = Algorithm::BitVector->new( intVal => 0, size => $num->{size} );
939 0         0 my $remainder = $num->deep_copy();
940 0         0 for (my $i=0; $i < $num->{size}; $i++) {
941 0         0 my $mod_highest_power = $mod->{size} - $mod->next_set_bit(0) - 1;
942 0         0 my $remainder_highest_power;
943 0 0       0 if ($remainder->next_set_bit(0) == -1) {
944 0         0 $remainder_highest_power = 0;
945             } else {
946 0         0 $remainder_highest_power = $remainder->{size} - $remainder->next_set_bit(0) - 1;
947             }
948 0 0 0     0 if (($remainder_highest_power < $mod_highest_power) or (int($remainder) == 0)) {
949 0         0 last;
950             } else {
951 0         0 my $exponent_shift = $remainder_highest_power - $mod_highest_power;
952 0         0 $quotient->set_bit($quotient->{size} - $exponent_shift - 1, 1);
953 0         0 my $quotient_mod_product = $mod->deep_copy();
954 0         0 $quotient_mod_product->pad_from_left($remainder->{size} - $mod->{size});
955 0         0 $quotient_mod_product->shift_left($exponent_shift);
956 0         0 $remainder ^= $quotient_mod_product;
957             }
958             }
959 0 0       0 if ($remainder->{size} > $n) {
960 0         0 $remainder = Algorithm::BitVector->new(
961             bitlist => $remainder->get_bit([$remainder->{size}-$n .. $remainder->{size}-1]));
962             }
963 0         0 return ($quotient, $remainder);
964             }
965              
966             ## Multiplies a bitvector with the bitvector b in GF(2^n) finite field with the
967             ## modulus bit pattern set to mod
968             sub gf_multiply_modular {
969 0     0 1 0 my $self = shift;
970 0         0 my $b = shift;
971 0         0 my $mod = shift;
972 0         0 my $n = shift;
973 0         0 my $a_copy = $self->deep_copy();
974 0         0 my $b_copy = $b->deep_copy();
975 0         0 my $product = $a_copy->gf_multiply($b_copy);
976 0         0 my ($quotient, $remainder) = $product->gf_divide_by_modulus($mod, $n);
977 0         0 return $remainder
978             }
979              
980             ## Returns the multiplicative inverse of a vector in the GF(2^n) finite field
981             ## with the modulus polynomial set to mod
982             sub gf_MI {
983 0     0 1 0 my $num = shift;
984 0         0 my $mod = shift;
985 0         0 my $n = shift;
986 0         0 my ($NUM, $MOD) = ($num->deep_copy(), $mod->deep_copy());
987 0         0 my $x = Algorithm::BitVector->new( size => $mod->{size} );
988 0         0 my $x_old = Algorithm::BitVector->new( intVal => 1, size => $mod->{size} );
989 0         0 my $y = Algorithm::BitVector->new( intVal => 1, size => $mod->{size} );
990 0         0 my $y_old = Algorithm::BitVector->new( size => $mod->{size} );
991 0         0 my ($quotient, $remainder);
992 0         0 while (int($mod)) {
993 0         0 ($quotient, $remainder) = $num->gf_divide_by_modulus($mod, $n);
994 0         0 ($num, $mod) = ($mod, $remainder);
995 0         0 ($x, $x_old) = ($x_old ^ $quotient->gf_multiply($x), $x);
996 0         0 ($y, $y_old) = ($y_old ^ $quotient->gf_multiply($y), $y);
997             }
998 0 0       0 if (int($num) != 1) {
999 0         0 return "NO MI. However, the GCD of $NUM and $MOD is $num\n";
1000             } else {
1001 0         0 my $z = $x_old ^ $MOD;
1002 0         0 ($quotient, $remainder) = $z->gf_divide_by_modulus($MOD, $n);
1003 0         0 return $remainder;
1004             }
1005             }
1006              
1007             ## Returns a list of the consecutive runs of 1's and 0's in the bitvector.
1008             ## Each run is either a string of all 1's or a string of all 0's.
1009             sub runs {
1010 0     0 1 0 my $self = shift;
1011 0 0       0 die "An empty vector has no runs" if $self->{size} == 0;
1012 0         0 my @allruns = ();
1013 0         0 my $run = '';
1014 0         0 my $previous_bit = $self->get_bit(0);
1015 0 0       0 if ($previous_bit == 0) {
1016 0         0 $run = '0';
1017             } else {
1018 0         0 $run = '1';
1019             }
1020 0         0 my @bitlist = split //, "$self";
1021 0         0 shift @bitlist;
1022 0         0 foreach my $bit (@bitlist) {
1023 0 0 0     0 if (($bit == 0) && ($previous_bit == 0)) {
    0 0        
    0 0        
1024 0         0 $run .= '0';
1025             } elsif (($bit == 1) && ($previous_bit == 0)) {
1026 0         0 push @allruns, $run;
1027 0         0 $run = '1';
1028             } elsif (($bit == 0) && ($previous_bit == 1)) {
1029 0         0 push @allruns, $run;
1030 0         0 $run = '0';
1031             } else {
1032 0         0 $run .= '1';
1033             }
1034 0         0 $previous_bit = $bit;
1035             }
1036 0         0 push @allruns, $run;
1037             return @allruns
1038 0         0 }
1039              
1040             ## Check if the integer value of the bitvector is a prime through the Miller-Rabin
1041             ## probabilistic test of primality. If not found to be a composite, estimate the
1042             ## probability of the bitvector being a prime using this test.
1043             sub test_for_primality {
1044 0     0 1 0 my $p = int(shift);
1045 0 0       0 die "\nThe primality test method test_for_primality() is intended for only " .
1046             "small integers --- integers that are relatively small in relation to " .
1047             "the largest integer that can fit Perl's 4-byte int representation:$!"
1048             if $p > 0x7f_ff_ff_ff;
1049 0 0       0 return "is NOT a prime" if $p == 1;
1050 0         0 my @probes = (2,3,5,7,11,13,17);
1051 0         0 foreach my $a (@probes) {
1052 0 0       0 return "is a prime" if $a == $p;
1053             }
1054 0 0   0   0 return "is NOT a prime" if any {$p % $_ == 0} @probes;
  0         0  
1055 0         0 my ($k, $q) = (0, $p-1);
1056 0         0 while (! ($q & 1)) {
1057 0         0 $q >>= 1;
1058 0         0 $k++;
1059             }
1060 0         0 foreach my $a (@probes) {
1061 0         0 my $a_raised_to_q = _powmod_small_ints($a, int($q), $p);
1062 0 0 0     0 next if $a_raised_to_q == 1 or $a_raised_to_q == $p - 1;
1063 0         0 my $a_raised_to_jq = $a_raised_to_q;
1064 0         0 my $primeflag = 0;
1065 0         0 foreach my $j (0..$k-1) {
1066 0         0 $a_raised_to_jq = _powmod_small_ints($a_raised_to_jq, 2, $p);
1067 0 0       0 if ($a_raised_to_jq == $p-1) {
1068 0         0 $primeflag = 1;
1069 0         0 last;
1070             }
1071             }
1072 0 0       0 return "is NOT a prime" unless $primeflag;
1073             }
1074 0         0 my $probability_of_prime = 1 - 1.0/(4 ** @probes);
1075 0         0 return "is a prime with probability $probability_of_prime";
1076             }
1077              
1078             ## This routine is for modular exponentiation of small integers, meaning the
1079             ## integers that can be accommodated (before and after exponentiation) in the native
1080             ## 4-byte int representation. For larger integers, a call to this function should
1081             ## be replaced by a call to modular exponentiation of the Math::BigInt module.
1082             sub _powmod_small_ints {
1083 0     0   0 my $base = shift;
1084 0         0 my $exponent = shift;
1085 0         0 my $mod = int(shift);
1086 0     0   0 warn "This is just a warning: The modular exponentiation method is not meant for very large numbers (IMPORTANT: This is just a very rough check on the size of the numbers involved)"
1087 0 0       0 if any {$_ > 0x0f_ff_ff_ff} (int($base), int($exponent), int($mod));
1088 0         0 my $result = 1;
1089 0         0 my $a = int($base);
1090 0         0 my $b = $exponent;
1091 0         0 while (int($b) > 0) {
1092 0 0       0 $result = ($result * $a) % $mod if int($b) & 1;
1093 0         0 $b = $b >> 1;
1094 0         0 $a = ($a * $a) % $mod;
1095             }
1096 0         0 return $result;
1097             }
1098              
1099             ## This method is for a generating a bit pattern of a given size with random bits.
1100             sub gen_random_bits {
1101 0     0 1 0 my $self = shift;
1102 0         0 my $width = shift;
1103 0         0 my $candidate_bits = "0b" . join '', Math::Random::random_uniform_integer($width, 0, 1);
1104 0         0 my $candidate = oct $candidate_bits;
1105 0         0 $candidate |= 1;
1106 0         0 $candidate |= (1 << $width-1);
1107 0         0 $candidate |= (2 << $width-3);
1108 0         0 return Algorithm::BitVector->new( intVal => $candidate );
1109             }
1110              
1111             sub length {
1112 0     0 1 0 my $self = shift;
1113 0         0 return $self->{size};
1114             }
1115              
1116             sub _check_for_illegal_params {
1117 3     3   4 my @params = @_;
1118 3         6 my @legal_params = qw / filename
1119             size
1120             intVal
1121             bitlist
1122             bitstring
1123             hexstring
1124             textstring
1125             /;
1126 3         4 my $found_match_flag;
1127 3         5 foreach my $param (@params) {
1128 3         4 foreach my $legal (@legal_params) {
1129 12         10 $found_match_flag = 0;
1130 12 100       18 if ($param eq $legal) {
1131 3         3 $found_match_flag = 1;
1132 3         4 last;
1133             }
1134             }
1135 3 50       9 last if $found_match_flag == 0;
1136             }
1137 3         10 return $found_match_flag;
1138             }
1139              
1140             1;
1141              
1142              
1143             =pod
1144              
1145             =head1 NAME
1146              
1147             Algorithm::BitVector --- A memory efficient packed representation of arbitrary sized
1148             bit arrays and for logical and arithmetic operations on such arrays.
1149              
1150             =head1 SYNOPSIS
1151              
1152             use Algorithm::BitVector;
1153              
1154             # Constructing a given sized bitvector of all zeros:
1155             $bv = Algorithm::BitVector->new( size => 7 );
1156             print "$bv\n"; # 0000000
1157              
1158             # Constructing a bitvector whose integer value is specified:
1159             $bv = Algorithm::BitVector->new( intVal => 123456 );
1160             print "$bv\n"; # 11110001001000000
1161             print int($bv); # 123456
1162              
1163             # Constructing a bitvector from a very large integer:
1164             use Math::BigInt;
1165             $x = Math::BigInt->new('12345678901234567890123456789012345678901234567890');
1166             $bv = Algorithm::BitVector->new( intVal => $x );
1167            
1168             # Constructing a bitvector from a given bit string:
1169             $bv = Algorithm::BitVector->new( bitstring => '00110011' );
1170            
1171             # Constructing a bitvector from an ASCII text string:
1172             $bv = Algorithm::BitVector->new( textstring => "hello\njello" );
1173              
1174             # Constructing a bitvector from a hex string:
1175             $bv = Algorithm::BitVector->new( hexstring => "68656c6c6f" );
1176              
1177             # Constructing a bitvector from a bit list passed as an anonymous array:
1178             $bv = Algorithm::BitVector->new( bitlist => [1, 1, 0, 1] );
1179              
1180             # Constructing a bitvector from the contents of a disk file:
1181             $bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1182             $bv1 = $bv->read_bits_from_file(64); # bitvector from the first 64 bits
1183             # and so on
1184              
1185              
1186             =head1 CHANGES
1187              
1188             Version 1.23 mentions the required modules in the C file but with no
1189             minimum version numbers. Additionally, the documentation associated with the methods
1190             was significantly upgraded in this version.
1191              
1192             Version 1.22 removes the Perl version restriction from the module and the
1193             C files and the C restrictions from the C file.
1194              
1195             Version 1.21 fixes a bug in the code for the Miller-Rabin primality test function
1196             C. This version also places a hard limit on the size of the
1197             integers that are allowed to be tested for primality.
1198              
1199             Version 1.2 fixes an important bug in creating bitvectors from the contents of a disk
1200             file. This version also includes corrections for some of the documentation errors
1201             discovered.
1202              
1203             Version 1.1 incorporates additional type checking on the operands for the overloaded
1204             operators. Also fixed some minor documentation formatting issues.
1205              
1206             =head1 DESCRIPTION
1207              
1208             My main motivation for creating this module was to provide the students at Purdue and
1209             elsewhere with a Perl class whose API is the same as that of my Python based
1210             C module that appears to have become popular for prototyping algorithms
1211             for cryptography and hash functions.
1212              
1213             This module stores the bits of a bitvector in 16-bit unsigned shorts. As you can see
1214             in the constructor code for C, after resolving the arguments with which the
1215             constructor is called, the very first thing the constructor does is to figure out how
1216             many of those 2-byte shorts it needs for the bits. That does not imply that the size
1217             of a bit array that is stored in a bitvector must be a multiple of 16. B
1218             can be of any size whatsoever.> The C class keeps track of the
1219             number of bits through its C instance variable.
1220              
1221             Note that, except for one case, the constructor must be called with a single keyword
1222             argument, which determines how the bitvector will be constructed. The single
1223             exception to this rule is for the keyword argument C which you would normally
1224             use for constructing a bitvector from an integer. The additional option you can
1225             supply with C is C. When both C and C are specified in a
1226             constructor call, you get a bitvector of the specified size provided the value
1227             supplied for C is larger than what it takes to accommodate the bits for the
1228             C integer.
1229              
1230             In addition to constructing bitvectors from integers, this module can also construct
1231             bitvectors from bit strings, from ASCII text strings, from hex strings, from a list
1232             of bits, and from the contents of a file. With regards to constructing bitvectors
1233             from integers, the module can construct very large bitvectors from very large
1234             integers stored as C objects.
1235              
1236             =head1 OVERLOADED OPERATORS
1237              
1238             The following C declaration in the module gives the list of the
1239             overloaded operators. Since C is set to 1, several other operators become
1240             overloaded by autogeneration from those shown below. For example, overloading of the
1241             3-way numerical comparison operator C<< <=> >> automatically overloads the C<< < >>,
1242             C<< <= >>, C<< > >>, C<< >= >>, C<< == >>, and C<< != >> operators.
1243              
1244             use overload '+' => '_add',
1245             '""' => '_str',
1246             '0+' => '_int',
1247             '~' => '_invert',
1248             '|' => '_or',
1249             '&' => '_and',
1250             '^' => '_xor',
1251             '<=>' => '_compare',
1252             '<<' => '_lshift',
1253             '>>' => '_rshift',
1254             '<>' => '_iter',
1255             'fallback' => 1;
1256              
1257             It is B to remember that the overloadings for the C<<< << >>> and C<<< >>
1258             >>> operators are for B left and right shifts (their usual meaning as
1259             bitwise operators is for non-circular shifts). This was done because the
1260             applications for which this module is intended is more likely to use circular shifts
1261             of bit fields than non-circular shifts. You can still carry out non-circular shifts
1262             by calling the methods C and C as illustrated elsewhere
1263             in this documentation.
1264              
1265             Another B thing to bear in mind is the overloading of the C<+> operator.
1266             It is B addition. On the other hand, it is a concatenation of the two operand
1267             bitvectors. This was done to keep the usage of this operator the same as in the
1268             Python version of this module.
1269              
1270             By virtue of how the operators are overloaded, you can make the calls listed in the
1271             rest of this section. To illustrate these calls, I will use the following two bit
1272             vectors:
1273              
1274             $bv1 = Algorithm::BitVector->new( bitstring => "111000" );
1275             $bv2 = Algorithm::BitVector->new( bitstring => "000101000" );
1276              
1277             These two bitvectors are intentionally of different lengths to illustrate what role
1278             the size differences play in how the various operators work.
1279              
1280             =over 4
1281              
1282             =item B
1283              
1284             $bv3 = $bv1 + $bv2; # 111000000101000
1285              
1286             The concatenation of two bitvectors is returned as a new bitvector. This is made
1287             possible by the overload definition for the C<+> operator.
1288              
1289             Note that the following also works:
1290              
1291             print $bv1 . "hello"; # 111000hello
1292              
1293             In this case, Perl implicitly converts the left operand of the `.' operator into a
1294             string (which is made possible by the overloading for the stringification operator in
1295             this module) and then returns the result as a string.
1296              
1297             =item B
1298              
1299             print "$bv1"; # 111000
1300              
1301             This is made possible for the overload definition for the C<""> operator.
1302              
1303             =item B
1304              
1305             print int($bv1); # 56
1306              
1307             This is made possible by the overload definition for the C<0+> operator.
1308              
1309             =item B
1310              
1311             $bv3 = ~ $bv1;
1312             print $bv3; # 000111
1313              
1314             This is made possible by the overload definition for the C<~> operator.
1315              
1316             =item B
1317              
1318             $bv3 = $bv1 | $bv2; # 000111000
1319              
1320             When two bitvectors are of unequal length (as is the case here), the shorter vector
1321             is zero-padded on the left to equalize their lengths before the application of the
1322             logical operation. If this auto-padding property is not something you want, you
1323             should check the lengths of the argument bitvectors in your own script before
1324             invoking this operator.
1325              
1326             =item B
1327              
1328             $bv3 = $bv1 & $bv2; # 000101000
1329              
1330             When two bitvectors are of unequal length (as is the case here), the shorter vector
1331             is zero-padded on the left to equalize their lengths before the application of the
1332             logical operation. If this auto-padding property is not something you want, you
1333             should check the lengths of the argument bitvectors in your own script before
1334             invoking this operator.
1335              
1336             =item B
1337              
1338             $bv3 = $bv1 ^ $bv2; # 000010000
1339              
1340             When two bitvectors are of unequal length (as is the case here), the shorter vector
1341             is zero-padded on the left to equalize their lengths before the application of the
1342             logical operation. If this auto-padding property is not something you want, you
1343             should check the lengths of the argument bitvectors in your own script before
1344             invoking this operator.
1345              
1346             =item B
1347              
1348             $bv1 < $bv2 ? print "yes\n" : print "no\n"; # no
1349              
1350             $bv1 > $bv2 ? print "yes\n" : print "no\n"; # yes
1351              
1352             $bv1 <= $bv2 ? print "yes\n" : print "no\n"; # no
1353              
1354             $bv1 >= $bv2 ? print "yes\n" : print "no\n"; # yes
1355              
1356             $bv1 == $bv2 ? print "yes\n" : print "no\n"; # no
1357              
1358             $bv1 != $bv2 ? print "yes\n" : print "no\n"; # yes
1359              
1360             The overload definitions for all these operators are autogenerated from the overload
1361             definition for the 3-way numerical comparison operator C<< <=> >>. B
1362             compared on the basis of their integer values.> That is, C<$bv1> is less than C<$bv2>
1363             is C is less than C.
1364              
1365             =item B
1366              
1367             $n = 3;
1368              
1369             $bv1 << $n; # $bv1 is now 000111
1370             $bv1 >> $n; # $bv1 is now 111000
1371              
1372             Since Perl does not expect these two operators to be invoked in a void context, such
1373             statements in your code will elicit a warning from Perl to that effect. If these
1374             warnings annoy you, you can turn them off by surrounding such statements with C
1375             warnings "void";> and C directives. The other option is to invoke
1376             such statements in the following manner:
1377              
1378             $bv1 = $bv1 << $n;
1379             $bv2 = $bv1 >> $n;
1380              
1381             That works because the overload definitions for these bit shift operators return the
1382             bitvector object on which they are invoked. As it turns out, this also allows for
1383             chained invocation of these operators, as in
1384              
1385             $bv1 = $bv1 << 3 << 1 >> 2; # 100011
1386              
1387             $bv1 = $bv1 << 2 >> 1 >> 3; # 111000
1388              
1389             =item B
1390              
1391             while (<$bv1>) {
1392             print "$_ ";
1393             } # 1 1 1 0 0 0
1394              
1395             This is made possible by the overload definition for the C<< <> >> operator. The
1396             C class includes an inner class C for this
1397             purpose.
1398              
1399             =back
1400              
1401              
1402             =head1 CONSTRUCTING BITVECTORS
1403              
1404             =over 4
1405              
1406             =item B
1407              
1408             $bv = Algorithm::BitVector->new( size => 0 );
1409              
1410             print "$bv\n"; # no output
1411              
1412             =item B
1413              
1414             $bv = Algorithm::BitVector->new( size => 13 ); # 0000000000000
1415              
1416             =item B
1417              
1418             $bv = Algorithm::BitVector->new( intVal => 5006 ); # 1001110001110
1419              
1420             The module returns the smallest possible bitvector that would accommodate the
1421             integer value provided with the C option.
1422              
1423             =item B
1424              
1425             As mentioned, with the C option, you get the smallest possible bitvector
1426             that can be generated from that integer. If you want a I bitvector, you can
1427             also supply the C option as shown below:
1428              
1429             $bv = Algorithm::BitVector->new( intVal => 5006, size => 16 ); # 0001001110001110
1430              
1431             If the value supplied for the C option in such a call is not larger than the
1432             smallest bit array that represents the C value, the constructor will throw an
1433             exception.
1434              
1435             =item B
1436              
1437             use Math::BigInt;
1438             $x = Math::BigInt->new('12345678901234567890123456789012345678901234567890');
1439             $bv = Algorithm::BitVector->new( intVal => $x );
1440             #1000011100100111111101100011011010011010101011111000001111001010000\
1441             #10101000000100110011101000111101011111000110001111111000110010110110\
1442             #01110001111110000101011010010
1443              
1444             =item B
1445              
1446             $bv = Algorithm::BitVector->new( bitstring => '00110011' ); # 00110011
1447              
1448             =item B
1449              
1450             $bv = Algorithm::BitVector->new( textstring => "hello\n" );
1451             # 011010000110010101101100011011000110111100001010
1452              
1453             =item B
1454              
1455             $bv = Algorithm::BitVector->new( hexstring => "68656c6c6f" );
1456             # 0110100001100101011011000110110001101111
1457              
1458             =item B
1459              
1460             $bv = Algorithm::BitVector->new( bitlist => [1, 1, 0, 1] ); # 1101
1461              
1462             =item B
1463              
1464             $bv = Algorithm::BitVector->new( filename => 'testinput1.txt' );
1465              
1466             print "$bv\n"; # Nothing to show yet
1467              
1468             $bv1 = $bv->read_bits_from_file(64); # Now you have a bitvector from the
1469             # first 64 bits
1470              
1471             Note that it takes two calls to create bitvectors from the contents of a file. The
1472             first merely creates an empty bitvector just to set the necessary file handle for
1473             reading the file. It is the second call in which you invoke the method
1474             C that actually returns a bitvector from the bits read from
1475             the file. Each call to C in this manner spits out a new bit
1476             vector.
1477              
1478             =back
1479              
1480             =head1 METHODS
1481              
1482             =head3 close_file_handle()
1483              
1484             =over 4
1485              
1486             When you construct bitvectors by block scanning a disk file, after you are done, you
1487             can call this method to close the file handle that was created to read the file:
1488              
1489             $bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1490             ## your code to read bit blocks for constructing bitvectors goes here
1491             $bv->close_file_handle();
1492              
1493             The constructor call in the first statement opens a file handle for reading the bits.
1494             It is this file handle that is closed by calling C,
1495              
1496             =back
1497              
1498             =head3 count_bits()
1499              
1500             =over 4
1501              
1502             $bv = Algorithm::BitVector->new( intVal => 45, size => 16 );
1503             print $bv->count_bits(); # 4
1504              
1505             This method returns an integer value which is the number of bits set to 1 in the
1506             bitvector on which the method is invoked.
1507              
1508             =back
1509              
1510             =head3 count_bits_sparse()
1511              
1512             =over 4
1513              
1514             Say you have a bitvector with two million bits:
1515              
1516             $bv = Algorithm::BitVector->new( size => 2000000 );
1517              
1518             and you happen to set its individual bits by
1519              
1520             $bv->set_bit(345234, 1);
1521             $bv->set_bit(233, 1);
1522             $bv->set_bit(243, 1);
1523             $bv->set_bit(18, 1);
1524             $bv->set_bit(785, 1);
1525              
1526             The following call returns the number of bits set in the bitvector:
1527              
1528             print $bv->count_bits_sparse(); # 5
1529              
1530             For very long bitvectors, as in the example here, this method will work much faster
1531             than the C method. However, for dense bitvectors, I expect
1532             C to work faster.
1533              
1534             =back
1535              
1536             =head3 deep_copy()
1537              
1538             =over 4
1539              
1540             $bv_copy = $bv->deep_copy();
1541              
1542             Subsequently, any alterations to the bitvectors pointed to by either C<$bv> or
1543             C<$bv_copy> will not affect the other.
1544              
1545             =back
1546              
1547             =head3 divide_into_two()
1548              
1549             =over 4
1550              
1551             ($bv1, $bv2) = $bv->divide_into_two(); # say $bv = 0000000000101101
1552             print "$bv1\n"; # 00000000
1553             print "$bv2\n"; # 00101101
1554              
1555             Divides an even sized bitvector into two bitvectors, each of size half of the
1556             bitvector on which this method is invoked. Throws an exception when invoked on a
1557             bitvector that is not even sized.
1558              
1559             =back
1560              
1561             =head3 gcd()
1562              
1563             =over 4
1564              
1565             This method uses the Euclid's algorithm to return the Greatest Common Divisor of the
1566             integer values represented by the two bitvectors. The following example shows a call
1567             to C returning the GCD of the integer values of the bitvectors C<$bv1> and
1568             C<$bv2>.
1569              
1570             $bv1 = Algorithm::BitVector->new( bitstring => '01100110' ); # 102
1571             $bv2 = Algorithm::BitVector->new( bitstring => '011010' ); # 26
1572             $gcd = $bv1->gcd( $bv2 ); # 10
1573             print int($gcd); # 2
1574              
1575             The result returned by C is a bitvector.
1576              
1577             =back
1578              
1579             =head3 gen_random_bits()
1580              
1581             =over 4
1582              
1583             $bv = Algorithm::BitVector->new( intVal => 0 );
1584             $bv = $bv->gen_random_bits(16); # 1100111001010101
1585              
1586             The call to C returns a bitvector whose bits are randomly
1587             generated. The number of bits in the returned bitvector equals the argument integer.
1588              
1589             =back
1590              
1591             =head3 get_bit()
1592              
1593             =over 4
1594              
1595             This method gives you array-like access to the individual bits of a bitvector.
1596              
1597             $bv = Algorithm::BitVector->new( bitstring => '10111' );
1598             print $bv->get_bit(0); # 1 (the first bit)
1599             print $bv->get_bit(1); # 0
1600             print $bv->get_bit(4); # 1 (the last bit)
1601              
1602             Negative values for the index scan a bitvector from right to left, with the C<-1>
1603             index standing for the last (meaning the right-most) bit in the vector:
1604              
1605             print $bv->get_bit(-1); # 1 (the last bit)
1606             print $bv->get_bit(-2); # 1
1607             print $bv->get_bit(-5); # 1 (the first bit)
1608              
1609             The C can also return a slice of a bitvector if the argument to the
1610             method is an anonymous array of the index range you desire, as in the second
1611             statement below:
1612              
1613             $bv = Algorithm::BitVector->new( bitstring => "10111011");
1614             my $arr = $bv->get_bit( [3..7] );
1615             print "@$arr\n"; # 1 1 0 1 1
1616              
1617             In this example, we want C to return all bits at positions indexed 3
1618             through 7, both ends inclusive. Note that the slice is returned as an array of bits.
1619              
1620              
1621             =back
1622              
1623             =head3 get_hex_string_from_bitvector()
1624              
1625             =over 4
1626              
1627             Assuming that length of your bitvector is a multiple of 4, this methods returns a
1628             hex representation of the bit pattern:
1629              
1630             $bv = Algorithm::BitVector->new(bitstring => "0110100001100101011011000110110001101111");
1631             print $bv->get_hex_string_from_bitvector(); # 68656c6c6f
1632              
1633             The hex representation is returned in the form if a string of hex characters. This
1634             method throws an exception if the size of the bitvector is not a multiple of 4.
1635              
1636             =back
1637              
1638             =head3 get_text_string_from_bitvector()
1639              
1640             =over 4
1641              
1642             $bv = Algorithm::BitVector->new( textstring => "hello" );
1643             print "$bv\n"; # 0110100001100101011011000110110001101111
1644             print $bv->get_text_from_bitvector(); # hello
1645              
1646             The method returns a string of ASCII characters by converting successive 8-bit slices
1647             of the bitvector into an ASCII character. It throws an exception if the size of the
1648             bit pattern is not a multiple of 8. Calling this method to create a text-based print
1649             representation of a bit vector makes sense only if you don't expect to see any
1650             unprintable characters in the successive 8-bit slices of the bitvector. Let's say
1651             you have created a bitvector from a text string using the appropriate constructor
1652             call. Subsequently, you encrypted this text string. Next, you or someone else
1653             decrypts the encrypted bit stream. Since what comes out at the decryption end must
1654             be the original text string, it would make sense to invoke this method to recover the
1655             original text.
1656              
1657             =back
1658              
1659             =head3 gf_divide_by_modulus()
1660              
1661             =over 4
1662              
1663             This method is for modular division in the Galois Field C. You must specify
1664             the modulus polynomial through a bit pattern and also the value of the integer C:
1665              
1666             $mod = Algorithm::BitVector->new( bitstring => '100011011' ); # AES modulus
1667             $n = 8;
1668             $a = Algorithm::BitVector->new( bitstring => '11100010110001' );
1669             ($quotient, $remainder) = $a->gf_divide_by_modulus($mod, $n);
1670             print "$quotient\n"; # 00000000111010
1671             print "$remainder\n"; # 10001111
1672              
1673             What this example illustrates is dividing the bitvector C<$a> by the modulus bit
1674             vector C<$mod>. For a more general division of one bitvector C<$a> by another bit
1675             vector C<$b>, you would carry out a multiplication of C<$a> by the MI of C<$b>, where
1676             MI stands for "multiplicative inverse" as returned by a call to the method
1677             C. A call to C returns two bitvectors, one for the
1678             quotient and the other for the remainder.
1679              
1680             =back
1681              
1682             =head3 gf_MI()
1683              
1684             =over 4
1685              
1686             This method returns the multiplicative inverse of a bit pattern in the Galois Field
1687             C. You must specify both the modulus polynomial through its bit pattern and
1688             the value of C:
1689              
1690             $modulus = Algorithm::BitVector->new( bitstring => '100011011' ); # AES modulus
1691             $n = 8;
1692             $a = Algorithm::BitVector->new( bitstring => '00110011' );
1693             print $a->gf_MI($modulus, $n); # 01101100
1694              
1695             Note that the multiplicative inverse is returned as a bitvector.
1696              
1697             =back
1698              
1699             =head3 gf_multiply()
1700              
1701             =over 4
1702              
1703             This method returns a product of two bit patterns in the Galois Field C field.
1704             That is, given any two polynomials with their coefficients drawn from the 0 and 1
1705             values in C, this method returns the product polynomial.
1706              
1707             $a = Algorithm::BitVector->new( bitstring => '0110001' );
1708             $b = Algorithm::BitVector->new( bitstring => '0110' );
1709             print $a->gf_multiply($b); #00010100110
1710              
1711             As you would expect, in general, the bit pattern returned by this method will be
1712             longer than the lengths of the two operand bitvectors. The result returned by the
1713             method is in the form of a bitvector.
1714              
1715             =back
1716              
1717             =head3 gf_multiply_modular()
1718              
1719             =over 4
1720              
1721             This method carries out modular multiplication in the Galois Field C. You
1722             must supply it the bitvector for the modulus polynomial and the value of C.
1723            
1724             $modulus = Algorithm::BitVector->new( bitstring => '100011011' ); # AES modulus
1725             $n = 8;
1726             $a = Algorithm::BitVector->new( bitstring => '0110001' );
1727             $b = Algorithm::BitVector->new( bitstring => '0110' );
1728             print $a->gf_multiply_modular($b, $modulus, $n); # 10100110
1729              
1730             This example returns the product of the bit patterns C<$a> and C<$b> modulo the bit
1731             pattern C<$modulus> in C. The result returned by the method is in the form
1732             of a bitvector.
1733              
1734             =back
1735              
1736             =head3 hamming_distance()
1737              
1738             =over 4
1739              
1740             Hamming distance is commonly used to measure dissimilarity between two bitvectors of
1741             the same size.
1742              
1743             $bv1 = Algorithm::BitVector->new( bitstring => '11111111' );
1744             $bv2 = Algorithm::BitVector->new( bitstring => '00101011' );
1745             print $bv1->hamming_distance( $bv2 ); # 4
1746              
1747             This distance returns the number of bit positions in which two the bit patterns
1748             differ. The method throws an exception if the two bitvectors are not of the same
1749             length. The value returned is an integer.
1750              
1751             =back
1752              
1753             =head3 int_value()
1754              
1755             =over 4
1756              
1757             You can find the integer value of a bitvector by
1758              
1759             $bv = Algorithm::BitVector->new( intVal => 5678 );
1760             print $bv3->int_value(); # 5678
1761              
1762             or, even more simply by
1763              
1764             print int($bv); # 5678
1765              
1766             which works on account of the overloading of the C<0+> operator.
1767              
1768             =back
1769              
1770             =head3 is_power_of_2()
1771              
1772             =over 4
1773              
1774             You can use this predicate to test if the integer value of a bitvector is a power of
1775             2:
1776              
1777             $bv = Algorithm::BitVector->new( bitstring => '10000000001110' );
1778             print int($bv); # 826
1779             print $bv->is_power_of_2(); # 0
1780              
1781             The predicate returns 1 for true and 0 for false.
1782              
1783             =back
1784              
1785             =head3 is_power_of_2_sparse()
1786              
1787             =over 4
1788              
1789             This does the same thing as the C method but in a way that makes it
1790             faster for large bitvectors with very few bits set.
1791              
1792             $bv = Algorithm::BitVector->new( size => 2000000 );
1793             $bv->set_bit(345234, 1);
1794             print $bv->is_power_of_2_sparse(); # 1
1795              
1796             =back
1797              
1798             =head3 jaccard_distance()
1799              
1800             =over 4
1801              
1802             The Jaccard distance between two bitvectors is 1 minus the Jaccard similarity
1803             coefficient:
1804              
1805             $bv1 = Algorithm::BitVector->new( bitstring => '11111111' );
1806             $bv2 = Algorithm::BitVector->new( bitstring => '10101011' );
1807             print $bv1->jaccard_distance( $bv2 ); # 0.375
1808              
1809             The value returned by the method is a floating point number between 0 and 1.
1810              
1811             =back
1812              
1813             =head3 jaccard_similarity()
1814              
1815             =over 4
1816              
1817             This method returns the Jaccard similarity coefficient between the two bitvectors
1818             pointed to by C<$bv1> and C<$bv2>:
1819              
1820             $bv1 = Algorithm::BitVector->new( bitstring => '11111111' );
1821             $bv2 = Algorithm::BitVector->new( bitstring => '10101011' );
1822             print $bv1->jaccard_similarity( $bv2 ); # 0.675
1823              
1824             The value returned by the method is a floating point number between 0 and 1.
1825              
1826             =back
1827              
1828             =head3 length()
1829              
1830             =over 4
1831              
1832             This method returns the total number of bits in a bitvector:
1833              
1834             $bv = Algorithm::BitVector->new( intVal => 5678 );
1835             print $bv; # 1011000101110
1836             print $bv->length(); # 13
1837              
1838             Note that what C returns is the total size of a bitvector, including any
1839             leading zeros.
1840              
1841             =back
1842              
1843             =head3 multiplicative_inverse()
1844              
1845             =over 4
1846              
1847             This method calculates the multiplicative inverse using normal integer arithmetic.
1848             For multiplicative inverses in a Galois Field C, use the method C
1849             described earlier in this API.
1850              
1851             $modulus = Algorithm::BitVector->new( intVal => 32 );
1852             $bv = Algorithm::BitVector->new( intVal => 17 );
1853             $result = $bv->multiplicative_inverse( $modulus );
1854             if ($result) {
1855             print $result; # 10001
1856             } else {
1857             print "No multiplicative inverse in this case\n";
1858             }
1859              
1860             What this example says is that the multiplicative inverse of 17 modulo 32 is 17. That
1861             is because 17 times 17 in modulo 32 arithmetic equals 1. When using this method, you
1862             must test the value returned for 0. If the returned value is 0, that means that the
1863             number corresponding to the bitvector on which this method is invoked does not
1864             possess a multiplicative inverse with respect to the modulus.
1865              
1866             =back
1867              
1868             =head3 next_set_bit()
1869              
1870             =over 4
1871              
1872             Starting from a given bit position, this method returns the index of the next bit
1873             that is set in a bitvector:
1874              
1875             $bv = Algorithm::BitVector->new( bitstring => '00000000000001' );
1876             print $bv->next_set_bit(5); # 13
1877              
1878             In this example, we are asking the method to return the index of the bit that is set
1879             after the bit position indexed 5.
1880              
1881             =back
1882              
1883             =head3 pad_from_left()
1884              
1885             =over 4
1886              
1887             You can pad a bitvector from the left with a designated number of zeros:
1888              
1889             $bv = Algorithm::BitVector->new( bitstring => '101010' );
1890             print $bv->pad_from_left( 4 ); # 0000101010
1891              
1892             The method returns the bitvector on which it is invoked. So you can think of it as
1893             an in-place extension of a bitvector (although, under the hood, the extension is
1894             carried out by giving a new longer C<_vector> attribute to the bitvector object).
1895              
1896             =back
1897              
1898             =head3 pad_from_right()
1899              
1900             =over 4
1901              
1902             You can pad a bitvector from the right with a designated number of zeros:
1903              
1904             $bv = Algorithm::BitVector->new( bitstring => '101010' );
1905             print $bv->pad_from_right( 4 ); # 1010100000
1906              
1907             The method returns the bitvector on which it is invoked. So you can think of it as
1908             an in-place extension of a bitvector (although, under the hood, the extension is
1909             carried out by giving a new longer C<_vector> attribute to the bitvector object).
1910              
1911              
1912             =back
1913              
1914             =head3 permute()
1915              
1916             =over 4
1917              
1918             You can permute the bits in a bitvector with a permutation list as shown below:
1919              
1920             $bv1 = Algorithm::BitVector->new( intVal => 203, size => 8 );
1921             print $bv1; # 11001011
1922             $bv2 = $bv1->permute( [3, 2, 1, 0, 7, 6, 5, 4] );
1923             print $bv2; # 00111101
1924              
1925             The method returns a new bitvector with permuted bits.
1926              
1927             =back
1928              
1929             =head3 rank_of_bit_set_at_index()
1930              
1931             =over 4
1932              
1933             You can measure the "rank" of a bit that is set at a given index. Rank is the number
1934             of bits that are set up to the argument position, as in
1935              
1936             $bv = Algorithm::BitVector->new( bitstring => '01010101011100' );
1937             print $bv->rank_of_bit_set_at_index( 10 ); # 6
1938              
1939             The value 6 returned by this call to C is the number of
1940             bits set up to the position indexed 10 (including that position). The method throws
1941             an exception if there is no bit set at the argument position.
1942              
1943             =back
1944              
1945             =head3 read_bits_from_file()
1946              
1947             =over 4
1948              
1949             Constructing bitvectors from the contents of a disk file takes two steps: First you
1950             must make the call shown in the first statement below. The purpose of this call is to
1951             create a file handle that is associated with the variable C<$bv> in this case.
1952             Subsequent invocations of C on this variable will read
1953             blocks of C<$n> bits and return a bitvector for each block thus read. The variable
1954             C<$n> must be a multiple of 8 for this to work.
1955              
1956             $bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1957             $bv1 = $bv->read_bits_from_file(64);
1958             $bv2 = $bv->read_bits_from_file(64);
1959             ...
1960             ...
1961             $bv->close_file_handle();
1962              
1963             When reading a file as shown above, you can test the attribute C of the
1964             bitvector object in order to find out if there is more to be read in the file. The
1965             C loop shown below reads all of a file in 64-bit blocks.
1966              
1967             $bv = Algorithm::BitVector->new( filename => 'testinput.txt' );
1968             while ($bv->{more_to_read}) {
1969             my $bv_read = $bv->read_bits_from_file( 64 );
1970             print "$bv_read\n";
1971             }
1972             $bv->close_file_handle();
1973              
1974             The size of the last bitvector constructed from a file corresponds to how many bytes
1975             remain unread in the file at that point. It is your responsibility to zero-pad the
1976             last bitvector appropriately if, say, you are doing block encryption of the whole
1977             file.
1978              
1979             =back
1980              
1981             =head3 reset()
1982              
1983             =over 4
1984              
1985             You can reset a previously constructed bitvector all either all 1's or all 0's by
1986             calling this method:
1987              
1988             $bv = Algorithm::BitVector->new( intVal => 203, size => 8 );
1989             print $bv; # 11001011
1990             $bv->reset(1);
1991             print $bv; # 11111111
1992             $bv->reset(0);
1993             print $bv; # 00000000
1994              
1995             What the method accomplishes can be thought of as in-place resetting of the bits. The
1996             method does not return anything.
1997              
1998             =back
1999              
2000             =head3 reverse()
2001              
2002             =over 4
2003              
2004             Given a bitvector, you can construct a bitvector with all the bits reversed, in the
2005             sense that what was left-to-right earlier now becomes right-to-left, as in
2006              
2007             $bv = Algorithm::BitVector->new( bitstring => '01100001' );
2008             print $bv->reverse(); # 10000110
2009              
2010             A call to this method returns a new bitvector whose bits are in reverse order in
2011             relation to the bits in the bitvector on which the method is called.
2012              
2013             =back
2014              
2015             =head3 runs()
2016              
2017             =over 4
2018              
2019             This method returns an array of runs of 1's and 0's in a bitvector:
2020              
2021             $bv = Algorithm::BitVector->new( bitlist => [1,1,1,0,0,1] );
2022             my @bvruns = $bv->runs();
2023             print "@bvruns\n"; # 111 00 1
2024              
2025             Each element of the array that is returned by C is a string of either 1's or
2026             0's.
2027              
2028             =back
2029              
2030             =head3 set_bit()
2031              
2032             =over 4
2033              
2034             With array-like indexing, you can use this method to set the individual bits of a
2035             previously constructed bitvector. Both positive and negative values are allowed for
2036             the bit position index. The method takes two explicit arguments, the first for the
2037             position of the bit you want to set and the second for the value of the bit.
2038              
2039             $bv = Algorithm::BitVector->new( bitstring => '1111' );
2040             $bv->set_bit(0,0); # set the first bit to 0
2041             $bv->set_bit(1,0); # set the next bit to 0
2042             print $bv; # 0011
2043             $bv->set_bit(-1,0); # set the last bit to 0
2044             $bv->set_bit(-2,0); # set the bit before the last bit to 0
2045             print $bv; # 0000
2046              
2047             =back
2048              
2049             =head3 set_value()
2050              
2051             =over 4
2052              
2053             This method can be used to change the bit pattern associated with a previously
2054             constructed bitvector:
2055              
2056             $bv = Algorithm::BitVector->new( intVal => 7, size => 16 );
2057             print $bv; # 0000000000000111
2058             $bv->set_value( intVal => 45 );
2059             print $bv; # 101101
2060              
2061             You can think of this method as carrying out an in-place resetting of the bit array
2062             in a bitvector. The method does not return anything.
2063              
2064             =back
2065              
2066             =head3 shift_left()
2067              
2068             =over 4
2069              
2070             If you want to shift a bitvector non-circularly to the left, this is the method to
2071             call:
2072              
2073             $bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2074             $bv->shift_left(3);
2075             print $bv; # 001000
2076             $bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2077             $bv->shift_left(3)->shift_right(3);
2078             print $bv; # 000001
2079              
2080             As the bitvector is shifted non-circularly to the left, the exposed bit positions on
2081             the right are filled with zeros. Note that the method returns the bitvector object
2082             on which it is invoked. That is the reason why the chained invocation of the method
2083             in the fifth statement above works.
2084              
2085             =back
2086              
2087             =head3 shift_right()
2088              
2089             =over 4
2090              
2091             If you want to shift a bitvector non-circularly to the right, this is the method to
2092             call:
2093              
2094             $bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2095             $bv->shift_right(3);
2096             print $bv; # 000111
2097             $bv = Algorithm::BitVector->new( bitlist => [1,1, 1, 0, 0, 1] );
2098             $bv->shift_right(3)->shift_right(2);
2099             print $bv; # 000001
2100              
2101             As the bitvector is shifted non-circularly to the right, the exposed bit positions on
2102             the left are filled with zeros. Note that the method returns the bitvector object
2103             on which it is invoked. That is the reason why the chained invocation of the method
2104             in the fifth statement above works.
2105              
2106             =back
2107              
2108             =head3 test_for_primality()
2109              
2110             =over 4
2111              
2112             If the integer value of a bitvector is small (meaning smaller than C<< 0x7f_ff_ff_ff >>),
2113             you can use this method to test it for its primality through the Miller-Rabin
2114             probabilistic test:
2115              
2116             $p = 7417;
2117             $bv = Algorithm::BitVector->new( intVal => $p );
2118             $check = $bv->test_for_primality();
2119             print "The primality test for $p: $check\n";
2120             # The primality test for 7417: is a prime with probability 0.99993896484375
2121              
2122             The method returns one of two strings: If the primality test succeeds, the method
2123             returns a string like "C". And if the test fails,
2124             the method returns the string "C".
2125              
2126             =back
2127              
2128             =head3 unpermute()
2129              
2130             =over 4
2131              
2132             This method reverses the permutation carried out by a call to the C method
2133             as shown below:
2134              
2135             $bv1 = Algorithm::BitVector->new( intVal => 203, size => 8 );
2136             print $bv1; # 11001011
2137             $bv2 = $bv1->permute( [3, 2, 1, 0, 7, 6, 5, 4] );
2138             print $bv2; # 00111101
2139             $bv3 = $bv2->unpermute( [3, 2, 1, 0, 7, 6, 5, 4] );
2140             print $bv3; # 11001011
2141              
2142             The method returns a new bitvector with unpermuted bits. Also note that the method
2143             throws an exception if the permutation list is not as long as the size of the
2144             bitvector on which the method is invoked.
2145              
2146             =back
2147              
2148             =head3 write_to_file()
2149              
2150             =over 4
2151              
2152             This method writes the bitvectors in your program to a disk file:
2153              
2154             $bv1 = Algorithm::BitVector->new( bitstring => '00001010' );
2155             open my $FILEOUT, ">test.txt";
2156             $bv1->write_to_file( $FILEOUT );
2157             close $FILEOUT;
2158              
2159             The size of a bitvector must be a multiple of 8 for this write method to work. If
2160             this condition is not met, the method will throw an exception.
2161              
2162             B When writing an internally generated bitvector out
2163             to a disk file, it may be important to open the file in the binary mode, since
2164             otherwise the bit pattern `00001010' ('\n') in your bitstring will be written out as
2165             0000110100001010 ('\r\n') which is the line break on Windows machines.
2166              
2167             =back
2168              
2169             =head1 REQUIRED
2170              
2171             This module imports the following modules:
2172              
2173             Math::BigInt
2174             List::Util
2175             Math::Random
2176             Fcntl
2177              
2178              
2179             =head1 THE C DIRECTORY
2180              
2181             The C directory contains the following script that invokes all of the
2182             functionality of this module:
2183              
2184             BitVectorDemo.pl
2185              
2186             In case there is any doubt about how exactly to invoke a method or how to use an
2187             operator, please look at the usage in this script.
2188              
2189             =head1 EXPORT
2190              
2191             None by design.
2192              
2193             =head1 BUGS
2194              
2195             Please notify the author if you encounter any bugs. When sending email, please place
2196             the string 'BitVector' in the subject line.
2197              
2198             =head1 INSTALLATION
2199              
2200             Download the archive from CPAN in any directory of your choice. Unpack the archive
2201             with a command that on a Linux machine would look like:
2202              
2203             tar zxvf Algorithm-BitVector-1.23.tar.gz
2204              
2205             This will create an installation directory for you whose name will be
2206             C. Enter this directory and execute the following commands
2207             for a standard install of the module if you have root privileges:
2208              
2209             perl Makefile.PL
2210             make
2211             make test
2212             sudo make install
2213              
2214             If you do not have root privileges, you can carry out a non-standard install the
2215             module in any directory of your choice by:
2216              
2217             perl Makefile.PL prefix=/some/other/directory/
2218             make
2219             make test
2220             make install
2221              
2222             With a non-standard install, you may also have to set your PERL5LIB environment
2223             variable so that this module can find the required other modules. How you do that
2224             would depend on what platform you are working on. In order to install this module in
2225             a Linux machine on which I use tcsh for the shell, I set the PERL5LIB environment
2226             variable by
2227              
2228             setenv PERL5LIB /some/other/directory/lib64/perl5/:/some/other/directory/share/perl5/
2229              
2230             If I used bash, I'd need to declare:
2231              
2232             export PERL5LIB=/some/other/directory/lib64/perl5/:/some/other/directory/share/perl5/
2233              
2234              
2235             =head1 THANKS
2236              
2237             The bug in the primality test function, whose fix resulted in Version 1.21, was
2238             reported by Dana Jacobsen in a bug report filed at C. Thanks Dana!
2239              
2240             The restriction on the Perl version was removed on Slaven Rezic's recommendation. He
2241             says the module runs fine with Perl 5.8.9. Thanks Slaven!
2242              
2243             =head1 AUTHOR
2244              
2245             Avinash Kak, kak@purdue.edu
2246              
2247             If you send email, please place the string "BitVector" in your subject line to get past
2248             my spam filter.
2249              
2250             =head1 COPYRIGHT
2251              
2252             This library is free software; you can redistribute it and/or modify it under the
2253             same terms as Perl itself.
2254              
2255             Copyright 2015 Avinash Kak
2256              
2257             =cut
2258