line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
2
|
|
|
|
|
|
|
package Data::Bvec; |
3
|
|
|
|
|
|
|
|
4
|
13
|
|
|
13
|
|
334679
|
use 5.008003; |
|
13
|
|
|
|
|
55
|
|
|
13
|
|
|
|
|
517
|
|
5
|
13
|
|
|
13
|
|
221
|
use strict; |
|
13
|
|
|
|
|
28
|
|
|
13
|
|
|
|
|
428
|
|
6
|
13
|
|
|
13
|
|
78
|
use warnings; |
|
13
|
|
|
|
|
36
|
|
|
13
|
|
|
|
|
346
|
|
7
|
13
|
|
|
13
|
|
74
|
use Carp; |
|
13
|
|
|
|
|
23
|
|
|
13
|
|
|
|
|
1503
|
|
8
|
13
|
|
|
13
|
|
11571
|
use Math::Int2Base qw( int2base base2int ); |
|
13
|
|
|
|
|
11980
|
|
|
13
|
|
|
|
|
1623
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Data::Bvec - a module to manipulate integer arrays as bit vectors and |
15
|
|
|
|
|
|
|
"compressed bit strings" (using a simple RLE). |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 VERSION |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
VERSION: 1.01 |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=cut |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
our $VERSION; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
$VERSION = '1.01'; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=head1 SYNOPSIS |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
use Data::Bvec; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
my $bv = Data::Bvec::->new( nums=>[1,2,3] ); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
my $vec = $bv->get_bvec(); # 01110000 |
34
|
|
|
|
|
|
|
my $bstr = $bv->get_bstr(); # '-134' |
35
|
|
|
|
|
|
|
my $nums = $bv->get_nums(); # [1,2,3] |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
---- |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
use Data::Bvec qw( :all ); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
my $vec = num2bit( [1,2,3] ); # 01110000 |
42
|
|
|
|
|
|
|
set_bit( $vec, 4, 1 ); # 01111000 |
43
|
|
|
|
|
|
|
my $bstr = compress bit2str $vec; # '-143' |
44
|
|
|
|
|
|
|
my $nums = bit2num str2bit uncompress $bstr; # [1,2,3,4] |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head1 DESCRIPTION/DISCUSSION |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
This module encapsulates some simple routines for manipulating Perl bit |
49
|
|
|
|
|
|
|
vectors (putting values in; getting values out), but its main goal is |
50
|
|
|
|
|
|
|
to implement a simple run-length encoding scheme for bit vectors that |
51
|
|
|
|
|
|
|
compresses them into relatively human-readable and flat-file-storable |
52
|
|
|
|
|
|
|
strings. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
My use case was wanting to prototype a data indexing system, and I |
55
|
|
|
|
|
|
|
wanted to ease debugging by plopping the bitstrings in a flat file that |
56
|
|
|
|
|
|
|
I could examine directly. (Each bit in a vector represents a record in |
57
|
|
|
|
|
|
|
the database -- true or false whether the term is in that record in the |
58
|
|
|
|
|
|
|
field being indexed.) It has worked well enough that I haven't felt |
59
|
|
|
|
|
|
|
the need to change how the bitstrings are stored (just where they're |
60
|
|
|
|
|
|
|
stored). |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
The initial version of the module used a different set of base-62 |
63
|
|
|
|
|
|
|
digits. In writing Math::Int2Base, I decided to normalize all the |
64
|
|
|
|
|
|
|
bases from 2 to 62 to use 0-9,A-Z,a-z. It makes the numbers sort |
65
|
|
|
|
|
|
|
correctly (ascii-betically == numerically), and it let me say that A |
66
|
|
|
|
|
|
|
base-16 == A base-36 == A base-62. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
So now I'm rewriting this module to use those base conversion routines. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head1 EXPORTS |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Nothing is exported by default. The following may be exported |
73
|
|
|
|
|
|
|
individually; all of them may be exported using the C<:all> tag: |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
- set_bit |
76
|
|
|
|
|
|
|
- howmany |
77
|
|
|
|
|
|
|
- bit2str |
78
|
|
|
|
|
|
|
- str2bit |
79
|
|
|
|
|
|
|
- bit2num |
80
|
|
|
|
|
|
|
- num2bit |
81
|
|
|
|
|
|
|
- compress |
82
|
|
|
|
|
|
|
- uncompress |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Examples: |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
use Data::Bvec qw( :all ); |
87
|
|
|
|
|
|
|
use Data::Bvec qw( bit2str str2bit compress uncompress ); |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
However, if you only use the object methods, nothing would need to be |
90
|
|
|
|
|
|
|
exported. See below. |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=cut |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
our ( @ISA, @EXPORT_OK, %EXPORT_TAGS ); |
95
|
|
|
|
|
|
|
BEGIN { |
96
|
13
|
|
|
13
|
|
79
|
use Exporter; |
|
13
|
|
|
|
|
26
|
|
|
13
|
|
|
|
|
892
|
|
97
|
13
|
|
|
13
|
|
190
|
@ISA = qw( Exporter ); |
98
|
13
|
|
|
|
|
46
|
@EXPORT_OK = qw( |
99
|
|
|
|
|
|
|
set_bit howmany |
100
|
|
|
|
|
|
|
bit2str str2bit |
101
|
|
|
|
|
|
|
bit2num num2bit |
102
|
|
|
|
|
|
|
compress uncompress |
103
|
|
|
|
|
|
|
); |
104
|
13
|
|
|
|
|
25667
|
%EXPORT_TAGS = ( all => [ @EXPORT_OK ] ); |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head1 SUBROUTINES |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head2 set_bit( $vec, $num, $zero_or_one ) |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
This is a shallow wrapper around Perl's vec() that simply provides |
115
|
|
|
|
|
|
|
the third parameter (1) to that routine that says we're working with |
116
|
|
|
|
|
|
|
a bit vector. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
Normally returns $num, if you care. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head3 Parameters: |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head4 $vec |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
A Perl bit vector stored in the scalar. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=head4 $num |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
The number whose bit you want to target in the bit vector. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head4 $zero_or_one |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
The value you want to set the bit to: 0 or 1. If not defined, |
133
|
|
|
|
|
|
|
1 is assumed. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Examples: |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
my $vec = ""; # empty vector |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
set_bit $vec, 1, 1; # 01000000 |
141
|
|
|
|
|
|
|
set_bit $vec, 2, 1; # 01100000 |
142
|
|
|
|
|
|
|
set_bit $vec, 3; # 01110000 |
143
|
|
|
|
|
|
|
set_bit $vec, 1, 0; # 00110000 |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=cut |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
sub set_bit { |
148
|
50
|
100
|
|
50
|
1
|
26231
|
if( defined $_[2] ) { vec( $_[0], $_[1], 1 ) = $_[2] } |
|
41
|
|
|
|
|
144
|
|
149
|
9
|
|
|
|
|
34
|
else { vec( $_[0], $_[1], 1 ) = 1 } |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=head2 bit2str( $vec ) |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
This routine is a shallow wrapper around unpack() that unpacks a bit |
157
|
|
|
|
|
|
|
vector into a string of '0's and '1's, in preparation for compression. |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head3 Parameters: |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head4 $vec |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
A Perl bit vector. |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
Example: |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
my $vec = ""; |
168
|
|
|
|
|
|
|
set_bit $vec, 4, 1; # 00001000 |
169
|
|
|
|
|
|
|
my $str = bit2str $vec; # '00001000' |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=cut |
172
|
|
|
|
|
|
|
|
173
|
72
|
|
|
72
|
1
|
1913
|
sub bit2str { unpack "b*", $_[0] } |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head2 str2bit( $str ) |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
This routine is a shallow wrapper around pack() that packs a string |
180
|
|
|
|
|
|
|
of '0's and '1's (following uncompression) into a bit vector. |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
=head3 Parameters: |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=head4 $str |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
A string of '0's and '1's, e.g., "00001000". |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
Example: |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
my $vec = str2bit '00001000'; |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=cut |
193
|
|
|
|
|
|
|
|
194
|
16
|
|
|
16
|
1
|
1082
|
sub str2bit { pack "b*", $_[0] } |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=head2 num2bit( \@integers ) |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
This routine accepts an array ref of integers and returns a bit |
201
|
|
|
|
|
|
|
vector with those integer's bits turned on. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=head3 Parameters: |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=head4 \@integers |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
A reference to an array of integers. |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
Examples: |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
my $vec = num2bit [1,2,3]; # 01110000 |
212
|
|
|
|
|
|
|
my $vec = num2bit [3,2,1]; # 01110000 |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
The second example is intended to make clear that the order of the |
215
|
|
|
|
|
|
|
integers in the array is not retained (for obvious reasons), and |
216
|
|
|
|
|
|
|
calling bit2num( $vec ) will always return the integers in ascending |
217
|
|
|
|
|
|
|
order (see bit2num() below). |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=cut |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub num2bit { |
222
|
9
|
|
|
9
|
1
|
1705
|
my $bvec = ""; |
223
|
9
|
|
|
|
|
17
|
vec( $bvec, $_, 1 ) = 1 for @{$_[0]}; |
|
9
|
|
|
|
|
95
|
|
224
|
9
|
|
|
|
|
42
|
$bvec; # returned |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=head2 bit2num( $vec, $beg, $cnt ) |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
This routine accepts a bit vector and returns an array of integers |
232
|
|
|
|
|
|
|
represented by the 1 bits. |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
The parameters $beg and $cnt are to support retrieving subsets of |
235
|
|
|
|
|
|
|
integers from a large vector -- in essence, to support "paging" through |
236
|
|
|
|
|
|
|
the set. |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
In scalar context, returns a reference to the array. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=head3 Parameters: |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=head4 $vec |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
A bit vector. |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=head4 $beg |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
The first integer (where the bit is 1) to return. Unlike array |
249
|
|
|
|
|
|
|
subscripts, the $beg positions start with 1, not 0. |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=head4 $cnt |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
The maximum number of integers (including the first) to return. |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
Examples: |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# 0----+----1----+----2----+----3- |
258
|
|
|
|
|
|
|
my $vec = str2bit '01110011110001111100001111110001'; |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
my $set1 = bit2num $vec, 1, 5; # [ 1, 2, 3, 6, 7 ] |
261
|
|
|
|
|
|
|
my $set2 = bit2num $vec, 6, 5; # [ 8, 9, 13, 14, 15 ] |
262
|
|
|
|
|
|
|
my $set3 = bit2num $vec, 11, 5; # [ 16, 17, 22, 23, 24 ] |
263
|
|
|
|
|
|
|
my $set4 = bit2num $vec, 16, 5; # [ 25, 26, 27, 31 ] |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=cut |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub bit2num { |
268
|
10
|
|
|
10
|
1
|
1159
|
my( $vec, $beg, $cnt ) = @_; |
269
|
|
|
|
|
|
|
|
270
|
10
|
|
|
|
|
13
|
my( @num, $count ); |
271
|
|
|
|
|
|
|
|
272
|
10
|
100
|
|
|
|
24
|
if( $beg ) { |
273
|
|
|
|
|
|
|
|
274
|
4
|
50
|
|
|
|
7
|
if( $cnt ) { |
275
|
4
|
|
|
|
|
7
|
my $end = $beg + $cnt - 1; |
276
|
4
|
|
|
|
|
11
|
for( my $i = 0; $i < 8 * length $vec; ++$i ) { |
277
|
128
|
100
|
100
|
|
|
558
|
if( vec $vec, $i, 1 and ++$count >= $beg and $count <= $end ) { |
|
|
|
100
|
|
|
|
|
278
|
19
|
|
|
|
|
114
|
push @num, $i } } |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
else { |
281
|
0
|
|
|
|
|
0
|
for( my $i = 0; $i < 8 * length $vec; ++$i ) { |
282
|
0
|
0
|
0
|
|
|
0
|
if( vec $vec, $i, 1 and ++$count >= $beg ) { |
283
|
0
|
|
|
|
|
0
|
push @num, $i } } |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
else { |
288
|
6
|
|
|
|
|
40
|
for( my $i = 0; $i < 8 * length $vec; ++$i ) { |
289
|
168
|
100
|
|
|
|
1071
|
push @num, $i if vec $vec, $i, 1 } |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
10
|
50
|
|
|
|
23
|
return @num if wantarray; |
293
|
10
|
|
|
|
|
32
|
return \@num; |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=head2 compress( $str ) |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
This routine takes a string of '0's and '1's and compresses it using a |
302
|
|
|
|
|
|
|
simple run-length encoding (RLE). It returns this "compressed bit |
303
|
|
|
|
|
|
|
string". |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
=head3 Parameters: |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=head4 $str |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
A string of '0's and '1's, e.g., "01110". |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
Note: the length of the string need not be a multiple of 8. |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
Example: |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
my $bstr; |
316
|
|
|
|
|
|
|
$bstr = compress '01110000'; # '-134' |
317
|
|
|
|
|
|
|
my $str = ('1'x100).('0'x30).('1'x6); |
318
|
|
|
|
|
|
|
$bstr = compress $str; # '+@1cU6' |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head3 Compression Scheme |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
The compression scheme counts the number of consecutive '0's and '1's |
323
|
|
|
|
|
|
|
and concatenates that count (in base-62) to the compressed bit string. |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
If the first bit is '0', the compressed bit string begins with '-'. If |
326
|
|
|
|
|
|
|
the first bit is '1', it begins with '+'. The digit following that |
327
|
|
|
|
|
|
|
represents that many of those bits. The next digit represents that |
328
|
|
|
|
|
|
|
many of the "other" bits, and so on. (A "digit" matches /[0-9A-Za-z]/.) |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
So in the first example, '-134' means 1 '0' bit, then 3 '1' bits, then |
331
|
|
|
|
|
|
|
4 '0' bits, i.e., '01110000'. |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
The second example includes a 2-digit number, 1c base-62 (100 decimal, |
334
|
|
|
|
|
|
|
as defined by Math::Int2base). |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
Any multi-digit number is preceded by a non-digit: |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
'@' for a 2-digit number |
339
|
|
|
|
|
|
|
'#' for 3 digits |
340
|
|
|
|
|
|
|
'$' for 4 digits |
341
|
|
|
|
|
|
|
'%' for 5 digits, and |
342
|
|
|
|
|
|
|
'^' for 6 digits |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
(Mnemonic: look above the numbers on a qwerty keyboard. A 6-digit |
345
|
|
|
|
|
|
|
number will accommodate 32,590,299,105 consecutive bits. If you need |
346
|
|
|
|
|
|
|
more than that, let me know.) |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
So '+@1cU6' means 1c (100) '1' bits, then U (30) '0' bits, then 6 '1' |
349
|
|
|
|
|
|
|
bits. |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=cut |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
sub compress { |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
# 1st char '-' => 1st bit '0', '+' => '1' |
356
|
10
|
|
|
10
|
1
|
1194
|
my( $first_char, @a ); |
357
|
10
|
|
|
|
|
22
|
for( $_[0] ) { |
358
|
10
|
100
|
|
|
|
139
|
if( /^0/ ) { $first_char = '-'; @a = /(0*)(1*)/g } |
|
9
|
|
|
|
|
21
|
|
|
9
|
|
|
|
|
121
|
|
359
|
1
|
|
|
|
|
3
|
else { $first_char = '+'; @a = /(1*)(0*)/g } |
|
1
|
|
|
|
|
11
|
|
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
10
|
50
|
|
|
|
33
|
return '' unless @a; |
363
|
|
|
|
|
|
|
|
364
|
10
|
|
|
|
|
66
|
pop @a while $a[-1] eq ''; # remove trailing nulls |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
# return compressed format |
367
|
65
|
|
|
|
|
181
|
$first_char . join( '', |
368
|
|
|
|
|
|
|
map { |
369
|
10
|
|
|
|
|
22
|
my $chars = int2base( length, 62 ); |
370
|
65
|
|
|
|
|
959
|
( undef,'','@','#','$','%','^' )[ length $chars ] . $chars; |
371
|
|
|
|
|
|
|
} @a ); |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=head2 uncompress( $bstr ) |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
This routine uncompresses a compressed bit string (which would have |
380
|
|
|
|
|
|
|
been compressed by the compress() routine above). |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
It returns a string of '0's and '1's. This string will (normally) then |
383
|
|
|
|
|
|
|
be converted to a bit vector using str2bit() above. |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=head3 Parameters: |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=head4 $bstr |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
A compressed bit string (see compress() above). |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
Example: |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
my $bstr = '-134'; |
394
|
|
|
|
|
|
|
my $str = uncompress $bstr; # '01110000' |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=cut |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
sub uncompress { |
399
|
6
|
|
|
6
|
1
|
20
|
my $compressed = shift; |
400
|
|
|
|
|
|
|
|
401
|
6
|
50
|
|
|
|
21
|
croak "Undefined" unless defined $compressed; |
402
|
6
|
|
|
|
|
11
|
my $ret = ''; |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
# examine first character to determine first bit's value |
405
|
6
|
50
|
|
|
|
21
|
my $bit = substr( $compressed, 0, 1 ) eq '+' ? '1' : '0'; |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# examine the rest of the characters, expand into 0's & 1's |
408
|
6
|
|
|
|
|
589
|
for( my $i = 1; $i < length $compressed; ++$i ) { |
409
|
|
|
|
|
|
|
|
410
|
32
|
|
|
|
|
52
|
my $char = substr( $compressed, $i, 1 ); |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
# multi-digit number? |
413
|
32
|
|
|
|
|
48
|
my $len = index '..@#$%^', $char; # @==2, #==3, etc. |
414
|
32
|
50
|
|
|
|
54
|
if( $len > 1 ) { |
415
|
0
|
|
|
|
|
0
|
$ret .= $bit x (base2int substr( $compressed, $i + 1, $len ), 62); |
416
|
0
|
|
|
|
|
0
|
$i += $len; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
else { |
419
|
32
|
|
|
|
|
81
|
$ret .= $bit x (base2int $char, 62); |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
|
422
|
32
|
100
|
|
|
|
706
|
$bit = $bit ? '0' : '1'; # toggle between 0/1 |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
6
|
|
|
|
|
23
|
$ret; # returned |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=head2 howmany( $vec, $zero_or_one ) |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
This routine returns a count of the 0 or 1 bits in a bit vector. |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
=head3 Parameters: |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=head4 $vec |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
A bit vector. |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=head4 $zero_or_one |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
The value you want a count of: 0 or 1. Defaults to 1 if not given. |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
Examples: |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
my $vec = str2bit '01010010'; |
447
|
|
|
|
|
|
|
my $ones_count = howmany $vec; # 3 |
448
|
|
|
|
|
|
|
my $zeros_count = howmany $vec, 0; # 5 |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
Note that howmany( $vec, 0 ) will include trailing zero bits. |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=cut |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
sub howmany { |
455
|
2
|
|
|
2
|
1
|
7
|
my( $bvec, $bitval ) = @_; |
456
|
|
|
|
|
|
|
|
457
|
2
|
100
|
|
|
|
6
|
$bitval = 1 unless defined $bitval; |
458
|
|
|
|
|
|
|
|
459
|
2
|
|
|
|
|
6
|
my $setbits = unpack "%32b*", $bvec; |
460
|
2
|
100
|
|
|
|
6
|
return $setbits if $bitval; |
461
|
1
|
|
|
|
|
5
|
return 8 * length( $bvec ) - $setbits; # includes trailing 0's |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=head1 METHODS |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=head2 new() |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
This constructs a Data::Bvec object. Each object represents a single |
471
|
|
|
|
|
|
|
array of integers stored either as a bit vector, a compressed bit |
472
|
|
|
|
|
|
|
string, or an array. |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=head3 Parameters: |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
All parameters to new() are named. |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=head4 bvec=>$bit_vector |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
Stores a Perl bit vector in the object. |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
my $vec = str2bit '01110011110001111100001111110001'; |
483
|
|
|
|
|
|
|
my $bv = Data::Bvec::->new( bvec => $vec ); |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=head4 bstr=>$compressed_bit_string |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
Stores a compressed bit string in the object. |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
my $bstr = compress bit2str $vec; |
490
|
|
|
|
|
|
|
my $bv = Data::Bvec::->new( bstr => $bstr ); |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=head4 nums=>\@integers |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
Stores an array of integers in the object. The order of the array is |
495
|
|
|
|
|
|
|
retained when stored. |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
my $nums = bit2num $vec; |
498
|
|
|
|
|
|
|
my $bv = Data::Bvec::->new( nums => $nums ); |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=head4 bvec2nums=>$bit_vector |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
Accepts a bit vector and stores it as an array of integers (as |
503
|
|
|
|
|
|
|
$self->{nums}). |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
my $bv = Data::Bvec::->new( bvec2nums => $vec ); |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
=head4 nums2bvec=>\@integers |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
Accepts an array of integers and stores it as a bit vector (as |
510
|
|
|
|
|
|
|
$self->{bvec}). The order of the array is not retained. |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
my $bv = Data::Bvec::->new( nums2bvec => $nums ); |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=head4 bvec2bstr=>$bit_vector |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
Accepts a bit vector and stores it as a compressed bit string (as |
517
|
|
|
|
|
|
|
$self->{bstr}). |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
my $bv = Data::Bvec::->new( bvec2bstr => $vec ); |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=head4 bstr2bvec=>$compressed_bit_string |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
Accepts a compressed bit string and stores it as a bit vector (as |
524
|
|
|
|
|
|
|
$self->{bvec}). |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
my $bv = Data::Bvec::->new( bstr2bvec => $bstr ); |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=head4 bstr2nums=>$compressed_bit_string |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
Accepts a compressed bit string and stores it as an array of integers |
531
|
|
|
|
|
|
|
(as $self->{nums}). |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
my $bv = Data::Bvec::->new( bstr2nums => $bstr ); |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=head4 nums2bstr=>\@integers |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
Accepts an array of integers and stores it as a compressed bit string |
538
|
|
|
|
|
|
|
(as $self->{bstr}). The order of the array is not retained. |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
my $bv = Data::Bvec::->new( nums2bstr => $nums ); |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
=cut |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
sub new { |
545
|
13
|
|
|
13
|
1
|
931
|
my $class = shift; |
546
|
|
|
|
|
|
|
|
547
|
13
|
50
|
|
|
|
46
|
die "Too many parameters" if @_ > 2; |
548
|
13
|
|
|
|
|
43
|
my %parm = @_; |
549
|
13
|
|
|
|
|
23
|
my $self = {}; |
550
|
|
|
|
|
|
|
|
551
|
13
|
100
|
|
|
|
43
|
$self->{bvec} = $parm{bvec} if defined $parm{bvec}; |
552
|
13
|
100
|
|
|
|
51
|
$self->{bstr} = $parm{bstr} if defined $parm{bstr}; |
553
|
13
|
100
|
|
|
|
44
|
$self->{nums} = $parm{nums} if defined $parm{nums}; |
554
|
|
|
|
|
|
|
|
555
|
13
|
100
|
|
|
|
49
|
$self->{nums} = bit2num $parm{bvec2nums} if defined $parm{bvec2nums}; |
556
|
13
|
100
|
|
|
|
40
|
$self->{bvec} = num2bit $parm{nums2bvec} if defined $parm{nums2bvec}; |
557
|
|
|
|
|
|
|
|
558
|
13
|
100
|
|
|
|
41
|
$self->{bstr} = compress bit2str $parm{bvec2bstr} if defined $parm{bvec2bstr}; |
559
|
13
|
100
|
|
|
|
48
|
$self->{bvec} = str2bit uncompress $parm{bstr2bvec} if defined $parm{bstr2bvec}; |
560
|
|
|
|
|
|
|
|
561
|
13
|
100
|
|
|
|
38
|
$self->{nums} = bit2num str2bit uncompress $parm{bstr2nums} if defined $parm{bstr2nums}; |
562
|
13
|
100
|
|
|
|
41
|
$self->{bstr} = compress bit2str num2bit $parm{nums2bstr} if defined $parm{nums2bstr}; |
563
|
|
|
|
|
|
|
|
564
|
13
|
|
|
|
|
63
|
bless $self, $class; |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
=head2 get_bvec() |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
This routine takes no parameters. It returns a bit vector, regardless |
572
|
|
|
|
|
|
|
how the integers are stored. The object is not changed. |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
my $vec = $bv->get_bvec(); |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
=cut |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
sub get_bvec { |
579
|
5
|
|
|
5
|
1
|
26
|
my $self = shift; |
580
|
|
|
|
|
|
|
|
581
|
5
|
100
|
|
|
|
46
|
return $self->{bvec} if defined $self->{bvec}; |
582
|
2
|
50
|
|
|
|
7
|
return str2bit uncompress $self->{bstr} if defined $self->{bstr}; |
583
|
2
|
50
|
|
|
|
13
|
return num2bit $self->{nums} if defined $self->{nums}; |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
} |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
=head2 get_bstr() |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
This routine takes no parameters. It returns a compressed bit string, |
592
|
|
|
|
|
|
|
regardless how the integers are stored. The object is not changed. |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
my $bstr = $bv->get_bstr(); |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
=cut |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
sub get_bstr { |
599
|
5
|
|
|
5
|
1
|
24
|
my $self = shift; |
600
|
|
|
|
|
|
|
|
601
|
5
|
100
|
|
|
|
33
|
return $self->{bstr} if defined $self->{bstr}; |
602
|
2
|
50
|
|
|
|
7
|
return compress bit2str $self->{bvec} if defined $self->{bvec}; |
603
|
2
|
50
|
|
|
|
13
|
return compress bit2str num2bit $self->{nums} if defined $self->{nums}; |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
=head2 get_nums( $beg, $cnt ) |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
This routine returns an array of integers, regardless how the integers |
612
|
|
|
|
|
|
|
are stored. The object is not changed. |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
Note that if the integers are stored as 'nums' (an array), get_nums() |
615
|
|
|
|
|
|
|
will return them in the same order as the array. If they are stored |
616
|
|
|
|
|
|
|
another way, they will be returned in ascending order. |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
my @integers = $bv->get_nums(); # list returned in list context |
619
|
|
|
|
|
|
|
my $ints = $bv->get_nums(); # aref returned in scalar context |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
=head3 Parameters: |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=head4 $beg |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
The first integer to return. Unlike array subscripts, the $beg |
626
|
|
|
|
|
|
|
positions start with 1, not 0. If no $beg is given, 1 is assumed. |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
=head4 $cnt |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
The maximum number of integers (including the first) to return. |
631
|
|
|
|
|
|
|
If no $cnt is given, the rest of the integers are returned. |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
=cut |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
sub get_nums { |
636
|
6
|
|
|
6
|
1
|
26
|
my $self = shift; |
637
|
6
|
|
50
|
|
|
70
|
my $beg = shift||1; |
638
|
6
|
|
50
|
|
|
37
|
my $cnt = shift||''; |
639
|
|
|
|
|
|
|
|
640
|
6
|
|
|
|
|
8
|
my @num; |
641
|
|
|
|
|
|
|
|
642
|
6
|
50
|
|
|
|
27
|
if( defined $self->{nums} ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
643
|
6
|
50
|
|
|
|
33
|
if ( $cnt ) { @num = @{$self->{nums}}[--$beg..$beg+$cnt-1] } |
|
0
|
50
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
644
|
6
|
|
|
|
|
11
|
elsif( $beg == 1 ) { @num = @{$self->{nums}} } |
|
6
|
|
|
|
|
28
|
|
645
|
0
|
|
|
|
|
0
|
else { @num = @{$self->{nums}}[--$beg..$#{$self->{nums}}] } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
elsif( defined $self->{bvec} ) { |
648
|
0
|
|
|
|
|
0
|
@num = bit2num $self->{bvec}, $beg, $cnt; |
649
|
|
|
|
|
|
|
} |
650
|
|
|
|
|
|
|
elsif( defined $self->{bstr} ) { |
651
|
0
|
|
|
|
|
0
|
@num = bit2num str2bit( uncompress $self->{bstr} ), $beg, $cnt; |
652
|
|
|
|
|
|
|
} |
653
|
|
|
|
|
|
|
|
654
|
6
|
100
|
|
|
|
54
|
return @num if wantarray; |
655
|
2
|
|
|
|
|
11
|
return \@num; |
656
|
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
1; # return true |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
__END__ |