line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::BitStream::Code::Comma; |
2
|
28
|
|
|
28
|
|
37900
|
use strict; |
|
28
|
|
|
|
|
64
|
|
|
28
|
|
|
|
|
1453
|
|
3
|
28
|
|
|
28
|
|
192
|
use warnings; |
|
28
|
|
|
|
|
179
|
|
|
28
|
|
|
|
|
1766
|
|
4
|
|
|
|
|
|
|
BEGIN { |
5
|
28
|
|
|
28
|
|
92
|
$Data::BitStream::Code::Comma::AUTHORITY = 'cpan:DANAJ'; |
6
|
28
|
|
|
|
|
2599
|
$Data::BitStream::Code::Comma::VERSION = '0.08'; |
7
|
|
|
|
|
|
|
} |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $CODEINFO = { package => __PACKAGE__, |
10
|
|
|
|
|
|
|
name => 'Comma', |
11
|
|
|
|
|
|
|
universal => 1, |
12
|
|
|
|
|
|
|
params => 1, |
13
|
|
|
|
|
|
|
encodesub => sub {shift->put_comma(@_)}, |
14
|
|
|
|
|
|
|
decodesub => sub {shift->get_comma(@_)}, }; |
15
|
|
|
|
|
|
|
|
16
|
28
|
|
|
28
|
|
269
|
use Moo::Role; |
|
28
|
|
|
|
|
78
|
|
|
28
|
|
|
|
|
708
|
|
17
|
|
|
|
|
|
|
requires qw(read write); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
sub put_comma { |
20
|
1632
|
|
|
1632
|
1
|
26704
|
my $self = shift; |
21
|
1632
|
50
|
|
|
|
5480
|
$self->error_stream_mode('write') unless $self->writing; |
22
|
1632
|
|
|
|
|
2636
|
my $bits = shift; |
23
|
1632
|
50
|
33
|
|
|
9248
|
$self->error_code('param', 'bits must be in range 1-16') unless $bits >= 1 && $bits <= 16; |
24
|
|
|
|
|
|
|
|
25
|
1632
|
50
|
|
|
|
9722
|
return $self->put_unary(@_) if $bits == 1; |
26
|
1632
|
|
|
|
|
2934
|
my $comma = ~(~0 << $bits); # 1 x $bits is the terminator |
27
|
1632
|
|
|
|
|
3077
|
my $base = 2**$bits - 1; # The base of the digits we're writing |
28
|
|
|
|
|
|
|
|
29
|
1632
|
|
|
|
|
3106
|
foreach my $val (@_) { |
30
|
4686
|
100
|
100
|
|
|
22862
|
$self->error_code('zeroval') unless defined $val and $val >= 0; |
31
|
|
|
|
|
|
|
|
32
|
4682
|
100
|
|
|
|
12668
|
if ($val == 0) { $self->write( $bits, $comma ); next; } # c |
|
150
|
|
|
|
|
535
|
|
|
150
|
|
|
|
|
339
|
|
33
|
|
|
|
|
|
|
|
34
|
4532
|
|
|
|
|
7116
|
my $v = $val; |
35
|
4532
|
|
|
|
|
8287
|
my @stack = ($comma); |
36
|
4532
|
|
|
|
|
12000
|
while ($v > 0) { |
37
|
17345
|
|
|
|
|
23980
|
push @stack, $v % $base; |
38
|
17345
|
|
|
|
|
38018
|
$v = int($v / $base); |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
# Write the stack. Simple way: |
41
|
|
|
|
|
|
|
# $self->write($bits, pop @stack) while @stack; |
42
|
4532
|
|
|
|
|
6029
|
my $cword = 0; |
43
|
4532
|
|
|
|
|
5013
|
my $cbits = 0; |
44
|
4532
|
|
|
|
|
10854
|
while (@stack) { |
45
|
21877
|
|
|
|
|
34839
|
$cword = ($cword << $bits) | pop @stack; |
46
|
21877
|
|
|
|
|
54174
|
$cbits += $bits; |
47
|
21877
|
50
|
|
|
|
60768
|
if (($cbits + $bits) > 32) { |
48
|
0
|
|
|
|
|
0
|
$self->write($cbits, $cword); |
49
|
0
|
|
|
|
|
0
|
$cword = 0; |
50
|
0
|
|
|
|
|
0
|
$cbits = 0; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
} |
53
|
4532
|
50
|
|
|
|
20201
|
$self->write($cbits, $cword) if $cbits; |
54
|
|
|
|
|
|
|
} |
55
|
1628
|
|
|
|
|
7568
|
1; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub get_comma { |
59
|
1673
|
|
|
1673
|
1
|
25616
|
my $self = shift; |
60
|
1673
|
50
|
|
|
|
7757
|
$self->error_stream_mode('read') if $self->writing; |
61
|
1673
|
|
|
|
|
2401
|
my $bits = shift; |
62
|
1673
|
50
|
33
|
|
|
9987
|
$self->error_code('param', 'bits must be in range 1-16') unless $bits >= 1 && $bits <= 16; |
63
|
|
|
|
|
|
|
|
64
|
1673
|
50
|
|
|
|
4109
|
return $self->get_unary(@_) if $bits == 1; |
65
|
1673
|
|
|
|
|
2809
|
my $comma = ~(~0 << $bits); # 1 x $bits is the terminator |
66
|
1673
|
|
|
|
|
9905
|
my $base = 2**$bits - 1; # The base of the digits we're writing |
67
|
|
|
|
|
|
|
|
68
|
1673
|
|
|
|
|
2138
|
my $count = shift; |
69
|
1673
|
100
|
|
|
|
3859
|
if (!defined $count) { $count = 1; } |
|
1631
|
50
|
|
|
|
2478
|
|
|
|
0
|
|
|
|
|
|
70
|
42
|
|
|
|
|
73
|
elsif ($count < 0) { $count = ~0; } # Get everything |
71
|
0
|
|
|
|
|
0
|
elsif ($count == 0) { return; } |
72
|
|
|
|
|
|
|
|
73
|
1673
|
|
|
|
|
2126
|
my @vals; |
74
|
1673
|
|
|
|
|
5636
|
$self->code_pos_start('Comma'); |
75
|
1673
|
|
|
|
|
61608
|
while ($count-- > 0) { |
76
|
4769
|
|
|
|
|
15668
|
$self->code_pos_set; |
77
|
4769
|
|
|
|
|
624026
|
my $tval = $self->read($bits); |
78
|
4769
|
100
|
|
|
|
13495
|
last unless defined $tval; |
79
|
|
|
|
|
|
|
|
80
|
4725
|
|
|
|
|
6089
|
my $val = 0; |
81
|
4725
|
|
|
|
|
11073
|
while ($tval != $comma) { |
82
|
18053
|
|
|
|
|
23970
|
$val = $base * $val + $tval; |
83
|
18053
|
|
|
|
|
52454
|
$tval = $self->read($bits); |
84
|
18050
|
50
|
|
|
|
61112
|
$self->error_off_stream unless defined $tval; |
85
|
|
|
|
|
|
|
} |
86
|
4722
|
|
|
|
|
15276
|
push @vals, $val; |
87
|
|
|
|
|
|
|
} |
88
|
1670
|
|
|
|
|
7118
|
$self->code_pos_end; |
89
|
1670
|
100
|
|
|
|
60457
|
wantarray ? @vals : $vals[-1]; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
28
|
|
|
28
|
|
29101
|
no Moo::Role; |
|
28
|
|
|
|
|
85
|
|
|
28
|
|
|
|
|
192
|
|
93
|
|
|
|
|
|
|
1; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# ABSTRACT: A Role implementing Comma codes |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=pod |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head1 NAME |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Data::BitStream::Code::Comma - A Role implementing Comma codes |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head1 VERSION |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
version 0.08 |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head1 DESCRIPTION |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
A role written for L that provides get and set methods for |
110
|
|
|
|
|
|
|
Comma codes. The role applies to a stream object. |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Comma codes are described in many sources. The codes are written in C-bit |
113
|
|
|
|
|
|
|
chunks, where a chunk consisting of all 1 bits indicates the end of the code. |
114
|
|
|
|
|
|
|
The number to be encoded is stored in base C<2^k-1>. The case of 1-bit comma |
115
|
|
|
|
|
|
|
codes degenerates into unary codes. The most common comma code in current use |
116
|
|
|
|
|
|
|
is the ternary comma code which uses 2-bit chunks and stores the number in |
117
|
|
|
|
|
|
|
base 3 (hence why it is called ternary comma). Example for ternary comma: |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
value code binary bits |
120
|
|
|
|
|
|
|
0 c 11 2 |
121
|
|
|
|
|
|
|
1 1c 0111 4 |
122
|
|
|
|
|
|
|
2 2c 1011 4 |
123
|
|
|
|
|
|
|
3 10c 010011 6 |
124
|
|
|
|
|
|
|
4 11c 010111 6 |
125
|
|
|
|
|
|
|
.. 8 22c 101011 6 |
126
|
|
|
|
|
|
|
9 100c 01000011 8 |
127
|
|
|
|
|
|
|
.. 64 2101c 1001000111 10 |
128
|
|
|
|
|
|
|
.. 10000 111201101c 01010110000101000111 20 |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Comma codes using larger chunks compact larger numbers better, but the |
131
|
|
|
|
|
|
|
terminator also grows. This means smaller values take more bits to encode, |
132
|
|
|
|
|
|
|
and all codes have many wasted bits after the information. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
Also note that skipping the leading C<0>s for all codes results in a large |
135
|
|
|
|
|
|
|
waste of space. For instance, the codes C<0xc>, C<0xxc>, C<0xxxc>, etc. are |
136
|
|
|
|
|
|
|
all not used, even though they are uniquely decodable. Note that Fenwick's |
137
|
|
|
|
|
|
|
table 6 (p6) shows C<0c> being used, but no other leading zero. This is not |
138
|
|
|
|
|
|
|
the case in Sayood's table 3.19 (p71) where no entry has a leading zero. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
These codes are a special case of the block-based taboo codes (Pigeon 2001). |
141
|
|
|
|
|
|
|
The taboo codes fully utilize all the bits. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head1 METHODS |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=head2 Provided Object Methods |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=over 4 |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=item B< put_comma($bits, $value) > |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=item B< put_comma($bits, @values) > |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Insert one or more values as Comma codes using C<$bits> bits. Returns 1. |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=item B< get_comma($bits) > |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item B< get_comma($bits, $count) > |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Decode one or more Comma codes from the stream. If count is omitted, |
160
|
|
|
|
|
|
|
one value will be read. If count is negative, values will be read until |
161
|
|
|
|
|
|
|
the end of the stream is reached. In scalar context it returns the last |
162
|
|
|
|
|
|
|
code read; in array context it returns an array of all codes read. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=back |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head2 Parameters |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
The parameter C must be an integer between 1 and 16. This indicates |
169
|
|
|
|
|
|
|
the number of bits used per chunk. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
If C is 1, then unary coding is used. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Ternary comma coding is the special case of comma coding with C. |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Byte coding is the special case of comma coding with C. |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head2 Required Methods |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=over 4 |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=item B< read > |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=item B< write > |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
These methods are required for the role. |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=back |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=head1 SEE ALSO |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=over 4 |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=item Peter Fenwick, "Punctured Elias Codes for variable-length coding of the integers", Technical Report 137, Department of Computer Science, University of Auckland, December 1996. |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=item Peter Fenwick, "Ziv-Lempel encoding with multi-bit flags", Proc. Data Compression Conference (IEEE DCC), Snowbird, Utah, pp 138-147, March 1993. |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=item Khalid Sayood (editor), "Lossless Compression Handbook", 2003. |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=back |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=head1 AUTHORS |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
Dana Jacobsen |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=head1 COPYRIGHT |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
Copyright 2012 by Dana Jacobsen |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=cut |