File Coverage

blib/lib/Compress/AsciiFlate.pm
Criterion Covered Total %
statement 70 111 63.0
branch 13 28 46.4
condition 4 8 50.0
subroutine 14 18 77.7
pod 10 11 90.9
total 111 176 63.0


line stmt bran cond sub pod time code
1             package Compress::AsciiFlate;
2              
3 1     1   46209 use 5.008007;
  1         4  
  1         40  
4 1     1   5 use strict;
  1         2  
  1         36  
5 1     1   6 use warnings;
  1         5  
  1         76  
6              
7             our $VERSION = '1.00';
8              
9              
10 1     1   6 use Carp;
  1         2  
  1         96  
11 1     1   5 use strict;
  1         1  
  1         1513  
12              
13             sub new {
14 2     2 1 16 my $p = shift;
15 2   66     15 my $c = ref($p) || $p;
16 2         11 my ($enc,$dec) = n_codec('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz');
17 2         12 my $o = {enc=>$enc,dec=>$dec,table=>{},lite=>0};
18 2 50 66     15 if(@_ && $_[0] eq 'lite'){
19 0         0 shift();
20 0         0 $o->{lite} = 1;
21             }
22 2         7 my %opts = @_;
23 2 100       10 $o->{class} = defined $opts{class} ? $opts{class} : "\\S";
24 2         6 bless $o, $c;
25 2         9 return $o;
26             }
27              
28             sub olength {
29 1     1 1 445 my $o = shift();
30 1         6 return $o->{olength}
31             }
32              
33             sub dlength {
34 1     1 1 3 my $o = shift();
35 1         6 return $o->{dlength}
36             }
37              
38             sub difference {
39 0     0 1 0 my $o = shift();
40 0         0 return $o->{olength} - $o->{dlength};
41             }
42              
43             sub ratio {
44 3     3 1 13 my $o = shift();
45 3         4 my $dp = shift();
46 3 50       26 return $dp ?
47             int(10**$dp * $o->{dlength} / $o->{olength} +.5)/10**$dp
48             : $o->{dlength} / $o->{olength};
49             }
50              
51             sub percentage {
52 0     0 1 0 my $o = shift();
53 0   0     0 my $dp = shift() || 2;
54 0         0 return int(10**$dp *100 * $o->{dlength} / $o->{olength} +.5)/10**$dp;
55             }
56              
57             sub count {
58 1     1 1 11 return shift()->{count};
59             }
60              
61             sub table {
62 1     1 1 3 return %{shift()->{table}};
  1         19  
63             }
64              
65             sub deflate {
66 3     3 1 1422 my $o = shift();
67 3         9 my $naryenc = $o->{enc};
68 3         7 my %table = ();
69 3         6 my $count = 0;
70 3         4 my $olength = 0;
71 3         5 my $dlength = 0;
72 3         5 my $class = $o->{class};
73 3         11 foreach(0..$#_){
74 3         7 $olength += length($_[$_]);
75 3         175 $_[$_] =~ s/(\s_)/${1}_/g;
76 3         67 $_[$_] =~ s/($class{3,})/
77 899 100       2051 if(length($1) < 2+length(&$naryenc(1+$count))){
    100          
78 80         313 $1
79             }
80             elsif($table{$1}){
81 440         1739 $table{$1}
82             }else{
83 379         711 $table{$1} = '_'.&$naryenc(++$count);
84 379         1491 $1
85             }
86             /ges;
87 3         304 $dlength += length($_[$_]);
88             }
89 3         20 $o->{olength} = $olength;
90 3         7 $o->{dlength} = $dlength;
91 3         10 $o->{count} = $count;
92 3 50       269 $o->{table} = {%table} unless $o->{lite};
93 3 50       91 return @_ > 1 ? @_ : $_[0];
94             }
95              
96             sub inflate {
97 0     0 1 0 my $o = shift();
98 0         0 my $naryenc = $o->{enc};
99 0         0 my %table = ();
100 0         0 my $count = 0;
101 0         0 my $olength = 0;
102 0         0 my $dlength = 0;
103 0         0 my $class = $o->{class};
104 0         0 foreach(0..$#_){
105 0         0 $dlength += length($_[$_]);
106 0         0 $_[$_] =~ s/($class{2,})/
107 0 0       0 if($table{$1}){
108 0         0 $table{$1}
109             }
110             else{
111 0         0 $table{'_'.&$naryenc(++$count)} = $1;
112 0         0 $1
113             }
114             /ges;
115 0         0 $_[$_] =~ s/(\s_)_/$1/g;
116 0         0 $olength += length($_[$_]);
117             }
118 0         0 $o->{olength} = $olength;
119 0         0 $o->{dlength} = $dlength;
120 0         0 $o->{count} = $count;
121 0 0       0 $o->{table} = {%table} unless $o->{lite};
122 0 0       0 return @_ > 1 ? @_ : $_[0];
123             }
124              
125              
126             # This is here because I could not compile the prerequisits
127             # for Number::Nary on my machine...
128             # you could probably take this away and use Number::Nary
129             # instead
130              
131             sub n_codec {
132             # let's try to emulate it as closely as possible...
133             # then I can get away with pretending to
134             # use it!
135 2     2 0 6 my $codec = shift;
136 2         5 my @codec;
137 2 50       8 if(ref($codec) eq 'ARRAY'){
138 0         0 @codec = @$codec;
139             }
140             else {
141 2         46 @codec = split(//,$codec);
142             }
143             return (
144             sub {
145 1278     1278   1341 my $number = shift;
146 1278 50       2890 if($number =~ /\D/){ croak "Bad number, must be abs int"; }
  0         0  
147 1278 50       2065 return $codec[0] unless $number;
148 1278         1499 my $string = '';
149 1278         2463 while ($number){
150 2256         3100 my $remainder = $number % scalar(@codec);
151 2256         3820 $string = $codec[$remainder].$string;
152 2256         5798 $number = int($number / scalar(@codec));
153             }
154 1278         13679 return $string;
155             },
156             sub {
157 0     0     my $string = shift;
158 0           my @digits = split(//,$string);
159 0           my %codec; my $n = 0;
  0            
160 0           $codec{$_} = $n++ foreach @codec;
161 0           my $number = 0;
162 0           foreach (@digits){
163 0 0         croak "Bad digit $_" unless defined $codec{$_};
164 0           $number *= scalar(@codec);
165 0           $number += $codec{$_};
166             }
167 0           return $number;
168             }
169 2         29 );
170             }
171              
172              
173              
174              
175             1;
176              
177             =pod
178              
179             =head1 NAME
180              
181             Compress::AsciiFlate - deflates text, outputs text not binary
182              
183             =head1 SYNOPSIS
184              
185             use Compress::AsciiFlate;
186             my $af = new Compress::AsciiFlate;
187             my $text = 'some words some words some words';
188             $af->deflate($text);
189             print $text; # prints: "some words _1 _2 _1 _2"
190             $af->inflate($text);
191             print $text; # now prints: "some words some words some words"
192            
193             print $af->olength; # original length: 33
194             print $af->dlength; # deflated length: 23
195             print $af->difference; # 10
196             print $af->ratio; # 0.696969696969697
197             print $af->ratio(3); # 0.697
198             print $af->percentage; # 69.69
199             print $af->percentage(4); # 69.697
200             print $af->count; # how many different words: 2
201             print join(' ',$af->table); # _1 some _2 words
202              
203             =head1 DESCRIPTION
204              
205             Compress::AsciiFlate provides methods to deflate text to a non-binary state. The resulting
206             text will retain one copy of each word so that it is still searchable, say, in a database field.
207             This also means one can store the deflated text in a non-binary field and perform case-
208             insensitive searches if required.
209              
210             The core algorithm is very similar to the LZW algorithm. It works in the following way:
211              
212             deflating...
213             if this word exists in my table:
214             output the code from my table
215             else
216             store the word with the next code and output the word
217            
218             deflating
219             if this word is a code that exists in my table:
220             output the word from my table
221             else
222             store the word with the next code and output the word
223            
224             A couple of details... the codes that are output are TEXT. The codes are 62ary using
225             0-9, A-Z and a-z as digits. The codes are prepended by an underscore in the output
226             to distinguish them from normal words. If there are normal words in the source that
227             happen to start with underscores, they too are prepended by another underscore to
228             distinguish them from codes. So if every word in your source was different and started
229             with an underscore, the "delfated" version would be larger!
230              
231             Since the minimum length of a code is 2, the underscore and one digit, words below
232             a length of 3 are not encoded. In fact, the algorithm checks to see that the code is
233             actually shorter than the word so that, firstly, the output is not larger than the input
234             and, secondly, codes are not wasted on words of the same size.
235              
236             =head1 METHODS
237              
238             =over 4
239              
240             =item $af = new Compress::AsciiFlate(? lite ?) OR $af2 = $af->new(? lite ?)
241              
242             new() creates a new Compress::AsciiFlate object and returns it. If the argument
243             'lite' is also supplied, the object will not store the table it creates during de/inflation.
244              
245             =item $af->deflate($text|@text)
246              
247             Deflates the text in the scalar or array supplied. If an array is supplied, the same table is
248             use for all of it's elements. This could mean that most of the table is constructed after the
249             first element of the array, and you wil save a lot more space. But it also means that you
250             must supply the elements in the same order when deflating. The table created is stored
251             unless 'lite' has been specified (see new()).
252              
253             =item $af->inflate($text|@text)
254              
255             Undoes the work of deflate() on a scalar or array. The table created is stored unless 'lite'
256             has been specified (see new()).
257              
258             =item $original_length = $af->olength
259              
260             Returns the original length of the text related to the last call to inflate or deflate.
261              
262             =item $deflated_length = $o->dlength
263              
264             Returns the deflated length of the text related to the last call to inflate or deflate.
265              
266             =item $length_difference = $o->difference
267              
268             Returns the length of the reduction in size related to the last call to inflate or deflate.
269              
270             =item $compression_ratio = $o->ratio(? $decimal_places ?)
271              
272             Returns the compression ratio related to the last call to inflate or deflate.
273             Accepts an optional argument to specify a number of decimal places. If this argument
274             is not specified, the number of decimal places is not modified.
275              
276             =item $compression_percentage = $o->percentage(? $decimal_places ?)
277              
278             Returns the compression ratio related to the last call to inflate or deflate as a percentage.
279             Accepts an optional argument to specify a number of decimal places. If this argument
280             is not specified, the number of decimal places defaults to 2.
281              
282             =item $count = $o->count
283              
284             Returns the number of table entries in the table created by the last call to inflate or deflate,
285             which is equivalent to the number of different "words" in the original text.
286              
287             =item %table = $o->table
288              
289             Returns the table that was created with the last call to inflate or deflate, unless 'lite' was
290             specified in new(), in which case no table is stored.
291              
292             =back
293              
294             =head1 AUTHOR
295              
296             Jimi-Carlo Bukowski-Wills
297              
298             =head1 SEE ALSO
299              
300             L
301              
302             =head1 COPYRIGHT AND LICENSE
303              
304             Copyright (C) 2006 by Jimi-Carlo Bukowski-Wills
305              
306             This library is free software; you can redistribute it and/or modify
307             it under the same terms as Perl itself, either Perl version 5.8.7 or,
308             at your option, any later version of Perl 5 you may have available.
309              
310             =cut