| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Compress::LZW::Compressor; | 
| 2 |  |  |  |  |  |  | { | 
| 3 |  |  |  |  |  |  | $Compress::LZW::Compressor::VERSION = '0.03'; | 
| 4 |  |  |  |  |  |  | } | 
| 5 |  |  |  |  |  |  | # ABSTRACT: Scaling LZW compressor class | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  |  | 
| 9 | 4 |  |  | 4 |  | 71727 | use Compress::LZW qw(:const); | 
|  | 4 |  |  |  |  | 11 |  | 
|  | 4 |  |  |  |  | 931 |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 | 4 |  |  | 4 |  | 6575 | use Types::Standard qw( Bool Int ); | 
|  | 4 |  |  |  |  | 242916 |  | 
|  | 4 |  |  |  |  | 62 |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 | 4 |  |  | 4 |  | 18055 | use Moo; | 
|  | 4 |  |  |  |  | 57378 |  | 
|  | 4 |  |  |  |  | 27 |  | 
| 14 | 4 |  |  | 4 |  | 7502 | use namespace::clean; | 
|  | 4 |  |  |  |  | 45612 |  | 
|  | 4 |  |  |  |  | 33 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | has block_mode => ( | 
| 18 |  |  |  |  |  |  | is      => 'ro', | 
| 19 |  |  |  |  |  |  | default => 1, | 
| 20 |  |  |  |  |  |  | isa     => Bool, | 
| 21 |  |  |  |  |  |  | ); | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | has lsb_first => ( | 
| 25 |  |  |  |  |  |  | is      => 'ro', | 
| 26 |  |  |  |  |  |  | default => \&Compress::LZW::_detect_lsb_first, | 
| 27 |  |  |  |  |  |  | isa     => Bool, | 
| 28 |  |  |  |  |  |  | ); | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | has max_code_size => ( # max bits | 
| 32 |  |  |  |  |  |  | is      => 'ro', | 
| 33 |  |  |  |  |  |  | default => 16, | 
| 34 |  |  |  |  |  |  | isa     => Type::Tiny->new( | 
| 35 |  |  |  |  |  |  | parent     => Int, | 
| 36 |  |  |  |  |  |  | constraint => sub { $_ >= 9 and $_ < $BITS_MASK }, | 
| 37 |  |  |  |  |  |  | message    => sub { "$_ isn't between 9 and $BITS_MASK" }, | 
| 38 |  |  |  |  |  |  | ), | 
| 39 |  |  |  |  |  |  | ); | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | has init_code_size => ( | 
| 43 |  |  |  |  |  |  | is      => 'ro', | 
| 44 |  |  |  |  |  |  | default => 9, | 
| 45 |  |  |  |  |  |  | isa     => Type::Tiny->new( | 
| 46 |  |  |  |  |  |  | parent     => Int, | 
| 47 |  |  |  |  |  |  | constraint => sub { $_ >= 9 and $_ <= $BITS_MASK }, | 
| 48 |  |  |  |  |  |  | message    => sub { "$_ isn't between 9 and $BITS_MASK" }, | 
| 49 |  |  |  |  |  |  | ), | 
| 50 |  |  |  |  |  |  | ); | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | has _code_size => ( # current bits | 
| 53 |  |  |  |  |  |  | is       => 'rw', | 
| 54 |  |  |  |  |  |  | clearer  => 1, | 
| 55 |  |  |  |  |  |  | lazy     => 1, | 
| 56 |  |  |  |  |  |  | builder  => sub { | 
| 57 | 12 |  |  | 12 |  | 2243 | $_[0]->init_code_size; | 
| 58 |  |  |  |  |  |  | }, | 
| 59 |  |  |  |  |  |  | ); | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | has _buf => ( | 
| 62 |  |  |  |  |  |  | is      => 'lazy', | 
| 63 |  |  |  |  |  |  | clearer => 1, | 
| 64 |  |  |  |  |  |  | builder => sub { | 
| 65 | 11 |  |  | 11 |  | 1951 | my $self = shift; | 
| 66 |  |  |  |  |  |  |  | 
| 67 | 11 | 100 |  |  |  | 129 | my $buf = $MAGIC | 
| 68 |  |  |  |  |  |  | . chr( $self->max_code_size | ( $self->block_mode ? $BLOCK_MASK : 0 ) ); | 
| 69 |  |  |  |  |  |  |  | 
| 70 | 11 |  |  |  |  | 54 | $self->_buf_size( length($buf) * 8 ); | 
| 71 | 11 |  |  |  |  | 57 | return \$buf; | 
| 72 |  |  |  |  |  |  | }, | 
| 73 |  |  |  |  |  |  | ); | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  | has _buf_size => ( #track our endpoint in bits | 
| 76 |  |  |  |  |  |  | is      => 'rw', | 
| 77 |  |  |  |  |  |  | ); | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | has _code_table => ( | 
| 80 |  |  |  |  |  |  | is      => 'ro', | 
| 81 |  |  |  |  |  |  | lazy    => 1, | 
| 82 |  |  |  |  |  |  | clearer => 1, | 
| 83 |  |  |  |  |  |  | builder => sub { | 
| 84 |  |  |  |  |  |  | return { | 
| 85 | 12 |  |  | 12 |  | 1986 | map { chr($_) => $_ } 0 .. 255 | 
|  | 3072 |  |  |  |  | 9480 |  | 
| 86 |  |  |  |  |  |  | }; | 
| 87 |  |  |  |  |  |  | }, | 
| 88 |  |  |  |  |  |  | ); | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | has _next_code => ( | 
| 91 |  |  |  |  |  |  | is      => 'rw', | 
| 92 |  |  |  |  |  |  | lazy    => 1, | 
| 93 |  |  |  |  |  |  | clearer => 1, | 
| 94 |  |  |  |  |  |  | builder => sub { | 
| 95 | 12 | 100 |  | 12 |  | 2128 | $_[0]->block_mode ? 257 : 256; | 
| 96 |  |  |  |  |  |  | }, | 
| 97 |  |  |  |  |  |  | ); | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | sub compress { | 
| 101 | 11 |  |  | 11 | 1 | 1861959 | my $self = shift; | 
| 102 | 11 |  |  |  |  | 25 | my ( $str ) = @_; | 
| 103 |  |  |  |  |  |  |  | 
| 104 | 11 |  |  |  |  | 343 | $self->reset; | 
| 105 |  |  |  |  |  |  |  | 
| 106 | 11 |  |  |  |  | 33 | my $seen = ''; | 
| 107 | 11 |  |  |  |  | 48 | for ( 0 .. length($str) ){ | 
| 108 | 8539300 |  |  |  |  | 14060685 | my $char = substr($str, $_, 1); | 
| 109 |  |  |  |  |  |  |  | 
| 110 | 8539300 | 100 |  |  |  | 207713244 | if ( exists $self->_code_table->{ $seen . $char } ){ | 
| 111 | 8420572 |  |  |  |  | 81319266 | $seen .= $char; | 
| 112 |  |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  | else { | 
| 114 | 118728 |  |  |  |  | 3775445 | $self->_buf_write( $self->_code_table->{ $seen } ); | 
| 115 |  |  |  |  |  |  |  | 
| 116 | 118728 |  |  |  |  | 459212 | $self->_new_code( $seen . $char ); | 
| 117 |  |  |  |  |  |  |  | 
| 118 | 118728 |  |  |  |  | 895497 | $seen = $char; | 
| 119 |  |  |  |  |  |  | } | 
| 120 |  |  |  |  |  |  | } | 
| 121 | 11 |  |  |  |  | 240 | $self->_buf_write( $self->_code_table->{ $seen } );  #last bit of input | 
| 122 |  |  |  |  |  |  |  | 
| 123 | 11 |  |  |  |  | 19 | return ${ $self->_buf }; | 
|  | 11 |  |  |  |  | 261 |  | 
| 124 |  |  |  |  |  |  | } | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | sub reset { | 
| 129 | 11 |  |  | 11 | 1 | 21 | my $self = shift; | 
| 130 |  |  |  |  |  |  |  | 
| 131 | 11 |  |  |  |  | 50 | $self->_reset_code_table; | 
| 132 | 11 |  |  |  |  | 1953 | $self->_clear_buf; | 
| 133 | 11 |  |  |  |  | 7369 | $self->_buf_size( 0 ); | 
| 134 |  |  |  |  |  |  | } | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | sub _reset_code_table { | 
| 138 | 12 |  |  | 12 |  | 24 | my $self = shift; | 
| 139 |  |  |  |  |  |  |  | 
| 140 | 12 |  |  |  |  | 311 | $self->_clear_code_table; | 
| 141 | 12 |  |  |  |  | 2730 | $self->_clear_next_code; | 
| 142 | 12 |  |  |  |  | 2062 | $self->_clear_code_size; | 
| 143 |  |  |  |  |  |  | } | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | sub _new_code { | 
| 146 | 118728 |  |  | 118728 |  | 203803 | my $self = shift; | 
| 147 | 118728 |  |  |  |  | 192828 | my ( $data ) = @_; | 
| 148 |  |  |  |  |  |  |  | 
| 149 | 118728 |  |  |  |  | 3269555 | my $code = $self->_next_code; | 
| 150 |  |  |  |  |  |  |  | 
| 151 | 118728 | 100 |  |  |  | 3871345 | if ( $code == (2 ** $self->_code_size) ){ | 
| 152 | 53 | 100 |  |  |  | 1498 | if ( $self->_code_size < $self->max_code_size ){ | 
|  |  | 100 |  |  |  |  |  | 
| 153 |  |  |  |  |  |  |  | 
| 154 | 19 |  |  |  |  | 566 | $self->_code_size($self->_code_size + 1 ); | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | } | 
| 157 |  |  |  |  |  |  | elsif ( $self->block_mode ){ | 
| 158 |  |  |  |  |  |  | # FINISHME | 
| 159 |  |  |  |  |  |  | # if compress(1) comparable we need to do a code table reset | 
| 160 |  |  |  |  |  |  | # ... when the ratio falls after reaching this point. | 
| 161 |  |  |  |  |  |  | # this doesn't need to be perfect, the only part that needs | 
| 162 |  |  |  |  |  |  | # match algorithm-wise is what code tables are built the same | 
| 163 |  |  |  |  |  |  | # after a reset. | 
| 164 |  |  |  |  |  |  | #~ warn "Resetting code table at $code"; | 
| 165 | 1 |  |  |  |  | 18 | $self->_reset_code_table; | 
| 166 | 1 |  |  |  |  | 10 | $self->_buf_write( $RESET_CODE ); | 
| 167 |  |  |  |  |  |  | } | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  |  | 
| 170 | 118728 | 100 |  |  |  | 3988135 | if ( $code >= (2 ** $self->_code_size) ){ | 
| 171 | 34 |  |  |  |  | 330 | return; # must not have been able to increase bits, we're capped | 
| 172 |  |  |  |  |  |  | } | 
| 173 |  |  |  |  |  |  |  | 
| 174 | 118694 |  |  |  |  | 3859654 | $self->_code_table->{ $data } = $code; | 
| 175 | 118694 |  |  |  |  | 46797678 | $self->_next_code( $code + 1 ); | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | } | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | sub _buf_write { | 
| 180 | 118740 |  |  | 118740 |  | 1167095 | my $self = shift; | 
| 181 | 118740 |  |  |  |  | 191424 | my ( $code ) = @_; | 
| 182 |  |  |  |  |  |  |  | 
| 183 | 118740 | 50 |  |  |  | 352867 | return unless defined $code; | 
| 184 |  |  |  |  |  |  |  | 
| 185 | 118740 |  |  |  |  | 3179926 | my $code_size = $self->_code_size; | 
| 186 | 118740 |  |  |  |  | 3878429 | my $buf       = $self->_buf; | 
| 187 | 118740 |  |  |  |  | 1042755 | my $buf_size  = $self->_buf_size; | 
| 188 |  |  |  |  |  |  |  | 
| 189 | 118740 | 50 |  |  |  | 467121 | if ( $code > ( 2 ** $code_size ) ){ | 
| 190 | 0 |  |  |  |  | 0 | die "Code value $code too high for current code size $code_size"; | 
| 191 |  |  |  |  |  |  | } | 
| 192 | 118740 | 100 |  |  |  | 428770 | my $wpos = $self->lsb_first ? $buf_size : ( $buf_size + $code_size - 1 ); | 
| 193 |  |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | #~ warn "write $code \tat $code_size bits\toffset $wpos (byte ".int($wpos/8) . ')'; | 
| 195 |  |  |  |  |  |  |  | 
| 196 | 118740 | 50 |  |  |  | 277291 | if ( $code == 1 ){ | 
| 197 | 0 |  |  |  |  | 0 | vec( $$buf, $wpos, 1 ) = 1; | 
| 198 |  |  |  |  |  |  | } | 
| 199 |  |  |  |  |  |  | else { | 
| 200 | 118740 |  |  |  |  | 339437 | for my $bit ( 0 .. $code_size-1 ){ | 
| 201 |  |  |  |  |  |  |  | 
| 202 | 1770101 | 100 |  |  |  | 4095579 | if ( ($code >> $bit) & 1 ){ | 
| 203 | 912893 | 100 |  |  |  | 3976504 | vec( $$buf, $wpos + ($self->lsb_first ? $bit : 0 - $bit ), 1 ) = 1; | 
| 204 |  |  |  |  |  |  | } | 
| 205 |  |  |  |  |  |  | } | 
| 206 |  |  |  |  |  |  | } | 
| 207 |  |  |  |  |  |  |  | 
| 208 | 118740 |  |  |  |  | 445850 | $self->_buf_size( $buf_size + $code_size ); | 
| 209 |  |  |  |  |  |  | } | 
| 210 |  |  |  |  |  |  |  | 
| 211 |  |  |  |  |  |  | 1; | 
| 212 |  |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  | __END__ |