File Coverage

blib/lib/EBook/Ishmael/MobiHuff.pm

Criterion	Covered	Total	%
statement	11	91	12.0
branch	0	16	0.0
condition	0	3	0.0
subroutine	4	8	50.0
pod	2	2	100.0
total	17	120	14.1

line	stmt	bran	cond	sub	pod	time	code
1							package EBook::Ishmael::MobiHuff;
2	17			17		351	use 5.016;
	17					67
3							our $VERSION = '2.01';
4	17			17		118	use strict;
	17					39
	17					470
5	17			17		81	use warnings;
	17					33
	17					1170
6
7	17			17		108	use List::Util qw(max min);
	17					56
	17					23288
8
9							our $UNPACK_Q = !!eval { pack "Q>", 1 };
10
11							# Many thanks to Calibre, much of the code in this module was based on their
12							# huffman decoder.
13
14							my $HUFF_HDR = pack "A4 N", 'HUFF', 24;
15							my $CDIC_HDR = pack "A4 N", 'CDIC', 16;
16
17							sub _load_huff {
18
19	0			0			my $self = shift;
20	0						my $huff = shift;
21
22	0	0					unless (substr($huff, 0, 8) eq $HUFF_HDR) {
23	0						die "Invalid MOBI HUFF header\n";
24							}
25
26	0						my @off = unpack "N N", substr $huff, 8, 8;
27
28	0						@{ $self->{dict1} } = map {
29
30	0						my $len = $_ & 0x1f;
	0
31	0						my $term = $_ & 0x80;
32	0						my $max = $_ >> 8;
33
34	0	0					if ($len == 0) {
35	0						die "Invalid MOBI HUFF dictionary\n";
36							}
37
38	0	0	0				if ($len <= 8 and !$term) {
39	0						die "Invalid MOBI HUFF dictionary\n";
40							}
41
42	0						$max = (($max + 1) << (32 - $len)) - 1;
43
44	0						[ $len, $term, $max ];
45
46							} unpack "N256", substr $huff, $off[0], 4 * 256;
47
48	0						my @dict2 = unpack "N64", substr $huff, $off[1], 4 * 64;
49
50	0						my @mins = (0, map { $dict2[$_] } grep { $_ % 2 == 0 } (0 .. $#dict2));
	0
	0
51	0						my @maxs = (0, map { $dict2[$_] } grep { $_ % 2 != 0 } (0 .. $#dict2));
	0
	0
52
53	0						$self->{mincode} = [ map { $mins[$_] << (32 - $_) } (0 .. $#mins) ];
	0
54	0						$self->{maxcode} = [ map { (($maxs[$_] + 1) << (32 - $_)) - 1 } (0 .. $#maxs) ];
	0
55
56	0						return 1;
57
58							}
59
60							sub _load_cdic {
61
62	0			0			my $self = shift;
63	0						my $cdic = shift;
64
65	0	0					unless (substr($cdic, 0, 8) eq $CDIC_HDR) {
66	0						die "Invalid MOBI CDIC header\n";
67							}
68
69	0						my ($phrases, $bits) = unpack "N N", substr $cdic, 8, 8;
70
71	0						my $n = min(1 << $bits, $phrases - @{ $self->{dictionary} });
	0
72
73	0						push @{ $self->{dictionary} }, map {
74
75	0						my $blen = unpack "n", substr $cdic, 16 + $_;
	0
76
77							[
78	0						substr($cdic, 18 + $_, $blen & 0x7fff),
79							$blen & 0x8000,
80							];
81
82							} unpack "n$n", substr $cdic, 16;
83
84	0						return 1;
85
86							}
87
88							sub new {
89
90	0			0	1		my $class = shift;
91	0						my $huff = shift;
92	0						my @cdic = @_;
93
94	0						my $self = {
95							dict1 => [],
96							dictionary => [],
97							mincode => [],
98							maxcode => [],
99							};
100
101	0						bless $self, $class;
102
103	0						$self->_load_huff($huff);
104
105	0						for my $c (@cdic) {
106	0						$self->_load_cdic($c);
107							}
108
109	0						return $self;
110
111							}
112
113							sub decode {
114
115	0			0	1		my $self = shift;
116	0						my $data = shift;
117
118	0						my $left = length($data) * 8;
119	0						$data .= "\x00" x 8;
120	0						my $pos = 0;
121	0						my $x = unpack "Q>", $data;
122	0						my $n = 32;
123
124	0						my $s = '';
125
126	0						while (1) {
127
128	0	0					if ($n <= 0) {
129	0						$pos += 4;
130	0						$x = unpack "Q>", substr $data, $pos, 8;
131	0						$n += 32;
132							}
133	0						my $code = ($x >> $n) & ((1 << 32) - 1);
134
135	0						my ($len, $term, $max) = @{ $self->{dict1}[$code >> 24] };
	0
136	0	0					unless ($term) {
137	0						$len += 1 while $code < $self->{mincode}[$len];
138	0						$max = $self->{maxcode}[$len];
139							}
140
141	0						$n -= $len;
142	0						$left -= $len;
143	0	0					last if $left < 0;
144
145	0						my $r = ($max - $code) >> (32 - $len);
146
147	0						my ($slice, $flag) = @{ $self->{dictionary}[$r] };
	0
148
149	0	0					unless ($flag) {
150	0						$self->{dictionary}[$r] = [];
151	0						$slice = $self->decode($slice);
152	0						$self->{dictionary}[$r] = [ $slice, 1 ];
153							}
154
155	0						$s .= $slice;
156
157							}
158
159	0						return $s;
160
161							}
162
163							1;
164
165							=head1 NAME
166
167							EBook::Ishmael::MobiHuff - Huff/CDIC decoder for MOBI/AZW
168
169							=head1 SYNOPSIS
170
171							use EBook::Ishmael::MobiHuff;
172
173							my $mh = EBook::Ishmael::MobiHuff->new($huff, @cdics);
174							my $decode = $mh->decode($data);
175
176							=head1 DESCRIPTION
177
178							B is a module that provides an object-oriented
179							interface for decoding Huff/CDIC-encoded data found in MOBI/AZW ebooks. This is
180							developer documentation, please consult the L manual for user
181							documentation.
182
183							=head1 METHODS
184
185							=over 4
186
187							=item $mh = EBook::Ishmael::MobiHuff->new($huff, @cdics)
188
189							Returns a blessed C object and reads Huff/CDIC data
190							from C<$huff> and C<@cdics>. C<$huff> is the record data for the C record,
191							C<@cdics> is an array of record data for eacah C record.
192
193							=item $data = $mh->decode($encode)
194
195							Returns the decoded C<$data> from C<$encode>.
196
197							=back
198
199							=head1 AUTHOR
200
201							Written by Samuel Young, Esamyoung12788@gmail.comE.
202
203							This project's source can be found on its
204							L. Comments and pull
205							requests are welcome!
206
207							=head1 COPYRIGHT
208
209							Copyright (C) 2025-2026 Samuel Young
210
211							This program is free software: you can redistribute it and/or modify
212							it under the terms of the GNU General Public License as published by
213							the Free Software Foundation, either version 3 of the License, or
214							(at your option) any later version.
215
216							=cut