| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package EBook::Ishmael::EBook::Mobi; |
|
2
|
17
|
|
|
17
|
|
10664
|
use 5.016; |
|
|
17
|
|
|
|
|
63
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '2.03'; |
|
4
|
17
|
|
|
17
|
|
120
|
use strict; |
|
|
17
|
|
|
|
|
35
|
|
|
|
17
|
|
|
|
|
486
|
|
|
5
|
17
|
|
|
17
|
|
84
|
use warnings; |
|
|
17
|
|
|
|
|
33
|
|
|
|
17
|
|
|
|
|
1011
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
17
|
|
|
17
|
|
94
|
use Encode qw(from_to); |
|
|
17
|
|
|
|
|
27
|
|
|
|
17
|
|
|
|
|
957
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
17
|
|
|
17
|
|
147
|
use XML::LibXML; |
|
|
17
|
|
|
|
|
37
|
|
|
|
17
|
|
|
|
|
133
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
17
|
|
|
17
|
|
10830
|
use EBook::Ishmael::Decode qw(palmdoc_decode); |
|
|
17
|
|
|
|
|
59
|
|
|
|
17
|
|
|
|
|
1419
|
|
|
12
|
17
|
|
|
17
|
|
149
|
use EBook::Ishmael::ImageID qw(image_id); |
|
|
17
|
|
|
|
|
41
|
|
|
|
17
|
|
|
|
|
850
|
|
|
13
|
17
|
|
|
17
|
|
8291
|
use EBook::Ishmael::PDB; |
|
|
17
|
|
|
|
|
53
|
|
|
|
17
|
|
|
|
|
771
|
|
|
14
|
17
|
|
|
17
|
|
140
|
use EBook::Ishmael::Time qw(guess_time); |
|
|
17
|
|
|
|
|
47
|
|
|
|
17
|
|
|
|
|
125005
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# Many thanks to Tommy Persson, the original author of mobi2html, a script |
|
17
|
|
|
|
|
|
|
# which much of this code is based off of. |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# TODO: Implement AZW4 support |
|
20
|
|
|
|
|
|
|
# TODO: Add support for UTF16 MOBIs (65002) |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $TYPE = 'BOOK'; |
|
23
|
|
|
|
|
|
|
my $CREATOR = 'MOBI'; |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
my $RECSIZE = 4096; |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
my $NULL_INDEX = 0xffffffff; |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
my $UNPACK_Q = !! eval { unpack "Q>", 1 }; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub heuristic { |
|
32
|
|
|
|
|
|
|
|
|
33
|
70
|
|
|
70
|
0
|
166
|
my $class = shift; |
|
34
|
70
|
|
|
|
|
139
|
my $file = shift; |
|
35
|
70
|
|
|
|
|
152
|
my $fh = shift; |
|
36
|
|
|
|
|
|
|
|
|
37
|
70
|
50
|
|
|
|
1134
|
return 0 unless -s $file >= 68; |
|
38
|
|
|
|
|
|
|
|
|
39
|
70
|
|
|
|
|
396
|
seek $fh, 32, 0; |
|
40
|
70
|
|
|
|
|
546
|
read $fh, my ($null), 1; |
|
41
|
|
|
|
|
|
|
|
|
42
|
70
|
100
|
|
|
|
271
|
unless ($null eq "\0") { |
|
43
|
35
|
|
|
|
|
160
|
return 0; |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
|
|
46
|
35
|
|
|
|
|
237
|
seek $fh, 60, 0; |
|
47
|
35
|
|
|
|
|
222
|
read $fh, my ($type), 4; |
|
48
|
35
|
|
|
|
|
120
|
read $fh, my ($creator), 4; |
|
49
|
|
|
|
|
|
|
|
|
50
|
35
|
100
|
66
|
|
|
237
|
return 0 unless $type eq $TYPE && $creator eq $CREATOR; |
|
51
|
|
|
|
|
|
|
|
|
52
|
11
|
|
|
|
|
76
|
seek $fh, 78, 0; |
|
53
|
11
|
|
|
|
|
63
|
read $fh, my ($off), 4; |
|
54
|
11
|
|
|
|
|
49
|
$off = unpack "N", $off; |
|
55
|
11
|
|
|
|
|
80
|
seek $fh, $off + 36, 0; |
|
56
|
11
|
|
|
|
|
70
|
read $fh, my ($ver), 4; |
|
57
|
11
|
|
|
|
|
51
|
$ver = unpack "N", $ver; |
|
58
|
|
|
|
|
|
|
|
|
59
|
11
|
|
|
|
|
69
|
return $ver != 8; |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# Many thanks to Calibre, much of the code in this module was based on their |
|
65
|
|
|
|
|
|
|
# huffman decoder. |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
package EBook::Ishmael::EBook::Mobi::MobiHuff { |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
my $HUFF_HDR = pack "A4 N", 'HUFF', 24; |
|
70
|
|
|
|
|
|
|
my $CDIC_HDR = pack "A4 N", 'CDIC', 16; |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub _load_huff { |
|
73
|
|
|
|
|
|
|
|
|
74
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
75
|
0
|
|
|
|
|
0
|
my $huff = shift; |
|
76
|
|
|
|
|
|
|
|
|
77
|
0
|
0
|
|
|
|
0
|
unless (substr($huff, 0, 8) eq $HUFF_HDR) { |
|
78
|
0
|
|
|
|
|
0
|
die "Invalid MOBI HUFF header\n"; |
|
79
|
|
|
|
|
|
|
} |
|
80
|
|
|
|
|
|
|
|
|
81
|
0
|
|
|
|
|
0
|
my @off = unpack "N N", substr $huff, 8, 8; |
|
82
|
|
|
|
|
|
|
|
|
83
|
0
|
|
|
|
|
0
|
@{ $self->{dict1} } = map { |
|
84
|
|
|
|
|
|
|
|
|
85
|
0
|
|
|
|
|
0
|
my $len = $_ & 0x1f; |
|
|
0
|
|
|
|
|
0
|
|
|
86
|
0
|
|
|
|
|
0
|
my $term = $_ & 0x80; |
|
87
|
0
|
|
|
|
|
0
|
my $max = $_ >> 8; |
|
88
|
|
|
|
|
|
|
|
|
89
|
0
|
0
|
|
|
|
0
|
if ($len == 0) { |
|
90
|
0
|
|
|
|
|
0
|
die "Invalid MOBI HUFF dictionary\n"; |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
|
|
93
|
0
|
0
|
0
|
|
|
0
|
if ($len <= 8 and !$term) { |
|
94
|
0
|
|
|
|
|
0
|
die "Invalid MOBI HUFF dictionary\n"; |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
|
|
97
|
0
|
|
|
|
|
0
|
$max = (($max + 1) << (32 - $len)) - 1; |
|
98
|
|
|
|
|
|
|
|
|
99
|
0
|
|
|
|
|
0
|
[ $len, $term, $max ]; |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
} unpack "N256", substr $huff, $off[0], 4 * 256; |
|
102
|
|
|
|
|
|
|
|
|
103
|
0
|
|
|
|
|
0
|
my @dict2 = unpack "N64", substr $huff, $off[1], 4 * 64; |
|
104
|
|
|
|
|
|
|
|
|
105
|
0
|
|
|
|
|
0
|
my @mins = (0, map { $dict2[$_] } grep { $_ % 2 == 0 } (0 .. $#dict2)); |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
106
|
0
|
|
|
|
|
0
|
my @maxs = (0, map { $dict2[$_] } grep { $_ % 2 != 0 } (0 .. $#dict2)); |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
107
|
|
|
|
|
|
|
|
|
108
|
0
|
|
|
|
|
0
|
$self->{mincode} = [ map { $mins[$_] << (32 - $_) } (0 .. $#mins) ]; |
|
|
0
|
|
|
|
|
0
|
|
|
109
|
0
|
|
|
|
|
0
|
$self->{maxcode} = [ map { (($maxs[$_] + 1) << (32 - $_)) - 1 } (0 .. $#maxs) ]; |
|
|
0
|
|
|
|
|
0
|
|
|
110
|
|
|
|
|
|
|
|
|
111
|
0
|
|
|
|
|
0
|
return 1; |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
} |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub _load_cdic { |
|
116
|
|
|
|
|
|
|
|
|
117
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
118
|
0
|
|
|
|
|
0
|
my $cdic = shift; |
|
119
|
|
|
|
|
|
|
|
|
120
|
0
|
0
|
|
|
|
0
|
unless (substr($cdic, 0, 8) eq $CDIC_HDR) { |
|
121
|
0
|
|
|
|
|
0
|
die "Invalid MOBI CDIC header\n"; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
0
|
|
|
|
|
0
|
my ($phrases, $bits) = unpack "N N", substr $cdic, 8, 8; |
|
125
|
|
|
|
|
|
|
|
|
126
|
0
|
|
|
|
|
0
|
my $n = min(1 << $bits, $phrases - @{ $self->{dictionary} }); |
|
|
0
|
|
|
|
|
0
|
|
|
127
|
|
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
0
|
push @{ $self->{dictionary} }, map { |
|
129
|
|
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
0
|
my $blen = unpack "n", substr $cdic, 16 + $_; |
|
|
0
|
|
|
|
|
0
|
|
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
[ |
|
133
|
0
|
|
|
|
|
0
|
substr($cdic, 18 + $_, $blen & 0x7fff), |
|
134
|
|
|
|
|
|
|
$blen & 0x8000, |
|
135
|
|
|
|
|
|
|
]; |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
} unpack "n$n", substr $cdic, 16; |
|
138
|
|
|
|
|
|
|
|
|
139
|
0
|
|
|
|
|
0
|
return 1; |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub new { |
|
144
|
|
|
|
|
|
|
|
|
145
|
0
|
|
|
0
|
|
0
|
my $class = shift; |
|
146
|
0
|
|
|
|
|
0
|
my $huff = shift; |
|
147
|
0
|
|
|
|
|
0
|
my @cdic = @_; |
|
148
|
|
|
|
|
|
|
|
|
149
|
0
|
|
|
|
|
0
|
my $self = { |
|
150
|
|
|
|
|
|
|
dict1 => [], |
|
151
|
|
|
|
|
|
|
dictionary => [], |
|
152
|
|
|
|
|
|
|
mincode => [], |
|
153
|
|
|
|
|
|
|
maxcode => [], |
|
154
|
|
|
|
|
|
|
}; |
|
155
|
|
|
|
|
|
|
|
|
156
|
0
|
|
|
|
|
0
|
bless $self, $class; |
|
157
|
|
|
|
|
|
|
|
|
158
|
0
|
|
|
|
|
0
|
$self->_load_huff($huff); |
|
159
|
|
|
|
|
|
|
|
|
160
|
0
|
|
|
|
|
0
|
for my $c (@cdic) { |
|
161
|
0
|
|
|
|
|
0
|
$self->_load_cdic($c); |
|
162
|
|
|
|
|
|
|
} |
|
163
|
|
|
|
|
|
|
|
|
164
|
0
|
|
|
|
|
0
|
return $self; |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
sub decode { |
|
169
|
|
|
|
|
|
|
|
|
170
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
171
|
0
|
|
|
|
|
0
|
my $data = shift; |
|
172
|
|
|
|
|
|
|
|
|
173
|
0
|
|
|
|
|
0
|
my $left = length($data) * 8; |
|
174
|
0
|
|
|
|
|
0
|
$data .= "\x00" x 8; |
|
175
|
0
|
|
|
|
|
0
|
my $pos = 0; |
|
176
|
0
|
|
|
|
|
0
|
my $x = unpack "Q>", $data; |
|
177
|
0
|
|
|
|
|
0
|
my $n = 32; |
|
178
|
|
|
|
|
|
|
|
|
179
|
0
|
|
|
|
|
0
|
my $s = ''; |
|
180
|
|
|
|
|
|
|
|
|
181
|
0
|
|
|
|
|
0
|
while (1) { |
|
182
|
|
|
|
|
|
|
|
|
183
|
0
|
0
|
|
|
|
0
|
if ($n <= 0) { |
|
184
|
0
|
|
|
|
|
0
|
$pos += 4; |
|
185
|
0
|
|
|
|
|
0
|
$x = unpack "Q>", substr $data, $pos, 8; |
|
186
|
0
|
|
|
|
|
0
|
$n += 32; |
|
187
|
|
|
|
|
|
|
} |
|
188
|
0
|
|
|
|
|
0
|
my $code = ($x >> $n) & ((1 << 32) - 1); |
|
189
|
|
|
|
|
|
|
|
|
190
|
0
|
|
|
|
|
0
|
my ($len, $term, $max) = @{ $self->{dict1}[$code >> 24] }; |
|
|
0
|
|
|
|
|
0
|
|
|
191
|
0
|
0
|
|
|
|
0
|
unless ($term) { |
|
192
|
0
|
|
|
|
|
0
|
$len += 1 while $code < $self->{mincode}[$len]; |
|
193
|
0
|
|
|
|
|
0
|
$max = $self->{maxcode}[$len]; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
0
|
|
|
|
|
0
|
$n -= $len; |
|
197
|
0
|
|
|
|
|
0
|
$left -= $len; |
|
198
|
0
|
0
|
|
|
|
0
|
last if $left < 0; |
|
199
|
|
|
|
|
|
|
|
|
200
|
0
|
|
|
|
|
0
|
my $r = ($max - $code) >> (32 - $len); |
|
201
|
|
|
|
|
|
|
|
|
202
|
0
|
|
|
|
|
0
|
my ($slice, $flag) = @{ $self->{dictionary}[$r] }; |
|
|
0
|
|
|
|
|
0
|
|
|
203
|
|
|
|
|
|
|
|
|
204
|
0
|
0
|
|
|
|
0
|
unless ($flag) { |
|
205
|
0
|
|
|
|
|
0
|
$self->{dictionary}[$r] = []; |
|
206
|
0
|
|
|
|
|
0
|
$slice = $self->decode($slice); |
|
207
|
0
|
|
|
|
|
0
|
$self->{dictionary}[$r] = [ $slice, 1 ]; |
|
208
|
|
|
|
|
|
|
} |
|
209
|
|
|
|
|
|
|
|
|
210
|
0
|
|
|
|
|
0
|
$s .= $slice; |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
} |
|
213
|
|
|
|
|
|
|
|
|
214
|
0
|
|
|
|
|
0
|
return $s; |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
sub _clean_html { |
|
221
|
|
|
|
|
|
|
|
|
222
|
5
|
|
|
5
|
|
11
|
my $html = shift; |
|
223
|
|
|
|
|
|
|
|
|
224
|
5
|
|
|
|
|
275
|
$$html =~ s/
|
|
225
|
5
|
|
|
|
|
242
|
$$html =~ s/
|
|
226
|
5
|
|
|
|
|
252
|
$$html =~ s/<\/mbp:pagebreak>//g; |
|
227
|
5
|
|
|
|
|
94
|
$$html =~ s/.*?<\/guide>//g; |
|
228
|
5
|
|
|
|
|
77
|
$$html =~ s/<\/?mbp:nu>//g; |
|
229
|
5
|
|
|
|
|
242
|
$$html =~ s/<\/?mbp:section//g; |
|
230
|
5
|
|
|
|
|
70
|
$$html =~ s/<\/?mbp:frameset>//g; |
|
231
|
5
|
|
|
|
|
70
|
$$html =~ s/<\/?mbp:slave-frame>//g; |
|
232
|
|
|
|
|
|
|
|
|
233
|
5
|
|
|
|
|
13
|
return 1; |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
sub _trailing_entry_size { |
|
238
|
|
|
|
|
|
|
|
|
239
|
0
|
|
|
0
|
|
0
|
my $data = shift; |
|
240
|
|
|
|
|
|
|
|
|
241
|
0
|
|
|
|
|
0
|
my $res = 0; |
|
242
|
|
|
|
|
|
|
|
|
243
|
0
|
|
|
|
|
0
|
my $trail = substr $data, -4; |
|
244
|
|
|
|
|
|
|
|
|
245
|
0
|
|
|
|
|
0
|
for my $c (unpack "C4", $trail) { |
|
246
|
0
|
0
|
|
|
|
0
|
if ($c & 0x80) { |
|
247
|
0
|
|
|
|
|
0
|
$res = 0; |
|
248
|
|
|
|
|
|
|
} |
|
249
|
0
|
|
|
|
|
0
|
$res = ($res << 7) | ($c & 0x7f); |
|
250
|
|
|
|
|
|
|
} |
|
251
|
|
|
|
|
|
|
|
|
252
|
0
|
|
|
|
|
0
|
return $res; |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub _trailing_entries_size { |
|
257
|
|
|
|
|
|
|
|
|
258
|
138
|
|
|
138
|
|
223
|
my $self = shift; |
|
259
|
138
|
|
|
|
|
192
|
my $data = shift; |
|
260
|
|
|
|
|
|
|
|
|
261
|
138
|
|
|
|
|
210
|
my $res = 0; |
|
262
|
|
|
|
|
|
|
|
|
263
|
138
|
|
|
|
|
349
|
for my $i (0 .. $self->{_trailers} - 1) { |
|
264
|
0
|
|
|
|
|
0
|
my $n = _trailing_entry_size($data); |
|
265
|
0
|
|
|
|
|
0
|
$res += $n; |
|
266
|
0
|
|
|
|
|
0
|
substr $data, -$n, $n, ''; |
|
267
|
|
|
|
|
|
|
} |
|
268
|
|
|
|
|
|
|
|
|
269
|
138
|
50
|
|
|
|
373
|
if ($self->{_extra_data} & 1) { |
|
270
|
138
|
|
|
|
|
310
|
$res += (ord(substr $data, -1) & 3) + 1; |
|
271
|
|
|
|
|
|
|
} |
|
272
|
|
|
|
|
|
|
|
|
273
|
138
|
|
|
|
|
224
|
return $res; |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
} |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
# Index processing code was adapted from KindleUnpack |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
sub _get_index_data { |
|
280
|
|
|
|
|
|
|
|
|
281
|
20
|
|
|
20
|
|
40
|
my $self = shift; |
|
282
|
20
|
|
|
|
|
36
|
my $idx = shift; |
|
283
|
|
|
|
|
|
|
|
|
284
|
20
|
50
|
|
|
|
54
|
return {} if $idx == $NULL_INDEX; |
|
285
|
|
|
|
|
|
|
|
|
286
|
20
|
|
|
|
|
43
|
my $outtbl = []; |
|
287
|
20
|
|
|
|
|
35
|
my $ctoc = {}; |
|
288
|
|
|
|
|
|
|
|
|
289
|
20
|
|
|
|
|
33
|
my $data; |
|
290
|
20
|
|
|
|
|
97
|
$$data = $self->{_pdb}->record($idx)->data; |
|
291
|
|
|
|
|
|
|
|
|
292
|
20
|
|
|
|
|
142
|
my ($idxhdr, $hordt1, $hordt2) = $self->_parse_indx_header($data); |
|
293
|
20
|
|
|
|
|
48
|
my $icount = $idxhdr->{count}; |
|
294
|
20
|
|
|
|
|
38
|
my $roff = 0; |
|
295
|
20
|
|
|
|
|
51
|
my $off = $idx + $icount + 1; |
|
296
|
|
|
|
|
|
|
|
|
297
|
20
|
|
|
|
|
66
|
for my $i (0 .. $idxhdr->{nctoc} - 1) { |
|
298
|
10
|
|
|
|
|
50
|
my $cdata = $self->{_pdb}->record($off + $i)->data; |
|
299
|
10
|
|
|
|
|
44
|
my $ctocdict = $self->_read_ctoc(\$cdata); |
|
300
|
10
|
|
|
|
|
45
|
for my $j (sort keys %$ctocdict) { |
|
301
|
0
|
|
|
|
|
0
|
$ctoc->{ $j + $roff } = $ctocdict->{ $j }; |
|
302
|
|
|
|
|
|
|
} |
|
303
|
10
|
|
|
|
|
32
|
$roff += 0x10000; |
|
304
|
|
|
|
|
|
|
} |
|
305
|
|
|
|
|
|
|
|
|
306
|
20
|
|
|
|
|
50
|
my $tagstart = $idxhdr->{len}; |
|
307
|
20
|
|
|
|
|
90
|
my ($ctrlcount, $tagtbl) = _read_tag_section($tagstart, $data); |
|
308
|
|
|
|
|
|
|
|
|
309
|
20
|
|
|
|
|
76
|
for my $i ($idx + 1 .. $idx + 1 + $icount - 1) { |
|
310
|
20
|
|
|
|
|
96
|
my $d = $self->{_pdb}->record($i)->data; |
|
311
|
20
|
|
|
|
|
70
|
my ($hdrinfo, $ordt1, $ordt2) = $self->_parse_indx_header(\$d); |
|
312
|
20
|
|
|
|
|
49
|
my $idxtpos = $hdrinfo->{start}; |
|
313
|
20
|
|
|
|
|
39
|
my $ecount = $hdrinfo->{count}; |
|
314
|
20
|
|
|
|
|
39
|
my $idxposits = []; |
|
315
|
20
|
|
|
|
|
77
|
for my $j (0 .. $ecount - 1) { |
|
316
|
100
|
|
|
|
|
219
|
my $pos = unpack "n", substr $d, $idxtpos + 4 + (2 * $j), 2; |
|
317
|
100
|
|
|
|
|
212
|
push @$idxposits, $pos; |
|
318
|
|
|
|
|
|
|
} |
|
319
|
20
|
|
|
|
|
49
|
for my $j (0 .. $ecount - 1) { |
|
320
|
100
|
|
|
|
|
208
|
my $spos = $idxposits->[$j]; |
|
321
|
100
|
|
|
|
|
198
|
my $epos = $idxposits->[$j + 1]; |
|
322
|
100
|
|
|
|
|
218
|
my $txtlen = ord(substr $d, $spos, 1); |
|
323
|
100
|
|
|
|
|
265
|
my $txt = substr $d, $spos + 1, $txtlen; |
|
324
|
100
|
50
|
|
|
|
255
|
if (@$hordt2) { |
|
325
|
|
|
|
|
|
|
$txt = join '', |
|
326
|
0
|
|
|
|
|
0
|
map { chr $hordt2->[ ord $_ ] } |
|
|
0
|
|
|
|
|
0
|
|
|
327
|
|
|
|
|
|
|
split //, $txt; |
|
328
|
|
|
|
|
|
|
} |
|
329
|
100
|
|
|
|
|
390
|
my $tagmap = _get_tagmap( |
|
330
|
|
|
|
|
|
|
$ctrlcount, |
|
331
|
|
|
|
|
|
|
$tagtbl, |
|
332
|
|
|
|
|
|
|
\$d, |
|
333
|
|
|
|
|
|
|
$spos + 1 + $txtlen, |
|
334
|
|
|
|
|
|
|
$epos |
|
335
|
|
|
|
|
|
|
); |
|
336
|
100
|
|
|
|
|
493
|
push @$outtbl, [ $txt, $tagmap ]; |
|
337
|
|
|
|
|
|
|
} |
|
338
|
|
|
|
|
|
|
} |
|
339
|
|
|
|
|
|
|
|
|
340
|
20
|
|
|
|
|
185
|
return ( $outtbl, $ctoc ); |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
} |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
sub _parse_indx_header { |
|
345
|
|
|
|
|
|
|
|
|
346
|
40
|
|
|
40
|
|
105
|
my $self = shift; |
|
347
|
40
|
|
|
|
|
97
|
my $data = shift; |
|
348
|
|
|
|
|
|
|
|
|
349
|
40
|
50
|
|
|
|
152
|
unless (substr($$data, 0, 4) eq 'INDX') { |
|
350
|
0
|
|
|
|
|
0
|
die "Index section is not INDX\n"; |
|
351
|
|
|
|
|
|
|
} |
|
352
|
|
|
|
|
|
|
|
|
353
|
40
|
|
|
|
|
172
|
my @words = qw( |
|
354
|
|
|
|
|
|
|
len nul1 type gen start count code lng total ordt ligt nligt nctoc |
|
355
|
|
|
|
|
|
|
); |
|
356
|
40
|
|
|
|
|
109
|
my $num = scalar @words; |
|
357
|
40
|
|
|
|
|
176
|
my @values = unpack "N$num", substr $$data, 4, 4 * $num; |
|
358
|
40
|
|
|
|
|
87
|
my $header = {}; |
|
359
|
|
|
|
|
|
|
|
|
360
|
40
|
|
|
|
|
121
|
for my $i (0 .. $#words) { |
|
361
|
520
|
|
|
|
|
1456
|
$header->{ $words[$i] } = $values[$i]; |
|
362
|
|
|
|
|
|
|
} |
|
363
|
|
|
|
|
|
|
|
|
364
|
40
|
|
|
|
|
78
|
my $ordt1 = []; |
|
365
|
40
|
|
|
|
|
69
|
my $ordt2 = []; |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
my ( |
|
368
|
40
|
|
|
|
|
185
|
$ocnt, |
|
369
|
|
|
|
|
|
|
$oentries, |
|
370
|
|
|
|
|
|
|
$op1, |
|
371
|
|
|
|
|
|
|
$op2, |
|
372
|
|
|
|
|
|
|
$otagx |
|
373
|
|
|
|
|
|
|
) = unpack "N N N N N", substr $$data, 0xa4, 4 * 5; |
|
374
|
|
|
|
|
|
|
|
|
375
|
40
|
50
|
33
|
|
|
348
|
if ($header->{code} == 0xfdea or $ocnt != 0 or $oentries > 0) { |
|
|
|
|
33
|
|
|
|
|
|
376
|
|
|
|
|
|
|
|
|
377
|
0
|
0
|
|
|
|
0
|
unless ($ocnt == 1) { |
|
378
|
0
|
|
|
|
|
0
|
die "Corrupted INDX record\n"; |
|
379
|
|
|
|
|
|
|
} |
|
380
|
0
|
0
|
|
|
|
0
|
unless (substr($$data, $op1, 4) eq 'ORDT') { |
|
381
|
0
|
|
|
|
|
0
|
die "Corrupted INDX record\n"; |
|
382
|
|
|
|
|
|
|
} |
|
383
|
0
|
0
|
|
|
|
0
|
unless (substr($$data, $op2, 4) eq 'ORDT') { |
|
384
|
0
|
|
|
|
|
0
|
die "Corrupted INDX record\n"; |
|
385
|
|
|
|
|
|
|
} |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
$ordt1 = [ |
|
388
|
0
|
|
|
|
|
0
|
unpack("C$oentries", substr $$data, $op1 + 4, $oentries) |
|
389
|
|
|
|
|
|
|
]; |
|
390
|
0
|
|
|
|
|
0
|
$ordt2 = [ |
|
391
|
|
|
|
|
|
|
unpack("n$oentries", substr $$data, $op2 + 4, $oentries * 2) |
|
392
|
|
|
|
|
|
|
]; |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
} |
|
395
|
|
|
|
|
|
|
|
|
396
|
40
|
|
|
|
|
187
|
return ( $header, $ordt1, $ordt2 ); |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
} |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
sub _read_ctoc { |
|
401
|
|
|
|
|
|
|
|
|
402
|
10
|
|
|
10
|
|
24
|
my $self = shift; |
|
403
|
10
|
|
|
|
|
24
|
my $data = shift; |
|
404
|
|
|
|
|
|
|
|
|
405
|
10
|
|
|
|
|
25
|
my $ctoc = {}; |
|
406
|
|
|
|
|
|
|
|
|
407
|
10
|
|
|
|
|
20
|
my $off = 0; |
|
408
|
10
|
|
|
|
|
22
|
my $len = length $$data; |
|
409
|
|
|
|
|
|
|
|
|
410
|
10
|
|
|
|
|
36
|
while ($off < $len) { |
|
411
|
40
|
100
|
|
|
|
112
|
if (substr($$data, $off, 1) eq "\0") { |
|
412
|
10
|
|
|
|
|
21
|
last; |
|
413
|
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
|
|
415
|
30
|
|
|
|
|
52
|
my $idxoff = $off; |
|
416
|
|
|
|
|
|
|
|
|
417
|
30
|
|
|
|
|
63
|
my ($pos, $ilen) = _vwv($data, $off); |
|
418
|
30
|
|
|
|
|
52
|
$off += $pos; |
|
419
|
|
|
|
|
|
|
|
|
420
|
30
|
|
|
|
|
63
|
my $name = substr $$data, $off, $ilen; |
|
421
|
30
|
|
|
|
|
80
|
$off += $ilen; |
|
422
|
|
|
|
|
|
|
|
|
423
|
30
|
|
|
|
|
171
|
my $ctoc->{ $idxoff } = $name; |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
} |
|
426
|
|
|
|
|
|
|
|
|
427
|
10
|
|
|
|
|
27
|
return $ctoc; |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
} |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
sub _vwv { |
|
432
|
|
|
|
|
|
|
|
|
433
|
560
|
|
|
560
|
|
913
|
my $data = shift; |
|
434
|
560
|
|
|
|
|
858
|
my $off = shift; |
|
435
|
|
|
|
|
|
|
|
|
436
|
560
|
|
|
|
|
919
|
my $value = 0; |
|
437
|
560
|
|
|
|
|
798
|
my $consume = 0; |
|
438
|
560
|
|
|
|
|
959
|
my $fin = 0; |
|
439
|
|
|
|
|
|
|
|
|
440
|
560
|
|
|
|
|
1229
|
while (!$fin) { |
|
441
|
810
|
|
|
|
|
1569
|
my $v = substr $$data, $off + $consume, 1; |
|
442
|
810
|
|
|
|
|
1251
|
$consume++; |
|
443
|
810
|
100
|
|
|
|
1804
|
if (ord($v) & 0x80) { |
|
444
|
560
|
|
|
|
|
932
|
$fin = 1; |
|
445
|
|
|
|
|
|
|
} |
|
446
|
810
|
|
|
|
|
2119
|
$value = ($value << 7) | (ord($v) & 0x7f); |
|
447
|
|
|
|
|
|
|
} |
|
448
|
|
|
|
|
|
|
|
|
449
|
560
|
|
|
|
|
1444
|
return ( $consume, $value ); |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
} |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
sub _read_tag_section { |
|
454
|
|
|
|
|
|
|
|
|
455
|
20
|
|
|
20
|
|
41
|
my $start = shift; |
|
456
|
20
|
|
|
|
|
45
|
my $data = shift; |
|
457
|
|
|
|
|
|
|
|
|
458
|
20
|
|
|
|
|
31
|
my $ctrlcount = 0; |
|
459
|
|
|
|
|
|
|
|
|
460
|
20
|
|
|
|
|
38
|
my $tags = []; |
|
461
|
|
|
|
|
|
|
|
|
462
|
20
|
50
|
|
|
|
96
|
if (substr($$data, $start, 4) eq 'TAGX') { |
|
463
|
20
|
|
|
|
|
64
|
my $foff = unpack "N", substr $$data, $start + 4, 4; |
|
464
|
20
|
|
|
|
|
80
|
$ctrlcount = unpack "N", substr $$data, $start + 8, 4; |
|
465
|
20
|
|
|
|
|
153
|
for (my $i = 12; $i < $foff; $i += 4) { |
|
466
|
80
|
|
|
|
|
129
|
my $pos = $start + $i; |
|
467
|
80
|
|
|
|
|
327
|
push @$tags, [ unpack "C4", substr $$data, $pos, 4 ]; |
|
468
|
|
|
|
|
|
|
} |
|
469
|
|
|
|
|
|
|
} |
|
470
|
|
|
|
|
|
|
|
|
471
|
20
|
|
|
|
|
57
|
return ( $ctrlcount, $tags ); |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
} |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
sub _count_setbits { |
|
476
|
|
|
|
|
|
|
|
|
477
|
280
|
|
|
280
|
|
454
|
my $val = shift; |
|
478
|
280
|
|
50
|
|
|
956
|
my $bits = shift // 8; |
|
479
|
|
|
|
|
|
|
|
|
480
|
280
|
|
|
|
|
442
|
my $count = 0; |
|
481
|
280
|
|
|
|
|
582
|
for my $i (0 .. $bits - 1) { |
|
482
|
2240
|
100
|
|
|
|
4716
|
if (($val & 0x01) == 0x01) { |
|
483
|
280
|
|
|
|
|
440
|
$count++; |
|
484
|
|
|
|
|
|
|
} |
|
485
|
2240
|
|
|
|
|
3703
|
$val >>= 1; |
|
486
|
|
|
|
|
|
|
} |
|
487
|
|
|
|
|
|
|
|
|
488
|
280
|
|
|
|
|
716
|
return $count; |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
} |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
sub _get_tagmap { |
|
493
|
|
|
|
|
|
|
|
|
494
|
100
|
|
|
100
|
|
171
|
my $ctrlcount = shift; |
|
495
|
100
|
|
|
|
|
160
|
my $tagtbl = shift; |
|
496
|
100
|
|
|
|
|
150
|
my $entry = shift; |
|
497
|
100
|
|
|
|
|
169
|
my $spos = shift; |
|
498
|
100
|
|
|
|
|
147
|
my $epos = shift; |
|
499
|
|
|
|
|
|
|
|
|
500
|
100
|
|
|
|
|
190
|
my $tags = []; |
|
501
|
100
|
|
|
|
|
170
|
my $tagmap = {}; |
|
502
|
100
|
|
|
|
|
178
|
my $ctrli = 0; |
|
503
|
100
|
|
|
|
|
5317
|
my $start = $spos + $ctrlcount; |
|
504
|
|
|
|
|
|
|
|
|
505
|
100
|
|
|
|
|
235
|
for my $t (@$tagtbl) { |
|
506
|
440
|
|
|
|
|
975
|
my ($tag, $values, $mask, $endflag) = @$t; |
|
507
|
440
|
100
|
|
|
|
985
|
if ($endflag == 1) { |
|
508
|
100
|
|
|
|
|
158
|
$ctrli++; |
|
509
|
100
|
|
|
|
|
286
|
next; |
|
510
|
|
|
|
|
|
|
} |
|
511
|
340
|
|
|
|
|
682
|
my $cbyte = ord(substr $$entry, $spos + $ctrli, 1); |
|
512
|
340
|
|
|
|
|
652
|
my $val = $cbyte & $mask; |
|
513
|
340
|
50
|
|
|
|
740
|
if ($val != 0) { |
|
514
|
340
|
100
|
|
|
|
691
|
if ($val == $mask) { |
|
515
|
280
|
50
|
|
|
|
589
|
if (_count_setbits($mask) > 1) { |
|
516
|
0
|
|
|
|
|
0
|
my ($consume, $val) = _vwv($entry, $start); |
|
517
|
0
|
|
|
|
|
0
|
$start += $consume; |
|
518
|
0
|
|
|
|
|
0
|
push @$tags, [ $tag, undef, $val, $values ]; |
|
519
|
|
|
|
|
|
|
} else { |
|
520
|
280
|
|
|
|
|
1016
|
push @$tags, [ $tag, 1, undef, $values ]; |
|
521
|
|
|
|
|
|
|
} |
|
522
|
|
|
|
|
|
|
} else { |
|
523
|
60
|
|
|
|
|
182
|
while (($mask & 0x01) == 0) { |
|
524
|
60
|
|
|
|
|
97
|
$mask >>= 1; |
|
525
|
60
|
|
|
|
|
152
|
$val >>= 1; |
|
526
|
|
|
|
|
|
|
} |
|
527
|
60
|
|
|
|
|
193
|
push @$tags, [ $tag, $val, undef, $values ]; |
|
528
|
|
|
|
|
|
|
} |
|
529
|
|
|
|
|
|
|
} |
|
530
|
|
|
|
|
|
|
} |
|
531
|
|
|
|
|
|
|
|
|
532
|
100
|
|
|
|
|
186
|
for my $t (@$tags) { |
|
533
|
340
|
|
|
|
|
704
|
my ($tag, $count, $bytes, $per_entry) = @$t; |
|
534
|
340
|
|
|
|
|
632
|
my $values = []; |
|
535
|
340
|
50
|
|
|
|
784
|
if (defined $count) { |
|
536
|
340
|
|
|
|
|
695
|
for my $i (1 .. $count) { |
|
537
|
400
|
|
|
|
|
743
|
for my $j (1 .. $per_entry) { |
|
538
|
530
|
|
|
|
|
1285
|
my ($consume, $data) = _vwv($entry, $start); |
|
539
|
530
|
|
|
|
|
851
|
$start += $consume; |
|
540
|
530
|
|
|
|
|
1437
|
push @$values, $data; |
|
541
|
|
|
|
|
|
|
} |
|
542
|
|
|
|
|
|
|
} |
|
543
|
|
|
|
|
|
|
} else { |
|
544
|
0
|
|
|
|
|
0
|
my $constotal = 0; |
|
545
|
0
|
|
|
|
|
0
|
while ($constotal < $bytes) { |
|
546
|
0
|
|
|
|
|
0
|
my ($consume, $data) = _vwv($entry, $start); |
|
547
|
0
|
|
|
|
|
0
|
$start += $consume; |
|
548
|
0
|
|
|
|
|
0
|
push @$values, $data; |
|
549
|
|
|
|
|
|
|
} |
|
550
|
|
|
|
|
|
|
# Should we warn if $constotal does not match $bytes? |
|
551
|
|
|
|
|
|
|
} |
|
552
|
340
|
|
|
|
|
1025
|
$tagmap->{ $tag } = $values; |
|
553
|
|
|
|
|
|
|
} |
|
554
|
|
|
|
|
|
|
|
|
555
|
100
|
|
|
|
|
417
|
return $tagmap; |
|
556
|
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
} |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
sub _kf8_init { |
|
560
|
|
|
|
|
|
|
|
|
561
|
10
|
|
|
10
|
|
25
|
my $self = shift; |
|
562
|
|
|
|
|
|
|
|
|
563
|
10
|
50
|
|
|
|
51
|
if ($self->{_fdst} != $NULL_INDEX) { |
|
564
|
10
|
|
|
|
|
80
|
my $hdr = $self->{_pdb}->record($self->{_fdst})->data; |
|
565
|
10
|
50
|
|
|
|
152
|
unless (substr($hdr, 0, 4) eq 'FDST') { |
|
566
|
0
|
|
|
|
|
0
|
die "KF8 Mobi missing FDST info\n"; |
|
567
|
|
|
|
|
|
|
} |
|
568
|
10
|
|
|
|
|
37
|
my $secnum = unpack "N", substr $hdr, 0x08, 4; |
|
569
|
10
|
|
|
|
|
552
|
my $sc2 = $secnum * 2; |
|
570
|
10
|
|
|
|
|
60
|
my @secs = unpack "N$sc2", substr $hdr, 12, 4 * $sc2; |
|
571
|
|
|
|
|
|
|
$self->{_fdsttbl} = [ |
|
572
|
10
|
|
|
|
|
44
|
map({ $secs[$_] } grep { $_ % 2 == 0 } 0 .. $#secs) |
|
|
30
|
|
|
|
|
86
|
|
|
|
60
|
|
|
|
|
150
|
|
|
573
|
|
|
|
|
|
|
]; |
|
574
|
10
|
|
|
|
|
30
|
push @{ $self->{_fdsttbl} }, $self->{_textlen}; |
|
|
10
|
|
|
|
|
67
|
|
|
575
|
|
|
|
|
|
|
} |
|
576
|
|
|
|
|
|
|
|
|
577
|
10
|
50
|
|
|
|
189
|
if ($self->{_skelidx} != $NULL_INDEX) { |
|
578
|
10
|
|
|
|
|
61
|
my ($outtbl, $ctoc) = $self->_get_index_data($self->{_skelidx}); |
|
579
|
10
|
|
|
|
|
25
|
my $fptr = 0; |
|
580
|
10
|
|
|
|
|
30
|
for my $o (@$outtbl) { |
|
581
|
30
|
|
|
|
|
67
|
my ($txt, $tagmap) = @$o; |
|
582
|
30
|
|
|
|
|
117
|
push @{ $self->{_skeltbl} }, [ |
|
583
|
30
|
|
|
|
|
49
|
$fptr, $txt, $tagmap->{1}[0], $tagmap->{6}[0], $tagmap->{6}[1] |
|
584
|
|
|
|
|
|
|
]; |
|
585
|
30
|
|
|
|
|
95
|
$fptr++; |
|
586
|
|
|
|
|
|
|
} |
|
587
|
|
|
|
|
|
|
} |
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
# TODO: The $cdat is usually undef. Not too important as we don't use it |
|
590
|
|
|
|
|
|
|
# for anything at the moment. |
|
591
|
10
|
50
|
|
|
|
43
|
if ($self->{_fragidx} != $NULL_INDEX) { |
|
592
|
10
|
|
|
|
|
44
|
my ($outtbl, $ctoc) = $self->_get_index_data($self->{_fragidx}); |
|
593
|
10
|
|
|
|
|
29
|
for my $o (@$outtbl) { |
|
594
|
70
|
|
|
|
|
175
|
my ($txt, $tagmap) = @$o; |
|
595
|
70
|
|
|
|
|
203
|
my $coff = $tagmap->{2}[0]; |
|
596
|
70
|
|
|
|
|
115
|
my $cdat = $ctoc->{ $coff }; |
|
597
|
70
|
|
|
|
|
421
|
push @{ $self->{_fragtbl} }, [ |
|
598
|
|
|
|
|
|
|
int($txt), $cdat, $tagmap->{3}[0], $tagmap->{4}[0], |
|
599
|
70
|
|
|
|
|
165
|
$tagmap->{6}[0], $tagmap->{6}[1] |
|
600
|
|
|
|
|
|
|
]; |
|
601
|
|
|
|
|
|
|
} |
|
602
|
|
|
|
|
|
|
} |
|
603
|
|
|
|
|
|
|
|
|
604
|
10
|
50
|
|
|
|
46
|
if ($self->{_guideidx} != $NULL_INDEX) { |
|
605
|
0
|
|
|
|
|
0
|
my ($outtbl, $ctoc) = $self->_get_index_data($self->{_guideidx}); |
|
606
|
0
|
|
|
|
|
0
|
for my $o (@$outtbl) { |
|
607
|
0
|
|
|
|
|
0
|
my ($txt, $tagmap) = @$o; |
|
608
|
0
|
|
|
|
|
0
|
my $coff = $tagmap->{1}[0]; |
|
609
|
0
|
|
|
|
|
0
|
my $rtitle = $ctoc->{ $coff }; |
|
610
|
0
|
|
|
|
|
0
|
my $rtype = $txt; |
|
611
|
0
|
|
|
|
|
0
|
my $fno; |
|
612
|
0
|
0
|
|
|
|
0
|
if (exists $tagmap->{3}) { |
|
613
|
0
|
|
|
|
|
0
|
$fno = $tagmap->{3}[0]; |
|
614
|
|
|
|
|
|
|
} |
|
615
|
0
|
0
|
|
|
|
0
|
if (exists $tagmap->{6}) { |
|
616
|
0
|
|
|
|
|
0
|
$fno = $tagmap->{6}[0]; |
|
617
|
|
|
|
|
|
|
} |
|
618
|
0
|
|
|
|
|
0
|
push @{ $self->{_guidetbl} }, [ $rtype, $rtitle, $fno ]; |
|
|
0
|
|
|
|
|
0
|
|
|
619
|
|
|
|
|
|
|
} |
|
620
|
|
|
|
|
|
|
} |
|
621
|
|
|
|
|
|
|
|
|
622
|
10
|
|
|
|
|
29
|
return 1; |
|
623
|
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
} |
|
625
|
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
sub _kf8_xhtml { |
|
627
|
|
|
|
|
|
|
|
|
628
|
5
|
|
|
5
|
|
11
|
my $self = shift; |
|
629
|
|
|
|
|
|
|
|
|
630
|
5
|
|
|
|
|
11
|
my @parts; |
|
631
|
|
|
|
|
|
|
|
|
632
|
5
|
|
|
|
|
24
|
my $rawml = $self->rawml; |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
# xhtml is the first flow piece |
|
635
|
|
|
|
|
|
|
my $source = substr( |
|
636
|
|
|
|
|
|
|
$rawml, |
|
637
|
|
|
|
|
|
|
$self->{_fdsttbl}[0], |
|
638
|
5
|
|
|
|
|
209
|
$self->{_fdsttbl}[1] - $self->{_fdsttbl}[0] |
|
639
|
|
|
|
|
|
|
); |
|
640
|
|
|
|
|
|
|
|
|
641
|
5
|
|
|
|
|
30
|
my $fragptr = 0; |
|
642
|
5
|
|
|
|
|
56
|
my $baseptr = 0; |
|
643
|
|
|
|
|
|
|
|
|
644
|
5
|
|
|
|
|
14
|
for my $s (@{ $self->{_skeltbl} }) { |
|
|
5
|
|
|
|
|
24
|
|
|
645
|
|
|
|
|
|
|
my ( |
|
646
|
15
|
|
|
|
|
49
|
$skelnum, |
|
647
|
|
|
|
|
|
|
$skelnam, |
|
648
|
|
|
|
|
|
|
$fragcnt, |
|
649
|
|
|
|
|
|
|
$skelpos, |
|
650
|
|
|
|
|
|
|
$skellen |
|
651
|
|
|
|
|
|
|
) = @$s; |
|
652
|
15
|
|
|
|
|
31
|
my $baseptr = $skelpos + $skellen; |
|
653
|
15
|
|
|
|
|
40
|
my $skeleton = substr $source, $skelpos, $skellen; |
|
654
|
15
|
|
|
|
|
43
|
for my $i (0 .. $fragcnt - 1) { |
|
655
|
|
|
|
|
|
|
my ( |
|
656
|
|
|
|
|
|
|
$inpos, |
|
657
|
|
|
|
|
|
|
$idtxt, |
|
658
|
|
|
|
|
|
|
$fnum, |
|
659
|
|
|
|
|
|
|
$seqnum, |
|
660
|
|
|
|
|
|
|
$spos, |
|
661
|
|
|
|
|
|
|
$len |
|
662
|
35
|
|
|
|
|
50
|
) = @{ $self->{_fragtbl}[$fragptr] }; |
|
|
35
|
|
|
|
|
94
|
|
|
663
|
35
|
|
|
|
|
125
|
my $slice = substr $source, $baseptr, $len; |
|
664
|
35
|
|
|
|
|
57
|
$inpos -= $skelpos; |
|
665
|
35
|
|
|
|
|
237
|
my $head = substr $skeleton, 0, $inpos; |
|
666
|
35
|
|
|
|
|
66
|
my $tail = substr $skeleton, $inpos; |
|
667
|
35
|
|
|
|
|
265
|
$skeleton = $head . $slice . $tail; |
|
668
|
35
|
|
|
|
|
52
|
$baseptr += $len; |
|
669
|
35
|
|
|
|
|
72
|
$fragptr++; |
|
670
|
|
|
|
|
|
|
} |
|
671
|
15
|
|
|
|
|
119
|
push @parts, $skeleton; |
|
672
|
|
|
|
|
|
|
} |
|
673
|
|
|
|
|
|
|
|
|
674
|
5
|
|
|
|
|
25
|
return @parts; |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
} |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
sub _decode_record { |
|
679
|
|
|
|
|
|
|
|
|
680
|
138
|
|
|
138
|
|
34658
|
my $self = shift; |
|
681
|
138
|
|
|
|
|
224
|
my $rec = shift; |
|
682
|
|
|
|
|
|
|
|
|
683
|
138
|
|
|
|
|
186
|
$rec++; |
|
684
|
|
|
|
|
|
|
|
|
685
|
138
|
|
|
|
|
362
|
my $encode = $self->{_pdb}->record($rec)->data; |
|
686
|
138
|
|
|
|
|
355
|
my $trail = $self->_trailing_entries_size($encode); |
|
687
|
138
|
|
|
|
|
449
|
substr $encode, -$trail, $trail, ''; |
|
688
|
|
|
|
|
|
|
|
|
689
|
138
|
50
|
|
|
|
401
|
if ($self->{_compression} == 1) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
690
|
0
|
|
|
|
|
0
|
return $encode; |
|
691
|
|
|
|
|
|
|
} elsif ($self->{_compression} == 2) { |
|
692
|
138
|
|
|
|
|
346
|
return palmdoc_decode($encode); |
|
693
|
|
|
|
|
|
|
} elsif ($self->{_compression} == 17480) { |
|
694
|
0
|
|
|
|
|
0
|
return $self->{_huff}->decode($encode); |
|
695
|
|
|
|
|
|
|
} |
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
} |
|
698
|
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
# TODO: Could probably optimize this. |
|
700
|
|
|
|
|
|
|
sub _read_exth { |
|
701
|
|
|
|
|
|
|
|
|
702
|
20
|
|
|
20
|
|
42
|
my $self = shift; |
|
703
|
20
|
|
|
|
|
227
|
my $exth = shift; |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
my %exth_records = ( |
|
706
|
20
|
|
|
20
|
|
111
|
100 => sub { $self->{Metadata}->add_author(shift) }, |
|
707
|
0
|
|
|
0
|
|
0
|
101 => sub { $self->{Metadata}->add_contributor(shift) }, |
|
708
|
0
|
|
|
0
|
|
0
|
103 => sub { $self->{Metadata}->set_description(shift) }, |
|
709
|
0
|
|
|
0
|
|
0
|
104 => sub { $self->{Metadata}->set_id(shift) }, |
|
710
|
0
|
|
|
0
|
|
0
|
105 => sub { $self->{Metadata}->add_genre(shift) }, |
|
711
|
20
|
|
|
20
|
|
54
|
106 => sub { $self->{Metadata}->set_created(eval { guess_time(shift) }) }, |
|
|
20
|
|
|
|
|
119
|
|
|
712
|
20
|
|
|
20
|
|
101
|
108 => sub { $self->{Metadata}->add_contributor(shift) }, |
|
713
|
0
|
|
|
0
|
|
0
|
114 => sub { $self->{Metadata}->set_format('MOBI ' . shift) }, |
|
714
|
|
|
|
|
|
|
201 => sub { |
|
715
|
20
|
50
|
|
20
|
|
90
|
if (defined $self->{_imgrec}) { |
|
716
|
20
|
|
|
|
|
106
|
$self->{_coverrec} = $self->{_imgrec} + unpack "N", shift; |
|
717
|
|
|
|
|
|
|
} |
|
718
|
|
|
|
|
|
|
}, |
|
719
|
20
|
|
|
20
|
|
136
|
524 => sub { $self->{Metadata}->add_language(shift) }, |
|
720
|
20
|
|
|
|
|
591
|
); |
|
721
|
|
|
|
|
|
|
|
|
722
|
20
|
|
|
|
|
128
|
my ($doctype, $len, $items) = unpack "a4 N N", $exth; |
|
723
|
|
|
|
|
|
|
|
|
724
|
20
|
|
|
|
|
84
|
my $pos = 12; |
|
725
|
|
|
|
|
|
|
|
|
726
|
20
|
|
|
|
|
62
|
for my $i (1 .. $items) { |
|
727
|
|
|
|
|
|
|
|
|
728
|
390
|
|
|
|
|
1325
|
my (undef, $size) = unpack "N N", substr $exth, $pos; |
|
729
|
390
|
|
|
|
|
934
|
my $contlen = $size - 8; |
|
730
|
390
|
|
|
|
|
1398
|
my ($id, undef, $content) = unpack "N N a$contlen", substr $exth, $pos; |
|
731
|
|
|
|
|
|
|
|
|
732
|
390
|
100
|
|
|
|
971
|
if (exists $exth_records{ $id }) { |
|
733
|
100
|
|
|
|
|
247
|
$exth_records{ $id }->($content); |
|
734
|
|
|
|
|
|
|
} |
|
735
|
|
|
|
|
|
|
|
|
736
|
390
|
|
|
|
|
792
|
$pos += $size; |
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
} |
|
739
|
|
|
|
|
|
|
|
|
740
|
20
|
|
|
|
|
449
|
return 1; |
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
} |
|
743
|
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
sub new { |
|
745
|
|
|
|
|
|
|
|
|
746
|
20
|
|
|
20
|
0
|
49
|
my $class = shift; |
|
747
|
20
|
|
|
|
|
44
|
my $file = shift; |
|
748
|
20
|
|
|
|
|
40
|
my $enc = shift; |
|
749
|
20
|
|
50
|
|
|
81
|
my $net = shift // 1; |
|
750
|
|
|
|
|
|
|
|
|
751
|
20
|
|
|
|
|
261
|
my $self = { |
|
752
|
|
|
|
|
|
|
Source => undef, |
|
753
|
|
|
|
|
|
|
Metadata => EBook::Ishmael::EBook::Metadata->new, |
|
754
|
|
|
|
|
|
|
Network => $net, |
|
755
|
|
|
|
|
|
|
_pdb => undef, |
|
756
|
|
|
|
|
|
|
_compression => undef, |
|
757
|
|
|
|
|
|
|
_textlen => undef, |
|
758
|
|
|
|
|
|
|
_recnum => undef, |
|
759
|
|
|
|
|
|
|
_recsize => undef, |
|
760
|
|
|
|
|
|
|
_encryption => undef, |
|
761
|
|
|
|
|
|
|
_doctype => undef, |
|
762
|
|
|
|
|
|
|
_length => undef, |
|
763
|
|
|
|
|
|
|
_type => undef, |
|
764
|
|
|
|
|
|
|
_codepage => undef, |
|
765
|
|
|
|
|
|
|
_uid => undef, |
|
766
|
|
|
|
|
|
|
_version => undef, |
|
767
|
|
|
|
|
|
|
_exth_flag => undef, |
|
768
|
|
|
|
|
|
|
_extra_data => undef, |
|
769
|
|
|
|
|
|
|
_trailers => 0, |
|
770
|
|
|
|
|
|
|
_huff => undef, |
|
771
|
|
|
|
|
|
|
_imgrec => undef, |
|
772
|
|
|
|
|
|
|
_coverrec => undef, |
|
773
|
|
|
|
|
|
|
_lastcont => undef, |
|
774
|
|
|
|
|
|
|
_images => [], |
|
775
|
|
|
|
|
|
|
# kf8 stuff |
|
776
|
|
|
|
|
|
|
_skelidx => undef, |
|
777
|
|
|
|
|
|
|
_skeltbl => [], |
|
778
|
|
|
|
|
|
|
_fragidx => undef, |
|
779
|
|
|
|
|
|
|
_fragtbl => [], |
|
780
|
|
|
|
|
|
|
_guideidx => undef, |
|
781
|
|
|
|
|
|
|
_guidetbl => [], |
|
782
|
|
|
|
|
|
|
_fdst => undef, |
|
783
|
|
|
|
|
|
|
_fdsttbl => [ 0, $NULL_INDEX ], |
|
784
|
|
|
|
|
|
|
}; |
|
785
|
|
|
|
|
|
|
|
|
786
|
20
|
|
|
|
|
64
|
bless $self, $class; |
|
787
|
|
|
|
|
|
|
|
|
788
|
20
|
|
|
|
|
907
|
$self->{Source} = File::Spec->rel2abs($file); |
|
789
|
|
|
|
|
|
|
|
|
790
|
20
|
|
|
|
|
207
|
$self->{_pdb} = EBook::Ishmael::PDB->new($file); |
|
791
|
|
|
|
|
|
|
|
|
792
|
20
|
|
|
|
|
117
|
my $hdr = $self->{_pdb}->record(0)->data; |
|
793
|
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
( |
|
795
|
|
|
|
|
|
|
$self->{_compression}, |
|
796
|
|
|
|
|
|
|
undef, |
|
797
|
|
|
|
|
|
|
$self->{_textlen}, |
|
798
|
|
|
|
|
|
|
$self->{_recnum}, |
|
799
|
|
|
|
|
|
|
$self->{_recsize}, |
|
800
|
|
|
|
|
|
|
$self->{_encryption}, |
|
801
|
|
|
|
|
|
|
undef, |
|
802
|
20
|
|
|
|
|
194
|
) = unpack "n n N n n n n", $hdr; |
|
803
|
|
|
|
|
|
|
|
|
804
|
20
|
50
|
33
|
|
|
173
|
unless ( |
|
|
|
|
33
|
|
|
|
|
|
805
|
|
|
|
|
|
|
$self->{_compression} == 1 or |
|
806
|
|
|
|
|
|
|
$self->{_compression} == 2 or |
|
807
|
|
|
|
|
|
|
$self->{_compression} == 17480 |
|
808
|
|
|
|
|
|
|
) { |
|
809
|
0
|
|
|
|
|
0
|
die "Mobi $self->{Source} uses an unsupported compression level\n"; |
|
810
|
|
|
|
|
|
|
} |
|
811
|
|
|
|
|
|
|
|
|
812
|
20
|
50
|
|
|
|
69
|
if ($self->{_recsize} != 4096) { |
|
813
|
0
|
|
|
|
|
0
|
die "$self->{Source} is not a Mobi file\n"; |
|
814
|
|
|
|
|
|
|
} |
|
815
|
|
|
|
|
|
|
|
|
816
|
20
|
50
|
|
|
|
76
|
unless ($self->{_encryption} == 0) { |
|
817
|
0
|
|
|
|
|
0
|
die "Cannot read encrypted Mobi $self->{Source}\n"; |
|
818
|
|
|
|
|
|
|
} |
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
( |
|
821
|
|
|
|
|
|
|
$self->{_doctype}, |
|
822
|
|
|
|
|
|
|
$self->{_length}, |
|
823
|
|
|
|
|
|
|
$self->{_type}, |
|
824
|
|
|
|
|
|
|
$self->{_codepage}, |
|
825
|
|
|
|
|
|
|
$self->{_uid}, |
|
826
|
|
|
|
|
|
|
$self->{_version}, |
|
827
|
20
|
|
|
|
|
152
|
) = unpack "a4 N N N N N", substr $hdr, 16, 4 * 6; |
|
828
|
|
|
|
|
|
|
|
|
829
|
20
|
50
|
33
|
|
|
122
|
unless ($self->{_codepage} == 1252 or $self->{_codepage} == 65001) { |
|
830
|
0
|
|
|
|
|
0
|
die "Mobi $self->{Source} uses an unsupported text encoding\n"; |
|
831
|
|
|
|
|
|
|
} |
|
832
|
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
# Read some parts of the Mobi header that we care about. |
|
834
|
20
|
|
|
|
|
76
|
my ($toff, $tlen) = unpack "N N", substr $hdr, 0x54, 8; |
|
835
|
20
|
|
|
|
|
62
|
$self->{_imgrec} = unpack "N", substr $hdr, 0x6c, 4; |
|
836
|
20
|
|
|
|
|
67
|
my ($hoff, $hcount) = unpack "N N", substr $hdr, 0x70, 8; |
|
837
|
20
|
|
|
|
|
75
|
$self->{_exth_flag} = unpack "N", substr $hdr, 0x80, 4; |
|
838
|
20
|
|
|
|
|
61
|
$self->{_lastcont} = unpack "n", substr $hdr, 0xc2, 2; |
|
839
|
20
|
|
|
|
|
67
|
$self->{_extra_data} = unpack "n", substr $hdr, 0xf2, 2; |
|
840
|
|
|
|
|
|
|
|
|
841
|
20
|
50
|
|
|
|
77
|
if ($self->{_compression} == 17480) { |
|
842
|
|
|
|
|
|
|
|
|
843
|
0
|
0
|
|
|
|
0
|
unless ($UNPACK_Q) { |
|
844
|
0
|
|
|
|
|
0
|
die "Cannot read AZW $self->{Source}; perl does not support " . |
|
845
|
|
|
|
|
|
|
"unpacking 64-bit integars\n"; |
|
846
|
|
|
|
|
|
|
} |
|
847
|
|
|
|
|
|
|
|
|
848
|
0
|
|
|
|
|
0
|
my @huffs = map { $self->{_pdb}->record($_)->data } ($hoff .. $hoff + $hcount - 1); |
|
|
0
|
|
|
|
|
0
|
|
|
849
|
0
|
|
|
|
|
0
|
$self->{_huff} = EBook::Ishmael::EBook::Mobi::MobiHuff->new(@huffs); |
|
850
|
|
|
|
|
|
|
} |
|
851
|
|
|
|
|
|
|
|
|
852
|
20
|
50
|
33
|
|
|
142
|
if ($self->{_length} >= 0xe3 and $self->{_version} >= 5) { |
|
853
|
20
|
|
|
|
|
44
|
my $flags = $self->{_extra_data}; |
|
854
|
20
|
|
|
|
|
94
|
while ($flags > 1) { |
|
855
|
0
|
0
|
|
|
|
0
|
$self->{_trailers}++ if $flags & 2; |
|
856
|
0
|
|
|
|
|
0
|
$flags >>= 1; |
|
857
|
|
|
|
|
|
|
} |
|
858
|
|
|
|
|
|
|
} |
|
859
|
|
|
|
|
|
|
|
|
860
|
20
|
100
|
|
|
|
234
|
if ($self->{_version} == 8) { |
|
861
|
10
|
|
|
|
|
38
|
$self->{_fdst} = unpack "N", substr $hdr, 0xc0, 4; |
|
862
|
10
|
|
|
|
|
38
|
$self->{_fragidx} = unpack "N", substr $hdr, 0xf8, 4; |
|
863
|
10
|
|
|
|
|
26
|
$self->{_skelidx} = unpack "N", substr $hdr, 0xfc, 4; |
|
864
|
10
|
|
|
|
|
36
|
$self->{_guideidx} = unpack "N", substr $hdr, 0x104, 4; |
|
865
|
10
|
|
|
|
|
95
|
$self->_kf8_init; |
|
866
|
|
|
|
|
|
|
} |
|
867
|
|
|
|
|
|
|
|
|
868
|
20
|
50
|
|
|
|
120
|
if ($self->{_lastcont} > $self->{_pdb}->recnum - 1) { |
|
869
|
0
|
|
|
|
|
0
|
$self->{_lastcont} = $self->{_pdb}->recnum - 1; |
|
870
|
|
|
|
|
|
|
} |
|
871
|
|
|
|
|
|
|
|
|
872
|
20
|
50
|
|
|
|
70
|
if ($self->{_imgrec} >= $self->{_lastcont}) { |
|
873
|
0
|
|
|
|
|
0
|
undef $self->{_imgrec}; |
|
874
|
|
|
|
|
|
|
} |
|
875
|
|
|
|
|
|
|
|
|
876
|
20
|
50
|
|
|
|
57
|
if (defined $self->{_imgrec}) { |
|
877
|
20
|
|
|
|
|
133
|
for my $i ($self->{_imgrec} .. $self->{_lastcont}) { |
|
878
|
50
|
|
|
|
|
158
|
my $img = $self->{_pdb}->record($i)->data; |
|
879
|
50
|
|
|
|
|
229
|
my $format = image_id($img); |
|
880
|
50
|
100
|
|
|
|
140
|
next if not defined $format; |
|
881
|
40
|
|
|
|
|
63
|
push @{ $self->{_images} }, [ $i, $format ]; |
|
|
40
|
|
|
|
|
153
|
|
|
882
|
|
|
|
|
|
|
} |
|
883
|
|
|
|
|
|
|
} |
|
884
|
|
|
|
|
|
|
|
|
885
|
20
|
50
|
|
|
|
68
|
if ($self->{_exth_flag}) { |
|
886
|
20
|
|
|
|
|
176
|
$self->_read_exth(substr $hdr, $self->{_length} + 16); |
|
887
|
|
|
|
|
|
|
} |
|
888
|
|
|
|
|
|
|
|
|
889
|
20
|
50
|
33
|
|
|
129
|
if ( |
|
890
|
|
|
|
|
|
|
defined $self->{_coverrec} and |
|
891
|
40
|
|
|
|
|
190
|
not grep { $self->{_coverrec} == $_->[0] } @{ $self->{_images} } |
|
|
20
|
|
|
|
|
90
|
|
|
892
|
|
|
|
|
|
|
) { |
|
893
|
0
|
|
|
|
|
0
|
undef $self->{_coverrec}; |
|
894
|
|
|
|
|
|
|
} |
|
895
|
|
|
|
|
|
|
|
|
896
|
20
|
|
|
|
|
165
|
$self->{Metadata}->set_title(substr $hdr, $toff, $tlen); |
|
897
|
|
|
|
|
|
|
|
|
898
|
20
|
50
|
33
|
|
|
93
|
if ( |
|
899
|
|
|
|
|
|
|
not defined $self->{Metadata}->created or |
|
900
|
|
|
|
|
|
|
# If the PDB's created date is greater than the MOBI's EXTH one, |
|
901
|
|
|
|
|
|
|
# probably means a corrupted EXTH date. |
|
902
|
|
|
|
|
|
|
$self->{_pdb}->cdate > $self->{Metadata}->created |
|
903
|
|
|
|
|
|
|
) { |
|
904
|
20
|
|
|
|
|
107
|
$self->{Metadata}->set_created($self->{_pdb}->cdate); |
|
905
|
|
|
|
|
|
|
} |
|
906
|
|
|
|
|
|
|
|
|
907
|
20
|
50
|
|
|
|
70
|
if ($self->{_pdb}->mdate) { |
|
908
|
20
|
|
|
|
|
79
|
$self->{Metadata}->set_modified($self->{_pdb}->mdate); |
|
909
|
|
|
|
|
|
|
} |
|
910
|
|
|
|
|
|
|
|
|
911
|
20
|
100
|
|
|
|
100
|
if ($self->{_version} == 8) { |
|
|
|
50
|
|
|
|
|
|
|
912
|
10
|
|
|
|
|
70
|
$self->{Metadata}->set_format('KF8'); |
|
913
|
|
|
|
|
|
|
} elsif (not defined $self->{Metadata}->format) { |
|
914
|
10
|
|
|
|
|
40
|
$self->{Metadata}->set_format('MOBI'); |
|
915
|
|
|
|
|
|
|
} |
|
916
|
|
|
|
|
|
|
|
|
917
|
20
|
|
|
|
|
107
|
return $self; |
|
918
|
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
} |
|
920
|
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
sub rawml { |
|
922
|
|
|
|
|
|
|
|
|
923
|
10
|
|
|
10
|
0
|
28
|
my $self = shift; |
|
924
|
10
|
|
|
|
|
38
|
my %param = @_; |
|
925
|
|
|
|
|
|
|
|
|
926
|
10
|
|
50
|
|
|
60
|
my $decode = $param{decode} // 0; |
|
927
|
10
|
|
100
|
|
|
43
|
my $clean = $param{clean} // 0; |
|
928
|
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
my $cont = |
|
930
|
|
|
|
|
|
|
join '', |
|
931
|
115
|
|
|
|
|
315
|
map { $self->_decode_record($_) } |
|
932
|
10
|
|
|
|
|
52
|
0 .. $self->{_recnum} - 1; |
|
933
|
|
|
|
|
|
|
|
|
934
|
10
|
100
|
|
|
|
82
|
_clean_html(\$cont) if $clean; |
|
935
|
|
|
|
|
|
|
|
|
936
|
10
|
50
|
33
|
|
|
52
|
if ($decode and $self->{_codepage} == 1252) { |
|
937
|
0
|
0
|
|
|
|
0
|
from_to($cont, "cp1252", "utf-8") |
|
938
|
|
|
|
|
|
|
or die "Failed to encode Mobi $self->{Source} text as utf-8\n"; |
|
939
|
|
|
|
|
|
|
} |
|
940
|
|
|
|
|
|
|
|
|
941
|
10
|
|
|
|
|
296
|
return $cont; |
|
942
|
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
} |
|
944
|
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
sub html { |
|
946
|
|
|
|
|
|
|
|
|
947
|
6
|
|
|
6
|
0
|
16
|
my $self = shift; |
|
948
|
6
|
|
|
|
|
13
|
my $out = shift; |
|
949
|
|
|
|
|
|
|
|
|
950
|
6
|
|
|
|
|
13
|
my $html; |
|
951
|
|
|
|
|
|
|
|
|
952
|
6
|
100
|
|
|
|
23
|
if ($self->{_version} == 8) { |
|
953
|
|
|
|
|
|
|
|
|
954
|
3
|
|
|
|
|
22
|
for my $part ($self->_kf8_xhtml) { |
|
955
|
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
my $dom = XML::LibXML->load_html( |
|
957
|
|
|
|
|
|
|
string => $part, |
|
958
|
|
|
|
|
|
|
no_network => !$self->{Network}, |
|
959
|
9
|
|
|
|
|
6741
|
recover => 2, |
|
960
|
|
|
|
|
|
|
); |
|
961
|
|
|
|
|
|
|
|
|
962
|
9
|
50
|
|
|
|
7763
|
my ($body) = $dom->findnodes('/html/body') or next; |
|
963
|
|
|
|
|
|
|
|
|
964
|
9
|
|
|
|
|
397
|
$html .= join '', map { $_->toString } $body->childNodes; |
|
|
1821
|
|
|
|
|
10555
|
|
|
965
|
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
} |
|
967
|
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
} else { |
|
969
|
|
|
|
|
|
|
|
|
970
|
3
|
|
|
|
|
13
|
my $rawml = $self->rawml(clean => 1); |
|
971
|
3
|
50
|
|
|
|
17
|
my $enc = $self->{_codepage} == 1252 ? "cp1252" : "utf-8"; |
|
972
|
|
|
|
|
|
|
my $dom = XML::LibXML->load_html( |
|
973
|
|
|
|
|
|
|
string => $rawml, |
|
974
|
|
|
|
|
|
|
no_network => !$self->{Network}, |
|
975
|
3
|
|
|
|
|
39
|
encoding => $enc, |
|
976
|
|
|
|
|
|
|
recover => 2 |
|
977
|
|
|
|
|
|
|
); |
|
978
|
3
|
|
|
|
|
20195
|
$html = $dom->documentElement->toString; |
|
979
|
|
|
|
|
|
|
} |
|
980
|
|
|
|
|
|
|
|
|
981
|
6
|
50
|
|
|
|
959
|
if (defined $out) { |
|
982
|
0
|
0
|
|
|
|
0
|
open my $fh, '>', $out |
|
983
|
|
|
|
|
|
|
or die "Failed to open $out for writing: $!\n"; |
|
984
|
0
|
|
|
|
|
0
|
binmode $fh, ':utf8'; |
|
985
|
0
|
|
|
|
|
0
|
print { $fh } $html; |
|
|
0
|
|
|
|
|
0
|
|
|
986
|
0
|
|
|
|
|
0
|
close $fh; |
|
987
|
0
|
|
|
|
|
0
|
return $out; |
|
988
|
|
|
|
|
|
|
} else { |
|
989
|
6
|
|
|
|
|
548
|
return $html; |
|
990
|
|
|
|
|
|
|
} |
|
991
|
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
} |
|
993
|
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
sub raw { |
|
995
|
|
|
|
|
|
|
|
|
996
|
4
|
|
|
4
|
0
|
12
|
my $self = shift; |
|
997
|
4
|
|
|
|
|
13
|
my $out = shift; |
|
998
|
|
|
|
|
|
|
|
|
999
|
4
|
|
|
|
|
10
|
my $raw; |
|
1000
|
|
|
|
|
|
|
|
|
1001
|
4
|
100
|
|
|
|
24
|
if ($self->{_version} == 8) { |
|
1002
|
|
|
|
|
|
|
|
|
1003
|
2
|
|
|
|
|
12
|
for my $part ($self->_kf8_xhtml) { |
|
1004
|
|
|
|
|
|
|
my $dom = XML::LibXML->load_html( |
|
1005
|
|
|
|
|
|
|
string => $part, |
|
1006
|
|
|
|
|
|
|
no_network => !$self->{Network}, |
|
1007
|
6
|
|
|
|
|
489
|
recover => 2, |
|
1008
|
|
|
|
|
|
|
); |
|
1009
|
6
|
50
|
|
|
|
6276
|
my ($body) = $dom->findnodes('/html/body') or next; |
|
1010
|
6
|
|
|
|
|
693
|
$raw .= $body->textContent; |
|
1011
|
|
|
|
|
|
|
} |
|
1012
|
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
} else { |
|
1014
|
|
|
|
|
|
|
|
|
1015
|
2
|
|
|
|
|
9
|
my $rawml = $self->rawml(clean => 1); |
|
1016
|
2
|
50
|
|
|
|
14
|
my $enc = $self->{_codepage} == 1252 ? "cp1252" : "utf-8"; |
|
1017
|
|
|
|
|
|
|
my $dom = XML::LibXML->load_html( |
|
1018
|
|
|
|
|
|
|
string => $rawml, |
|
1019
|
|
|
|
|
|
|
no_network => !$self->{Network}, |
|
1020
|
2
|
|
|
|
|
29
|
encoding => $enc, |
|
1021
|
|
|
|
|
|
|
recover => 2, |
|
1022
|
|
|
|
|
|
|
); |
|
1023
|
|
|
|
|
|
|
|
|
1024
|
2
|
|
|
|
|
18648
|
$raw = $dom->documentElement->textContent; |
|
1025
|
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
} |
|
1027
|
|
|
|
|
|
|
|
|
1028
|
4
|
50
|
|
|
|
139
|
if (defined $out) { |
|
1029
|
0
|
0
|
|
|
|
0
|
open my $fh, '>', $out |
|
1030
|
|
|
|
|
|
|
or die "Failed to open $out for writing: $!\n"; |
|
1031
|
0
|
|
|
|
|
0
|
binmode $fh, ':utf8'; |
|
1032
|
0
|
|
|
|
|
0
|
print { $fh } $raw; |
|
|
0
|
|
|
|
|
0
|
|
|
1033
|
0
|
|
|
|
|
0
|
close $fh; |
|
1034
|
0
|
|
|
|
|
0
|
return $out; |
|
1035
|
|
|
|
|
|
|
} else { |
|
1036
|
4
|
|
|
|
|
409
|
return $raw; |
|
1037
|
|
|
|
|
|
|
} |
|
1038
|
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
} |
|
1040
|
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
sub metadata { |
|
1042
|
|
|
|
|
|
|
|
|
1043
|
8
|
|
|
8
|
0
|
22
|
my $self = shift; |
|
1044
|
|
|
|
|
|
|
|
|
1045
|
8
|
|
|
|
|
52
|
return $self->{Metadata}; |
|
1046
|
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
} |
|
1048
|
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
sub has_cover { |
|
1050
|
|
|
|
|
|
|
|
|
1051
|
8
|
|
|
8
|
0
|
1844
|
my $self = shift; |
|
1052
|
|
|
|
|
|
|
|
|
1053
|
8
|
|
|
|
|
53
|
return defined $self->{_coverrec}; |
|
1054
|
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
} |
|
1056
|
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
sub cover { |
|
1058
|
|
|
|
|
|
|
|
|
1059
|
4
|
|
|
4
|
0
|
9
|
my $self = shift; |
|
1060
|
4
|
|
|
|
|
9
|
my $out = shift; |
|
1061
|
|
|
|
|
|
|
|
|
1062
|
4
|
50
|
|
|
|
13
|
return (undef, undef) unless $self->has_cover; |
|
1063
|
|
|
|
|
|
|
|
|
1064
|
4
|
|
|
|
|
23
|
my $bin = $self->{_pdb}->record($self->{_coverrec})->data; |
|
1065
|
4
|
|
|
|
|
21
|
my $format = image_id($bin); |
|
1066
|
4
|
50
|
|
|
|
16
|
return (undef, undef) if not defined $format; |
|
1067
|
|
|
|
|
|
|
|
|
1068
|
4
|
|
|
|
|
18
|
return ($bin, $format); |
|
1069
|
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
} |
|
1071
|
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
sub image_num { |
|
1073
|
|
|
|
|
|
|
|
|
1074
|
12
|
|
|
12
|
0
|
1848
|
my $self = shift; |
|
1075
|
|
|
|
|
|
|
|
|
1076
|
12
|
|
|
|
|
22
|
return scalar @{ $self->{_images} }; |
|
|
12
|
|
|
|
|
60
|
|
|
1077
|
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
} |
|
1079
|
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
sub image { |
|
1081
|
|
|
|
|
|
|
|
|
1082
|
8
|
|
|
8
|
0
|
1774
|
my $self = shift; |
|
1083
|
8
|
|
|
|
|
19
|
my $n = shift; |
|
1084
|
|
|
|
|
|
|
|
|
1085
|
8
|
50
|
|
|
|
29
|
if ($n >= $self->image_num) { |
|
1086
|
0
|
|
|
|
|
0
|
return (undef, undef); |
|
1087
|
|
|
|
|
|
|
} |
|
1088
|
|
|
|
|
|
|
|
|
1089
|
8
|
|
|
|
|
47
|
my $img = $self->{_pdb}->record($self->{_images}->[$n][0])->data; |
|
1090
|
|
|
|
|
|
|
|
|
1091
|
8
|
|
|
|
|
40
|
return ($img, $self->{_images}[$n][1]); |
|
1092
|
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
} |
|
1094
|
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
1; |