line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# Palm::Doc.pm |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# Palm::PDB helper for handling Palm Doc databases |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Copyright (C) 2004 Christophe Beauregard |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# $Id: Doc.pm,v 1.19 2005/05/12 01:36:49 cpb Exp $ |
8
|
|
|
|
|
|
|
|
9
|
9
|
|
|
9
|
|
54
|
use strict; |
|
9
|
|
|
|
|
16
|
|
|
9
|
|
|
|
|
537
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
package EBook::MOBI::MobiPerl::Palm::Doc; |
12
|
|
|
|
|
|
|
|
13
|
9
|
|
|
9
|
|
54
|
use EBook::MOBI::MobiPerl::Palm::PDB; |
|
9
|
|
|
|
|
18
|
|
|
9
|
|
|
|
|
244
|
|
14
|
9
|
|
|
9
|
|
5562
|
use EBook::MOBI::MobiPerl::Palm::Raw(); |
|
9
|
|
|
|
|
21
|
|
|
9
|
|
|
|
|
209
|
|
15
|
9
|
|
|
9
|
|
52
|
use vars qw( $VERSION @ISA ); |
|
9
|
|
|
|
|
19
|
|
|
9
|
|
|
|
|
944
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
$VERSION = do { my @r = (q$Revision: 1.19 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
@ISA = qw( EBook::MOBI::MobiPerl::Palm::Raw ); |
20
|
|
|
|
|
|
|
|
21
|
9
|
|
|
9
|
|
52
|
use constant DOC_UNCOMPRESSED => scalar 1; |
|
9
|
|
|
|
|
16
|
|
|
9
|
|
|
|
|
661
|
|
22
|
9
|
|
|
9
|
|
48
|
use constant DOC_COMPRESSED => scalar 2; |
|
9
|
|
|
|
|
23
|
|
|
9
|
|
|
|
|
454
|
|
23
|
9
|
|
|
9
|
|
43
|
use constant DOC_RECSIZE => scalar 4096; |
|
9
|
|
|
|
|
30
|
|
|
9
|
|
|
|
|
17447
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 NAME |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
Palm::Doc - Handler for Palm Doc books |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 SYNOPSIS |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
use Palm::Doc; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head1 DESCRIPTION |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
Helper for reading and writing Palm Doc books. The interface is based on |
36
|
|
|
|
|
|
|
L since it just makes sense. However, because of the nature |
37
|
|
|
|
|
|
|
of these databases, record-level processing is just a Bad Idea. Use |
38
|
|
|
|
|
|
|
the C and C calls rather than do direct access of the |
39
|
|
|
|
|
|
|
C<@records> array. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 EXAMPLES |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Convert a text file to a .pdb: |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
use Palm::Doc; |
46
|
|
|
|
|
|
|
my $doc = new Palm::Doc; |
47
|
|
|
|
|
|
|
$doc->textfile( $ARGV[0] ); |
48
|
|
|
|
|
|
|
$doc->Write( $ARGV[0] . ".pdb" ); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Convert an HTML file to a .prc: |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
use HTML::TreeBuilder; |
53
|
|
|
|
|
|
|
use HTML::FormatText; |
54
|
|
|
|
|
|
|
use Palm::Doc; |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
my $tree = HTML::TreeBuilder->new_from_file( $ARGV[0] ); |
57
|
|
|
|
|
|
|
my $formatter = HTML::FormatText->new( leftmargin => 0, rightmargin => 80 ); |
58
|
|
|
|
|
|
|
my $doc = new Palm::Doc; |
59
|
|
|
|
|
|
|
$doc->{attributes}{resource} = 1; |
60
|
|
|
|
|
|
|
$doc->text( $formatter->format( $tree ) ); |
61
|
|
|
|
|
|
|
$doc->Write( $ARGV[0] . ".prc" ); |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=cut |
64
|
|
|
|
|
|
|
#' |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub import |
67
|
|
|
|
|
|
|
{ |
68
|
9
|
|
|
9
|
|
60
|
&EBook::MOBI::MobiPerl::Palm::PDB::RegisterPDBHandlers( __PACKAGE__, [ "REAd", "TEXt" ], ); |
69
|
9
|
|
|
|
|
73
|
&EBook::MOBI::MobiPerl::Palm::PDB::RegisterPRCHandlers( __PACKAGE__, [ "REAd", "TEXt" ], ); |
70
|
9
|
|
|
|
|
40
|
&EBook::MOBI::MobiPerl::Palm::PDB::RegisterPDBHandlers( __PACKAGE__, [ "MOBI", "BOOK" ], ); |
71
|
9
|
|
|
|
|
41
|
&EBook::MOBI::MobiPerl::Palm::PDB::RegisterPRCHandlers( __PACKAGE__, [ "MOBI", "BOOK" ], ); |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head2 new |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
$doc = new Palm::Doc; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Create a new Doc object. By default, it's not a resource database. Setting |
79
|
|
|
|
|
|
|
C<$self->{attributes}{resource}> to C<1> before any manipulations will |
80
|
|
|
|
|
|
|
cause it to become a resource database. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=cut |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub new |
85
|
|
|
|
|
|
|
{ |
86
|
3
|
|
|
3
|
1
|
7
|
my $class = shift; |
87
|
3
|
|
|
|
|
38
|
my $self = $class->SUPER::new(@_); |
88
|
|
|
|
|
|
|
|
89
|
3
|
|
|
|
|
23
|
$self->{'creator'} = 'REAd'; |
90
|
3
|
|
|
|
|
8
|
$self->{'type'} = 'TEXt'; |
91
|
|
|
|
|
|
|
|
92
|
3
|
|
|
|
|
9
|
$self->{attributes}{resource} = 0; |
93
|
|
|
|
|
|
|
|
94
|
3
|
|
|
|
|
9
|
$self->{appinfo} = undef; |
95
|
3
|
|
|
|
|
7
|
$self->{sort} = undef; |
96
|
3
|
|
|
|
|
8
|
$self->{records} = []; |
97
|
|
|
|
|
|
|
|
98
|
3
|
|
|
|
|
18
|
return $self; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# determine if the given (raw) record is a Doc header record and fill in the |
102
|
|
|
|
|
|
|
# record with appropriate fields if it is. |
103
|
|
|
|
|
|
|
sub _parse_headerrec($) { |
104
|
3
|
|
|
3
|
|
6
|
my $record = shift; |
105
|
3
|
50
|
|
|
|
11
|
return undef unless exists $record->{'data'}; |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# Doc header is minimum of 16 bytes |
108
|
3
|
50
|
|
|
|
12
|
return undef if length $record->{'data'} < 16; |
109
|
|
|
|
|
|
|
|
110
|
3
|
|
|
|
|
24
|
my ($version,$spare,$ulen, $records, $recsize, $position) |
111
|
|
|
|
|
|
|
= unpack( 'n n N n n N', $record->{'data'} ); |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# BEGIN: BORIS DAEPPEN JULY 2012 |
114
|
|
|
|
|
|
|
# *********************** |
115
|
|
|
|
|
|
|
# This code just prints stuff... which is what I don't like |
116
|
|
|
|
|
|
|
# One should not print to stderr just for fun! |
117
|
|
|
|
|
|
|
# so this is why I put this out of work... |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
#my $h = sprintf ("%x", $version); |
120
|
|
|
|
|
|
|
#print STDERR "Version: $version - $h - "; |
121
|
|
|
|
|
|
|
#if ($version == DOC_COMPRESSED) { |
122
|
|
|
|
|
|
|
#print STDERR " DOC_COMPRESSED\n"; |
123
|
|
|
|
|
|
|
#} |
124
|
|
|
|
|
|
|
#if ($version == DOC_UNCOMPRESSED) { |
125
|
|
|
|
|
|
|
#print STDERR " DOC_UNCOMPRESSED\n"; |
126
|
|
|
|
|
|
|
#} |
127
|
|
|
|
|
|
|
#if ($version != DOC_UNCOMPRESSED and $version != DOC_COMPRESSED) { |
128
|
|
|
|
|
|
|
#print STDERR " probably HUFFDIC_COMPRESSED - CANNOT BE DECOMPRESSED!!!\n"; |
129
|
|
|
|
|
|
|
#} |
130
|
|
|
|
|
|
|
# *********************** |
131
|
|
|
|
|
|
|
# END: BORIS DAEPPEN JULY 2012 |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# the header is followed by a list of record sizes. We don't use |
134
|
|
|
|
|
|
|
# this since we can guess the sizes pretty easily by looking at |
135
|
|
|
|
|
|
|
# the actual records. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# According to the spec, $version is either 1 (uncompressed) |
138
|
|
|
|
|
|
|
# or 2 (compress), while spare is always zero. AportisDoc supposedly sets |
139
|
|
|
|
|
|
|
# spare to something else, so screw AportisDoc. |
140
|
3
|
50
|
33
|
|
|
23
|
return undef if $version != DOC_UNCOMPRESSED and $version != DOC_COMPRESSED; |
141
|
3
|
50
|
|
|
|
10
|
return undef if $spare != 0; |
142
|
|
|
|
|
|
|
|
143
|
3
|
|
|
|
|
7
|
$record->{'version'} = $version; |
144
|
3
|
|
|
|
|
6
|
$record->{'length'} = $ulen; |
145
|
3
|
|
|
|
|
6
|
$record->{'records'} = $records; |
146
|
3
|
|
|
|
|
6
|
$record->{'recsize'} = $recsize; |
147
|
3
|
|
|
|
|
7
|
$record->{'position'} = $position; |
148
|
|
|
|
|
|
|
|
149
|
3
|
|
|
|
|
17
|
return $record; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub _compress_record($$) { |
153
|
3
|
|
|
3
|
|
16
|
my ($version,$in) = @_; |
154
|
3
|
50
|
|
|
|
10
|
return $in if $version == DOC_UNCOMPRESSED; |
155
|
|
|
|
|
|
|
|
156
|
3
|
|
|
|
|
6
|
my $out = ''; |
157
|
|
|
|
|
|
|
|
158
|
3
|
|
|
|
|
7
|
my $lin = length $in; |
159
|
3
|
|
|
|
|
4
|
my $i = 0; |
160
|
3
|
|
|
|
|
12
|
while( $i < $lin ) { |
161
|
|
|
|
|
|
|
# See http://patb.dyndns.org/Programming/PilotDoc.htm for the code type |
162
|
|
|
|
|
|
|
# taxonomy. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# Try type B compression first. |
165
|
|
|
|
|
|
|
# If the next 3 to 10 bytes are already in the compressed buffer, we can |
166
|
|
|
|
|
|
|
# encode them into a 2 byte sequence. Don't bother too close to the ends, |
167
|
|
|
|
|
|
|
# however... Makes the boundary conditions simpler. |
168
|
171
|
100
|
100
|
|
|
634
|
if( $i > 10 and $lin - $i > 10 ) { |
169
|
116
|
|
|
|
|
141
|
my $chunk = ''; |
170
|
116
|
|
|
|
|
134
|
my $match = -1; |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# the preamble is what'll be in the decoders output buffer. |
173
|
116
|
|
|
|
|
186
|
my $preamble = substr( $in, 0, $i ); |
174
|
116
|
|
|
|
|
254
|
for( my $j = 10; $j >= 3; $j -- ) { |
175
|
905
|
|
|
|
|
1191
|
$chunk = substr( $in, $i, $j ); # grab next $j characters |
176
|
905
|
|
|
|
|
1004
|
$match = rindex( $preamble, $chunk ); # in the output? |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# type B code has a 2047 byte sliding window, so matches have to be |
179
|
|
|
|
|
|
|
# within that range to be useful |
180
|
905
|
100
|
66
|
|
|
1797
|
last if $match >= 0 and ($i - $match) <= 2047; |
181
|
893
|
|
|
|
|
1943
|
$match = -1; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
116
|
|
|
|
|
151
|
my $n = length $chunk; |
185
|
116
|
50
|
66
|
|
|
322
|
if( $match >= 0 and $n <= 10 and $n >= 3 ) { |
|
|
|
66
|
|
|
|
|
186
|
12
|
|
|
|
|
17
|
my $m = $i - $match; |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# first 2 bits are 10, next 11 are offset, next 3 are length-3 |
189
|
12
|
|
|
|
|
42
|
$out .= pack( "n", 0x8000 + (($m<<3)&0x3ff8) + ($n-3) ); |
190
|
|
|
|
|
|
|
|
191
|
12
|
|
|
|
|
19
|
$i += $n; |
192
|
|
|
|
|
|
|
|
193
|
12
|
|
|
|
|
29
|
next; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
159
|
|
|
|
|
255
|
my $ch = substr( $in, $i ++, 1 ); |
198
|
159
|
|
|
|
|
192
|
my $och = ord($ch); |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# Try type C compression. |
201
|
159
|
100
|
100
|
|
|
647
|
if( $i+1 < $lin and $ch eq ' ' ) { |
202
|
13
|
|
|
|
|
24
|
my $nch = substr( $in, $i, 1 ); |
203
|
13
|
|
|
|
|
18
|
my $onch = ord($nch); |
204
|
|
|
|
|
|
|
|
205
|
13
|
100
|
100
|
|
|
65
|
if( $onch >= 0x40 and $onch < 0x80 ) { |
206
|
|
|
|
|
|
|
# space plus ASCII character compression |
207
|
10
|
|
|
|
|
24
|
$out .= chr($onch ^ 0x80); |
208
|
10
|
|
|
|
|
14
|
$i ++; |
209
|
|
|
|
|
|
|
|
210
|
10
|
|
|
|
|
28
|
next; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
149
|
100
|
66
|
|
|
765
|
if( $och == 0 or ($och >= 9 and $och < 0x80) ) { |
|
|
|
33
|
|
|
|
|
215
|
|
|
|
|
|
|
# pass through |
216
|
147
|
|
|
|
|
364
|
$out .= $ch; |
217
|
|
|
|
|
|
|
} else { |
218
|
|
|
|
|
|
|
# type A code. This is essentially an 'escape' like '\\' in strings. |
219
|
|
|
|
|
|
|
# For efficiency, it's best to encode as long a sequence as |
220
|
|
|
|
|
|
|
# possible with one copy. This might seem like it would cause us to miss |
221
|
|
|
|
|
|
|
# out on a type B sequence, but in actuality keeping long binary strings |
222
|
|
|
|
|
|
|
# together improves the likelyhood of a later type B sequence than |
223
|
|
|
|
|
|
|
# interspersing them with x01's. |
224
|
|
|
|
|
|
|
|
225
|
2
|
|
|
|
|
5
|
my $next = substr($in,$i - 1); |
226
|
2
|
50
|
|
|
|
23
|
if( $next =~ /([\x01-\x08\x80-\xff]{1,8})/o ) { |
227
|
2
|
|
|
|
|
7
|
my $binseq = $1; |
228
|
2
|
|
|
|
|
5
|
$out .= chr(length $binseq); |
229
|
2
|
|
|
|
|
4
|
$out .= $binseq; |
230
|
2
|
|
|
|
|
8
|
$i += length( $binseq ) - 1; # first char, $ch, is already counted |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
3
|
|
|
|
|
20
|
return $out; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
# algorithm taken from makedoc7.cpp with reference to |
239
|
|
|
|
|
|
|
# http://patb.dyndns.org/Programming/PilotDoc.htm and |
240
|
|
|
|
|
|
|
# http://www.pyrite.org/doc_format.html |
241
|
|
|
|
|
|
|
sub _decompress_record($$) { |
242
|
0
|
|
|
0
|
|
0
|
my ($version,$in) = @_; |
243
|
0
|
0
|
|
|
|
0
|
return $in if $version == DOC_UNCOMPRESSED; |
244
|
|
|
|
|
|
|
|
245
|
0
|
|
|
|
|
0
|
my $out = ''; |
246
|
|
|
|
|
|
|
|
247
|
0
|
|
|
|
|
0
|
my $lin = length $in; |
248
|
0
|
|
|
|
|
0
|
my $i = 0; |
249
|
0
|
|
|
|
|
0
|
while( $i < $lin ) { |
250
|
0
|
|
|
|
|
0
|
my $ch = substr( $in, $i ++, 1 ); |
251
|
0
|
|
|
|
|
0
|
my $och = ord($ch); |
252
|
|
|
|
|
|
|
|
253
|
0
|
0
|
0
|
|
|
0
|
if( $och >= 1 and $och <= 8 ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
254
|
|
|
|
|
|
|
# copy this many bytes... basically a way to 'escape' data |
255
|
0
|
|
|
|
|
0
|
$out .= substr( $in, $i, $och ); |
256
|
0
|
|
|
|
|
0
|
$i += $och; |
257
|
|
|
|
|
|
|
} elsif( $och < 0x80 ) { |
258
|
|
|
|
|
|
|
# pass through 0, 9-0x7f |
259
|
0
|
|
|
|
|
0
|
$out .= $ch; |
260
|
|
|
|
|
|
|
} elsif( $och >= 0xc0 ) { |
261
|
|
|
|
|
|
|
# 0xc0-0xff are 'space' plus ASCII char |
262
|
0
|
|
|
|
|
0
|
$out .= ' '; |
263
|
0
|
|
|
|
|
0
|
$out .= chr($och ^ 0x80); |
264
|
|
|
|
|
|
|
} else { |
265
|
|
|
|
|
|
|
# 0x80-0xbf is sequence from already decompressed buffer |
266
|
0
|
|
|
|
|
0
|
my $nch = substr( $in, $i ++, 1 ); |
267
|
0
|
|
|
|
|
0
|
$och = ($och << 8) + ord($nch); |
268
|
0
|
|
|
|
|
0
|
my $m = ($och & 0x3fff) >> 3; |
269
|
0
|
|
|
|
|
0
|
my $n = ($och & 0x7) + 3; |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
# This isn't very perl-like, but a simple |
272
|
|
|
|
|
|
|
# substr($out,$lo-$m,$n) doesn't work. |
273
|
0
|
|
|
|
|
0
|
my $lo = length $out; |
274
|
0
|
|
|
|
|
0
|
for( my $j = 0; $j < $n; $j ++, $lo ++ ) { |
275
|
0
|
0
|
|
|
|
0
|
die "bad Doc compression" unless ($lo-$m) >= 0; |
276
|
0
|
|
|
|
|
0
|
$out .= substr( $out, $lo-$m, 1 ); |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
|
281
|
0
|
|
|
|
|
0
|
return $out; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
sub Write { |
285
|
3
|
|
|
3
|
1
|
7
|
my $self = shift; |
286
|
|
|
|
|
|
|
|
287
|
3
|
|
|
|
|
10
|
my $prc = $self->{attributes}{resource}; |
288
|
3
|
50
|
|
|
|
22
|
my $recs = $prc ? $self->{'resources'} : $self->{'records'}; |
289
|
3
|
|
|
|
|
7
|
my $header = $recs->[0]; |
290
|
3
|
50
|
|
|
|
34
|
unless( defined _parse_headerrec($header) ) { |
291
|
0
|
|
|
|
|
0
|
die "@_: Doesn't appear to be a correct book..."; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
3
|
|
|
|
|
32
|
$self->SUPER::Write(@_); |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=head2 text |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
$text = $doc->text; |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
Return the contents of the Doc database. |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
$text = $doc->text( @text ); |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
Set the contents of the Doc book to the specified arguments. All the list arguments |
306
|
|
|
|
|
|
|
will simply be concatenated together. |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=cut |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
sub text { |
311
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
312
|
|
|
|
|
|
|
|
313
|
0
|
|
|
|
|
|
my $body = ''; |
314
|
0
|
|
|
|
|
|
my $prc = $self->{attributes}{resource}; |
315
|
|
|
|
|
|
|
|
316
|
0
|
0
|
|
|
|
|
if( @_ > 0 ) { |
|
|
0
|
|
|
|
|
|
317
|
0
|
|
|
|
|
|
$body = join( '', @_ ); |
318
|
|
|
|
|
|
|
|
319
|
0
|
|
|
|
|
|
my $version = DOC_COMPRESSED; |
320
|
|
|
|
|
|
|
|
321
|
0
|
|
|
|
|
|
$self->{'records'} = []; |
322
|
0
|
|
|
|
|
|
$self->{'resources'} = []; |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# first record is the header |
325
|
0
|
0
|
|
|
|
|
my $header = $prc ? $self->append_Resource() : $self->append_Record(); |
326
|
0
|
|
|
|
|
|
$header->{'version'} = $version; |
327
|
0
|
|
|
|
|
|
$header->{'length'} = 0; |
328
|
0
|
|
|
|
|
|
$header->{'records'} = 0; |
329
|
0
|
|
|
|
|
|
$header->{'recsize'} = DOC_RECSIZE; |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
# break the document into record-sized chunks |
332
|
0
|
|
|
|
|
|
for( my $i = 0; $i < length($body); $i += DOC_RECSIZE ) { |
333
|
0
|
0
|
|
|
|
|
my $record = $prc ? $self->append_Resource : $self->append_Record; |
334
|
0
|
|
|
|
|
|
my $chunk = substr($body,$i,DOC_RECSIZE); |
335
|
0
|
|
|
|
|
|
$record->{'data'} = _compress_record( $version, $chunk ); |
336
|
|
|
|
|
|
|
|
337
|
0
|
|
|
|
|
|
$header->{'records'} ++; |
338
|
0
|
|
|
|
|
|
$header->{'length'} += length $body; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
0
|
0
|
|
|
|
|
$header->{'recsize'} = $header->{'length'} |
342
|
|
|
|
|
|
|
if $header->{'length'} < DOC_RECSIZE; |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
# pack up the header |
345
|
0
|
|
|
|
|
|
$header->{'data'} = pack( 'n xx N n n N', |
346
|
|
|
|
|
|
|
$header->{'version'}, $header->{'length'}, |
347
|
|
|
|
|
|
|
$header->{'records'}, $header->{'recsize'}, 0 ); |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
} elsif( defined wantarray ) { |
350
|
|
|
|
|
|
|
|
351
|
0
|
0
|
|
|
|
|
my $recs = $prc ? $self->{'resources'} : $self->{'records'}; |
352
|
|
|
|
|
|
|
|
353
|
0
|
|
|
|
|
|
my $header = $recs->[0]; |
354
|
0
|
0
|
|
|
|
|
if( defined _parse_headerrec($header) ) { |
355
|
|
|
|
|
|
|
# a proper Doc file should be fine, but if it's not Doc |
356
|
|
|
|
|
|
|
# compression like some Mobi docs seem to be we want to |
357
|
|
|
|
|
|
|
# bail early. Otherwise we end up with a huge stream of |
358
|
|
|
|
|
|
|
# substr() errors and we _still_ don't get any content. |
359
|
0
|
|
|
|
|
|
eval { |
360
|
0
|
0
|
|
0
|
0
|
|
sub min { return ($_[0]<$_[1]) ? $_[0] : $_[1] } |
361
|
0
|
|
|
|
|
|
my $maxi = min($#$recs, $header->{'records'}); |
362
|
0
|
|
|
|
|
|
for( my $i = 1; $i <= $maxi; $i ++ ) { |
363
|
0
|
|
|
|
|
|
my $data = $recs->[$i]->{'data'}; |
364
|
0
|
|
|
|
|
|
my $len = length($data); |
365
|
0
|
|
|
|
|
|
my $overlap = ""; |
366
|
0
|
0
|
|
|
|
|
if ($self->{multibyteoverlap}) { |
367
|
0
|
|
|
|
|
|
my $c = chop $data; |
368
|
0
|
|
|
|
|
|
print STDERR "I:$i - $len - ", int($c), "\n"; |
369
|
0
|
|
|
|
|
|
my $n = $c & 7; |
370
|
0
|
|
|
|
|
|
foreach (0..$n-1) { |
371
|
0
|
|
|
|
|
|
$overlap = (chop $data) . $overlap; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
0
|
|
|
|
|
|
$body .= _decompress_record( $header->{'version'}, |
376
|
|
|
|
|
|
|
$data ); |
377
|
0
|
|
|
|
|
|
$body .= $overlap; |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
}; |
380
|
0
|
0
|
|
|
|
|
return undef if $@; |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
|
384
|
0
|
|
|
|
|
|
return $body; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=head2 textfile |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
$doc->textfile( "README.txt" ); |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
Set the contents of the Doc to the contents of the file and sets the name of the PDB to |
392
|
|
|
|
|
|
|
the specified filename. |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=cut |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
sub textfile($$) { |
397
|
0
|
|
|
0
|
1
|
|
my ($self, $filename) = @_; |
398
|
|
|
|
|
|
|
|
399
|
0
|
0
|
|
|
|
|
open IN, "< $filename" or return undef; |
400
|
0
|
|
|
|
|
|
binmode IN; |
401
|
0
|
|
|
|
|
|
$self->text( '', ); |
402
|
0
|
|
|
|
|
|
close IN; |
403
|
|
|
|
|
|
|
|
404
|
0
|
|
|
|
|
|
$self->{'name'} = $filename; |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
1; |
408
|
|
|
|
|
|
|
__END__ |