| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # Palm::Doc.pm | 
| 2 |  |  |  |  |  |  | # | 
| 3 |  |  |  |  |  |  | # Palm::PDB helper for handling Palm Doc databases | 
| 4 |  |  |  |  |  |  | # | 
| 5 |  |  |  |  |  |  | # Copyright (C) 2004 Christophe Beauregard | 
| 6 |  |  |  |  |  |  | # | 
| 7 |  |  |  |  |  |  | # $Id: Doc.pm,v 1.19 2005/05/12 01:36:49 cpb Exp $ | 
| 8 |  |  |  |  |  |  |  | 
| 9 | 9 |  |  | 9 |  | 54 | use strict; | 
|  | 9 |  |  |  |  | 16 |  | 
|  | 9 |  |  |  |  | 537 |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | package EBook::MOBI::MobiPerl::Palm::Doc; | 
| 12 |  |  |  |  |  |  |  | 
| 13 | 9 |  |  | 9 |  | 54 | use EBook::MOBI::MobiPerl::Palm::PDB; | 
|  | 9 |  |  |  |  | 18 |  | 
|  | 9 |  |  |  |  | 244 |  | 
| 14 | 9 |  |  | 9 |  | 5562 | use EBook::MOBI::MobiPerl::Palm::Raw(); | 
|  | 9 |  |  |  |  | 21 |  | 
|  | 9 |  |  |  |  | 209 |  | 
| 15 | 9 |  |  | 9 |  | 52 | use vars qw( $VERSION @ISA ); | 
|  | 9 |  |  |  |  | 19 |  | 
|  | 9 |  |  |  |  | 944 |  | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | $VERSION = do { my @r = (q$Revision: 1.19 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | @ISA = qw( EBook::MOBI::MobiPerl::Palm::Raw ); | 
| 20 |  |  |  |  |  |  |  | 
| 21 | 9 |  |  | 9 |  | 52 | use constant DOC_UNCOMPRESSED => scalar 1; | 
|  | 9 |  |  |  |  | 16 |  | 
|  | 9 |  |  |  |  | 661 |  | 
| 22 | 9 |  |  | 9 |  | 48 | use constant DOC_COMPRESSED => scalar 2; | 
|  | 9 |  |  |  |  | 23 |  | 
|  | 9 |  |  |  |  | 454 |  | 
| 23 | 9 |  |  | 9 |  | 43 | use constant DOC_RECSIZE => scalar 4096; | 
|  | 9 |  |  |  |  | 30 |  | 
|  | 9 |  |  |  |  | 17447 |  | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | =head1 NAME | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | Palm::Doc - Handler for Palm Doc books | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | use Palm::Doc; | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | Helper for reading and writing Palm Doc books. The interface is based on | 
| 36 |  |  |  |  |  |  | L since it just makes sense. However, because of the nature | 
| 37 |  |  |  |  |  |  | of these databases, record-level processing is just a Bad Idea. Use | 
| 38 |  |  |  |  |  |  | the C and C calls rather than do direct access of the | 
| 39 |  |  |  |  |  |  | C<@records> array. | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | =head1 EXAMPLES | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | Convert a text file to a .pdb: | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | use Palm::Doc; | 
| 46 |  |  |  |  |  |  | my $doc = new Palm::Doc; | 
| 47 |  |  |  |  |  |  | $doc->textfile( $ARGV[0] ); | 
| 48 |  |  |  |  |  |  | $doc->Write( $ARGV[0] . ".pdb" ); | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | Convert an HTML file to a .prc: | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | use HTML::TreeBuilder; | 
| 53 |  |  |  |  |  |  | use HTML::FormatText; | 
| 54 |  |  |  |  |  |  | use Palm::Doc; | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | my $tree = HTML::TreeBuilder->new_from_file( $ARGV[0] ); | 
| 57 |  |  |  |  |  |  | my $formatter = HTML::FormatText->new( leftmargin => 0, rightmargin => 80 ); | 
| 58 |  |  |  |  |  |  | my $doc = new Palm::Doc; | 
| 59 |  |  |  |  |  |  | $doc->{attributes}{resource} = 1; | 
| 60 |  |  |  |  |  |  | $doc->text( $formatter->format( $tree ) ); | 
| 61 |  |  |  |  |  |  | $doc->Write( $ARGV[0] . ".prc" ); | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | =cut | 
| 64 |  |  |  |  |  |  | #' | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | sub import | 
| 67 |  |  |  |  |  |  | { | 
| 68 | 9 |  |  | 9 |  | 60 | &EBook::MOBI::MobiPerl::Palm::PDB::RegisterPDBHandlers( __PACKAGE__, [ "REAd", "TEXt" ], ); | 
| 69 | 9 |  |  |  |  | 73 | &EBook::MOBI::MobiPerl::Palm::PDB::RegisterPRCHandlers( __PACKAGE__, [ "REAd", "TEXt" ], ); | 
| 70 | 9 |  |  |  |  | 40 | &EBook::MOBI::MobiPerl::Palm::PDB::RegisterPDBHandlers( __PACKAGE__, [ "MOBI", "BOOK" ], ); | 
| 71 | 9 |  |  |  |  | 41 | &EBook::MOBI::MobiPerl::Palm::PDB::RegisterPRCHandlers( __PACKAGE__, [ "MOBI", "BOOK" ], ); | 
| 72 |  |  |  |  |  |  | } | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | =head2 new | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | $doc = new Palm::Doc; | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | Create a new Doc object. By default, it's not a resource database. Setting | 
| 79 |  |  |  |  |  |  | C<$self->{attributes}{resource}> to C<1> before any manipulations will | 
| 80 |  |  |  |  |  |  | cause it to become a resource database. | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | =cut | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | sub new | 
| 85 |  |  |  |  |  |  | { | 
| 86 | 3 |  |  | 3 | 1 | 7 | my $class = shift; | 
| 87 | 3 |  |  |  |  | 38 | my $self = $class->SUPER::new(@_); | 
| 88 |  |  |  |  |  |  |  | 
| 89 | 3 |  |  |  |  | 23 | $self->{'creator'} = 'REAd'; | 
| 90 | 3 |  |  |  |  | 8 | $self->{'type'} = 'TEXt'; | 
| 91 |  |  |  |  |  |  |  | 
| 92 | 3 |  |  |  |  | 9 | $self->{attributes}{resource} = 0; | 
| 93 |  |  |  |  |  |  |  | 
| 94 | 3 |  |  |  |  | 9 | $self->{appinfo} = undef; | 
| 95 | 3 |  |  |  |  | 7 | $self->{sort} = undef; | 
| 96 | 3 |  |  |  |  | 8 | $self->{records} = []; | 
| 97 |  |  |  |  |  |  |  | 
| 98 | 3 |  |  |  |  | 18 | return $self; | 
| 99 |  |  |  |  |  |  | } | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | # determine if the given (raw) record is a Doc header record and fill in the | 
| 102 |  |  |  |  |  |  | # record with appropriate fields if it is. | 
| 103 |  |  |  |  |  |  | sub _parse_headerrec($) { | 
| 104 | 3 |  |  | 3 |  | 6 | my $record = shift; | 
| 105 | 3 | 50 |  |  |  | 11 | return undef unless exists $record->{'data'}; | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | # Doc header is minimum of 16 bytes | 
| 108 | 3 | 50 |  |  |  | 12 | return undef if length $record->{'data'} < 16; | 
| 109 |  |  |  |  |  |  |  | 
| 110 | 3 |  |  |  |  | 24 | my ($version,$spare,$ulen, $records, $recsize, $position) | 
| 111 |  |  |  |  |  |  | = unpack( 'n n N n n N', $record->{'data'} ); | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | # BEGIN: BORIS DAEPPEN JULY 2012 | 
| 114 |  |  |  |  |  |  | # *********************** | 
| 115 |  |  |  |  |  |  | # This code just prints stuff... which is what I don't like | 
| 116 |  |  |  |  |  |  | # One should not print to stderr just for fun! | 
| 117 |  |  |  |  |  |  | # so this is why I put this out of work... | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | #my $h = sprintf ("%x", $version); | 
| 120 |  |  |  |  |  |  | #print STDERR "Version: $version - $h - "; | 
| 121 |  |  |  |  |  |  | #if ($version == DOC_COMPRESSED) { | 
| 122 |  |  |  |  |  |  | #print STDERR " DOC_COMPRESSED\n"; | 
| 123 |  |  |  |  |  |  | #} | 
| 124 |  |  |  |  |  |  | #if ($version == DOC_UNCOMPRESSED) { | 
| 125 |  |  |  |  |  |  | #print STDERR " DOC_UNCOMPRESSED\n"; | 
| 126 |  |  |  |  |  |  | #} | 
| 127 |  |  |  |  |  |  | #if ($version != DOC_UNCOMPRESSED and $version != DOC_COMPRESSED) { | 
| 128 |  |  |  |  |  |  | #print STDERR " probably HUFFDIC_COMPRESSED - CANNOT BE DECOMPRESSED!!!\n"; | 
| 129 |  |  |  |  |  |  | #} | 
| 130 |  |  |  |  |  |  | # *********************** | 
| 131 |  |  |  |  |  |  | # END: BORIS DAEPPEN JULY 2012 | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | # the header is followed by a list of record sizes. We don't use | 
| 134 |  |  |  |  |  |  | # this since we can guess the sizes pretty easily by looking at | 
| 135 |  |  |  |  |  |  | # the actual records. | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | # According to the spec, $version is either 1 (uncompressed) | 
| 138 |  |  |  |  |  |  | # or 2 (compress), while spare is always zero. AportisDoc supposedly sets | 
| 139 |  |  |  |  |  |  | # spare to something else, so screw AportisDoc. | 
| 140 | 3 | 50 | 33 |  |  | 23 | return undef if $version != DOC_UNCOMPRESSED and $version != DOC_COMPRESSED; | 
| 141 | 3 | 50 |  |  |  | 10 | return undef if $spare != 0; | 
| 142 |  |  |  |  |  |  |  | 
| 143 | 3 |  |  |  |  | 7 | $record->{'version'} = $version; | 
| 144 | 3 |  |  |  |  | 6 | $record->{'length'} = $ulen; | 
| 145 | 3 |  |  |  |  | 6 | $record->{'records'} = $records; | 
| 146 | 3 |  |  |  |  | 6 | $record->{'recsize'} = $recsize; | 
| 147 | 3 |  |  |  |  | 7 | $record->{'position'} = $position; | 
| 148 |  |  |  |  |  |  |  | 
| 149 | 3 |  |  |  |  | 17 | return $record; | 
| 150 |  |  |  |  |  |  | } | 
| 151 |  |  |  |  |  |  |  | 
| 152 |  |  |  |  |  |  | sub _compress_record($$) { | 
| 153 | 3 |  |  | 3 |  | 16 | my ($version,$in) = @_; | 
| 154 | 3 | 50 |  |  |  | 10 | return $in if $version == DOC_UNCOMPRESSED; | 
| 155 |  |  |  |  |  |  |  | 
| 156 | 3 |  |  |  |  | 6 | my $out = ''; | 
| 157 |  |  |  |  |  |  |  | 
| 158 | 3 |  |  |  |  | 7 | my $lin = length $in; | 
| 159 | 3 |  |  |  |  | 4 | my $i = 0; | 
| 160 | 3 |  |  |  |  | 12 | while( $i < $lin ) { | 
| 161 |  |  |  |  |  |  | # See http://patb.dyndns.org/Programming/PilotDoc.htm for the code type | 
| 162 |  |  |  |  |  |  | # taxonomy. | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | # Try type B compression first. | 
| 165 |  |  |  |  |  |  | # If the next 3 to 10 bytes are already in the compressed buffer, we can | 
| 166 |  |  |  |  |  |  | # encode them into a 2 byte sequence. Don't bother too close to the ends, | 
| 167 |  |  |  |  |  |  | # however... Makes the boundary conditions simpler. | 
| 168 | 171 | 100 | 100 |  |  | 634 | if( $i > 10 and $lin - $i > 10 ) { | 
| 169 | 116 |  |  |  |  | 141 | my $chunk = ''; | 
| 170 | 116 |  |  |  |  | 134 | my $match = -1; | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  | # the preamble is what'll be in the decoders output buffer. | 
| 173 | 116 |  |  |  |  | 186 | my $preamble = substr( $in, 0, $i ); | 
| 174 | 116 |  |  |  |  | 254 | for( my $j = 10; $j >= 3; $j -- ) { | 
| 175 | 905 |  |  |  |  | 1191 | $chunk = substr( $in, $i, $j );	# grab next $j characters | 
| 176 | 905 |  |  |  |  | 1004 | $match = rindex( $preamble, $chunk );	# in the output? | 
| 177 |  |  |  |  |  |  |  | 
| 178 |  |  |  |  |  |  | # type B code has a 2047 byte sliding window, so matches have to be | 
| 179 |  |  |  |  |  |  | # within that range to be useful | 
| 180 | 905 | 100 | 66 |  |  | 1797 | last if $match >= 0 and ($i - $match) <= 2047; | 
| 181 | 893 |  |  |  |  | 1943 | $match = -1; | 
| 182 |  |  |  |  |  |  | } | 
| 183 |  |  |  |  |  |  |  | 
| 184 | 116 |  |  |  |  | 151 | my $n = length $chunk; | 
| 185 | 116 | 50 | 66 |  |  | 322 | if( $match >= 0 and $n <= 10 and $n >= 3 ) { | 
|  |  |  | 66 |  |  |  |  | 
| 186 | 12 |  |  |  |  | 17 | my $m = $i - $match; | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | # first 2 bits are 10, next 11 are offset, next 3 are length-3 | 
| 189 | 12 |  |  |  |  | 42 | $out .= pack( "n", 0x8000 + (($m<<3)&0x3ff8) + ($n-3) ); | 
| 190 |  |  |  |  |  |  |  | 
| 191 | 12 |  |  |  |  | 19 | $i += $n; | 
| 192 |  |  |  |  |  |  |  | 
| 193 | 12 |  |  |  |  | 29 | next; | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  | } | 
| 196 |  |  |  |  |  |  |  | 
| 197 | 159 |  |  |  |  | 255 | my $ch = substr( $in, $i ++, 1 ); | 
| 198 | 159 |  |  |  |  | 192 | my $och = ord($ch); | 
| 199 |  |  |  |  |  |  |  | 
| 200 |  |  |  |  |  |  | # Try type C compression. | 
| 201 | 159 | 100 | 100 |  |  | 647 | if( $i+1 < $lin and $ch eq ' ' ) { | 
| 202 | 13 |  |  |  |  | 24 | my $nch = substr( $in, $i, 1 ); | 
| 203 | 13 |  |  |  |  | 18 | my $onch = ord($nch); | 
| 204 |  |  |  |  |  |  |  | 
| 205 | 13 | 100 | 100 |  |  | 65 | if( $onch >= 0x40 and $onch < 0x80 ) { | 
| 206 |  |  |  |  |  |  | # space plus ASCII character compression | 
| 207 | 10 |  |  |  |  | 24 | $out .= chr($onch ^ 0x80); | 
| 208 | 10 |  |  |  |  | 14 | $i ++; | 
| 209 |  |  |  |  |  |  |  | 
| 210 | 10 |  |  |  |  | 28 | next; | 
| 211 |  |  |  |  |  |  | } | 
| 212 |  |  |  |  |  |  | } | 
| 213 |  |  |  |  |  |  |  | 
| 214 | 149 | 100 | 66 |  |  | 765 | if( $och == 0 or ($och >= 9 and $och < 0x80) ) { | 
|  |  |  | 33 |  |  |  |  | 
| 215 |  |  |  |  |  |  | # pass through | 
| 216 | 147 |  |  |  |  | 364 | $out .= $ch; | 
| 217 |  |  |  |  |  |  | } else { | 
| 218 |  |  |  |  |  |  | # type A code. This is essentially an 'escape' like '\\' in strings. | 
| 219 |  |  |  |  |  |  | # For efficiency, it's best to encode as long a sequence as | 
| 220 |  |  |  |  |  |  | # possible with one copy. This might seem like it would cause us to miss | 
| 221 |  |  |  |  |  |  | # out on a type B sequence, but in actuality keeping long binary strings | 
| 222 |  |  |  |  |  |  | # together improves the likelyhood of a later type B sequence than | 
| 223 |  |  |  |  |  |  | # interspersing them with x01's. | 
| 224 |  |  |  |  |  |  |  | 
| 225 | 2 |  |  |  |  | 5 | my $next = substr($in,$i - 1); | 
| 226 | 2 | 50 |  |  |  | 23 | if( $next =~ /([\x01-\x08\x80-\xff]{1,8})/o ) { | 
| 227 | 2 |  |  |  |  | 7 | my $binseq = $1; | 
| 228 | 2 |  |  |  |  | 5 | $out .= chr(length $binseq); | 
| 229 | 2 |  |  |  |  | 4 | $out .= $binseq; | 
| 230 | 2 |  |  |  |  | 8 | $i += length( $binseq ) - 1;	# first char, $ch, is already counted | 
| 231 |  |  |  |  |  |  | } | 
| 232 |  |  |  |  |  |  | } | 
| 233 |  |  |  |  |  |  | } | 
| 234 |  |  |  |  |  |  |  | 
| 235 | 3 |  |  |  |  | 20 | return $out; | 
| 236 |  |  |  |  |  |  | } | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | # algorithm taken from makedoc7.cpp with reference to | 
| 239 |  |  |  |  |  |  | # http://patb.dyndns.org/Programming/PilotDoc.htm and | 
| 240 |  |  |  |  |  |  | # http://www.pyrite.org/doc_format.html | 
| 241 |  |  |  |  |  |  | sub _decompress_record($$) { | 
| 242 | 0 |  |  | 0 |  | 0 | my ($version,$in) = @_; | 
| 243 | 0 | 0 |  |  |  | 0 | return $in if $version == DOC_UNCOMPRESSED; | 
| 244 |  |  |  |  |  |  |  | 
| 245 | 0 |  |  |  |  | 0 | my $out = ''; | 
| 246 |  |  |  |  |  |  |  | 
| 247 | 0 |  |  |  |  | 0 | my $lin = length $in; | 
| 248 | 0 |  |  |  |  | 0 | my $i = 0; | 
| 249 | 0 |  |  |  |  | 0 | while( $i < $lin ) { | 
| 250 | 0 |  |  |  |  | 0 | my $ch = substr( $in, $i ++, 1 ); | 
| 251 | 0 |  |  |  |  | 0 | my $och = ord($ch); | 
| 252 |  |  |  |  |  |  |  | 
| 253 | 0 | 0 | 0 |  |  | 0 | if( $och >= 1 and $och <= 8 ) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 254 |  |  |  |  |  |  | # copy this many bytes... basically a way to 'escape' data | 
| 255 | 0 |  |  |  |  | 0 | $out .= substr( $in, $i, $och ); | 
| 256 | 0 |  |  |  |  | 0 | $i += $och; | 
| 257 |  |  |  |  |  |  | } elsif( $och < 0x80 ) { | 
| 258 |  |  |  |  |  |  | # pass through 0, 9-0x7f | 
| 259 | 0 |  |  |  |  | 0 | $out .= $ch; | 
| 260 |  |  |  |  |  |  | } elsif( $och >= 0xc0 ) { | 
| 261 |  |  |  |  |  |  | # 0xc0-0xff are 'space' plus ASCII char | 
| 262 | 0 |  |  |  |  | 0 | $out .= ' '; | 
| 263 | 0 |  |  |  |  | 0 | $out .= chr($och ^ 0x80); | 
| 264 |  |  |  |  |  |  | } else { | 
| 265 |  |  |  |  |  |  | # 0x80-0xbf is sequence from already decompressed buffer | 
| 266 | 0 |  |  |  |  | 0 | my $nch = substr( $in, $i ++, 1 ); | 
| 267 | 0 |  |  |  |  | 0 | $och = ($och << 8) + ord($nch); | 
| 268 | 0 |  |  |  |  | 0 | my $m = ($och & 0x3fff) >> 3; | 
| 269 | 0 |  |  |  |  | 0 | my $n = ($och & 0x7) + 3; | 
| 270 |  |  |  |  |  |  |  | 
| 271 |  |  |  |  |  |  | # This isn't very perl-like, but a simple | 
| 272 |  |  |  |  |  |  | # substr($out,$lo-$m,$n) doesn't work. | 
| 273 | 0 |  |  |  |  | 0 | my $lo = length $out; | 
| 274 | 0 |  |  |  |  | 0 | for( my $j = 0; $j < $n; $j ++, $lo ++ ) { | 
| 275 | 0 | 0 |  |  |  | 0 | die "bad Doc compression" unless ($lo-$m) >= 0; | 
| 276 | 0 |  |  |  |  | 0 | $out .= substr( $out, $lo-$m, 1 ); | 
| 277 |  |  |  |  |  |  | } | 
| 278 |  |  |  |  |  |  | } | 
| 279 |  |  |  |  |  |  | } | 
| 280 |  |  |  |  |  |  |  | 
| 281 | 0 |  |  |  |  | 0 | return $out; | 
| 282 |  |  |  |  |  |  | } | 
| 283 |  |  |  |  |  |  |  | 
| 284 |  |  |  |  |  |  | sub Write { | 
| 285 | 3 |  |  | 3 | 1 | 7 | my $self = shift; | 
| 286 |  |  |  |  |  |  |  | 
| 287 | 3 |  |  |  |  | 10 | my $prc = $self->{attributes}{resource}; | 
| 288 | 3 | 50 |  |  |  | 22 | my $recs = $prc ? $self->{'resources'} : $self->{'records'}; | 
| 289 | 3 |  |  |  |  | 7 | my $header = $recs->[0]; | 
| 290 | 3 | 50 |  |  |  | 34 | unless( defined _parse_headerrec($header) ) { | 
| 291 | 0 |  |  |  |  | 0 | die "@_: Doesn't appear to be a correct book..."; | 
| 292 |  |  |  |  |  |  | } | 
| 293 |  |  |  |  |  |  |  | 
| 294 | 3 |  |  |  |  | 32 | $self->SUPER::Write(@_); | 
| 295 |  |  |  |  |  |  | } | 
| 296 |  |  |  |  |  |  |  | 
| 297 |  |  |  |  |  |  | =head2 text | 
| 298 |  |  |  |  |  |  |  | 
| 299 |  |  |  |  |  |  | $text = $doc->text; | 
| 300 |  |  |  |  |  |  |  | 
| 301 |  |  |  |  |  |  | Return the contents of the Doc database. | 
| 302 |  |  |  |  |  |  |  | 
| 303 |  |  |  |  |  |  | $text = $doc->text( @text ); | 
| 304 |  |  |  |  |  |  |  | 
| 305 |  |  |  |  |  |  | Set the contents of the Doc book to the specified arguments. All the list arguments | 
| 306 |  |  |  |  |  |  | will simply be concatenated together. | 
| 307 |  |  |  |  |  |  |  | 
| 308 |  |  |  |  |  |  | =cut | 
| 309 |  |  |  |  |  |  |  | 
| 310 |  |  |  |  |  |  | sub text { | 
| 311 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 312 |  |  |  |  |  |  |  | 
| 313 | 0 |  |  |  |  |  | my $body = ''; | 
| 314 | 0 |  |  |  |  |  | my $prc = $self->{attributes}{resource}; | 
| 315 |  |  |  |  |  |  |  | 
| 316 | 0 | 0 |  |  |  |  | if( @_ > 0 ) { | 
|  |  | 0 |  |  |  |  |  | 
| 317 | 0 |  |  |  |  |  | $body = join( '', @_ ); | 
| 318 |  |  |  |  |  |  |  | 
| 319 | 0 |  |  |  |  |  | my $version = DOC_COMPRESSED; | 
| 320 |  |  |  |  |  |  |  | 
| 321 | 0 |  |  |  |  |  | $self->{'records'} = []; | 
| 322 | 0 |  |  |  |  |  | $self->{'resources'} = []; | 
| 323 |  |  |  |  |  |  |  | 
| 324 |  |  |  |  |  |  | # first record is the header | 
| 325 | 0 | 0 |  |  |  |  | my $header = $prc ? $self->append_Resource() : $self->append_Record(); | 
| 326 | 0 |  |  |  |  |  | $header->{'version'} = $version; | 
| 327 | 0 |  |  |  |  |  | $header->{'length'} = 0; | 
| 328 | 0 |  |  |  |  |  | $header->{'records'} = 0; | 
| 329 | 0 |  |  |  |  |  | $header->{'recsize'} = DOC_RECSIZE; | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | # break the document into record-sized chunks | 
| 332 | 0 |  |  |  |  |  | for( my $i = 0; $i < length($body); $i += DOC_RECSIZE ) { | 
| 333 | 0 | 0 |  |  |  |  | my $record = $prc ? $self->append_Resource : $self->append_Record; | 
| 334 | 0 |  |  |  |  |  | my $chunk = substr($body,$i,DOC_RECSIZE); | 
| 335 | 0 |  |  |  |  |  | $record->{'data'} = _compress_record( $version, $chunk ); | 
| 336 |  |  |  |  |  |  |  | 
| 337 | 0 |  |  |  |  |  | $header->{'records'} ++; | 
| 338 | 0 |  |  |  |  |  | $header->{'length'} += length $body; | 
| 339 |  |  |  |  |  |  | } | 
| 340 |  |  |  |  |  |  |  | 
| 341 | 0 | 0 |  |  |  |  | $header->{'recsize'} = $header->{'length'} | 
| 342 |  |  |  |  |  |  | if $header->{'length'} < DOC_RECSIZE; | 
| 343 |  |  |  |  |  |  |  | 
| 344 |  |  |  |  |  |  | # pack up the header | 
| 345 | 0 |  |  |  |  |  | $header->{'data'} = pack( 'n xx N n n N', | 
| 346 |  |  |  |  |  |  | $header->{'version'}, $header->{'length'}, | 
| 347 |  |  |  |  |  |  | $header->{'records'}, $header->{'recsize'}, 0 ); | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | } elsif( defined wantarray ) { | 
| 350 |  |  |  |  |  |  |  | 
| 351 | 0 | 0 |  |  |  |  | my $recs = $prc ? $self->{'resources'} : $self->{'records'}; | 
| 352 |  |  |  |  |  |  |  | 
| 353 | 0 |  |  |  |  |  | my $header = $recs->[0]; | 
| 354 | 0 | 0 |  |  |  |  | if( defined _parse_headerrec($header) ) { | 
| 355 |  |  |  |  |  |  | # a proper Doc file should be fine, but if it's not Doc | 
| 356 |  |  |  |  |  |  | # compression like some Mobi docs seem to be we want to | 
| 357 |  |  |  |  |  |  | # bail early. Otherwise we end up with a huge stream of | 
| 358 |  |  |  |  |  |  | # substr() errors and we _still_ don't get any content. | 
| 359 | 0 |  |  |  |  |  | eval { | 
| 360 | 0 | 0 |  | 0 | 0 |  | sub min { return ($_[0]<$_[1]) ? $_[0] : $_[1] } | 
| 361 | 0 |  |  |  |  |  | my $maxi = min($#$recs, $header->{'records'}); | 
| 362 | 0 |  |  |  |  |  | for( my $i = 1; $i <= $maxi; $i ++ ) { | 
| 363 | 0 |  |  |  |  |  | my $data = $recs->[$i]->{'data'}; | 
| 364 | 0 |  |  |  |  |  | my $len = length($data); | 
| 365 | 0 |  |  |  |  |  | my $overlap = ""; | 
| 366 | 0 | 0 |  |  |  |  | if ($self->{multibyteoverlap}) { | 
| 367 | 0 |  |  |  |  |  | my $c = chop $data; | 
| 368 | 0 |  |  |  |  |  | print STDERR "I:$i - $len - ", int($c), "\n"; | 
| 369 | 0 |  |  |  |  |  | my $n = $c & 7; | 
| 370 | 0 |  |  |  |  |  | foreach (0..$n-1) { | 
| 371 | 0 |  |  |  |  |  | $overlap = (chop $data) . $overlap; | 
| 372 |  |  |  |  |  |  | } | 
| 373 |  |  |  |  |  |  | } | 
| 374 |  |  |  |  |  |  |  | 
| 375 | 0 |  |  |  |  |  | $body .= _decompress_record( $header->{'version'}, | 
| 376 |  |  |  |  |  |  | $data ); | 
| 377 | 0 |  |  |  |  |  | $body .= $overlap; | 
| 378 |  |  |  |  |  |  | } | 
| 379 |  |  |  |  |  |  | }; | 
| 380 | 0 | 0 |  |  |  |  | return undef if $@; | 
| 381 |  |  |  |  |  |  | } | 
| 382 |  |  |  |  |  |  | } | 
| 383 |  |  |  |  |  |  |  | 
| 384 | 0 |  |  |  |  |  | return $body; | 
| 385 |  |  |  |  |  |  | } | 
| 386 |  |  |  |  |  |  |  | 
| 387 |  |  |  |  |  |  | =head2 textfile | 
| 388 |  |  |  |  |  |  |  | 
| 389 |  |  |  |  |  |  | $doc->textfile( "README.txt" ); | 
| 390 |  |  |  |  |  |  |  | 
| 391 |  |  |  |  |  |  | Set the contents of the Doc to the contents of the file and sets the name of the PDB to | 
| 392 |  |  |  |  |  |  | the specified filename. | 
| 393 |  |  |  |  |  |  |  | 
| 394 |  |  |  |  |  |  | =cut | 
| 395 |  |  |  |  |  |  |  | 
| 396 |  |  |  |  |  |  | sub textfile($$) { | 
| 397 | 0 |  |  | 0 | 1 |  | my ($self, $filename) = @_; | 
| 398 |  |  |  |  |  |  |  | 
| 399 | 0 | 0 |  |  |  |  | open IN, "< $filename" or return undef; | 
| 400 | 0 |  |  |  |  |  | binmode IN; | 
| 401 | 0 |  |  |  |  |  | $self->text( '',  ); | 
| 402 | 0 |  |  |  |  |  | close IN; | 
| 403 |  |  |  |  |  |  |  | 
| 404 | 0 |  |  |  |  |  | $self->{'name'} = $filename; | 
| 405 |  |  |  |  |  |  | } | 
| 406 |  |  |  |  |  |  |  | 
| 407 |  |  |  |  |  |  | 1; | 
| 408 |  |  |  |  |  |  | __END__ |