File Coverage

blib/lib/Data/Embed/Reader.pm
Criterion Covered Total %
statement 125 134 93.2
branch 25 36 69.4
condition 3 5 60.0
subroutine 18 22 81.8
pod 4 4 100.0
total 175 201 87.0


line stmt bran cond sub pod time code
1             package Data::Embed::Reader;
2              
3 9     9   3218 use strict;
  9         12  
  9         198  
4 9     9   25 use warnings;
  9         8  
  9         177  
5 9     9   24 use English qw< -no_match_vars >;
  9         7  
  9         29  
6 9     9   2340 use Fcntl qw< :seek >;
  9         10  
  9         782  
7 9     9   34 use Log::Log4perl::Tiny qw< :easy :dead_if_first >;
  9         10  
  9         51  
8 9     9   6970 use Storable qw< dclone >;
  9         19377  
  9         478  
9 9     9   4172 use Data::Embed::File;
  9         16  
  9         220  
10 9     9   3466 use Data::Embed::Util qw< :constants unescape >;
  9         11  
  9         9303  
11              
12             our $VERSION = '0.32'; # make indexer happy
13              
14             sub new {
15 20     20 1 21 my $package = shift;
16 20         27 my $input = shift;
17              
18             # Undocumented, keep additional parameters around...
19 20 50 33     56 my %args = (scalar(@_) && ref($_[0])) ? %{$_[0]} : @_;
  0         0  
20 20         40 my $self = bless {args => \%args}, $package;
21              
22             # If a GLOB, just assign a default filename for logs and set
23             # binary mode :raw
24 20 100       45 if (ref($input) eq 'GLOB') {
25 10         18 DEBUG $package, ': input is a GLOB';
26 10         92 $self->{filename} = '';
27 10 50       33 binmode $input, ":raw"
28             or LOGCROAK "binmode() to ':raw' failed";
29 10         13 $self->{fh} = $input;
30             } ## end if (ref($input) eq 'GLOB')
31             else { # otherwise... it's a filename
32 10         18 DEBUG $package,
33             ': input is a file or other thing that can be open-ed';
34 10         98 $self->{filename} = $input;
35 10 50   1   113 open $self->{fh}, '<', $input
  1         4  
  1         1  
  1         6  
36             or LOGCROAK "open('$input'): $OS_ERROR";
37 10         589 binmode $self->{fh}, ':raw';
38             } ## end else [ if (ref($input) eq 'GLOB')]
39              
40 20         44 return $self;
41             } ## end sub new
42              
43             sub files {
44 10     10 1 74 my $files = shift->_ensure_index()->{files};
45 10 50       46 return wantarray() ? @$files : $files;
46             }
47              
48             sub prefix {
49 4     4 1 4 return shift->_ensure_index()->{_prefix};
50             }
51              
52             sub reset {
53 0     0 1 0 my $self = shift;
54 0         0 delete $self->{$_} for qw< files index >;
55 0         0 return $self;
56             }
57              
58             ######## PRIVATE METHODS ##############################################
59              
60             # Get the index of the embedded files as a L object.
61             # You should normally not need this index, because it is parsed
62 10     10   12 sub _index { return shift->_ensure_index()->{_index}; }
63              
64             sub _ensure_index {
65 24     24   43 my $self = shift;
66              
67             # rebuild cache if not in place
68 24 100       41 if (!exists $self->{files}) {
69             my $index = $self->_load_index()
70             || {
71             _prefix => Data::Embed::File->new(
72             fh => $self->{fh},
73             filename => $self->{filename},
74             name => 'Data::Embed prefix data',
75             offset => 0,
76             length => scalar(__size($self->{fh})),
77             ),
78             files => [],
79             _index => Data::Embed::File->new(
80             fh => $self->{fh},
81             filename => $self->{filename},
82             name => 'Data::Embed index',
83             length => 0,
84 20   100     28 offset => scalar(__size($self->{fh})),
85             ),
86             };
87 20         114 %$self = (%$self, %$index);
88             } ## end if (!exists $self->{files...})
89              
90             # return a reference to $self, for easy chaining
91 24         50 return $self;
92             } ## end sub _ensure_index
93              
94             sub _load_index {
95 20     20   12 my $self = shift;
96              
97             # read the index section from the end of the file, or bail out
98 20 100       31 defined(my $index_text = $self->_read_index())
99             or return;
100 9         7 my $index_length = length($index_text);
101              
102             # trim to isolate the data in the section
103 9         6 my $terminator_length = length TERMINATOR();
104 9         8 substr $index_text, 0, length(STARTER()), '';
105 9         9 substr $index_text, -$terminator_length, $terminator_length, '';
106 9         17 DEBUG "index contents is '$index_text'";
107              
108             # iterate over the index that has been read. Each line in this
109             # index is assumed to contain a pair length/name
110 9         49 my $data_length = 0;
111 9         6 my ($fh, $filename) = @{$self}{qw< fh filename >};
  9         12  
112             my @files = map {
113 9 50       42 my ($length, $name) = m{\A \s* (\d+) \s+ (\S*) \s*\z}mxs
  16         59  
114             or LOGCROAK "index line is not compliant: >$_<";
115 16         26 $name = Data::Embed::Util::unescape($name);
116              
117             # the offset at which "this" file lives is equal to the length
118             # of all data considered so far
119 16         14 my $offset = $data_length;
120              
121             # the addition of this file increases the data length with the
122             # size of the section, plus two bytes for separating newlines
123 16         17 $data_length += $length + 2;
124             {
125 16         47 fh => $fh,
126             filename => $filename,
127             name => $name,
128             length => $length,
129             offset => $offset, # to be adjusted further
130             };
131             } split /\n+/, $index_text;
132              
133             # Now we established the full length of the data section, so it's
134             # possible to adjust all offsets for all files (remember that the
135             # files are assumed to be at the end of the embedding file)
136 9         12 my $full_length = __size($fh);
137 9         12 my $offset_correction = $full_length - $index_length - $data_length;
138 9         14 for my $file (@files) {
139             $file =
140             Data::Embed::File->new(%$file,
141 16         62 offset => ($file->{offset} + $offset_correction),);
142             }
143              
144             # return the files in the index and the index itself, all as
145             # Data::Embed::File objects for consistency
146             return {
147 9         19 _prefix => Data::Embed::File->new(
148             fh => $fh,
149             filename => $filename,
150             name => 'Data::Embed prefix data',
151             length => $offset_correction,
152             offset => 0,
153             ),
154             files => \@files,
155             _index => Data::Embed::File->new(
156             fh => $fh,
157             filename => $filename,
158             name => 'Data::Embed index',
159             length => $index_length,
160             offset => $data_length + $offset_correction,
161             ),
162             };
163             } ## end sub _load_index
164              
165             sub __size {
166 51     51   37 my $fh = shift;
167 51         96 my $size = -s $fh;
168 51 100       67 if (!defined $size) {
169 41         57 DEBUG "getting size via seek";
170 41         249 my $current = tell $fh;
171 41         36 seek $fh, 0, SEEK_END;
172 41         27 $size = tell $fh;
173 41         97 DEBUG "size: $size";
174 41         229 seek $fh, $current, SEEK_SET;
175             } ## end if (!defined $size)
176 51         142 return $size;
177             } ## end sub __size
178              
179             # read the last section of the file, looking for the index
180             sub _read_index {
181 20     20   12 my $self = shift;
182 20         16 my ($fh, $filename) = @{$self}{qw< fh filename >};
  20         29  
183 20         63 DEBUG "_read_index(): fh[$fh] filename[$filename]";
184 20         144 my $full_length = __size($fh); # length of the whole stream/file
185              
186             # look for TERMINATOR at the very end of the file
187 20         17 my $terminator = TERMINATOR;
188              
189             # is there enough data?
190 20         14 my $terminator_length = length $terminator;
191 20 100       54 return unless $full_length > $terminator_length;
192              
193             # read exactly that number of bytes from the end of the file
194             # and compare with the TERMINATOR
195 15         27 my $ending = $self->_read(($terminator_length) x 2);
196 15 100       38 return unless $ending eq $terminator;
197 9         18 DEBUG "found terminator";
198              
199             # TERMINATOR is in place, this is promising. Now let's look for
200             # STARTER going backwards in the file
201 9         48 my $starter = STARTER;
202 9         8 my $readable = $full_length - $terminator_length;
203 9         7 my $chunk_size = 80; # we'll read this number of bytes per time
204 9         6 my $starter_position; # this will tell us where the STARTER begins
205 9         15 while ($readable) { # loop until there's stuff to read backwards
206              
207             # how many bytes to read? $chunk_size if possible, what remains
208             # otherwise
209 9 100       13 my $n = ($readable > $chunk_size) ? $chunk_size : $readable;
210 9         13 my $chunk = $self->_read($n, $n + length $ending);
211              
212             # we're reading backwards, so the new $chunk as to be pre-pended
213 9         11 $ending = $chunk . $ending;
214 9     0   25 TRACE sub { "ENDING: >$ending<" };
  0         0  
215              
216             # Look for the STARTER. We have to work on the full $ending
217             # instead of the shorter last $chunk because the STARTER might
218             # have been split across two reads
219 9         59 $starter_position = CORE::index $ending, $starter;
220              
221             # finding the STARTER is a good exit condition
222 9 50       18 last if $starter_position >= 0;
223              
224             # otherwise note that we already read some bytes and go on
225 0         0 $readable -= $n;
226             } ## end while ($readable)
227              
228             # if $starter_position is not valid (i.e. -1) then we did not find
229             # the STARTER and we exit with a failure (not an exception, the whole
230             # thing might not be in place at all, although the presence of the
231             # TERMINATOR is suspect anyway...)
232 9 50       22 return unless $starter_position >= 0;
233 9         13 DEBUG "found starter";
234              
235             # trim the available buffer $ending to isolate the index and return it
236 9         54 substr $ending, 0, $starter_position, '';
237 9         25 return $ending;
238             } ## end sub _read_index
239              
240             # read data from the underlying stream, using offsets from the end
241             # of the stream
242             sub _read {
243 24     24   18 my $self = shift;
244 24         33 my @args = my ($count, $offset_from_end) = @_;
245 24         18 my ($fh, $filename) = @{$self}{qw< fh filename >};
  24         26  
246             DEBUG
247 24     0   75 sub { my $args = join ', ', @args; "_read($args) [file: $filename]" };
  0         0  
  0         0  
248              
249 24 50       178 LOGDIE '_read(): offset from end cannot be less than count'
250             if $offset_from_end < $count;
251 24         44 DEBUG "seeking $offset_from_end to the end";
252 24 50       153 seek $fh, -$offset_from_end, SEEK_END
253             or LOGCROAK "seek('$filename'): $OS_ERROR";
254              
255 24         18 my $buffer = '';
256 24         33 while ($count) {
257 24         15 my $chunk;
258 24 50       61 defined(my $nread = read $fh, $chunk, $count)
259             or LOGCROAK "read('$filename'): $OS_ERROR";
260 24     0   60 TRACE sub { "read $nread bytes, '$chunk'" };
  0         0  
261 24         187 DEBUG "read $nread out of $count bytes needed";
262 24 50       145 LOGCROAK "unexpectedly reached end of file"
263             unless $nread;
264 24         24 $buffer .= $chunk;
265 24         41 $count -= $nread;
266             } ## end while ($count)
267 24         35 return $buffer;
268             } ## end sub _read
269              
270             1;