File Coverage

blib/lib/Data/Embed/Reader.pm

Criterion	Covered	Total	%
statement	125	134	93.2
branch	25	36	69.4
condition	3	5	60.0
subroutine	18	22	81.8
pod	4	4	100.0
total	175	201	87.0

line	stmt	bran	cond	sub	pod	time	code
1							package Data::Embed::Reader;
2
3	9			9		3218	use strict;
	9					12
	9					198
4	9			9		25	use warnings;
	9					8
	9					177
5	9			9		24	use English qw< -no_match_vars >;
	9					7
	9					29
6	9			9		2340	use Fcntl qw< :seek >;
	9					10
	9					782
7	9			9		34	use Log::Log4perl::Tiny qw< :easy :dead_if_first >;
	9					10
	9					51
8	9			9		6970	use Storable qw< dclone >;
	9					19377
	9					478
9	9			9		4172	use Data::Embed::File;
	9					16
	9					220
10	9			9		3466	use Data::Embed::Util qw< :constants unescape >;
	9					11
	9					9303
11
12							our $VERSION = '0.32'; # make indexer happy
13
14							sub new {
15	20			20	1	21	my $package = shift;
16	20					27	my $input = shift;
17
18							# Undocumented, keep additional parameters around...
19	20	50	33			56	my %args = (scalar(@_) && ref($_[0])) ? %{$_[0]} : @_;
	0					0
20	20					40	my $self = bless {args => \%args}, $package;
21
22							# If a GLOB, just assign a default filename for logs and set
23							# binary mode :raw
24	20	100				45	if (ref($input) eq 'GLOB') {
25	10					18	DEBUG $package, ': input is a GLOB';
26	10					92	$self->{filename} = '';
27	10	50				33	binmode $input, ":raw"
28							or LOGCROAK "binmode() to ':raw' failed";
29	10					13	$self->{fh} = $input;
30							} ## end if (ref($input) eq 'GLOB')
31							else { # otherwise... it's a filename
32	10					18	DEBUG $package,
33							': input is a file or other thing that can be open-ed';
34	10					98	$self->{filename} = $input;
35	10	50		1		113	open $self->{fh}, '<', $input
	1					4
	1					1
	1					6
36							or LOGCROAK "open('$input'): $OS_ERROR";
37	10					589	binmode $self->{fh}, ':raw';
38							} ## end else [ if (ref($input) eq 'GLOB')]
39
40	20					44	return $self;
41							} ## end sub new
42
43							sub files {
44	10			10	1	74	my $files = shift->_ensure_index()->{files};
45	10	50				46	return wantarray() ? @$files : $files;
46							}
47
48							sub prefix {
49	4			4	1	4	return shift->_ensure_index()->{_prefix};
50							}
51
52							sub reset {
53	0			0	1	0	my $self = shift;
54	0					0	delete $self->{$_} for qw< files index >;
55	0					0	return $self;
56							}
57
58							######## PRIVATE METHODS ##############################################
59
60							# Get the index of the embedded files as a L object.
61							# You should normally not need this index, because it is parsed
62	10			10		12	sub _index { return shift->_ensure_index()->{_index}; }
63
64							sub _ensure_index {
65	24			24		43	my $self = shift;
66
67							# rebuild cache if not in place
68	24	100				41	if (!exists $self->{files}) {
69							my $index = $self->_load_index()
70							\|\| {
71							_prefix => Data::Embed::File->new(
72							fh => $self->{fh},
73							filename => $self->{filename},
74							name => 'Data::Embed prefix data',
75							offset => 0,
76							length => scalar(__size($self->{fh})),
77							),
78							files => [],
79							_index => Data::Embed::File->new(
80							fh => $self->{fh},
81							filename => $self->{filename},
82							name => 'Data::Embed index',
83							length => 0,
84	20		100			28	offset => scalar(__size($self->{fh})),
85							),
86							};
87	20					114	%$self = (%$self, %$index);
88							} ## end if (!exists $self->{files...})
89
90							# return a reference to $self, for easy chaining
91	24					50	return $self;
92							} ## end sub _ensure_index
93
94							sub _load_index {
95	20			20		12	my $self = shift;
96
97							# read the index section from the end of the file, or bail out
98	20	100				31	defined(my $index_text = $self->_read_index())
99							or return;
100	9					7	my $index_length = length($index_text);
101
102							# trim to isolate the data in the section
103	9					6	my $terminator_length = length TERMINATOR();
104	9					8	substr $index_text, 0, length(STARTER()), '';
105	9					9	substr $index_text, -$terminator_length, $terminator_length, '';
106	9					17	DEBUG "index contents is '$index_text'";
107
108							# iterate over the index that has been read. Each line in this
109							# index is assumed to contain a pair length/name
110	9					49	my $data_length = 0;
111	9					6	my ($fh, $filename) = @{$self}{qw< fh filename >};
	9					12
112							my @files = map {
113	9	50				42	my ($length, $name) = m{\A \s* (\d+) \s+ (\S) \s\z}mxs
	16					59
114							or LOGCROAK "index line is not compliant: >$_<";
115	16					26	$name = Data::Embed::Util::unescape($name);
116
117							# the offset at which "this" file lives is equal to the length
118							# of all data considered so far
119	16					14	my $offset = $data_length;
120
121							# the addition of this file increases the data length with the
122							# size of the section, plus two bytes for separating newlines
123	16					17	$data_length += $length + 2;
124							{
125	16					47	fh => $fh,
126							filename => $filename,
127							name => $name,
128							length => $length,
129							offset => $offset, # to be adjusted further
130							};
131							} split /\n+/, $index_text;
132
133							# Now we established the full length of the data section, so it's
134							# possible to adjust all offsets for all files (remember that the
135							# files are assumed to be at the end of the embedding file)
136	9					12	my $full_length = __size($fh);
137	9					12	my $offset_correction = $full_length - $index_length - $data_length;
138	9					14	for my $file (@files) {
139							$file =
140							Data::Embed::File->new(%$file,
141	16					62	offset => ($file->{offset} + $offset_correction),);
142							}
143
144							# return the files in the index and the index itself, all as
145							# Data::Embed::File objects for consistency
146							return {
147	9					19	_prefix => Data::Embed::File->new(
148							fh => $fh,
149							filename => $filename,
150							name => 'Data::Embed prefix data',
151							length => $offset_correction,
152							offset => 0,
153							),
154							files => \@files,
155							_index => Data::Embed::File->new(
156							fh => $fh,
157							filename => $filename,
158							name => 'Data::Embed index',
159							length => $index_length,
160							offset => $data_length + $offset_correction,
161							),
162							};
163							} ## end sub _load_index
164
165							sub __size {
166	51			51		37	my $fh = shift;
167	51					96	my $size = -s $fh;
168	51	100				67	if (!defined $size) {
169	41					57	DEBUG "getting size via seek";
170	41					249	my $current = tell $fh;
171	41					36	seek $fh, 0, SEEK_END;
172	41					27	$size = tell $fh;
173	41					97	DEBUG "size: $size";
174	41					229	seek $fh, $current, SEEK_SET;
175							} ## end if (!defined $size)
176	51					142	return $size;
177							} ## end sub __size
178
179							# read the last section of the file, looking for the index
180							sub _read_index {
181	20			20		12	my $self = shift;
182	20					16	my ($fh, $filename) = @{$self}{qw< fh filename >};
	20					29
183	20					63	DEBUG "_read_index(): fh[$fh] filename[$filename]";
184	20					144	my $full_length = __size($fh); # length of the whole stream/file
185
186							# look for TERMINATOR at the very end of the file
187	20					17	my $terminator = TERMINATOR;
188
189							# is there enough data?
190	20					14	my $terminator_length = length $terminator;
191	20	100				54	return unless $full_length > $terminator_length;
192
193							# read exactly that number of bytes from the end of the file
194							# and compare with the TERMINATOR
195	15					27	my $ending = $self->_read(($terminator_length) x 2);
196	15	100				38	return unless $ending eq $terminator;
197	9					18	DEBUG "found terminator";
198
199							# TERMINATOR is in place, this is promising. Now let's look for
200							# STARTER going backwards in the file
201	9					48	my $starter = STARTER;
202	9					8	my $readable = $full_length - $terminator_length;
203	9					7	my $chunk_size = 80; # we'll read this number of bytes per time
204	9					6	my $starter_position; # this will tell us where the STARTER begins
205	9					15	while ($readable) { # loop until there's stuff to read backwards
206
207							# how many bytes to read? $chunk_size if possible, what remains
208							# otherwise
209	9	100				13	my $n = ($readable > $chunk_size) ? $chunk_size : $readable;
210	9					13	my $chunk = $self->_read($n, $n + length $ending);
211
212							# we're reading backwards, so the new $chunk as to be pre-pended
213	9					11	$ending = $chunk . $ending;
214	9			0		25	TRACE sub { "ENDING: >$ending<" };
	0					0
215
216							# Look for the STARTER. We have to work on the full $ending
217							# instead of the shorter last $chunk because the STARTER might
218							# have been split across two reads
219	9					59	$starter_position = CORE::index $ending, $starter;
220
221							# finding the STARTER is a good exit condition
222	9	50				18	last if $starter_position >= 0;
223
224							# otherwise note that we already read some bytes and go on
225	0					0	$readable -= $n;
226							} ## end while ($readable)
227
228							# if $starter_position is not valid (i.e. -1) then we did not find
229							# the STARTER and we exit with a failure (not an exception, the whole
230							# thing might not be in place at all, although the presence of the
231							# TERMINATOR is suspect anyway...)
232	9	50				22	return unless $starter_position >= 0;
233	9					13	DEBUG "found starter";
234
235							# trim the available buffer $ending to isolate the index and return it
236	9					54	substr $ending, 0, $starter_position, '';
237	9					25	return $ending;
238							} ## end sub _read_index
239
240							# read data from the underlying stream, using offsets from the end
241							# of the stream
242							sub _read {
243	24			24		18	my $self = shift;
244	24					33	my @args = my ($count, $offset_from_end) = @_;
245	24					18	my ($fh, $filename) = @{$self}{qw< fh filename >};
	24					26
246							DEBUG
247	24			0		75	sub { my $args = join ', ', @args; "_read($args) [file: $filename]" };
	0					0
	0					0
248
249	24	50				178	LOGDIE '_read(): offset from end cannot be less than count'
250							if $offset_from_end < $count;
251	24					44	DEBUG "seeking $offset_from_end to the end";
252	24	50				153	seek $fh, -$offset_from_end, SEEK_END
253							or LOGCROAK "seek('$filename'): $OS_ERROR";
254
255	24					18	my $buffer = '';
256	24					33	while ($count) {
257	24					15	my $chunk;
258	24	50				61	defined(my $nread = read $fh, $chunk, $count)
259							or LOGCROAK "read('$filename'): $OS_ERROR";
260	24			0		60	TRACE sub { "read $nread bytes, '$chunk'" };
	0					0
261	24					187	DEBUG "read $nread out of $count bytes needed";
262	24	50				145	LOGCROAK "unexpectedly reached end of file"
263							unless $nread;
264	24					24	$buffer .= $chunk;
265	24					41	$count -= $nread;
266							} ## end while ($count)
267	24					35	return $buffer;
268							} ## end sub _read
269
270							1;