File Coverage

lib/Tie/File.pm

Criterion	Covered	Total	%
statement	846	996	84.9
branch	350	458	76.4
condition	56	82	68.2
subroutine	100	110	90.9
pod	7	8	87.5
total	1359	1654	82.1

line	stmt	bran	cond	sub	pod	time	code
1
2							package Tie::File;
3							require 5.005;
4	37			37		201926	use Carp ':DEFAULT', 'confess';
	37					300
	37					5882
5	37			37		12725	use POSIX 'SEEK_SET';
	37					208954
	37					210
6	37			37		38152	use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX', 'LOCK_SH', 'O_WRONLY', 'O_RDONLY';
	37					68
	37					281634
7							sub O_ACCMODE () { O_RDONLY \| O_RDWR \| O_WRONLY }
8
9
10							$VERSION = "1.05";
11							my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes
12							my $DEFAULT_AUTODEFER_THRESHHOLD = 3; # 3 records
13							my $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD = 65536; # 16 disk blocksful
14
15							my %good_opt = map {$_ => 1, "-$_" => 1}
16							qw(memory dw_size mode recsep discipline
17							autodefer autochomp autodefer_threshhold concurrent);
18
19							sub TIEARRAY {
20	2985	50		2985		1614489	if (@_ % 2 != 0) {
21	0					0	croak "usage: tie \@array, $_[0], filename, [option => value]...";
22							}
23	2985					7762	my ($pack, $file, %opts) = @_;
24
25							# transform '-foo' keys into 'foo' keys
26	2985					10507	for my $key (keys %opts) {
27	57	50				183	unless ($good_opt{$key}) {
28	0					0	croak("$pack: Unrecognized option '$key'\n");
29							}
30	57					294	my $okey = $key;
31	57	50				202	if ($key =~ s/^-+//) {
32	0					0	$opts{$key} = delete $opts{$okey};
33							}
34							}
35
36	2985	50				7727	if ($opts{concurrent}) {
37	0					0	croak("$pack: concurrent access not supported yet\n");
38							}
39
40	2985	100				6687	unless (defined $opts{memory}) {
41							# default is the larger of the default cache size and the
42							# deferred-write buffer size (if specified)
43	2981					5229	$opts{memory} = $DEFAULT_MEMORY_SIZE;
44							$opts{memory} = $opts{dw_size}
45	2981	50	33			7136	if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE;
46							# Dora Winifred Read
47							}
48	2985	100				6795	$opts{dw_size} = $opts{memory} unless defined $opts{dw_size};
49	2985	50				6072	if ($opts{dw_size} > $opts{memory}) {
50	0					0	croak("$pack: dw_size may not be larger than total memory allocation\n");
51							}
52							# are we in deferred-write mode?
53	2985	50				6347	$opts{defer} = 0 unless defined $opts{defer};
54	2985					5640	$opts{deferred} = {}; # no records are presently deferred
55	2985					4986	$opts{deferred_s} = 0; # count of total bytes in ->{deferred}
56	2985					4815	$opts{deferred_max} = -1; # empty
57
58							# What's a good way to arrange that this class can be overridden?
59	2985					15466	$opts{cache} = Tie::File::Cache->new($opts{memory});
60
61							# autodeferment is enabled by default
62	2985	100				11006	$opts{autodefer} = 1 unless defined $opts{autodefer};
63	2985					5029	$opts{autodeferring} = 0; # but is not initially active
64	2985					5028	$opts{ad_history} = [];
65							$opts{autodefer_threshhold} = $DEFAULT_AUTODEFER_THRESHHOLD
66	2985	50				6418	unless defined $opts{autodefer_threshhold};
67							$opts{autodefer_filelen_threshhold} = $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD
68	2985	50				6520	unless defined $opts{autodefer_filelen_threshhold};
69
70	2985					5454	$opts{offsets} = [0];
71	2985					5363	$opts{filename} = $file;
72	2985	100				6008	unless (defined $opts{recsep}) {
73	2972					6616	$opts{recsep} = _default_recsep();
74							}
75	2985					8878	$opts{recseplen} = length($opts{recsep});
76	2985	50				6039	if ($opts{recseplen} == 0) {
77	0					0	croak "Empty record separator not supported by $pack";
78							}
79
80	2985	100				5988	$opts{autochomp} = 1 unless defined $opts{autochomp};
81
82	2985	100				5943	$opts{mode} = O_CREAT\|O_RDWR unless defined $opts{mode};
83	2985					6835	$opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
84	2985					4668	$opts{sawlastrec} = undef;
85
86	2985					4032	my $fh;
87
88	2985	100				15571	if (UNIVERSAL::isa($file, 'GLOB')) {
		50
89							# We use 1 here on the theory that some systems
90							# may not indicate failure if we use 0.
91							# MSWin32 does not indicate failure with 0, but I don't know if
92							# it will indicate failure with 1 or not.
93	2	100				18	unless (seek $file, 1, SEEK_SET) {
94	1					227	croak "$pack: your filehandle does not appear to be seekable";
95							}
96	1					6	seek $file, 0, SEEK_SET; # put it back
97	1					3	$fh = $file; # setting binmode is the user's problem
98							} elsif (ref $file) {
99	0					0	croak "usage: tie \@array, $pack, filename, [option => value]...";
100							} else {
101							# $fh = \do { local *FH }; # XXX this is buggy
102	2983	50				6789	if ($] < 5.006) {
103							# perl 5.005 and earlier don't autovivify filehandles
104	0					0	require Symbol;
105	0					0	$fh = Symbol::gensym();
106							}
107	2983	50				106230	sysopen $fh, $file, $opts{mode}, 0666 or return;
108	2983					13777	binmode $fh;
109	2983					8100	++$opts{ourfh};
110							}
111	2984					4458	{ my $ofh = select $fh; $\| = 1; select $ofh } # autoflush on write
	2984					11106
	2984					7782
	2984					9002
112	2984	50	33			8218	if (defined $opts{discipline} && $] >= 5.006) {
113							# This avoids a compile-time warning under 5.005
114	0					0	eval 'binmode($fh, $opts{discipline})';
115	0	0				0	croak $@ if $@ =~ /unknown discipline/i;
116	0	0				0	die if $@;
117							}
118	2984					5425	$opts{fh} = $fh;
119
120	2984					18587	bless \%opts => $pack;
121							}
122
123							sub FETCH {
124	995			995		9650	my ($self, $n) = @_;
125	995					1249	my $rec;
126
127							# check the defer buffer
128	995	100				2107	$rec = $self->{deferred}{$n} if exists $self->{deferred}{$n};
129	995	100				2313	$rec = $self->_fetch($n) unless defined $rec;
130
131							# inlined _chomp1
132							substr($rec, - $self->{recseplen}) = ""
133	995	100	100			3637	if defined $rec && $self->{autochomp};
134	995					3864	$rec;
135							}
136
137							# Chomp many records in-place; return nothing useful
138							sub _chomp {
139	144			144		210	my $self = shift;
140	144	100				315	return unless $self->{autochomp};
141	47	50				100	if ($self->{autochomp}) {
142	47					85	for (@_) {
143	65	50				108	next unless defined;
144	65					133	substr($_, - $self->{recseplen}) = "";
145							}
146							}
147							}
148
149							# Chomp one record in-place; return modified record
150							sub _chomp1 {
151	242			242		451	my ($self, $rec) = @_;
152	242	100				618	return $rec unless $self->{autochomp};
153	217	100				519	return unless defined $rec;
154	135					282	substr($rec, - $self->{recseplen}) = "";
155	135					468	$rec;
156							}
157
158							sub _fetch {
159	1753			1753		2736	my ($self, $n) = @_;
160
161							# check the record cache
162	1753					2169	{ my $cached = $self->{cache}->lookup($n);
	1753					3384
163	1753	100				3961	return $cached if defined $cached;
164							}
165
166	1157	100				1395	if ($#{$self->{offsets}} < $n) {
	1157					2480
167	22	100				97	return if $self->{eof}; # request for record beyond end of file
168	10					37	my $o = $self->_fill_offsets_to($n);
169							# If it's still undefined, there is no such record, so return 'undef'
170	10	100				35	return unless defined $o;
171							}
172
173	1139					1722	my $fh = $self->{FH};
174	1139					2688	$self->_seek($n); # we can do this now that offsets is populated
175	1139					3138	my $rec = $self->_read_record;
176
177							# If we happen to have just read the first record, check to see if
178							# the length of the record matches what 'tell' says. If not, Tie::File
179							# won't work, and should drop dead.
180							#
181							# if ($n == 0 && defined($rec) && tell($self->{fh}) != length($rec)) {
182							# if (defined $self->{discipline}) {
183							# croak "I/O discipline $self->{discipline} not supported";
184							# } else {
185							# croak "File encoding not supported";
186							# }
187							# }
188
189	1139	100	66			5541	$self->{cache}->insert($n, $rec) if defined $rec && not $self->{flushing};
190	1139					2343	$rec;
191							}
192
193							sub STORE {
194	330			330		4322	my ($self, $n, $rec) = @_;
195	330	50				664	die "STORE called from _check_integrity!" if $DIAGNOSTIC;
196
197	330					821	$self->_fixrecs($rec);
198
199	330	100				729	if ($self->{autodefer}) {
200	190					408	$self->_annotate_ad_history($n);
201							}
202
203	330	100				607	return $self->_store_deferred($n, $rec) if $self->_is_deferring;
204
205
206							# We need this to decide whether the new record will fit
207							# It incidentally populates the offsets table
208							# Note we have to do this before we alter the cache
209							# 20020324 Wait, but this DOES alter the cache. TODO BUG?
210	259					596	my $oldrec = $self->_fetch($n);
211
212	259	100				553	if (not defined $oldrec) {
213							# We're storing a record beyond the end of the file
214	40					123	$self->_extend_file_to($n+1);
215	40					117	$oldrec = $self->{recsep};
216							}
217							# return if $oldrec eq $rec; # don't bother
218	259					464	my $len_diff = length($rec) - length($oldrec);
219
220							# length($oldrec) here is not consistent with text mode TODO XXX BUG
221	259					783	$self->_mtwrite($rec, $self->{offsets}[$n], length($oldrec));
222	259					1058	$self->_oadjust([$n, 1, $rec]);
223	259					754	$self->{cache}->update($n, $rec);
224							}
225
226							sub _store_deferred {
227	73			73		135	my ($self, $n, $rec) = @_;
228	73					165	$self->{cache}->remove($n);
229	73					118	my $old_deferred = $self->{deferred}{$n};
230
231	73	100	100			275	if (defined $self->{deferred_max} && $n > $self->{deferred_max}) {
232	68					109	$self->{deferred_max} = $n;
233							}
234	73					138	$self->{deferred}{$n} = $rec;
235
236	73					107	my $len_diff = length($rec);
237	73	100				136	$len_diff -= length($old_deferred) if defined $old_deferred;
238	73					107	$self->{deferred_s} += $len_diff;
239	73					184	$self->{cache}->adj_limit(-$len_diff);
240	73	100				204	if ($self->{deferred_s} > $self->{dw_size}) {
		100
241	1					4	$self->_flush;
242							} elsif ($self->_cache_too_full) {
243	2					13	$self->_cache_flush;
244							}
245							}
246
247							# Remove a single record from the deferred-write buffer without writing it
248							# The record need not be present
249							sub _delete_deferred {
250	6			6		11	my ($self, $n) = @_;
251	6					15	my $rec = delete $self->{deferred}{$n};
252	6	100				22	return unless defined $rec;
253
254	4	50	33			20	if (defined $self->{deferred_max}
255							&& $n == $self->{deferred_max}) {
256	4					10	undef $self->{deferred_max};
257							}
258
259	4					16	$self->{deferred_s} -= length $rec;
260	4					11	$self->{cache}->adj_limit(length $rec);
261							}
262
263							sub FETCHSIZE {
264	530			530		1647	my $self = shift;
265	530	100				1072	my $n = $self->{eof} ? $#{$self->{offsets}} : $self->_fill_offsets;
	508					895
266
267	530					1071	my $top_deferred = $self->_defer_max;
268	530	100	66			1907	$n = $top_deferred+1 if defined $top_deferred && $n < $top_deferred+1;
269	530					1137	$n;
270							}
271
272							sub STORESIZE {
273	16			16		432	my ($self, $len) = @_;
274
275	16	100				53	if ($self->{autodefer}) {
276	15					34	$self->_annotate_ad_history('STORESIZE');
277							}
278
279	16					40	my $olen = $self->FETCHSIZE;
280	16	50				41	return if $len == $olen; # Woo-hoo!
281
282							# file gets longer
283	16	100				39	if ($len > $olen) {
284	6	100				13	if ($self->_is_deferring) {
285	1					4	for ($olen .. $len-1) {
286	2					5	$self->_store_deferred($_, $self->{recsep});
287							}
288							} else {
289	5					13	$self->_extend_file_to($len);
290							}
291	6					23	return;
292							}
293
294							# file gets shorter
295	10	100				28	if ($self->_is_deferring) {
296							# TODO maybe replace this with map-plus-assignment?
297	2					4	for (grep $_ >= $len, keys %{$self->{deferred}}) {
	2					14
298	2					7	$self->_delete_deferred($_);
299							}
300	2					5	$self->{deferred_max} = $len-1;
301							}
302
303	10					32	$self->_seek($len);
304	10					52	$self->_chop_file;
305	10					37	$#{$self->{offsets}} = $len;
	10					42
306							# $self->{offsets}[0] = 0; # in case we just chopped this
307
308	10					46	$self->{cache}->remove(grep $_ >= $len, $self->{cache}->ckeys);
309							}
310
311							### OPTIMIZE ME
312							### It should not be necessary to do FETCHSIZE
313							### Just seek to the end of the file.
314							sub PUSH {
315	5			5		151	my $self = shift;
316	5					14	$self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
317
318							# No need to return:
319							# $self->FETCHSIZE; # because av.c takes care of this for me
320							}
321
322							sub POP {
323	3			3		111	my $self = shift;
324	3					7	my $size = $self->FETCHSIZE;
325	3	100				8	return if $size == 0;
326							# print STDERR "# POPPITY POP POP POP\n";
327	2					9	scalar $self->SPLICE($size-1, 1);
328							}
329
330							sub SHIFT {
331	3			3		113	my $self = shift;
332	3					7	scalar $self->SPLICE(0, 1);
333							}
334
335							sub UNSHIFT {
336	3			3		155	my $self = shift;
337	3					23	$self->SPLICE(0, 0, @_);
338							# $self->FETCHSIZE; # av.c takes care of this for me
339							}
340
341							sub CLEAR {
342	31			31		1323	my $self = shift;
343
344	31	100				94	if ($self->{autodefer}) {
345	21					152	$self->_annotate_ad_history('CLEAR');
346							}
347
348	31					110	$self->_seekb(0);
349	31					134	$self->_chop_file;
350	31					181	$self->{cache}->set_limit($self->{memory});
351	31					94	$self->{cache}->empty;
352	31					54	@{$self->{offsets}} = (0);
	31					86
353	31					51	%{$self->{deferred}}= ();
	31					58
354	31					55	$self->{deferred_s} = 0;
355	31					133	$self->{deferred_max} = -1;
356							}
357
358							sub EXTEND {
359	28			28		103	my ($self, $n) = @_;
360
361							# No need to pre-extend anything in this case
362	28	100				69	return if $self->_is_deferring;
363
364	26					89	$self->_fill_offsets_to($n);
365	26					81	$self->_extend_file_to($n);
366							}
367
368							sub DELETE {
369	9			9		431	my ($self, $n) = @_;
370
371	9	100				59	if ($self->{autodefer}) {
372	4					8	$self->_annotate_ad_history('DELETE');
373							}
374
375	9					21	my $lastrec = $self->FETCHSIZE-1;
376	9					23	my $rec = $self->FETCH($n);
377	9	100				25	$self->_delete_deferred($n) if $self->_is_deferring;
378	9	100				29	if ($n == $lastrec) {
		100
379	4					15	$self->_seek($n);
380	4					17	$self->_chop_file;
381	4					12	$#{$self->{offsets}}--;
	4					18
382	4					23	$self->{cache}->remove($n);
383							# perhaps in this case I should also remove trailing null records?
384							# 20020316
385							# Note that delete @a[-3..-1] deletes the records in the wrong order,
386							# so we only chop the very last one out of the file. We could repair this
387							# by tracking deleted records inside the object.
388							} elsif ($n < $lastrec) {
389	4					11	$self->STORE($n, "");
390							}
391	9					158	$rec;
392							}
393
394							sub EXISTS {
395	11			11		409	my ($self, $n) = @_;
396	11	100				31	return 1 if exists $self->{deferred}{$n};
397	10					20	$n < $self->FETCHSIZE;
398							}
399
400							sub SPLICE {
401	388			388		11051	my $self = shift;
402
403	388	100				955	if ($self->{autodefer}) {
404	334					713	$self->_annotate_ad_history('SPLICE');
405							}
406
407	388	100				750	$self->_flush if $self->_is_deferring; # move this up?
408	388	100				758	if (wantarray) {
409	144					316	$self->_chomp(my @a = $self->_splice(@_));
410	144					642	@a;
411							} else {
412	244					629	$self->_chomp1(scalar $self->_splice(@_));
413							}
414							}
415
416							sub DESTROY {
417	2984			2984		29480	my $self = shift;
418	2984	100				6790	$self->flush if $self->_is_deferring;
419	2984	50				11207	$self->{cache}->delink if defined $self->{cache}; # break circular link
420	2984	100	66			11763	if ($self->{fh} and $self->{ourfh}) {
421	2983					6479	delete $self->{ourfh};
422	2983					53147	close delete $self->{fh};
423							}
424							}
425
426							sub _splice {
427	388			388		880	my ($self, $pos, $nrecs, @data) = @_;
428	388					526	my @result;
429
430	388	100				698	$pos = 0 unless defined $pos;
431
432							# Deal with negative and other out-of-range positions
433							# Also set default for $nrecs
434							{
435	388					511	my $oldsize = $self->FETCHSIZE;
	388					757
436	388	100				746	$nrecs = $oldsize unless defined $nrecs;
437	388					544	my $oldpos = $pos;
438
439	388	100				675	if ($pos < 0) {
440	73					112	$pos += $oldsize;
441	73	100				142	if ($pos < 0) {
442	2					429	croak "Modification of non-creatable array value attempted, " .
443							"subscript $oldpos";
444							}
445							}
446
447	386	100				700	if ($pos > $oldsize) {
448	14	100				47	return unless @data;
449	8					27	$pos = $oldsize; # This is what perl does for normal arrays
450							}
451
452							# The manual is very unclear here
453	380	100				607	if ($nrecs < 0) {
454	10					18	$nrecs = $oldsize - $pos + $nrecs;
455	10	100				21	$nrecs = 0 if $nrecs < 0;
456							}
457
458							# nrecs is too big---it really means "until the end"
459							# 20030507
460	380	100				745	if ($nrecs + $pos > $oldsize) {
461	32					87	$nrecs = $oldsize - $pos;
462							}
463							}
464
465	380					951	$self->_fixrecs(@data);
466	380					828	my $data = join '', @data;
467	380					518	my $datalen = length $data;
468	380					446	my $oldlen = 0;
469
470							# compute length of data being removed
471	380					955	for ($pos .. $pos+$nrecs-1) {
472	507	50				975	last unless defined $self->_fill_offsets_to($_);
473	507					944	my $rec = $self->_fetch($_);
474	507	50				959	last unless defined $rec;
475	507					873	push @result, $rec;
476
477							# Why don't we just use length($rec) here?
478							# Because that record might have come from the cache. _splice
479							# might have been called to flush out the deferred-write records,
480							# and in this case length($rec) is the length of the record to be
481							# written, not the length of the actual record in the file. But
482							# the offsets are still true. 20020322
483							$oldlen += $self->{offsets}[$_+1] - $self->{offsets}[$_]
484	507	50				1523	if defined $self->{offsets}[$_+1];
485							}
486	380					978	$self->_fill_offsets_to($pos+$nrecs);
487
488							# Modify the file
489	380					1030	$self->_mtwrite($data, $self->{offsets}[$pos], $oldlen);
490							# Adjust the offsets table
491	380					1734	$self->_oadjust([$pos, $nrecs, @data]);
492
493							{ # Take this read cache stuff out into a separate function
494							# You made a half-attempt to put it into _oadjust.
495							# Finish something like that up eventually.
496							# STORE also needs to do something similarish
497
498							# update the read cache, part 1
499							# modified records
500	380					702	for ($pos .. $pos+$nrecs-1) {
	380					698
501	507					793	my $new = $data[$_-$pos];
502	507	100				876	if (defined $new) {
503	191					395	$self->{cache}->update($_, $new);
504							} else {
505	316					616	$self->{cache}->remove($_);
506							}
507							}
508
509							# update the read cache, part 2
510							# moved records - records past the site of the change
511							# need to be renumbered
512							# Maybe merge this with the previous block?
513							{
514	380					497	my @oldkeys = grep $_ >= $pos + $nrecs, $self->{cache}->ckeys;
	380					783
515	380					853	my @newkeys = map $_-$nrecs+@data, @oldkeys;
516	380					853	$self->{cache}->rekey(\@oldkeys, \@newkeys);
517							}
518
519							# Now there might be too much data in the cache, if we spliced out
520							# some short records and spliced in some long ones. If so, flush
521							# the cache.
522	380					875	$self->_cache_flush;
523							}
524
525							# Yes, the return value of 'splice' is actually this complicated
526	380	100				1672	wantarray ? @result : @result ? $result[-1] : undef;
		100
527							}
528
529
530							# write data into the file
531							# $data is the data to be written.
532							# it should be written at position $pos, and should overwrite
533							# exactly $len of the following bytes.
534							# Note that if length($data) > $len, the subsequent bytes will have to
535							# be moved up, and if length($data) < $len, they will have to
536							# be moved down
537							sub _twrite {
538	179			179		1190	my ($self, $data, $pos, $len) = @_;
539
540	179	50				433	unless (defined $pos) {
541	0					0	die "\$pos was undefined in _twrite";
542							}
543
544	179					303	my $len_diff = length($data) - $len;
545
546	179	100				365	if ($len_diff == 0) { # Woo-hoo!
547	30					61	my $fh = $self->{fh};
548	30					77	$self->_seekb($pos);
549	30					104	$self->_write_record($data);
550	30					92	return; # well, that was easy.
551							}
552
553							# the two records are of different lengths
554							# our strategy here: rewrite the tail of the file,
555							# reading ahead one buffer at a time
556							# $bufsize is required to be at least as large as the data we're overwriting
557	149					338	my $bufsize = _bufsize($len_diff);
558	149					333	my ($writepos, $readpos) = ($pos, $pos+$len);
559	149					236	my $next_block;
560							my $more_data;
561
562							# Seems like there ought to be a way to avoid the repeated code
563							# and the special case here. The read(1) is also a little weird.
564							# Think about this.
565	149					225	do {
566	376					961	$self->_seekb($readpos);
567	376					4568	my $br = read $self->{fh}, $next_block, $bufsize;
568	376					2327	$more_data = read $self->{fh}, my($dummy), 1;
569	376					1095	$self->_seekb($writepos);
570	376					1201	$self->_write_record($data);
571	376					782	$readpos += $br;
572	376					513	$writepos += length $data;
573	376					1277	$data = $next_block;
574							} while $more_data;
575	149					379	$self->_seekb($writepos);
576	149					453	$self->_write_record($next_block);
577
578							# There might be leftover data at the end of the file
579	149	100				577	$self->_chop_file if $len_diff < 0;
580							}
581
582							# _iwrite(D, S, E)
583							# Insert text D at position S.
584							# Let C = E-S-\|D\|. If C < 0; die.
585							# Data in [S,S+C) is copied to [S+D,S+D+C) = [S+D,E).
586							# Data in [S+C = E-D, E) is returned. Data in [E, oo) is untouched.
587							#
588							# In a later version, don't read the entire intervening area into
589							# memory at once; do the copying block by block.
590							sub _iwrite {
591	101			101		481	my $self = shift;
592	101					263	my ($D, $s, $e) = @_;
593	101					180	my $d = length $D;
594	101					157	my $c = $e-$s-$d;
595	101					281	local *FH = $self->{fh};
596	101	50				216	confess "Not enough space to insert $d bytes between $s and $e"
597							if $c < 0;
598	101	50				164	confess "[$s,$e) is an invalid insertion range" if $e < $s;
599
600	101					233	$self->_seekb($s);
601	101					1711	read FH, my $buf, $e-$s;
602
603	101					787	$D .= substr($buf, 0, $c, "");
604
605	101					283	$self->_seekb($s);
606	101					368	$self->_write_record($D);
607
608	101					698	return $buf;
609							}
610
611							# Like _twrite, but the data-pos-len triple may be repeated; you may
612							# write several chunks. All the writing will be done in
613							# one pass. Chunks SHALL be in ascending order and SHALL NOT overlap.
614							sub _mtwrite {
615	2913			2913		12611	my $self = shift;
616	2913					4809	my $unwritten = "";
617	2913					4020	my $delta = 0;
618
619	2913	50				7204	@_ % 3 == 0
620							or die "Arguments to _mtwrite did not come in groups of three";
621
622	2913					7114	while (@_) {
623	5097					15290	my ($data, $pos, $len) = splice @_, 0, 3;
624	5097					7771	my $end = $pos + $len; # The OLD end of the segment to be replaced
625	5097					19776	$data = $unwritten . $data;
626	5097					9704	$delta -= length($unwritten);
627	5097					7946	$unwritten = "";
628	5097					6901	$pos += $delta; # This is where the data goes now
629	5097					7191	my $dlen = length $data;
630	5097					13100	$self->_seekb($pos);
631	5097	100				13982	if ($len >= $dlen) { # the data will fit
632	4240					11188	$self->_write_record($data);
633	4240					8904	$delta += ($dlen - $len); # everything following moves down by this much
634	4240					7275	$data = ""; # All the data in the buffer has been written
635							} else { # won't fit
636	857					2562	my $writable = substr($data, 0, $len - $delta, "");
637	857					2256	$self->_write_record($writable);
638	857					2177	$delta += ($dlen - $len); # everything following moves down by this much
639							}
640
641							# At this point we've written some but maybe not all of the data.
642							# There might be a gap to close up, or $data might still contain a
643							# bunch of unwritten data that didn't fit.
644	5097					7584	my $ndlen = length $data;
645	5097	100				11152	if ($delta == 0) {
		100
646	885					1771	$self->_write_record($data);
647							} elsif ($delta < 0) {
648							# upcopy (close up gap)
649	3368	100				6009	if (@_) {
650	1263					3472	$self->_upcopy($end, $end + $delta, $_[1] - $end);
651							} else {
652	2105					5034	$self->_upcopy($end, $end + $delta);
653							}
654							} else {
655							# downcopy (insert data that didn't fit; replace this data in memory
656							# with _later_ data that doesn't fit)
657	844	100				1587	if (@_) {
658	346					905	$unwritten = $self->_downcopy($data, $end, $_[1] - $end);
659							} else {
660							# Make the file longer to accommodate the last segment that doesn't
661	498					1152	$unwritten = $self->_downcopy($data, $end);
662							}
663							}
664							}
665							}
666
667							# Copy block of data of length $len from position $spos to position $dpos
668							# $dpos must be <= $spos
669							#
670							# If $len is undefined, go all the way to the end of the file
671							# and then truncate it ($spos - $dpos bytes will be removed)
672							sub _upcopy {
673	3422			3422		5972	my $blocksize = 8192;
674	3422					6790	my ($self, $spos, $dpos, $len) = @_;
675	3422	50				8335	if ($dpos > $spos) {
		100
676	0					0	die "source ($spos) was upstream of destination ($dpos) in _upcopy";
677							} elsif ($dpos == $spos) {
678	16					45	return;
679							}
680
681	3406		100			11002	while (! defined ($len) \|\| $len > 0) {
682	7962	100				15106	my $readsize = ! defined($len) ? $blocksize
		100
683							: $len > $blocksize ? $blocksize
684							: $len;
685
686	7962					13971	my $fh = $self->{fh};
687	7962					20164	$self->_seekb($spos);
688	7962					80408	my $bytes_read = read $fh, my($data), $readsize;
689	7962					24649	$self->_seekb($dpos);
690	7962	100				20968	if ($data eq "") {
691	2119					5521	$self->_chop_file;
692	2119					21941	last;
693							}
694	5843					13697	$self->_write_record($data);
695	5843					12302	$spos += $bytes_read;
696	5843					7531	$dpos += $bytes_read;
697	5843	100				23642	$len -= $bytes_read if defined $len;
698							}
699							}
700
701							# Write $data into a block of length $len at position $pos,
702							# moving everything in the block forwards to make room.
703							# Instead of writing the last length($data) bytes from the block
704							# (because there isn't room for them any longer) return them.
705							#
706							# Undefined $len means 'until the end of the file'
707							sub _downcopy {
708	1202			1202		10525	my $blocksize = 8192;
709	1202					2797	my ($self, $data, $pos, $len) = @_;
710	1202					2061	my $fh = $self->{fh};
711
712	1202		100			3957	while (! defined $len \|\| $len > 0) {
713	2345	100				4642	my $readsize = ! defined($len) ? $blocksize
		100
714							: $len > $blocksize? $blocksize : $len;
715	2345					5738	$self->_seekb($pos);
716	2345					25066	read $fh, my($old), $readsize;
717	2345					6202	my $last_read_was_short = length($old) < $readsize;
718	2345					8269	$data .= $old;
719	2345					3108	my $writable;
720	2345	100				4223	if ($last_read_was_short) {
721							# If last read was short, then $data now contains the entire rest
722							# of the file, so there's no need to write only one block of it
723	677					1392	$writable = $data;
724	677					1073	$data = "";
725							} else {
726	1668					6790	$writable = substr($data, 0, $readsize, "");
727							}
728	2345	100				5036	last if $writable eq "";
729	2340					6007	$self->_seekb($pos);
730	2340					7301	$self->_write_record($writable);
731	2340	100	66			8766	last if $last_read_was_short && $data eq "";
732	1668	100				3041	$len -= $readsize if defined $len;
733	1668					5180	$pos += $readsize;
734							}
735	1202					5973	return $data;
736							}
737
738							# Adjust the object data structures following an '_mtwrite'
739							# Arguments are
740							# [$pos, $nrecs, @length] items
741							# indicating that $nrecs records were removed at $recpos (a record offset)
742							# and replaced with records of length @length...
743							# Arguments guarantee that $recpos is strictly increasing.
744							# No return value
745							sub _oadjust {
746	662			662		1016	my $self = shift;
747	662					854	my $delta = 0;
748	662					805	my $delta_recs = 0;
749	662					852	my $prev_end = -1;
750	662					845	my %newkeys;
751
752	662					1228	for (@_) {
753	664					1627	my ($pos, $nrecs, @data) = @$_;
754	664					909	$pos += $delta_recs;
755
756							# Adjust the offsets of the records after the previous batch up
757							# to the first new one of this batch
758	664					1564	for my $i ($prev_end+2 .. $pos - 1) {
759	1047					1476	$self->{offsets}[$i] += $delta;
760	1047					1930	$newkey{$i} = $i + $delta_recs;
761							}
762
763	664					1111	$prev_end = $pos + @data - 1; # last record moved on this pass
764
765							# Remove the offsets for the removed records;
766							# replace with the offsets for the inserted records
767	664					1360	my @newoff = ($self->{offsets}[$pos] + $delta);
768	664					1308	for my $i (0 .. $#data) {
769	767					1138	my $newlen = length $data[$i];
770	767					1217	push @newoff, $newoff[$i] + $newlen;
771	767					1199	$delta += $newlen;
772							}
773
774	664					1342	for my $i ($pos .. $pos+$nrecs-1) {
775	822	100				1079	last if $i+1 > $#{$self->{offsets}};
	822					1809
776	812					1454	my $oldlen = $self->{offsets}[$i+1] - $self->{offsets}[$i];
777	812					1189	$delta -= $oldlen;
778							}
779
780							# # also this data has changed, so update it in the cache
781							# for (0 .. $#data) {
782							# $self->{cache}->update($pos + $_, $data[$_]);
783							# }
784							# if ($delta_recs) {
785							# my @oldkeys = grep $_ >= $pos + @data, $self->{cache}->ckeys;
786							# my @newkeys = map $_ + $delta_recs, @oldkeys;
787							# $self->{cache}->rekey(\@oldkeys, \@newkeys);
788							# }
789
790							# replace old offsets with new
791	664					868	splice @{$self->{offsets}}, $pos, $nrecs+1, @newoff;
	664					1627
792							# What if we just spliced out the end of the offsets table?
793							# shouldn't we clear $self->{eof}? Test for this XXX BUG TODO
794
795	664					1461	$delta_recs += @data - $nrecs; # net change in total number of records
796							}
797
798							# The trailing records at the very end of the file
799	662	100				1305	if ($delta) {
800	562					789	for my $i ($prev_end+2 .. $#{$self->{offsets}}) {
	562					1149
801	1393					1894	$self->{offsets}[$i] += $delta;
802							}
803							}
804
805							# If we scrubbed out all known offsets, regenerate the trivial table
806							# that knows that the file does indeed start at 0.
807	662	50				900	$self->{offsets}[0] = 0 unless @{$self->{offsets}};
	662					1353
808							# If the file got longer, the offsets table is no longer complete
809							# $self->{eof} = 0 if $delta_recs > 0;
810
811							# Now there might be too much data in the cache, if we spliced out
812							# some short records and spliced in some long ones. If so, flush
813							# the cache.
814	662					1445	$self->_cache_flush;
815							}
816
817							# If a record does not already end with the appropriate terminator
818							# string, append one.
819							sub _fixrecs {
820	710			710		980	my $self = shift;
821	710					1415	for (@_) {
822	782	100				1383	$_ = "" unless defined $_;
823							$_ .= $self->{recsep}
824	782	100				2622	unless substr($_, - $self->{recseplen}) eq $self->{recsep};
825							}
826							}
827
828
829							################################################################
830							#
831							# Basic read, write, and seek
832							#
833
834							# seek to the beginning of record #$n
835							# Assumes that the offsets table is already correctly populated
836							#
837							# Note that $n=-1 has a special meaning here: It means the start of
838							# the last known record; this may or may not be the very last record
839							# in the file, depending on whether the offsets table is fully populated.
840							#
841							sub _seek {
842	1283			1283		1937	my ($self, $n) = @_;
843	1283					1881	my $o = $self->{offsets}[$n];
844	1283	50				2233	defined($o)
845							or confess("logic error: undefined offset for record $n");
846	1283	50				13324	seek $self->{fh}, $o, SEEK_SET
847							or confess "Couldn't seek filehandle: $!"; # "Should never happen."
848							}
849
850							# seek to byte $b in the file
851							sub _seekb {
852	26870			26870		42704	my ($self, $b) = @_;
853	26870	50				211375	seek $self->{fh}, $b, SEEK_SET
854							or die "Couldn't seek filehandle: $!"; # "Should never happen."
855							}
856
857							# populate the offsets table up to the beginning of record $n
858							# return the offset of record $n
859							sub _fill_offsets_to {
860	949			949		1530	my ($self, $n) = @_;
861
862	949	100				2410	return $self->{offsets}[$n] if $self->{eof};
863
864	17					56	my $fh = $self->{fh};
865	17					60	local *OFF = $self->{offsets};
866	17					33	my $rec;
867
868	17					76	until ($#OFF >= $n) {
869	27					95	$self->_seek(-1); # tricky -- see comment at _seek
870	27					96	$rec = $self->_read_record;
871	27	100				91	if (defined $rec) {
872	14					61	push @OFF, int(tell $fh); # Tels says that int() saves memory here
873							} else {
874	13					35	$self->{eof} = 1;
875	13					42	return; # It turns out there is no such record
876							}
877							}
878
879							# we have now read all the records up to record n-1,
880							# so we can return the offset of record n
881	4					13	$OFF[$n];
882							}
883
884							sub _fill_offsets {
885	22			22		82	my ($self) = @_;
886
887	22					47	my $fh = $self->{fh};
888	22					77	local *OFF = $self->{offsets};
889
890	22					92	$self->_seek(-1); # tricky -- see comment at _seek
891
892							# Tels says that inlining read_record() would make this loop
893							# five times faster. 20030508
894	22					91	while ( defined $self->_read_record()) {
895							# int() saves us memory here
896	166					441	push @OFF, int(tell $fh);
897							}
898
899	22					67	$self->{eof} = 1;
900	22					71	$#OFF;
901							}
902
903							# assumes that $rec is already suitably terminated
904							sub _write_record {
905	15049			15049		33759	my ($self, $rec) = @_;
906	15049					24428	my $fh = $self->{fh};
907	15049					52150	local $\ = "";
908	15049	50				222407	print $fh $rec
909							or die "Couldn't write record: $!"; # "Should never happen."
910							# $self->{_written} += length($rec);
911							}
912
913							sub _read_record {
914	1354			1354		1941	my $self = shift;
915	1354					1607	my $rec;
916	1354					1748	{ local $/ = $self->{recsep};
	1354					5378
917	1354					1991	my $fh = $self->{fh};
918	1354					11548	$rec = <$fh>;
919							}
920	1354	100				3555	return unless defined $rec;
921	1287	100				3648	if (substr($rec, -$self->{recseplen}) ne $self->{recsep}) {
922							# improperly terminated final record --- quietly fix it.
923							# my $ac = substr($rec, -$self->{recseplen});
924							# $ac =~ s/\n/\\n/g;
925	7					12	$self->{sawlastrec} = 1;
926	7	100				16	unless ($self->{rdonly}) {
927	4					11	local $\ = "";
928	4					7	my $fh = $self->{fh};
929	4					67	print $fh $self->{recsep};
930							}
931	7					21	$rec .= $self->{recsep};
932							}
933							# $self->{_read} += length($rec) if defined $rec;
934	1287					2734	$rec;
935							}
936
937							sub _rw_stats {
938	0			0		0	my $self = shift;
939	0					0	@{$self}{'_read', '_written'};
	0					0
940							}
941
942							################################################################
943							#
944							# Read cache management
945
946							sub _cache_flush {
947	1044			1044		1696	my ($self) = @_;
948	1044					2497	$self->{cache}->reduce_size_to($self->{memory} - $self->{deferred_s});
949							}
950
951							sub _cache_too_full {
952	72			72		104	my $self = shift;
953	72					132	$self->{cache}->bytes + $self->{deferred_s} >= $self->{memory};
954							}
955
956							################################################################
957							#
958							# File custodial services
959							#
960
961
962							# We have read to the end of the file and have the offsets table
963							# entirely populated. Now we need to write a new record beyond
964							# the end of the file. We prepare for this by writing
965							# empty records into the file up to the position we want
966							#
967							# assumes that the offsets table already contains the offset of record $n,
968							# if it exists, and extends to the end of the file if not.
969							sub _extend_file_to {
970	81			81		163	my ($self, $n) = @_;
971	81					225	$self->_seek(-1); # position after the end of the last record
972	81					274	my $pos = $self->{offsets}[-1];
973
974							# the offsets table has one entry more than the total number of records
975	81					145	my $extras = $n - $#{$self->{offsets}};
	81					209
976
977							# Todo : just use $self->{recsep} x $extras here?
978	81					258	while ($extras-- > 0) {
979	228					666	$self->_write_record($self->{recsep});
980	228					480	push @{$self->{offsets}}, int(tell $self->{fh});
	228					1153
981							}
982							}
983
984							# Truncate the file at the current position
985							sub _chop_file {
986	2235			2235		3493	my $self = shift;
987	2235					659838	truncate $self->{fh}, tell($self->{fh});
988							}
989
990
991							# compute the size of a buffer suitable for moving
992							# all the data in a file forward $n bytes
993							# ($n may be negative)
994							# The result should be at least $n.
995							sub _bufsize {
996	149			149		273	my $n = shift;
997	149	100				375	return 8192 if $n <= 0;
998	78					107	my $b = $n & ~8191;
999	78	100				171	$b += 8192 if $n & 8191;
1000	78					126	$b;
1001							}
1002
1003							################################################################
1004							#
1005							# Miscellaneous public methods
1006							#
1007
1008							# Lock the file
1009							sub flock {
1010	2			2	1	22	my ($self, $op) = @_;
1011	2	50				6	unless (@_ <= 3) {
1012	0					0	my $pack = ref $self;
1013	0					0	croak "Usage: $pack\->flock([OPERATION])";
1014							}
1015	2					3	my $fh = $self->{fh};
1016	2	100				5	$op = LOCK_EX unless defined $op;
1017	2					17	my $locked = flock $fh, $op;
1018
1019	2	100	66			12	if ($locked && ($op & (LOCK_EX \| LOCK_SH))) {
1020							# If you're locking the file, then presumably it's because
1021							# there might have been a write access by another process.
1022							# In that case, the read cache contents and the offsets table
1023							# might be invalid, so discard them. 20030508
1024	1					3	$self->{offsets} = [0];
1025	1					4	$self->{cache}->empty;
1026							}
1027
1028	2					5	$locked;
1029							}
1030
1031							# Get/set autochomp option
1032							sub autochomp {
1033	4			4	1	31	my $self = shift;
1034	4	100				10	if (@_) {
1035	2					5	my $old = $self->{autochomp};
1036	2					4	$self->{autochomp} = shift;
1037	2					6	$old;
1038							} else {
1039	2					5	$self->{autochomp};
1040							}
1041							}
1042
1043							# Get offset table entries; returns offset of nth record
1044							sub offset {
1045	22			22	1	894	my ($self, $n) = @_;
1046
1047	22	100				25	if ($#{$self->{offsets}} < $n) {
	22					64
1048	4	100				20	return if $self->{eof}; # request for record beyond the end of file
1049	1					4	my $o = $self->_fill_offsets_to($n);
1050							# If it's still undefined, there is no such record, so return 'undef'
1051	1	50				8	return unless defined $o;
1052							}
1053
1054	18					79	$self->{offsets}[$n];
1055							}
1056
1057							sub discard_offsets {
1058	0			0	0	0	my $self = shift;
1059	0					0	$self->{offsets} = [0];
1060							}
1061
1062							################################################################
1063							#
1064							# Matters related to deferred writing
1065							#
1066
1067							# Defer writes
1068							sub defer {
1069	22			22	1	1153	my $self = shift;
1070	22					71	$self->_stop_autodeferring;
1071	22					35	@{$self->{ad_history}} = ();
	22					45
1072	22					69	$self->{defer} = 1;
1073							}
1074
1075							# Flush deferred writes
1076							#
1077							# This could be better optimized to write the file in one pass, instead
1078							# of one pass per block of records. But that will require modifications
1079							# to _twrite, so I should have a good _twrite test suite first.
1080							sub flush {
1081	16			16	1	549	my $self = shift;
1082
1083	16					45	$self->_flush;
1084	16					62	$self->{defer} = 0;
1085							}
1086
1087							sub _old_flush {
1088	0			0		0	my $self = shift;
1089	0					0	my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
	0					0
	0					0
1090
1091	0					0	while (@writable) {
1092							# gather all consecutive records from the front of @writable
1093	0					0	my $first_rec = shift @writable;
1094	0					0	my $last_rec = $first_rec+1;
1095	0		0			0	++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
1096	0					0	--$last_rec;
1097	0					0	$self->_fill_offsets_to($last_rec);
1098	0					0	$self->_extend_file_to($last_rec);
1099							$self->_splice($first_rec, $last_rec-$first_rec+1,
1100	0					0	@{$self->{deferred}}{$first_rec .. $last_rec});
	0					0
1101							}
1102
1103	0					0	$self->_discard; # clear out defered-write-cache
1104							}
1105
1106							sub _flush {
1107	23			23		30	my $self = shift;
1108	23					44	my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
	55					173
	23					104
1109	23					47	my @args;
1110							my @adjust;
1111
1112	23					59	while (@writable) {
1113							# gather all consecutive records from the front of @writable
1114	25					55	my $first_rec = shift @writable;
1115	25					41	my $last_rec = $first_rec+1;
1116	25		100			161	++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
1117	25					38	--$last_rec;
1118	25					68	my $end = $self->_fill_offsets_to($last_rec+1);
1119	25	100				53	if (not defined $end) {
1120	10					29	$self->_extend_file_to($last_rec);
1121	10					22	$end = $self->{offsets}[$last_rec];
1122							}
1123	25					58	my ($start) = $self->{offsets}[$first_rec];
1124							push @args,
1125	25					56	join("", @{$self->{deferred}}{$first_rec .. $last_rec}), # data
	25					111
1126							$start, # position
1127							$end-$start; # length
1128							push @adjust, [$first_rec, # starting at this position...
1129							$last_rec-$first_rec+1, # this many records...
1130							# are replaced with these...
1131	25					158	@{$self->{deferred}}{$first_rec .. $last_rec},
	25					146
1132							];
1133							}
1134
1135	23					82	$self->_mtwrite(@args); # write multiple record groups
1136	23					72	$self->_discard; # clear out defered-write-cache
1137	23					59	$self->_oadjust(@adjust);
1138							}
1139
1140							# Discard deferred writes and disable future deferred writes
1141							sub discard {
1142	6			6	1	238	my $self = shift;
1143	6					22	$self->_discard;
1144	6					21	$self->{defer} = 0;
1145							}
1146
1147							# Discard deferred writes, but retain old deferred writing mode
1148							sub _discard {
1149	29			29		43	my $self = shift;
1150	29					69	%{$self->{deferred}} = ();
	29					87
1151	29					51	$self->{deferred_s} = 0;
1152	29					58	$self->{deferred_max} = -1;
1153	29					71	$self->{cache}->set_limit($self->{memory});
1154							}
1155
1156							# Deferred writing is enabled, either explicitly ($self->{defer})
1157							# or automatically ($self->{autodeferring})
1158							sub _is_deferring {
1159	4515			4515		6287	my $self = shift;
1160	4515	100				16774	$self->{defer} \|\| $self->{autodeferring};
1161							}
1162
1163							# The largest record number of any deferred record
1164							sub _defer_max {
1165	530			530		717	my $self = shift;
1166	530	100				1347	return $self->{deferred_max} if defined $self->{deferred_max};
1167	1					2	my $max = -1;
1168	1					2	for my $key (keys %{$self->{deferred}}) {
	1					3
1169	1	50				5	$max = $key if $key > $max;
1170							}
1171	1					3	$self->{deferred_max} = $max;
1172	1					2	$max;
1173							}
1174
1175							################################################################
1176							#
1177							# Matters related to autodeferment
1178							#
1179
1180							# Get/set autodefer option
1181							sub autodefer {
1182	2			2	1	102	my $self = shift;
1183	2	50				8	if (@_) {
1184	2					4	my $old = $self->{autodefer};
1185	2					3	$self->{autodefer} = shift;
1186	2	100				8	if ($old) {
1187	1					3	$self->_stop_autodeferring;
1188	1					2	@{$self->{ad_history}} = ();
	1					3
1189							}
1190	2					5	$old;
1191							} else {
1192	0					0	$self->{autodefer};
1193							}
1194							}
1195
1196							# The user is trying to store record #$n Record that in the history,
1197							# and then enable (or disable) autodeferment if that seems useful.
1198							# Note that it's OK for $n to be a non-number, as long as the function
1199							# is prepared to deal with that. Nobody else looks at the ad_history.
1200							#
1201							# Now, what does the ad_history mean, and what is this function doing?
1202							# Essentially, the idea is to enable autodeferring when we see that the
1203							# user has made three consecutive STORE calls to three consecutive records.
1204							# ("Three" is actually ->{autodefer_threshhold}.)
1205							# A STORE call for record #$n inserts $n into the autodefer history,
1206							# and if the history contains three consecutive records, we enable
1207							# autodeferment. An ad_history of [X, Y] means that the most recent
1208							# STOREs were for records X, X+1, ..., Y, in that order.
1209							#
1210							# Inserting a nonconsecutive number erases the history and starts over.
1211							#
1212							# Performing a special operation like SPLICE erases the history.
1213							#
1214							# There's one special case: CLEAR means that CLEAR was just called.
1215							# In this case, we prime the history with [-2, -1] so that if the next
1216							# write is for record 0, autodeferring goes on immediately. This is for
1217							# the common special case of "@a = (...)".
1218							#
1219							sub _annotate_ad_history {
1220	564			564		1023	my ($self, $n) = @_;
1221	564	50				1087	return unless $self->{autodefer}; # feature is disabled
1222	564	100				1069	return if $self->{defer}; # already in explicit defer mode
1223	498	100				1243	return unless $self->{offsets}[-1] >= $self->{autodefer_filelen_threshhold};
1224
1225	25					45	local *H = $self->{ad_history};
1226	25	100				128	if ($n eq 'CLEAR') {
		50
1227	2					5	@H = (-2, -1); # prime the history with fake records
1228	2					7	$self->_stop_autodeferring;
1229							} elsif ($n =~ /^\d+$/) {
1230	23	100				48	if (@H == 0) {
1231	1					5	@H = ($n, $n);
1232							} else { # @H == 2
1233	22	100				42	if ($H[1] == $n-1) { # another consecutive record
1234	19					38	$H[1]++;
1235	19	100				48	if ($H[1] - $H[0] + 1 >= $self->{autodefer_threshhold}) {
1236	16					34	$self->{autodeferring} = 1;
1237							}
1238							} else { # nonconsecutive- erase and start over
1239	3					7	@H = ($n, $n);
1240	3					22	$self->_stop_autodeferring;
1241							}
1242							}
1243							} else { # SPLICE or STORESIZE or some such
1244	0					0	@H = ();
1245	0					0	$self->_stop_autodeferring;
1246							}
1247							}
1248
1249							# If autodeferring was enabled, cut it out and discard the history
1250							sub _stop_autodeferring {
1251	28			28		41	my $self = shift;
1252	28	100				80	if ($self->{autodeferring}) {
1253	5					11	$self->_flush;
1254							}
1255	28					102	$self->{autodeferring} = 0;
1256							}
1257
1258							################################################################
1259
1260
1261							# This is NOT a method. It is here for two reasons:
1262							# 1. To factor a fairly complicated block out of the constructor
1263							# 2. To provide access for the test suite, which need to be sure
1264							# files are being written properly.
1265							sub _default_recsep {
1266	2996			2996		8090	my $recsep = $/;
1267	2996	50				9413	if ($^O eq 'MSWin32') { # Dos too?
1268							# Windows users expect files to be terminated with \r\n
1269							# But $/ is set to \n instead
1270							# Note that this also transforms \n\n into \r\n\r\n.
1271							# That is a feature.
1272	0					0	$recsep =~ s/\n/\r\n/g;
1273							}
1274	2996					7155	$recsep;
1275							}
1276
1277							# Utility function for _check_integrity
1278							sub _ci_warn {
1279	0			0		0	my $msg = shift;
1280	0					0	$msg =~ s/\n/\\n/g;
1281	0					0	$msg =~ s/\r/\\r/g;
1282	0					0	print "# $msg\n";
1283							}
1284
1285							# Given a file, make sure the cache is consistent with the
1286							# file contents and the internal data structures are consistent with
1287							# each other. Returns true if everything checks out, false if not
1288							#
1289							# The $file argument is no longer used. It is retained for compatibility
1290							# with the existing test suite.
1291							sub _check_integrity {
1292	380			380		6523	my ($self, $file, $warn) = @_;
1293	380					674	my $rsl = $self->{recseplen};
1294	380					591	my $rs = $self->{recsep};
1295	380					523	my $good = 1;
1296	380					684	local *_; # local $_ does not work here
1297	380					599	local $DIAGNOSTIC = 1;
1298
1299	380	50				1277	if (not defined $rs) {
		50
		50
1300	0					0	_ci_warn("recsep is undef!");
1301	0					0	$good = 0;
1302							} elsif ($rs eq "") {
1303	0					0	_ci_warn("recsep is empty!");
1304	0					0	$good = 0;
1305							} elsif ($rsl != length $rs) {
1306	0					0	my $ln = length $rs;
1307	0					0	_ci_warn("recsep <$rs> has length $ln, should be $rsl");
1308	0					0	$good = 0;
1309							}
1310
1311	380	50				948	if (not defined $self->{offsets}[0]) {
		50
1312	0					0	_ci_warn("offset 0 is missing!");
1313	0					0	$good = 0;
1314
1315							} elsif ($self->{offsets}[0] != 0) {
1316	0					0	_ci_warn("rec 0: offset <$self->{offsets}[0]> s/b 0!");
1317	0					0	$good = 0;
1318							}
1319
1320	380					511	my $cached = 0;
1321							{
1322	380					461	local *F = $self->{fh};
	380					811
1323	380					2943	seek F, 0, SEEK_SET;
1324	380					1697	local $. = 0;
1325	380					1132	local $/ = $rs;
1326
1327	380					3433	while () {
1328	2132					3652	my $n = $. - 1;
1329	2132					3750	my $cached = $self->{cache}->_produce($n);
1330	2132					3277	my $offset = $self->{offsets}[$.];
1331	2132					2704	my $ao = tell F;
1332	2132	50	66			5825	if (defined $offset && $offset != $ao) {
1333	0					0	_ci_warn("rec $n: offset <$offset> actual <$ao>");
1334	0					0	$good = 0;
1335							}
1336	2132	50	66			4552	if (defined $cached && $_ ne $cached && ! $self->{deferred}{$n}) {
			33
1337	0					0	$good = 0;
1338	0					0	_ci_warn("rec $n: cached <$cached> actual <$_>");
1339							}
1340	2132	50	66			4592	if (defined $cached && substr($cached, -$rsl) ne $rs) {
1341	0					0	$good = 0;
1342	0					0	_ci_warn("rec $n in the cache is missing the record separator");
1343							}
1344	2132	50	66			7169	if (! defined $offset && $self->{eof}) {
1345	0					0	$good = 0;
1346	0					0	_ci_warn("The offset table was marked complete, but it is missing " .
1347							"element $.");
1348							}
1349							}
1350	380	50				739	if (@{$self->{offsets}} > $.+1) {
	380					1407
1351	0					0	$good = 0;
1352	0					0	my $n = @{$self->{offsets}};
	0					0
1353	0					0	_ci_warn("The offset table has $n items, but the file has only $.");
1354							}
1355
1356	380					1022	my $deferring = $self->_is_deferring;
1357	380					900	for my $n ($self->{cache}->ckeys) {
1358	938					1592	my $r = $self->{cache}->_produce($n);
1359	938					1313	$cached += length($r);
1360	938	50				2274	next if $n+1 <= $.; # checked this already
1361	0					0	_ci_warn("spurious caching of record $n");
1362	0					0	$good = 0;
1363							}
1364	380					842	my $b = $self->{cache}->bytes;
1365	380	50				1627	if ($cached != $b) {
1366	0					0	_ci_warn("cache size is $b, should be $cached");
1367	0					0	$good = 0;
1368							}
1369							}
1370
1371							# That cache has its own set of tests
1372	380	50				886	$good = 0 unless $self->{cache}->_check_integrity;
1373
1374							# Now let's check the deferbuffer
1375							# Unless deferred writing is enabled, it should be empty
1376	380	50	66			740	if (! $self->_is_deferring && %{$self->{deferred}}) {
	331					913
1377	0					0	_ci_warn("deferred writing disabled, but deferbuffer nonempty");
1378	0					0	$good = 0;
1379							}
1380
1381							# Any record in the deferbuffer should not be present in the readcache
1382	380					553	my $deferred_s = 0;
1383	380					535	while (my ($n, $r) = each %{$self->{deferred}}) {
	467					1331
1384	87					117	$deferred_s += length($r);
1385	87	50				150	if (defined $self->{cache}->_produce($n)) {
1386	0					0	_ci_warn("record $n is in the deferbuffer and the readcache");
1387	0					0	$good = 0;
1388							}
1389	87	50				229	if (substr($r, -$rsl) ne $rs) {
1390	0					0	_ci_warn("rec $n in the deferbuffer is missing the record separator");
1391	0					0	$good = 0;
1392							}
1393							}
1394
1395							# Total size of deferbuffer should match internal total
1396	380	50				832	if ($deferred_s != $self->{deferred_s}) {
1397	0					0	_ci_warn("buffer size is $self->{deferred_s}, should be $deferred_s");
1398	0					0	$good = 0;
1399							}
1400
1401							# Total size of deferbuffer should not exceed the specified limit
1402	380	50				731	if ($deferred_s > $self->{dw_size}) {
1403	0					0	_ci_warn("buffer size is $self->{deferred_s} which exceeds the limit " .
1404							"of $self->{dw_size}");
1405	0					0	$good = 0;
1406							}
1407
1408							# Total size of cached data should not exceed the specified limit
1409	380	50				808	if ($deferred_s + $cached > $self->{memory}) {
1410	0					0	my $total = $deferred_s + $cached;
1411	0					0	_ci_warn("total stored data size is $total which exceeds the limit " .
1412							"of $self->{memory}");
1413	0					0	$good = 0;
1414							}
1415
1416							# Stuff related to autodeferment
1417	380	50	66			787	if (!$self->{autodefer} && @{$self->{ad_history}}) {
	129					331
1418	0					0	_ci_warn("autodefer is disabled, but ad_history is nonempty");
1419	0					0	$good = 0;
1420							}
1421	380	50	66			723	if ($self->{autodeferring} && $self->{defer}) {
1422	0					0	_ci_warn("both autodeferring and explicit deferring are active");
1423	0					0	$good = 0;
1424							}
1425	380	100				459	if (@{$self->{ad_history}} == 0) {
	380	50				790
1426							# That's OK, no additional tests required
1427	14					23	} elsif (@{$self->{ad_history}} == 2) {
1428	14					18	my @non_number = grep !/^-?\d+$/, @{$self->{ad_history}};
	14					80
1429	14	50				53	if (@non_number) {
		50
1430	0					0	my $msg;
1431	0					0	{ local $" = ')(';
	0					0
1432	0					0	$msg = "ad_history contains non-numbers (@{$self->{ad_history}})";
	0					0
1433							}
1434	0					0	_ci_warn($msg);
1435	0					0	$good = 0;
1436							} elsif ($self->{ad_history}[1] < $self->{ad_history}[0]) {
1437	0					0	_ci_warn("ad_history has nonsensical values @{$self->{ad_history}}");
	0					0
1438	0					0	$good = 0;
1439							}
1440							} else {
1441	0					0	_ci_warn("ad_history has bad length <@{$self->{ad_history}}>");
	0					0
1442	0					0	$good = 0;
1443							}
1444
1445	380					1256	$good;
1446							}
1447
1448							################################################################
1449							#
1450							# Tie::File::Cache
1451							#
1452							# Read cache
1453
1454							package Tie::File::Cache;
1455							$Tie::File::Cache::VERSION = $Tie::File::VERSION;
1456	37			37		350	use Carp ':DEFAULT', 'confess';
	37					72
	37					6924
1457
1458							sub HEAP () { 0 }
1459							sub HASH () { 1 }
1460							sub MAX () { 2 }
1461							sub BYTES() { 3 }
1462							#sub STAT () { 4 } # Array with request statistics for each record
1463							#sub MISS () { 5 } # Total number of cache misses
1464							#sub REQ () { 6 } # Total number of cache requests
1465	37			37		300	use strict 'vars';
	37					106
	37					51625
1466
1467							sub new {
1468	2986			2986		6242	my ($pack, $max) = @_;
1469	2986					7830	local *_;
1470	2986	50				7038	croak "missing argument to ->new" unless defined $max;
1471	2986					5440	my $self = [];
1472	2986					5725	bless $self => $pack;
1473	2986					11719	@$self = (Tie::File::Heap->new($self), {}, $max, 0);
1474	2986					8700	$self;
1475							}
1476
1477							sub adj_limit {
1478	77			77		135	my ($self, $n) = @_;
1479	77					116	$self->[MAX] += $n;
1480							}
1481
1482							sub set_limit {
1483	60			60		188	my ($self, $n) = @_;
1484	60					183	$self->[MAX] = $n;
1485							}
1486
1487							# For internal use only
1488							# Will be called by the heap structure to notify us that a certain
1489							# piece of data has moved from one heap element to another.
1490							# $k is the hash key of the item
1491							# $n is the new index into the heap at which it is stored
1492							# If $n is undefined, the item has been removed from the heap.
1493							sub _heap_move {
1494	3501			3501		5498	my ($self, $k, $n) = @_;
1495	3501	100				5480	if (defined $n) {
1496	3071					5567	$self->[HASH]{$k} = $n;
1497							} else {
1498	430					820	delete $self->[HASH]{$k};
1499							}
1500							}
1501
1502							sub insert {
1503	1177			1177		2565	my ($self, $key, $val) = @_;
1504	1177					2310	local *_;
1505	1177	50				2122	croak "missing argument to ->insert" unless defined $key;
1506	1177	50				2154	unless (defined $self->[MAX]) {
1507	0					0	confess "undefined max" ;
1508							}
1509	1177	50				1953	confess "undefined val" unless defined $val;
1510	1177	100				2172	return if length($val) > $self->[MAX];
1511
1512							# if ($self->[STAT]) {
1513							# $self->[STAT][$key] = 1;
1514							# return;
1515							# }
1516
1517	1094					1609	my $oldnode = $self->[HASH]{$key};
1518	1094	50				1947	if (defined $oldnode) {
1519	0					0	my $oldval = $self->[HEAP]->set_val($oldnode, $val);
1520	0					0	$self->[BYTES] -= length($oldval);
1521							} else {
1522	1094					2223	$self->[HEAP]->insert($key, $val);
1523							}
1524	1094					1671	$self->[BYTES] += length($val);
1525	1094	100				2750	$self->flush if $self->[BYTES] > $self->[MAX];
1526							}
1527
1528							sub expire {
1529	56			56		213	my $self = shift;
1530	56					88	my $old_data = $self->[HEAP]->popheap;
1531	56	100				108	return unless defined $old_data;
1532	50					65	$self->[BYTES] -= length $old_data;
1533	50					106	$old_data;
1534							}
1535
1536							sub remove {
1537	434			434		767	my ($self, @keys) = @_;
1538	434					540	my @result;
1539
1540							# if ($self->[STAT]) {
1541							# for my $key (@keys) {
1542							# $self->[STAT][$key] = 0;
1543							# }
1544							# return;
1545							# }
1546
1547	434					698	for my $key (@keys) {
1548	435	100				1054	next unless exists $self->[HASH]{$key};
1549	336					708	my $old_data = $self->[HEAP]->remove($self->[HASH]{$key});
1550	336					508	$self->[BYTES] -= length $old_data;
1551	336					649	push @result, $old_data;
1552							}
1553	434					915	@result;
1554							}
1555
1556							sub lookup {
1557	1780			1780		2854	my ($self, $key) = @_;
1558	1780					3269	local *_;
1559	1780	50				3199	croak "missing argument to ->lookup" unless defined $key;
1560
1561							# if ($self->[STAT]) {
1562							# $self->[MISS]++ if $self->[STAT][$key]++ == 0;
1563							# $self->[REQ]++;
1564							# my $hit_rate = 1 - $self->[MISS] / $self->[REQ];
1565							# # Do some testing to determine this threshhold
1566							# $#$self = STAT - 1 if $hit_rate > 0.20;
1567							# }
1568
1569	1780	100				3348	if (exists $self->[HASH]{$key}) {
1570	621					1301	$self->[HEAP]->lookup($self->[HASH]{$key});
1571							} else {
1572	1159					2871	return;
1573							}
1574							}
1575
1576							# For internal use only
1577							sub _produce {
1578	3204			3204		4839	my ($self, $key) = @_;
1579	3204					4675	my $loc = $self->[HASH]{$key};
1580	3204	100				5727	return unless defined $loc;
1581	1923					3706	$self->[HEAP][$loc][2];
1582							}
1583
1584							# For internal use only
1585							sub _promote {
1586	5			5		21	my ($self, $key) = @_;
1587	5					13	$self->[HEAP]->promote($self->[HASH]{$key});
1588							}
1589
1590							sub empty {
1591	86			86		2418	my ($self) = @_;
1592	86					143	%{$self->[HASH]} = ();
	86					269
1593	86					169	$self->[BYTES] = 0;
1594	86					227	$self->[HEAP]->empty;
1595							# @{$self->[STAT]} = ();
1596							# $self->[MISS] = 0;
1597							# $self->[REQ] = 0;
1598							}
1599
1600							sub is_empty {
1601	3			3		19	my ($self) = @_;
1602	3					4	keys %{$self->[HASH]} == 0;
	3					11
1603							}
1604
1605							sub update {
1606	461			461		936	my ($self, $key, $val) = @_;
1607	461					1053	local *_;
1608	461	50				1182	croak "missing argument to ->update" unless defined $key;
1609	461	100				1233	if (length($val) > $self->[MAX]) {
		100
1610	21					47	my ($oldval) = $self->remove($key);
1611	21	50				40	$self->[BYTES] -= length($oldval) if defined $oldval;
1612							} elsif (exists $self->[HASH]{$key}) {
1613	405					996	my $oldval = $self->[HEAP]->set_val($self->[HASH]{$key}, $val);
1614	405					673	$self->[BYTES] += length($val);
1615	405	50				856	$self->[BYTES] -= length($oldval) if defined $oldval;
1616							} else {
1617	35					119	$self->[HEAP]->insert($key, $val);
1618	35					64	$self->[BYTES] += length($val);
1619							}
1620	461					852	$self->flush;
1621							}
1622
1623							sub rekey {
1624	381			381		660	my ($self, $okeys, $nkeys) = @_;
1625	381					826	local *_;
1626	381					483	my %map;
1627	381					759	@map{@$okeys} = @$nkeys;
1628	381	50				743	croak "missing argument to ->rekey" unless defined $nkeys;
1629	381	50				711	croak "length mismatch in ->rekey arguments" unless @$nkeys == @$okeys;
1630	381					490	my %adjusted; # map new keys to heap indices
1631							# You should be able to cut this to one loop TODO XXX
1632	381					784	for (0 .. $#$okeys) {
1633	407					958	$adjusted{$nkeys->[$_]} = delete $self->[HASH]{$okeys->[$_]};
1634							}
1635	381					1512	while (my ($nk, $ix) = each %adjusted) {
1636							# @{$self->[HASH]}{keys %adjusted} = values %adjusted;
1637	407					838	$self->[HEAP]->rekey($ix, $nk);
1638	407					1199	$self->[HASH]{$nk} = $ix;
1639							}
1640							}
1641
1642							sub ckeys {
1643	789			789		1428	my $self = shift;
1644	789					1013	my @a = keys %{$self->[HASH]};
	789					2131
1645	789					2622	@a;
1646							}
1647
1648							# Return total amount of cached data
1649							sub bytes {
1650	507			507		1014	my $self = shift;
1651	507					1069	$self->[BYTES];
1652							}
1653
1654							# Expire oldest item from cache until cache size is smaller than $max
1655							sub reduce_size_to {
1656	1080			1080		1652	my ($self, $max) = @_;
1657	1080					2926	until ($self->[BYTES] <= $max) {
1658							# Note that Tie::File::Cache::expire has been inlined here
1659	44					90	my $old_data = $self->[HEAP]->popheap;
1660	44	50				82	return unless defined $old_data;
1661	44					211	$self->[BYTES] -= length $old_data;
1662							}
1663							}
1664
1665							# Why not just $self->reduce_size_to($self->[MAX])?
1666							# Try this when things stabilize TODO XXX
1667							# If the cache is too full, expire the oldest records
1668							sub flush {
1669	482			482		660	my $self = shift;
1670	482	100				2074	$self->reduce_size_to($self->[MAX]) if $self->[BYTES] > $self->[MAX];
1671							}
1672
1673							# For internal use only
1674							sub _produce_lru {
1675	1			1		10	my $self = shift;
1676	1					4	$self->[HEAP]->expire_order;
1677							}
1678
1679	37			37		15936	BEGIN { *_ci_warn = \&Tie::File::_ci_warn }
1680
1681							sub _check_integrity { # For CACHE
1682	406			406		798	my $self = shift;
1683	406					531	my $good = 1;
1684
1685							# Test HEAP
1686	406	50				948	$self->[HEAP]->_check_integrity or $good = 0;
1687
1688							# Test HASH
1689	406					555	my $bytes = 0;
1690	406					537	for my $k (keys %{$self->[HASH]}) {
	406					944
1691	1063	50	66			4299	if ($k ne '0' && $k !~ /^[1-9][0-9]*$/) {
1692	0					0	$good = 0;
1693	0					0	_ci_warn "Cache hash key <$k> is non-numeric";
1694							}
1695
1696	1063					1618	my $h = $self->[HASH]{$k};
1697	1063	50				2062	if (! defined $h) {
		50
1698	0					0	$good = 0;
1699	0					0	_ci_warn "Heap index number for key $k is undefined";
1700							} elsif ($h == 0) {
1701	0					0	$good = 0;
1702	0					0	_ci_warn "Heap index number for key $k is zero";
1703							} else {
1704	1063					1368	my $j = $self->[HEAP][$h];
1705	1063	50				1455	if (! defined $j) {
1706	0					0	$good = 0;
1707	0					0	_ci_warn "Heap contents key $k (=> $h) are undefined";
1708							} else {
1709	1063					1371	$bytes += length($j->[2]);
1710	1063	50				2240	if ($k ne $j->[1]) {
1711	0					0	$good = 0;
1712	0					0	_ci_warn "Heap contents key $k (=> $h) is $j->[1], should be $k";
1713							}
1714							}
1715							}
1716							}
1717
1718							# Test BYTES
1719	406	50				918	if ($bytes != $self->[BYTES]) {
1720	0					0	$good = 0;
1721	0					0	_ci_warn "Total data in cache is $bytes, expected $self->[BYTES]";
1722							}
1723
1724							# Test MAX
1725	406	50				769	if ($bytes > $self->[MAX]) {
1726	0					0	$good = 0;
1727	0					0	_ci_warn "Total data in cache is $bytes, exceeds maximum $self->[MAX]";
1728							}
1729
1730	406					779	return $good;
1731							}
1732
1733							sub delink {
1734	2984			2984		4344	my $self = shift;
1735	2984					9595	$self->[HEAP] = undef; # Bye bye heap
1736							}
1737
1738							################################################################
1739							#
1740							# Tie::File::Heap
1741							#
1742							# Heap data structure for use by cache LRU routines
1743
1744							package Tie::File::Heap;
1745	37			37		317	use Carp ':DEFAULT', 'confess';
	37					67
	37					49867
1746							$Tie::File::Heap::VERSION = $Tie::File::Cache::VERSION;
1747							sub SEQ () { 0 };
1748							sub KEY () { 1 };
1749							sub DAT () { 2 };
1750
1751							sub new {
1752	2986			2986		5904	my ($pack, $cache) = @_;
1753							die "$pack: Parent cache object $cache does not support _heap_move method"
1754	2986	50				6218	unless eval { $cache->can('_heap_move') };
	2986					13124
1755	2986					7906	my $self = [[0,$cache,0]];
1756	2986					10785	bless $self => $pack;
1757							}
1758
1759							# Allocate a new sequence number, larger than all previously allocated numbers
1760							sub _nseq {
1761	2160			2160		2979	my $self = shift;
1762	2160					3918	$self->[0][0]++;
1763							}
1764
1765							sub _cache {
1766	0			0		0	my $self = shift;
1767	0					0	$self->[0][1];
1768							}
1769
1770							sub _nelts {
1771	0			0		0	my $self = shift;
1772	0					0	$self->[0][2];
1773							}
1774
1775							sub _nelts_inc {
1776	1129			1129		1507	my $self = shift;
1777	1129					1836	++$self->[0][2];
1778							}
1779
1780							sub _nelts_dec {
1781	430			430		567	my $self = shift;
1782	430					599	--$self->[0][2];
1783							}
1784
1785							sub is_empty {
1786	0			0		0	my $self = shift;
1787	0					0	$self->_nelts == 0;
1788							}
1789
1790							sub empty {
1791	86			86		160	my $self = shift;
1792	86					467	$#$self = 0;
1793	86					160	$self->[0][2] = 0;
1794	86					183	$self->[0][0] = 0; # might as well reset the sequence numbers
1795							}
1796
1797							# notify the parent cache object that we moved something
1798							sub _heap_move {
1799	0			0		0	my $self = shift;
1800	0					0	$self->_cache->_heap_move(@_);
1801							}
1802
1803							# Insert a piece of data into the heap with the indicated sequence number.
1804							# The item with the smallest sequence number is always at the top.
1805							# If no sequence number is specified, allocate a new one and insert the
1806							# item at the bottom.
1807							sub insert {
1808	1129			1129		2143	my ($self, $key, $data, $seq) = @_;
1809	1129	50				2538	$seq = $self->_nseq unless defined $seq;
1810	1129					3283	$self->_insert_new([$seq, $key, $data]);
1811							}
1812
1813							# Insert a new, fresh item at the bottom of the heap
1814							sub _insert_new {
1815	1129			1129		1718	my ($self, $item) = @_;
1816	1129					1597	my $i = @$self;
1817	1129					3197	$i = int($i/2) until defined $self->[$i/2];
1818	1129					1869	$self->[$i] = $item;
1819	1129					2560	$self->[0][1]->_heap_move($self->[$i][KEY], $i);
1820	1129					2042	$self->_nelts_inc;
1821							}
1822
1823							# Insert [$data, $seq] pair at or below item $i in the heap.
1824							# If $i is omitted, default to 1 (the top element.)
1825							sub _insert {
1826	0			0		0	my ($self, $item, $i) = @_;
1827							# $self->_check_loc($i) if defined $i;
1828	0	0				0	$i = 1 unless defined $i;
1829	0					0	until (! defined $self->[$i]) {
1830	0	0				0	if ($self->[$i][SEQ] > $item->[SEQ]) { # inserted item is older
1831	0					0	($self->[$i], $item) = ($item, $self->[$i]);
1832	0					0	$self->[0][1]->_heap_move($self->[$i][KEY], $i);
1833							}
1834							# If either is undefined, go that way. Otherwise, choose at random
1835	0					0	my $dir;
1836	0	0				0	$dir = 0 if !defined $self->[2*$i];
1837	0	0				0	$dir = 1 if !defined $self->[2*$i+1];
1838	0	0				0	$dir = int(rand(2)) unless defined $dir;
1839	0					0	$i = 2*$i + $dir;
1840							}
1841	0					0	$self->[$i] = $item;
1842	0					0	$self->[0][1]->_heap_move($self->[$i][KEY], $i);
1843	0					0	$self->_nelts_inc;
1844							}
1845
1846							# Remove the item at node $i from the heap, moving child items upwards.
1847							# The item with the smallest sequence number is always at the top.
1848							# Moving items upwards maintains this condition.
1849							# Return the removed item. Return undef if there was no item at node $i.
1850							sub remove {
1851	436			436		747	my ($self, $i) = @_;
1852	436	50				758	$i = 1 unless defined $i;
1853	436					603	my $top = $self->[$i];
1854	436	100				730	return unless defined $top;
1855	430					512	while (1) {
1856	912					1058	my $ii;
1857	912					1436	my ($L, $R) = (2$i, 2$i+1);
1858
1859							# If either is undefined, go the other way.
1860							# Otherwise, go towards the smallest.
1861	912	100	100			2517	last unless defined $self->[$L] \|\| defined $self->[$R];
1862	482	100				791	$ii = $R if not defined $self->[$L];
1863	482	100				770	$ii = $L if not defined $self->[$R];
1864	482	100				758	unless (defined $ii) {
1865	158	100				297	$ii = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1866							}
1867
1868	482					632	$self->[$i] = $self->[$ii]; # Promote child to fill vacated spot
1869	482					1012	$self->[0][1]->_heap_move($self->[$i][KEY], $i);
1870	482					595	$i = $ii; # Fill new vacated spot
1871							}
1872	430					1021	$self->[0][1]->_heap_move($top->[KEY], undef);
1873	430					589	undef $self->[$i];
1874	430					878	$self->_nelts_dec;
1875	430					964	return $top->[DAT];
1876							}
1877
1878							sub popheap {
1879	100			100		116	my $self = shift;
1880	100					171	$self->remove(1);
1881							}
1882
1883							# set the sequence number of the indicated item to a higher number
1884							# than any other item in the heap, and bubble the item down to the
1885							# bottom.
1886							sub promote {
1887	1031			1031		1503	my ($self, $n) = @_;
1888							# $self->_check_loc($n);
1889	1031					1827	$self->[$n][SEQ] = $self->_nseq;
1890	1031					1436	my $i = $n;
1891	1031					1299	while (1) {
1892	1761					2917	my ($L, $R) = (2$i, 2$i+1);
1893	1761					2047	my $dir;
1894	1761	100	100			5108	last unless defined $self->[$L] \|\| defined $self->[$R];
1895	730	100				1206	$dir = $R unless defined $self->[$L];
1896	730	100				1264	$dir = $L unless defined $self->[$R];
1897	730	100				1220	unless (defined $dir) {
1898	453	100				898	$dir = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1899							}
1900	730					931	@{$self}[$i, $dir] = @{$self}[$dir, $i];
	730					1136
	730					1068
1901	730					1554	for ($i, $dir) {
1902	1460	50				3242	$self->[0][1]->_heap_move($self->[$_][KEY], $_) if defined $self->[$_];
1903							}
1904	730					945	$i = $dir;
1905							}
1906							}
1907
1908							# Return item $n from the heap, promoting its LRU status
1909							sub lookup {
1910	621			621		982	my ($self, $n) = @_;
1911							# $self->_check_loc($n);
1912	621					1277	my $val = $self->[$n];
1913	621					1369	$self->promote($n);
1914	621					1562	$val->[DAT];
1915							}
1916
1917
1918							# Assign a new value for node $n, promoting it to the bottom of the heap
1919							sub set_val {
1920	405			405		768	my ($self, $n, $val) = @_;
1921							# $self->_check_loc($n);
1922	405					649	my $oval = $self->[$n][DAT];
1923	405					611	$self->[$n][DAT] = $val;
1924	405					909	$self->promote($n);
1925	405					793	return $oval;
1926							}
1927
1928							# The hash key has changed for an item;
1929							# alter the heap's record of the hash key
1930							sub rekey {
1931	407			407		597	my ($self, $n, $new_key) = @_;
1932							# $self->_check_loc($n);
1933	407					629	$self->[$n][KEY] = $new_key;
1934							}
1935
1936							sub _check_loc {
1937	0			0		0	my ($self, $n) = @_;
1938	0					0	unless (1 \|\| defined $self->[$n]) {
1939							confess "_check_loc($n) failed";
1940							}
1941							}
1942
1943	37			37		18095	BEGIN { *_ci_warn = \&Tie::File::_ci_warn }
1944
1945							sub _check_integrity {
1946	406			406		534	my $self = shift;
1947	406					528	my $good = 1;
1948	406					493	my %seq;
1949
1950	406	50				614	unless (eval {$self->[0][1]->isa("Tie::File::Cache")}) {
	406					1657
1951	0					0	_ci_warn "Element 0 of heap corrupt";
1952	0					0	$good = 0;
1953							}
1954	406	50				923	$good = 0 unless $self->_satisfies_heap_condition(1);
1955	406					581	for my $i (2 .. $#{$self}) {
	406					943
1956	1933					2818	my $p = int($i/2); # index of parent node
1957	1933	50	66			4258	if (defined $self->[$i] && ! defined $self->[$p]) {
1958	0					0	_ci_warn "Element $i of heap defined, but parent $p isn't";
1959	0					0	$good = 0;
1960							}
1961
1962	1933	100				3186	if (defined $self->[$i]) {
1963	824	50				1512	if ($seq{$self->[$i][SEQ]}) {
1964	0					0	my $seq = $self->[$i][SEQ];
1965	0					0	_ci_warn "Nodes $i and $seq{$seq} both have SEQ=$seq";
1966	0					0	$good = 0;
1967							} else {
1968	824					1624	$seq{$self->[$i][SEQ]} = $i;
1969							}
1970							}
1971							}
1972
1973	406					1070	return $good;
1974							}
1975
1976							sub _satisfies_heap_condition {
1977	1230			1230		1562	my $self = shift;
1978	1230		50			1987	my $n = shift \|\| 1;
1979	1230					1475	my $good = 1;
1980	1230					1734	for (0, 1) {
1981	2460					3055	my $c = $n*2 + $_;
1982	2460	100				4323	next unless defined $self->[$c];
1983	824	50				1430	if ($self->[$n][SEQ] >= $self->[$c]) {
1984	0					0	_ci_warn "Node $n of heap does not predate node $c";
1985	0					0	$good = 0 ;
1986							}
1987	824	50				1350	$good = 0 unless $self->_satisfies_heap_condition($c);
1988							}
1989	1230					2339	return $good;
1990							}
1991
1992							# Return a list of all the values, sorted by expiration order
1993							sub expire_order {
1994	1			1		2	my $self = shift;
1995	1					3	my @nodes = sort {$a->[SEQ] <=> $b->[SEQ]} $self->_nodes;
	3					8
1996	1					3	map { $_->[KEY] } @nodes;
	3					9
1997							}
1998
1999							sub _nodes {
2000	7			7		9	my $self = shift;
2001	7		100			15	my $i = shift \|\| 1;
2002	7	100				24	return unless defined $self->[$i];
2003	3					22	($self->[$i], $self->_nodes($i2), $self->_nodes($i2+1));
2004							}
2005
2006							"Cogito, ergo sum."; # don't forget to return a true value from the file
2007
2008							__END__