line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
114025
|
use strict; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
104
|
|
4
|
2
|
|
|
2
|
|
14
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
199
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package File::Bidirectional; |
7
|
2
|
|
|
2
|
|
13
|
use Carp; |
|
2
|
|
|
|
|
8
|
|
|
2
|
|
|
|
|
202
|
|
8
|
2
|
|
|
2
|
|
13
|
use Fcntl qw/:seek O_RDONLY/; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
940
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our $VERSION = '0.01'; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=pod |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
File::Bidirectional - Read a file line-by-line either forwards or backwards |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use File::Bidirectional; |
21
|
|
|
|
|
|
|
my $file = "/var/log/large_file"; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# Object interface |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# start from the last line |
26
|
|
|
|
|
|
|
my $fh = File::Bidirectional->new($file, {origin => -1}) |
27
|
|
|
|
|
|
|
or die $!; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# read backwards until point of interest |
30
|
|
|
|
|
|
|
while (my $line = $fh->readline()) { |
31
|
|
|
|
|
|
|
last if $line =~ /RECORD_START/; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# switch directions |
35
|
|
|
|
|
|
|
$fh->switch(); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# read forwards until point of interest |
38
|
|
|
|
|
|
|
while (my $line = $fh->readline()) { |
39
|
|
|
|
|
|
|
last if $line =~ /RECORD_END/; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# Tied Handle Interface |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
local *F; |
45
|
|
|
|
|
|
|
tie *F, "File::Bidirectional", $file, {origin => 1} |
46
|
|
|
|
|
|
|
or die $!; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
while (my $line = ) { ... } |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
(tied *F)->switch(); |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 DESCRIPTION |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
File::Bidirectional reads a file line-by-line in either the forwards or |
55
|
|
|
|
|
|
|
backwards direction. It supports an object interface as well as a tied |
56
|
|
|
|
|
|
|
filehandle interface, and should be straight-forward to use. It is also memory |
57
|
|
|
|
|
|
|
efficient, since it is intended to be used on files too large to be efficiently |
58
|
|
|
|
|
|
|
slurped into an array and traversed backwards. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
The direction in which to traverse the file can be changed at anytime, but it is |
61
|
|
|
|
|
|
|
important to note that the last-read line will be repeated when this happens. |
62
|
|
|
|
|
|
|
See C to see why this is so. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
On non-Unix platforms, this module attempts to immitate native Perl in |
65
|
|
|
|
|
|
|
converting the line endings. Currently, this is limited and untested, so please |
66
|
|
|
|
|
|
|
see L for more information. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head1 MOTIVATION |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
I had a C file describing the changes in a large (> 200MB) file. Based on |
71
|
|
|
|
|
|
|
the line numbers in the C, I have to repeatedly read backwards and |
72
|
|
|
|
|
|
|
forwards in the large file to obtain the context lines before and after the |
73
|
|
|
|
|
|
|
C changes. The number of context lines vary, thus it was a little more |
74
|
|
|
|
|
|
|
involved than regenerating the C with an appropriate C<--context> option. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
I decided to publish this module as I thought others might have similar needs. |
77
|
|
|
|
|
|
|
Reading large log files backwards is probably the most common of these, but if |
78
|
|
|
|
|
|
|
you have any other interesting uses, do let me know. |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=cut |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# globals |
83
|
|
|
|
|
|
|
my ($BLOCK_SIZE); |
84
|
|
|
|
|
|
|
BEGIN { |
85
|
|
|
|
|
|
|
# defaults - can be changed through constructor |
86
|
2
|
|
|
2
|
|
4
|
$BLOCK_SIZE = 1024 * 8; |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# _read_line() and _eof() are used as sensible defaults. we will fix up the |
89
|
|
|
|
|
|
|
# aliases again later to optimize away one indirection function call |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# tied interface |
92
|
2
|
|
|
|
|
8
|
*TIEHANDLE = \&new; |
93
|
2
|
|
|
|
|
6
|
*READLINE = \&_read_line; |
94
|
2
|
|
|
|
|
6
|
*EOF = \&_eof; |
95
|
2
|
|
|
|
|
5
|
*CLOSE = \&close; |
96
|
2
|
|
|
|
|
6
|
*TELL = \&tell; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# IO::Handle compatability |
99
|
2
|
|
|
|
|
5
|
*getline = \&_read_line; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# File::ReadBackwards compatability |
102
|
2
|
|
|
|
|
6
|
*get_handle = \&fh; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# aliases |
105
|
2
|
|
|
|
|
4
|
*readline = \&_read_line; |
106
|
2
|
|
|
|
|
5057
|
*eof = \&_eof; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=pod |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head1 CONSTRUCTOR (CLASS METHODS) |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=over |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=item new $file, \%option |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
$fh = File::Bidirectional->new($file); |
118
|
|
|
|
|
|
|
$fh = File::Bidirectional->new($file, {mode => 'forward'}); |
119
|
|
|
|
|
|
|
$fh = File::Bidirectional->new($file, {mode => 'backward'}); |
120
|
|
|
|
|
|
|
$fh = File::Bidirectional->new($file, {origin => -1}); |
121
|
|
|
|
|
|
|
$fh = File::Bidirectional->new($file, \%option); |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Has the file name as the first parameter, and a hashref of options as an |
124
|
|
|
|
|
|
|
optional second parameter. Upon success, it will return the object. For invalid |
125
|
|
|
|
|
|
|
parameters, it will C. For L errors, it returns |
126
|
|
|
|
|
|
|
undef and sets the error code in L. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
The list of valid options are: |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=over |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=item mode |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
Can be either C (bi-directional), C or C. The C |
135
|
|
|
|
|
|
|
and C modes are restrictive: the file is read from the first and last |
136
|
|
|
|
|
|
|
line respectively, and switching directions is prohibited. The C mode |
137
|
|
|
|
|
|
|
allows direction switching, and will start from the first line by default (use |
138
|
|
|
|
|
|
|
the C option to change that.) The default is C. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=item origin |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
Can be either C<1> or C<-1>. These denote whether the first or last line of the |
143
|
|
|
|
|
|
|
file is considered as line 1 by C. (C will always start |
144
|
|
|
|
|
|
|
from line 1.) C can only be set if the C option is C. The |
145
|
|
|
|
|
|
|
default is C<1>. |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=item binmode |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Can be any true or false expression. It is analogous to the L |
150
|
|
|
|
|
|
|
built-in function. On systems that distinguish between binary and text files, |
151
|
|
|
|
|
|
|
notably DOS and Windows-based systems, this is important. A true value will |
152
|
|
|
|
|
|
|
preserve C<\r\n> as is; a false value will convert C<\r\n> to C<\n>. The |
153
|
|
|
|
|
|
|
default is false. |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=item separator |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
Can be any scalar string. It is analogous to the L variable. |
158
|
|
|
|
|
|
|
C determines C's notion of what a line is. The |
159
|
|
|
|
|
|
|
default is L, which in turn defaults to C<"\n">. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Caveat: The Perl-ish magic that occurs when L is C<""> does not |
162
|
|
|
|
|
|
|
happen yet. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=item regex |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Can be any true or false expression. It determines whether the C |
167
|
|
|
|
|
|
|
option is a regex or a string. The default is false. |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=item block_size |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
Can be any positive integer. This is the size of a single block read by the |
172
|
|
|
|
|
|
|
underlying L. The default is 8192. |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=back |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=back |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=head1 INSTANCE METHODS |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=cut |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub new { |
184
|
206
|
|
|
206
|
1
|
2265608
|
my ($class, $file, $option) = @_; |
185
|
206
|
50
|
|
|
|
813
|
croak "expected class method" |
186
|
|
|
|
|
|
|
unless defined $class; |
187
|
206
|
50
|
|
|
|
609
|
croak "expected filename" |
188
|
|
|
|
|
|
|
unless defined $file; |
189
|
206
|
50
|
66
|
|
|
7064
|
croak "expected hashref for parameters" |
190
|
|
|
|
|
|
|
unless !defined $option || ref($option) eq 'HASH'; |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# block size and buffer size |
193
|
206
|
|
33
|
|
|
1088
|
my $block_size = $option->{block_size} || $BLOCK_SIZE; |
194
|
206
|
50
|
33
|
|
|
2195
|
croak "expected block_size to be positive integer" |
195
|
|
|
|
|
|
|
unless $block_size =~ /^\d+$/ && $block_size > 0; |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
# default separator is $/ |
198
|
206
|
|
|
|
|
562
|
my $sep = $option->{separator}; |
199
|
206
|
50
|
|
|
|
851
|
$sep = $/ |
200
|
|
|
|
|
|
|
unless defined $sep; |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# default is not to treat separator as regex |
203
|
206
|
|
|
|
|
421
|
my $sep_re = $option->{regex}; |
204
|
206
|
50
|
|
|
|
519
|
$sep_re = 0 |
205
|
|
|
|
|
|
|
unless defined $sep_re; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# pre-compile regular expression |
208
|
206
|
50
|
|
|
|
2735
|
my $re = ($sep_re) ? qr/(.*?$sep|.+)/ : qr/(.*?\Q$sep\E|.+)/; |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
# translation takes place on DOS (without binmode), Mac etc. |
211
|
206
|
|
|
|
|
505
|
my $binmode = $option->{binmode}; |
212
|
206
|
50
|
33
|
|
|
768
|
my $translate = |
|
|
50
|
|
|
|
|
|
213
|
|
|
|
|
|
|
(_is_dos() && !$binmode) ? qr/\015\012/ : |
214
|
|
|
|
|
|
|
(_is_mac()) ? qr/\015/ : |
215
|
|
|
|
|
|
|
undef; |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# default mode is bidirectional |
218
|
206
|
|
|
|
|
868
|
my $mode = $option->{mode}; |
219
|
206
|
50
|
66
|
|
|
13140
|
croak "expected mode to be [bi|forward|backward]" |
220
|
|
|
|
|
|
|
unless !defined $mode || $mode =~ /^(bi|forward|backward)$/; |
221
|
206
|
100
|
|
|
|
514
|
$mode = 'bi' unless defined $mode; |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# origin can only be explicitly set for bidirectional |
224
|
206
|
|
|
|
|
311
|
my $origin = $option->{origin}; |
225
|
206
|
50
|
66
|
|
|
894
|
croak "expected origin only for mode \"bi\"" |
226
|
|
|
|
|
|
|
unless !defined $origin || $mode eq 'bi'; |
227
|
206
|
50
|
66
|
|
|
1449
|
croak "expected origin to be [1|-1]" |
228
|
|
|
|
|
|
|
unless !defined $origin || $origin =~ /^(1|-1)$/; |
229
|
206
|
100
|
|
|
|
420
|
if (!defined $origin) { |
230
|
70
|
50
|
|
|
|
238
|
$origin = |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
231
|
|
|
|
|
|
|
($mode eq 'bi') ? 1 : |
232
|
|
|
|
|
|
|
($mode eq 'forward') ? 1 : |
233
|
|
|
|
|
|
|
($mode eq 'backward') ? -1 : undef; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# file size |
237
|
206
|
|
|
|
|
4037
|
my $file_size = -s $file; |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
# set starting point of cursor to coincide with the origin |
240
|
206
|
100
|
|
|
|
869
|
my $start = ($origin == 1) ? 0 : $file_size; |
241
|
|
|
|
|
|
|
|
242
|
206
|
50
|
|
|
|
21280
|
sysopen my $fh, $file, O_RDONLY |
243
|
|
|
|
|
|
|
or return undef; |
244
|
206
|
|
|
|
|
658
|
binmode $fh; |
245
|
|
|
|
|
|
|
|
246
|
206
|
|
|
|
|
4891
|
my $x = { |
247
|
|
|
|
|
|
|
mode => $mode, # mode |
248
|
|
|
|
|
|
|
fh => $fh, # filehandle |
249
|
|
|
|
|
|
|
cur => $start, # physical cursor on filehandle |
250
|
|
|
|
|
|
|
buffer => [], # buffer |
251
|
|
|
|
|
|
|
origin => $origin, # 1: first line as line 1 / -1: last line as line 1 |
252
|
|
|
|
|
|
|
move => $origin, # 1: moving forwards / -1: moving backwards |
253
|
|
|
|
|
|
|
line => 0, # forward: line read / backward: line to be read |
254
|
|
|
|
|
|
|
re => $re, # regular expression for separator |
255
|
|
|
|
|
|
|
translate => $translate, |
256
|
|
|
|
|
|
|
file_size => $file_size, |
257
|
|
|
|
|
|
|
block_size => $block_size, |
258
|
|
|
|
|
|
|
}; |
259
|
|
|
|
|
|
|
|
260
|
206
|
|
|
|
|
550
|
bless ($x, $class); |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# fixup the aliases to save a method call for readline |
263
|
206
|
|
|
|
|
731
|
$x->_fixup_alias(); |
264
|
|
|
|
|
|
|
|
265
|
206
|
|
|
|
|
1267
|
return $x; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=pod |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=over |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=item readline |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
while (my $line = $fh->readline()) { ... } |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
Returns the subsequent line. This refers either to the next line when the |
277
|
|
|
|
|
|
|
direction is forwards, or to the previous line when the direction is backwards. |
278
|
|
|
|
|
|
|
The direction can be changed with C. C is returned when there |
279
|
|
|
|
|
|
|
are no more lines to be read. |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=item getline |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
An alias for C. It exists for compatability with the IO::* classes. |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=item eof |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
Returns true when C will return an C, false otherwise. |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=item switch |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
$fh->switch(); |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
Switches the current direction in which we are reading the file. It will |
294
|
|
|
|
|
|
|
L if the C option in the constructor is set to C or |
295
|
|
|
|
|
|
|
C. |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
Note that switching directions will cause the last-read line to be repeated by |
298
|
|
|
|
|
|
|
C. |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=cut |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
# reverse movement direction |
303
|
|
|
|
|
|
|
sub switch { |
304
|
1932
|
|
|
1932
|
1
|
16178
|
my ($x) = @_; |
305
|
|
|
|
|
|
|
|
306
|
1932
|
50
|
|
|
|
21920
|
croak "needs to be bidirectional mode to switch directions" |
307
|
|
|
|
|
|
|
unless $x->{mode} eq 'bi'; |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
# get current tell() before changing direction and invalidating the buffer |
310
|
1932
|
|
|
|
|
6594
|
$x->{cur} = $x->tell(); |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
# invalidate the buffer |
313
|
1932
|
|
|
|
|
2962
|
undef @{$x->{buffer}}; |
|
1932
|
|
|
|
|
270428
|
|
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# change direction |
316
|
1932
|
|
|
|
|
5176
|
$x->{move} *= -1; |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# fixup aliases for readline() and eof() after changing direction |
319
|
1932
|
|
|
|
|
7079
|
$x->_fixup_alias(); |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=pod |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=item close |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
$fh->close(); |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
Closes the underlying filehandle and releases the memory allocated for its |
329
|
|
|
|
|
|
|
buffer. On success it returns true, otherwise it returns false with the error |
330
|
|
|
|
|
|
|
code found in L. All subsequent C calls will return |
331
|
|
|
|
|
|
|
undef, and C, its last value. |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=cut |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
# close file and destroy state |
336
|
|
|
|
|
|
|
sub close { |
337
|
205
|
|
|
205
|
1
|
3631222
|
my ($x) = @_; |
338
|
205
|
|
|
|
|
1515
|
undef @{$x->{buffer}}; |
|
205
|
|
|
|
|
32701
|
|
339
|
205
|
100
|
|
|
|
805
|
$x->{cur} = ($x->{move} == 1) ? $x->{file_size} : 0; |
340
|
205
|
50
|
|
|
|
43420
|
CORE::close($x->{fh}) |
341
|
|
|
|
|
|
|
or return undef; |
342
|
205
|
|
|
|
|
823
|
return 1; |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=pod |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=item direction $direction |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
Takes an optional parameter: 1 for reading forwards, -1 for reading backwards, |
350
|
|
|
|
|
|
|
L otherwise. If an argument for the parameter is provided, the |
351
|
|
|
|
|
|
|
direction will be switched if necessary. Either way, it returns the (new) |
352
|
|
|
|
|
|
|
direction. |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=cut |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub direction { |
357
|
0
|
|
|
0
|
1
|
0
|
my ($x, $direction) = @_; |
358
|
0
|
0
|
0
|
|
|
0
|
croak "expected direction to be [1|-1]" |
359
|
|
|
|
|
|
|
unless !defined $direction || $direction =~ /^(1|-1)$/; |
360
|
|
|
|
|
|
|
|
361
|
0
|
0
|
0
|
|
|
0
|
if (defined $direction && $direction != $x->{move}) { |
362
|
0
|
|
|
|
|
0
|
$x->switch(); |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
0
|
|
|
|
|
0
|
return $x->{move}; |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=pod |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
=item line_num |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
my $fh=File::Bidirectional->new($file); n=$fh->line_num(); # n = 0 |
373
|
|
|
|
|
|
|
$fh->readline(); n=$fh->line_num(); # n = 1 |
374
|
|
|
|
|
|
|
$fh->readline(); n=$fh->line_num(); # n = 2 |
375
|
|
|
|
|
|
|
$fh->switch(); n=$fh->line_num(); # n = 2 |
376
|
|
|
|
|
|
|
$fh->readline(); n=$fh->line_num(); # n = 1 |
377
|
|
|
|
|
|
|
$fh->readline(); n=$fh->line_num(); # n = 0 |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Returns the current line number. It is analogous to L. |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
For a file with I logical lines, the line number ranges from 0 to I. When |
382
|
|
|
|
|
|
|
reading away from the origin (forwards if the first line is the origin), its |
383
|
|
|
|
|
|
|
behavior is always identical to that of L - it refers to the number |
384
|
|
|
|
|
|
|
of lines that has been read. When reading towards the origin, it refers to the |
385
|
|
|
|
|
|
|
number of lines that can still be read. |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
When C is called, the direction is changed, but the line number |
388
|
|
|
|
|
|
|
remains the same. Therefore, the last-read line before changing directions will |
389
|
|
|
|
|
|
|
be repeated by C. |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=cut |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
# current line number, 1-based |
394
|
|
|
|
|
|
|
# forward: the line that has just been read |
395
|
|
|
|
|
|
|
# backward: the line that is going to be read |
396
|
|
|
|
|
|
|
sub line_num { |
397
|
0
|
|
|
0
|
1
|
0
|
my ($x) = @_; |
398
|
0
|
|
|
|
|
0
|
return $x->{line}; |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=pod |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=item tell |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
Returns the current position of the filehandle. |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=cut |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
# logical cursor on filehandle |
410
|
|
|
|
|
|
|
sub tell { |
411
|
2082
|
|
|
2082
|
1
|
4300
|
my ($x) = @_; |
412
|
2082
|
|
|
|
|
5013
|
my $pos = 0; |
413
|
2082
|
|
|
|
|
8277
|
for my $s (@{$x->{buffer}}) { |
|
2082
|
|
|
|
|
6847
|
|
414
|
2175866
|
|
|
|
|
3239603
|
$pos += length $s; |
415
|
|
|
|
|
|
|
} |
416
|
2082
|
100
|
|
|
|
22981
|
return ($x->{move} == 1) ? $x->{cur} - $pos : $x->{cur} + $pos; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=pod |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=item fh |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
Returns the underlying filehandle. This is mainly useful for file-locking. |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
Notice that this actually breaks the encapsulation of File::Bidirectional, |
426
|
|
|
|
|
|
|
therefore it becomes the user's responsibility to ensure that nothing bad |
427
|
|
|
|
|
|
|
happens to the underlying filehandle. For example, it should definitely not be |
428
|
|
|
|
|
|
|
closed. |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
The underlying filehandle will be returned with its seek position set to what is |
431
|
|
|
|
|
|
|
returned by C. It should generally be okay for this seek position to be |
432
|
|
|
|
|
|
|
modified (the object remembers its own seek position and will always restore |
433
|
|
|
|
|
|
|
it). Any other operations on the filehandle, however, is very likely to void |
434
|
|
|
|
|
|
|
your warranty. =) |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=cut |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
sub fh { |
439
|
48
|
|
|
48
|
1
|
8748
|
my ($x) = @_; |
440
|
48
|
|
|
|
|
173
|
sysseek($x->{fh}, $x->tell(), SEEK_SET); |
441
|
48
|
|
|
|
|
174
|
return $x->{fh}; |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
# used only as fail-safe default |
447
|
|
|
|
|
|
|
sub _read_line { |
448
|
0
|
|
|
0
|
|
0
|
my ($x) = @_; |
449
|
|
|
|
|
|
|
return |
450
|
0
|
0
|
|
|
|
0
|
($x->{move} == 1 ) ? $x->_next_line() : |
|
|
0
|
|
|
|
|
|
451
|
|
|
|
|
|
|
($x->{move} == -1) ? $x->_prev_line() : |
452
|
|
|
|
|
|
|
undef; |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
sub _next_line { |
457
|
144241
|
|
|
144241
|
|
890855
|
my ($x) = @_; |
458
|
|
|
|
|
|
|
# 1. more than 1 line is in the buffer, so the top of the buffer is a |
459
|
|
|
|
|
|
|
# complete line |
460
|
|
|
|
|
|
|
# 2. only line -1 (last line) remains in the buffer |
461
|
|
|
|
|
|
|
# 3. nothing else to read, i.e. return undef |
462
|
144241
|
|
|
|
|
176550
|
while (1) { |
463
|
145390
|
100
|
100
|
|
|
176217
|
if (@{$x->{buffer}} > 1 || $x->{cur} == $x->{file_size}) { |
|
145390
|
|
|
|
|
533561
|
|
464
|
144241
|
|
|
|
|
154957
|
my $line = shift @{$x->{buffer}}; |
|
144241
|
|
|
|
|
295491
|
|
465
|
144241
|
100
|
|
|
|
479352
|
$x->{line} += $x->{origin} if defined $line; |
466
|
144241
|
|
|
|
|
461520
|
return $line; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
# no complete line, so read something |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
# reading forward is easy - just sysseek() to where the bottom of the |
472
|
|
|
|
|
|
|
# buffer is, and let sysread() do the rest |
473
|
1149
|
50
|
|
|
|
17590
|
sysseek($x->{fh}, $x->{cur}, SEEK_SET) |
474
|
|
|
|
|
|
|
or croak $!; |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
# sysread returns undef for errors; |
477
|
|
|
|
|
|
|
# due to the pre-condition, 0 should not occur either |
478
|
1149
|
|
|
|
|
2262
|
my $tmp; |
479
|
1149
|
50
|
|
|
|
93341
|
my $size = sysread($x->{fh}, $tmp, $x->{block_size}) |
480
|
|
|
|
|
|
|
or croak $!; |
481
|
1149
|
|
|
|
|
2378
|
$x->{cur} += $size; |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
# prepend to the temp the leftover partial line in the buffer |
484
|
88
|
|
|
|
|
946
|
$tmp = pop (@{$x->{buffer}}) . $tmp |
|
1149
|
|
|
|
|
4104
|
|
485
|
1149
|
100
|
|
|
|
1717
|
if (@{$x->{buffer}}); |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
# platform-dependent translation |
488
|
1149
|
50
|
|
|
|
6681
|
$tmp =~ s/$x->{translate}/\n/ |
489
|
|
|
|
|
|
|
if defined $x->{translate}; |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
# split the temp and store it in the buffer |
492
|
1149
|
|
|
|
|
1242343
|
@{$x->{buffer}} = $tmp =~ /$x->{re}/gs; |
|
1149
|
|
|
|
|
242084
|
|
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
sub _prev_line { |
497
|
144217
|
|
|
144217
|
|
820618
|
my ($x) = @_; |
498
|
144217
|
|
|
|
|
204444
|
while (1) { |
499
|
|
|
|
|
|
|
# 1. more than 1 line is in the buffer, so the bottom of the buffer is |
500
|
|
|
|
|
|
|
# a complete line |
501
|
|
|
|
|
|
|
# 2. only line 1 remains in the buffer |
502
|
|
|
|
|
|
|
# 3. nothing else to read, i.e. return undef |
503
|
145366
|
100
|
100
|
|
|
150140
|
if (@{$x->{buffer}} > 1 || $x->{cur} == 0) { |
|
145366
|
|
|
|
|
511982
|
|
504
|
144217
|
|
|
|
|
197138
|
my $line = pop @{$x->{buffer}}; |
|
144217
|
|
|
|
|
342741
|
|
505
|
144217
|
100
|
|
|
|
461576
|
$x->{line} -= $x->{origin} if defined $line; |
506
|
144217
|
|
|
|
|
491626
|
return $line; |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
# no complete line, so read something |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
# reading backward requires us to first calculate where the top of the |
512
|
|
|
|
|
|
|
# buffer will reach. be careful to handle trailing bytes properly. |
513
|
1149
|
|
|
|
|
3291
|
my $read_size = $x->{block_size}; |
514
|
1149
|
|
|
|
|
3517
|
$x->{cur} -= $x->{block_size}; |
515
|
1149
|
100
|
|
|
|
5726
|
if ($x->{cur} < 0) { |
516
|
440
|
|
|
|
|
1012
|
$read_size += $x->{cur}; |
517
|
440
|
|
|
|
|
883
|
$x->{cur} = 0; |
518
|
|
|
|
|
|
|
} |
519
|
1149
|
50
|
|
|
|
40799
|
sysseek($x->{fh}, $x->{cur}, SEEK_SET) |
520
|
|
|
|
|
|
|
or croak $!; |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
# sysread returns undef for errors; |
523
|
|
|
|
|
|
|
# due to the pre-condition, 0 should not occur either |
524
|
1149
|
|
|
|
|
4822
|
my $tmp = ''; |
525
|
1149
|
50
|
|
|
|
58916
|
sysread($x->{fh}, $tmp, $read_size) == $read_size |
526
|
|
|
|
|
|
|
or croak $!; |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
# append to the temp the leftover partial line in the buffer |
529
|
88
|
|
|
|
|
505
|
$tmp .= pop @{$x->{buffer}} |
|
1149
|
|
|
|
|
3749
|
|
530
|
1149
|
100
|
|
|
|
1943
|
if (@{$x->{buffer}}); |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
# platform-dependent translation |
533
|
1149
|
50
|
|
|
|
3268
|
$tmp =~ s/$x->{translate}/\n/ |
534
|
|
|
|
|
|
|
if defined $x->{translate}; |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
# split the temp and store it in the buffer |
537
|
1149
|
|
|
|
|
1133904
|
@{$x->{buffer}} = $tmp =~ /$x->{re}/gs; |
|
1149
|
|
|
|
|
259224
|
|
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
} |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
# used only as fail-safe default |
542
|
|
|
|
|
|
|
sub _eof { |
543
|
0
|
|
|
0
|
|
0
|
my ($x) = @_; |
544
|
|
|
|
|
|
|
return |
545
|
0
|
0
|
|
|
|
0
|
($x->{move} == 1 ) ? $x->next_eof() : |
|
|
0
|
|
|
|
|
|
546
|
|
|
|
|
|
|
($x->{move} == -1) ? $x->prev_eof() : |
547
|
|
|
|
|
|
|
undef; |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
sub _next_eof { |
551
|
51
|
|
|
51
|
|
282
|
my ($x) = @_; |
552
|
51
|
|
100
|
|
|
212
|
return $x->{cur} == $x->{file_size} && @{$x->{buffer}} == 0; |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
sub _prev_eof { |
556
|
51
|
|
|
51
|
|
370
|
my ($x) = @_; |
557
|
51
|
|
100
|
|
|
213
|
return $x->{cur} == 0 && @{$x->{buffer}} == 0; |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
# fixes up our aliases so that we eliminate the indirection functions |
561
|
|
|
|
|
|
|
# _read_line() and _eof() |
562
|
|
|
|
|
|
|
sub _fixup_alias { |
563
|
2138
|
|
|
2138
|
|
4154
|
my ($x) = @_; |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
# TODO: walk through the symbol table to do this automatically? |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
# redefining aliases |
568
|
2
|
|
|
2
|
|
15
|
no warnings qw/redefine/; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
1177
|
|
569
|
2138
|
50
|
|
|
|
20674
|
*READLINE = ($x->{move} == 1) ? \&_next_line : ($x->{move} == -1) ? \&_prev_line : undef; |
|
|
100
|
|
|
|
|
|
570
|
2138
|
50
|
|
|
|
44003
|
*getline = ($x->{move} == 1) ? \&_next_line : ($x->{move} == -1) ? \&_prev_line : undef; |
|
|
100
|
|
|
|
|
|
571
|
2138
|
50
|
|
|
|
14157
|
*readline = ($x->{move} == 1) ? \&_next_line : ($x->{move} == -1) ? \&_prev_line : undef; |
|
|
100
|
|
|
|
|
|
572
|
2138
|
50
|
|
|
|
15909
|
*eof = ($x->{move} == 1) ? \&_next_eof : ($x->{move} == -1) ? \&_prev_eof : undef; |
|
|
100
|
|
|
|
|
|
573
|
2138
|
50
|
|
|
|
23008
|
*EOF = ($x->{move} == 1) ? \&_next_eof : ($x->{move} == -1) ? \&_prev_eof : undef; |
|
|
100
|
|
|
|
|
|
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
# function |
578
|
|
|
|
|
|
|
sub _is_dos { |
579
|
206
|
|
|
206
|
|
3278
|
return $^O =~ /^(dos|os2|mswin32|cygwin)$/i; |
580
|
|
|
|
|
|
|
} |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
# function |
583
|
|
|
|
|
|
|
sub _is_mac { |
584
|
206
|
|
|
206
|
|
1438
|
return $^O =~ /^(macos)$/i; |
585
|
|
|
|
|
|
|
} |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
sub _dump { |
588
|
0
|
|
|
0
|
|
|
my ($x) = @_; |
589
|
0
|
|
|
|
|
|
require YAML; |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
# YAML crashes for regexes |
592
|
0
|
|
|
|
|
|
my %h = map {$_ => $x->{$_}} grep {!/^re$/} keys %$x; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
593
|
0
|
|
|
|
|
|
return YAML::Dump(\%h); |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
=pod |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
=back |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
=head1 TIED HANDLE INTERFACE |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
local *F; |
603
|
|
|
|
|
|
|
tie *F, "File::Bidirectional", $file, {origin => 1} |
604
|
|
|
|
|
|
|
or die $!; |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
while (my $line = ) { ... } |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
(tied *F)->switch(); |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
The C, C, C, C and C are aliased to the |
611
|
|
|
|
|
|
|
constructor and the lower-case method names, respectively. All other tied |
612
|
|
|
|
|
|
|
operations, such as seeking and writing, are unsupported and will generate an |
613
|
|
|
|
|
|
|
unknown method area. |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
To use the other methods, it is necessary to get at the reference to the object |
616
|
|
|
|
|
|
|
underlying the tied variable via L. |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=head1 LINE ENDINGS |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Currently, File::Bidirectional attempts to imitate Perl by converting the |
621
|
|
|
|
|
|
|
platform-specific line separator into C<\n>. Currently, this only means |
622
|
|
|
|
|
|
|
converting C<\r> on MacOS, and C<\r\n> on DOS and Windows-type systems (when the |
623
|
|
|
|
|
|
|
C option is not set). |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
So far, this module has only been tested on Unix where line endings do not need |
626
|
|
|
|
|
|
|
to be converted, thus it will be greatly appreciated if users can feedback |
627
|
|
|
|
|
|
|
whether the line endings conversion work on their respective platforms. |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
=head1 BENCHMARKS |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
As would be expected, File::Bidirectional is hardly as fast as native Perl I/O. To |
632
|
|
|
|
|
|
|
break the news gently, it can be up to an order of magnitude slower... |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
Reading through a 250MB file with various methods yield the following numbers: |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
Method | Time (s) |
637
|
|
|
|
|
|
|
-------------------------------------- |
638
|
|
|
|
|
|
|
Native Perl | 5 |
639
|
|
|
|
|
|
|
IO::File | 16 |
640
|
|
|
|
|
|
|
File::Bidirectional (OO) | 42 |
641
|
|
|
|
|
|
|
File::Bidirectional (tied) | 51 |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
To be optimistic about it, in the best case File::Bidirectional takes 2.6 times |
644
|
|
|
|
|
|
|
the time taken for IO::File. For smaller files, the absolute time difference |
645
|
|
|
|
|
|
|
may be less noticeable, so you will have to decide if the tradeoff is worth it |
646
|
|
|
|
|
|
|
for your application. It is about as fast as I can make it without dropping |
647
|
|
|
|
|
|
|
down into C, but if anybody has a compelling need for speed or ideas on how to |
648
|
|
|
|
|
|
|
optimize things, please do drop me a line. |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
The benchmarks were performed circa 2005, on a Pentium-4 machine with clockspeed |
651
|
|
|
|
|
|
|
2.8GHz, a 7200rpm IDE harddisk, running Debian sarge and ext3. The programs |
652
|
|
|
|
|
|
|
tested were the respective variants of |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
while (my $line = <$fh>) { chomp $line; } |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
The record separator was simply C<"\n"> and no newline translation took place. |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=head1 AUTHOR |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
Kian Win Ong, cpan@bulk.squeakyblue.com |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=head1 COPYRIGHT |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
Copyright (C) 2005 by Kian Win Ong. All rights reserved. This program is free |
665
|
|
|
|
|
|
|
software; you can redistribute it and/or modify it under the same terms as Perl |
666
|
|
|
|
|
|
|
itself. This can be either the GNU General Public License or the Artistic |
667
|
|
|
|
|
|
|
License, as specified in the Perl README file. |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
Thanks goes out to Uri Guttman, the author of File::ReadBackwards, from which I |
672
|
|
|
|
|
|
|
stole a bunch of code and tests. =) |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=cut |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
1; |