File Coverage

blib/lib/Logfile/Tail.pm
Criterion Covered Total %
statement 296 296 100.0
branch 122 122 100.0
condition 26 26 100.0
subroutine 36 36 100.0
pod 6 6 100.0
total 486 486 100.0


line stmt bran cond sub pod time code
1             package Logfile::Tail;
2              
3             # ABSTRACT: Tails log files with the ability to
4              
5             =head1 NAME
6              
7             Logfile::Tail - read log files
8              
9             =head1 SYNOPSIS
10              
11             use Logfile::Tail ();
12             my $file = new Logfile::Tail('/var/log/messages');
13             while (<$file>) {
14             # process the line
15             }
16              
17             and later in different process
18              
19             my $file = new Logfile::Tail('/var/log/messages');
20              
21             and continue reading where we've left out the last time. Also possible
22             is to explicitly save the current position:
23              
24             my $file = new Logfile::Tail('/var/log/messages',
25             { autocommit => 0 });
26             my $line = $file->getline();
27             $file->commit();
28              
29             =cut
30              
31 4     4   299866 use strict;
  4         37  
  4         146  
32 4     4   24 use warnings FATAL => 'all';
  4         7  
  4         258  
33              
34             our $VERSION = '0.7001_03';
35              
36 4     4   2207 use Symbol ();
  4         3497  
  4         102  
37 4     4   2097 use IO::File ();
  4         32224  
  4         116  
38 4     4   2647 use Digest::SHA ();
  4         13583  
  4         135  
39 4     4   39 use File::Spec ();
  4         8  
  4         94  
40 4     4   19 use Fcntl qw( O_RDWR O_CREAT );
  4         18  
  4         257  
41 4     4   27 use Cwd ();
  4         7  
  4         10453  
42              
43             sub new {
44 40     40 1 95374 my $class = shift;
45              
46 40         249 my $self = Symbol::gensym();
47 40         970 bless $self, $class;
48 40         464 tie *$self, $self;
49              
50 40 100       207 if (@_) {
51 39 100       182 $self->open(@_) or return;
52             }
53              
54 32         229 return $self;
55             }
56              
57             my $STATUS_SUBDIR = '.logfile-tail-status';
58             my $CHECK_LENGTH = 512;
59             sub open {
60 39     39 1 98 my $self = shift;
61              
62 39         115 my $filename = shift;
63 39 100 100     205 if (@_ and ref $_[-1] eq 'HASH') {
64 7         32 *$self->{opts} = pop @_;
65             }
66 39 100       195 if (not exists *$self->{opts}{autocommit}) {
67 35         128 *$self->{opts}{autocommit} = 1;
68             }
69              
70 39         225 my ($archive, $offset, $checksum) = $self->_load_data_from_status($filename);
71 39 100       137 return unless defined $offset;
72              
73 35         106 my $need_commit = *$self->{opts}{autocommit};
74 35 100       111 if (not defined $checksum) {
75 11         26 $need_commit = 1;
76             }
77              
78 35 100       195 my ($fh, $content) = $self->_open(defined $archive ? $filename . $archive : $filename, $offset);
79 35 100 100     459 if (not defined $fh) {
    100          
    100          
80 4 100       49 if (not defined $archive) {
81 2         11 return;
82             }
83 2         12 my ($older_fh, $older_archive, $older_content) = $self->_get_archive($archive, 'older', $offset, $checksum);
84 2 100       15 if (defined $older_fh) {
85 1         4 $fh = $older_fh;
86 1         2 $content = $older_content;
87 1         3 $archive = $older_archive;
88             } else {
89 1         6 return;
90             }
91             } elsif (not defined $checksum) {
92 9         43 $content = $self->_seek_to($fh, 0);
93             } elsif (not defined $content
94             or $checksum ne Digest::SHA::sha256_hex($content)) {
95 9         50 my ($older_fh, $older_archive, $older_content) = $self->_get_archive($archive, 'older', $offset, $checksum);
96 9 100       31 if (defined $older_fh) {
97 5         21 $fh->close();
98 5         94 $fh = $older_fh;
99 5         11 $content = $older_content;
100 5         12 $archive = $older_archive;
101             } else {
102 4         20 $content = $self->_seek_to($fh, 0);
103             }
104             }
105              
106 32         93 my $layers = $_[0];
107 32 100 100     134 if (defined $layers and $layers =~ /<:/) {
108 2         12 $layers =~ s!<:!<:scalar:!;
109             } else {
110 30         68 $layers = '<:scalar';
111             }
112              
113 32         81 my $buffer = '';
114 32         111 *$self->{int_buffer} = \$buffer;
115 32         65 my $int_fh;
116 32     3   58 eval { open $int_fh, $layers, *$self->{int_buffer} };
  32     2   705  
  3         54  
  3         7  
  3         46  
  2         794  
  2         4  
  2         14  
117 32 100       20263 if ($@) {
118 1         8 warn "$@\n";
119 1         24 return;
120             };
121 31         127 *$self->{int_fh} = $int_fh;
122              
123 31         96 *$self->{_fh} = $fh;
124 31         88 *$self->{data_array} = [ $content ];
125 31         125 *$self->{data_length} = length $content;
126 31         84 *$self->{archive} = $archive;
127              
128 31 100       86 if ($need_commit) {
129 28         98 $self->commit();
130             }
131 31         1286 1;
132             }
133              
134             sub _open {
135 75     75   195 my ($self, $filename, $offset) = @_;
136 75 100       444 my $fh = new IO::File or return;
137 73 100       2436 $fh->open($filename, '<:raw') or return;
138              
139 65 100       3697 if ($offset > 0) {
140 33         140 my $content = $self->_seek_to($fh, $offset);
141 33         131 return ($fh, $content);
142             }
143 32         135 return ($fh, '');
144             }
145              
146             sub _fh {
147 2067     2067   2487 *{$_[0]}->{_fh};
  2067         3841  
148             }
149              
150             sub _seek_to {
151 46     46   130 my ($self, $fh, $offset) = @_;
152              
153 46         96 my $offset_start = $offset - $CHECK_LENGTH;
154 46 100       135 $offset_start = 0 if $offset_start < 0;
155              
156             # no point in checking the return value, seek will
157             # go beyond the end of the file anyway
158 46         252 $fh->seek($offset_start, 0);
159              
160 46         615 my $buffer = '';
161 46         156 while ($offset - $offset_start > 0) {
162 36         197 my $read = $fh->read($buffer, $offset - $offset_start, length($buffer));
163             # $read is not defined for example when we try to read directory
164 36 100 100     892 last if not defined $read or $read <= 0;
165 28         108 $offset_start += $read;
166             }
167 46 100       128 if ($offset_start == $offset) {
168 38         127 return $buffer;
169             } else {
170 8         26 return;
171             }
172             }
173              
174             sub _load_data_from_status {
175 39     39   105 my ($self, $log_filename) = @_;
176 39         1553 my $abs_filename = Cwd::abs_path($log_filename);
177 39 100       187 if (not defined $abs_filename) {
178             # can we access the file at all?
179 1         13 warn "Cannot access file [$log_filename]\n";
180 1         9 return;
181             }
182 38         640 my @abs_stat = stat $abs_filename;
183 38 100 100     707 if (defined $abs_stat[1] and (stat $log_filename)[1] == $abs_stat[1]) {
184 35         113 $log_filename = $abs_filename;
185             }
186              
187 38         194 *$self->{filename} = $log_filename;
188              
189 38         114 my $status_filename = *$self->{opts}{status_file};
190 38 100       112 if (not defined $status_filename) {
191 35         365 $status_filename = Digest::SHA::sha256_hex($log_filename);
192             }
193 38         119 my $status_dir = *$self->{opts}{status_dir};
194 38 100       133 if (not defined $status_dir) {
    100          
195 35         90 $status_dir = $STATUS_SUBDIR;
196             } elsif ($status_dir eq '') {
197 1         3 $status_dir = '.';
198             }
199 38 100       542 if (not -d $status_dir) {
200 4         249 mkdir $status_dir, 0775;
201             }
202 38         861 my $status_path = File::Spec->catfile($status_dir, $status_filename);
203 38         362 my $status_fh = new IO::File $status_path, O_RDWR | O_CREAT;
204 38 100       4677 if (not defined $status_fh) {
205 1         31 warn "Error reading/creating status file [$status_path]\n";
206 1         21 return;
207             }
208 37         135 *$self->{status_fh} = $status_fh;
209              
210 37         689 my $status_line = <$status_fh>;
211 37         177 my ($offset, $checksum, $archive_filename) = (0, undef, undef);
212 37 100       118 if (defined $status_line) {
213 26 100       322 if (not $status_line =~ /^File \[(.+?)\] (?:archive \[(.+)\] )?offset \[(\d+)\] checksum \[([0-9a-z]+)\]\n/) {
214 1         13 warn "Status file [$status_path] has bad format\n";
215 1         9 return;
216             }
217 25         111 my $check_filename = $1;
218 25         63 $archive_filename = $2;
219 25         65 $offset = $3;
220 25         115 $checksum = $4;
221 25 100       98 if ($check_filename ne $log_filename) {
222 1         15 warn "Status file [$status_path] is for file [$check_filename] while expected [$log_filename]\n";
223 1         11 return;
224             }
225             }
226              
227 35         206 return ($archive_filename, $offset, $checksum);
228             }
229              
230             sub _save_offset_to_status {
231 62     62   154 my ($self, $offset) = @_;
232 62         136 my $log_filename = *$self->{filename};
233 62         124 my $status_fh = *$self->{status_fh};
234 62         190 my $checksum = $self->_get_current_checksum();
235 62         266 $status_fh->seek(0, 0);
236 62 100       1108 my $archive_text = defined *$self->{archive} ? " archive [@{[ *$self->{archive} ]}]" : '';
  15         86  
237 62         511 $status_fh->printflush("File [$log_filename]$archive_text offset [$offset] checksum [$checksum]\n");
238 62         4890 $status_fh->truncate($status_fh->tell);
239             }
240              
241             sub _push_to_data {
242 2056     2056   2928 my $self = shift;
243 2056         2715 my $chunk = shift;
244 2056 100       3809 if (length($chunk) >= $CHECK_LENGTH) {
245 1         8 *$self->{data_array} = [ substr $chunk, -$CHECK_LENGTH ];
246 1         3 *$self->{data_length} = $CHECK_LENGTH;
247 1         2 return;
248             }
249 2055         3043 my $data = *$self->{data_array};
250 2055         2703 my $data_length = *$self->{data_length};
251 2055         3402 push @$data, $chunk;
252 2055         2587 $data_length += length($chunk);
253 2055         3937 while ($data_length - length($data->[0]) >= $CHECK_LENGTH) {
254 1895         2498 $data_length -= length($data->[0]);
255 1895         3831 shift @$data;
256             }
257 2055         3358 *$self->{data_length} = $data_length;
258             }
259              
260             sub _get_current_checksum {
261 66     66   148 my $self = shift;
262 66         109 my $data_length = *$self->{data_length};
263 66         124 my $data = *$self->{data_array};
264 66         92 my $i = 0;
265 66         454 my $digest = new Digest::SHA('sha256');
266 66 100       1493 if ($data_length > $CHECK_LENGTH) {
267 1         15 $digest->add(substr($data->[0], $data_length - $CHECK_LENGTH));
268 1         2 $i++;
269             }
270 66         224 for (; $i <= $#$data; $i++) {
271 136         569 $digest->add($data->[$i]);
272             }
273 66         775 return $digest->hexdigest();
274             }
275              
276             sub _get_archive {
277 36     36   156 my ($self, $start, $older_newer, $offset, $checksum) = @_;
278 36         117 my @types = ( '-', '.' );
279 36         60 my $start_num;
280 36 100       102 if (defined $start) {
281 27         81 @types = substr($start, 0, 1);
282 27         49 $start_num = substr($start, 1);
283             }
284 36         84 my $filename = *$self->{filename};
285 36         103 for my $t (@types) {
286 43         64 my $srt;
287 43 100       104 if ($t eq '.') {
288 30 100       68 if ($older_newer eq 'newer') {
289 17     98   87 $srt = sub { $_[1] <=> $_[0] };
  98         318  
290             } else {
291 13     34   73 $srt = sub { $_[0] <=> $_[1] };
  34         103  
292             }
293             } else {
294 13 100       35 if ($older_newer eq 'newer') {
295 4     14   27 $srt = sub { $_[0] cmp $_[1] };
  14         52  
296             } else {
297 9     2   76 $srt = sub { $_[1] cmp $_[0] };
  2         8  
298             }
299             }
300 50         132 my @archives = map { "$t$_" } # make it a suffix
301 36         73 sort { $srt->($a, $b) } # sort properly
302 125 100       353 grep { not defined $start_num or $srt->($_, $start_num) == 1} # only newer / older
303 125         429 grep { /^[0-9]+$/ } # only numerical suffixes
304 43         4334 map { substr($_, length($filename) + 1) } # only get the numerical suffixes
  125         342  
305             glob "$filename$t*"; # we look at file.1, file.2 or file-20091231, ...
306 43 100 100     546 if ($older_newer eq 'newer' and -f $filename) {
307 17         61 push @archives, '';
308             }
309 43         160 for my $a (@archives) {
310 40   100     300 my ($fh, $content) = $self->_open($filename . $a, ($offset || 0));
311 40 100       364 if (not defined $fh) {
312 6         18 next;
313             }
314 34 100       81 if (defined $checksum) {
315 14 100 100     132 if (defined $content
316             and $checksum eq Digest::SHA::sha256_hex($content)) {
317 9         79 return ($fh, $a, $content);
318             }
319             } else {
320 20 100       151 return ($fh, ($a eq '' ? undef : $a), $content);
321             }
322 5         19 $fh->close();
323             }
324             }
325 7         31 return;
326             }
327              
328             sub _close_status {
329 31     31   130 my ($self, $offset) = @_;
330 31         110 my $status_fh = delete *$self->{status_fh};
331 31 100       203 $status_fh->close() if defined $status_fh;
332             }
333              
334             sub _getline {
335 2067     2067   2858 my $self = shift;
336 2067         3418 my $fh = $self->_fh;
337 2067 100       3684 if (defined $fh) {
338 2066         2992 my $buffer_ref = *$self->{int_buffer};
339 2089         2683 DO_GETLINE:
340             my $ret = undef;
341 2089         34830 $$buffer_ref = $fh->getline();
342 2089 100       47002 if (not defined $$buffer_ref) {
343             # we are at the end of the current file
344             # we need to check if the file was rotated
345             # in the meantime
346 33         428 my @fh_stat = stat($fh);
347 33         126 my $filename = *$self->{filename};
348 33 100       653 my @file_stat = stat($filename . ( defined *$self->{archive} ? *$self->{archive} : '' ));
349 33 100 100     387 if (not @file_stat or "@fh_stat[0, 1]" ne "@file_stat[0, 1]") {
    100          
350             # our file was rotated, or generally
351             # is no longer where it was when
352             # we started to read
353             my ($older_fh, $older_archive, $older_content)
354 4         24 = $self->_get_archive(*$self->{archive}, 'older', $fh->tell, $self->_get_current_checksum);
355 4 100       12 if (not defined $older_fh) {
356             # we have lost the file / sync
357 1         7 return;
358             }
359 3         30 *$self->{_fh}->close();
360 3         63 *$self->{_fh} = $fh = $older_fh;
361 3         12 *$self->{data_array} = [ $older_content ];
362 3         8 *$self->{data_length} = length $older_content;
363 3         8 *$self->{archive} = $older_archive;
364 3         44 goto DO_GETLINE;
365             } elsif (defined *$self->{archive}) {
366             # our file was not rotated
367             # however, if our file is in fact
368             # a rotate file, we should go to the
369             # next one
370 21         79 my ($newer_fh, $newer_archive) = $self->_get_archive(*$self->{archive}, 'newer');
371 21 100       54 if (not defined $newer_fh) {
372 1         5 return;
373             }
374 20         101 *$self->{_fh}->close();
375 20         410 *$self->{_fh} = $fh = $newer_fh;
376 20         81 *$self->{data_array} = [ '' ];
377 20         42 *$self->{data_length} = 0;
378 20         31 *$self->{archive} = $newer_archive;
379 20         233 goto DO_GETLINE;
380             }
381 8         38 return;
382             }
383 2056         4774 $self->_push_to_data($$buffer_ref);
384 2056         3401 seek(*$self->{int_fh}, 0, 0);
385 2056         35050 my $line = *$self->{int_fh}->getline();
386 2056         44520 return $line;
387             } else {
388 1         3 return undef;
389             }
390             }
391              
392             sub getline {
393 43     43 1 1990 my $self = shift;
394 43         144 my $ret = $self->_getline();
395 4     4   59 no warnings 'uninitialized';
  4         12  
  4         713  
396 43 100       170 if (*$self->{opts}{autocommit} == 2) {
397 1         6 $self->commit();
398             }
399 43         271 return $ret;
400             }
401              
402             sub getlines {
403 7     7 1 27 my $self = shift;
404 7         16 my @out;
405 7         14 while (1) {
406 2024         3686 my $l = $self->_getline();
407 2024 100       4029 if (not defined $l) {
408 7         18 last;
409             }
410 2017         3464 push @out, $l;
411             }
412 4     4   46 no warnings 'uninitialized';
  4         10  
  4         1524  
413 7 100       32 if (*$self->{opts}{autocommit} == 2) {
414 1         6 $self->commit();
415             }
416 7         380 @out;
417             }
418              
419             sub commit {
420 62     62 1 117 my $self = shift;
421 62         129 my $fh = *$self->{_fh};
422 62         300 my $offset = $fh->tell;
423 62         504 $self->_save_offset_to_status($offset);
424             }
425              
426             sub close {
427 31     31 1 4190 my $self = shift;
428 31 100       150 if (*$self->{opts}{autocommit}) {
429 27         75 $self->commit();
430             }
431 31         1312 $self->_close_status();
432 31         674 my $fh = delete *$self->{_fh};
433 31 100       163 $fh->close() if defined $fh;
434             }
435              
436             sub TIEHANDLE() {
437 41 100   41   1964 if (ref $_[0]) {
438             # if we already have object, probably called from new(),
439             # just return that
440 40         162 return $_[0];
441             } else {
442 1         3 my $class = shift;
443 1         8 return $class->new(@_);
444             }
445             }
446              
447             sub READLINE() {
448 29 100   29   16479 goto &getlines if wantarray;
449 24         126 goto &getline;
450             }
451              
452             sub CLOSE() {
453 1     1   3 my $self = shift;
454 1         5 $self->close();
455             }
456              
457             sub DESTROY() {
458 39     39   2409 my $self = shift;
459 39 100       801 $self->close() if defined *$self->{_fh};
460             }
461              
462             1;
463              
464             =head1 DESCRIPTION
465              
466             Log files are files that are generated by various running programs.
467             They are generally only appended to. When parsing information from
468             log files, it is important to only read each record / line once,
469             both for performance and for accounting and statistics reasons.
470              
471             The C provides an easy way to achieve the
472             read-just-once processing of log files.
473              
474             The module remembers for each file the position where it left
475             out the last time, in external status file, and upon next invocation
476             it seeks to the remembered position. It also stores checksum
477             of 512 bytes before that position, and if the checksum does not
478             match the file content the next time it is read, it will try to
479             find the rotated file and read the end of it before advancing to
480             newer rotated file or to the current log file.
481              
482             Both .num and -date suffixed rotated files are supported.
483              
484             =head1 METHODS
485              
486             =over 4
487              
488             =item new()
489              
490             =item new( FILENAME [,MODE [,PERMS]], [ { attributes } ] )
491              
492             =item new( FILENAME, IOLAYERS, [ { attributes } ] )
493              
494             Constructor, creates new C object. Like C,
495             it passes any parameters to method C; it actually creates
496             an C handle internally.
497              
498             Returns new object, or undef upon error.
499              
500             =item open( FILENAME [,MODE [,PERMS]], [ { attributes } ] )
501              
502             =item open( FILENAME, IOLAYERS, [ { attributes } ] )
503              
504             Opens the file using C. If the file was read before, the
505             offset where the reading left out the last time is read from an
506             external file in the ./.logfile-tail-status directory and seek is
507             made to that offset, to continue reading at the last remembered
508             position.
509              
510             If however checksum, which is also stored with the offset, does not
511             match the current content of the file (512 bytes before the offset
512             are checked), the module assumes that the file was rotated / reused
513             / truncated in the mean time since the last read. It will try to
514             find the checksum among the rotated files. If no match is found,
515             it will reset the offset to zero and start from the beginning of
516             the file.
517              
518             Returns true, or undef upon error.
519              
520             The attributes are passed as an optional hashref of key => value
521             pairs. The supported attribute is
522              
523             =over 4
524              
525             =item autocommit
526              
527             Value 0 means that no saving takes place; you need to save explicitly
528             using the commit() method.
529              
530             Value 1 (the default) means that position is saved when the object is
531             closed via explicit close() call, or when it is destroyed. The value
532             is also saved upon the first open.
533              
534             Value 2 causes the position to be save in all cases as value 1,
535             plus after each successful read.
536              
537             =item status_dir
538              
539             The attribute specifies the directory (or subdirectory of current
540             directory) which is used to hold status files. By default,
541             ./.logfile-tail-status directory is used. To store the status
542             files in the current directory, pass empty string or dot (.).
543              
544             =item status_file
545              
546             The attribute specifies the name of the status file which is used to
547             hold the offset and SHA256 checksum of 512 bytes before the offset.
548             By default, SHA256 of the full (absolute) logfile filename is used
549             as the status file name.
550              
551             =back
552              
553             =item commit()
554              
555             Explicitly save the current position and checksum in the status file.
556              
557             Returns true, or undef upon error.
558              
559             =item close()
560              
561             Closes the internal filehandle. It stores the current position
562             and checksum in an external file in the ./.logfile-tail-status
563             directory.
564              
565             Returns true, or undef upon error.
566              
567             =item getline()
568              
569             Line <$fh> in scalar context.
570              
571             =item getlines()
572              
573             Line <$fh> in list context.
574              
575             =back
576              
577             =head1 AUTHOR AND LICENSE
578              
579             Copyright (c) 2010--2021 Jan Pazdziora.
580              
581             Logfile::Tail is free software. You can redistribute it and/or modify
582             it under the terms of either:
583              
584             a) the GNU General Public License, version 2 or 3;
585              
586             b) the Artistic License, either the original or version 2.0.
587