File Coverage

blib/lib/Logfile/Tail.pm
Criterion Covered Total %
statement 295 295 100.0
branch 123 124 99.1
condition 26 26 100.0
subroutine 35 35 100.0
pod 6 6 100.0
total 485 486 99.7


line stmt bran cond sub pod time code
1              
2             package Logfile::Tail;
3              
4             =head1 NAME
5              
6             Logfile::Tail - read log files
7              
8             =head1 SYNOPSIS
9              
10             use Logfile::Tail ();
11             my $file = new Logfile::Tail('/var/log/messages');
12             while (<$file>) {
13             # process the line
14             }
15              
16             and later in different process
17              
18             my $file = new Logfile::Tail('/var/log/messages');
19              
20             and continue reading where we've left out the last time. Also possible
21             is to explicitly save the current position:
22              
23             my $file = new Logfile::Tail('/var/log/messages',
24             { autocommit => 0 });
25             my $line = $file->getline();
26             $file->commit();
27              
28             =cut
29              
30 13     13   486794 use strict;
  13         25  
  13         548  
31 13     13   113 use warnings FATAL => 'all';
  13         27  
  13         1194  
32              
33             our $VERSION = '0.9';
34              
35 13     13   6224 use Symbol ();
  13         18502  
  13         418  
36 13     13   6145 use IO::File ();
  13         109303  
  13         413  
37 13     13   7560 use Digest::SHA ();
  13         48596  
  13         513  
38 13     13   135 use File::Spec ();
  13         23  
  13         490  
39 13     13   68 use Fcntl qw( O_RDWR O_CREAT );
  13         23  
  13         862  
40 13     13   85 use Cwd ();
  13         24  
  13         43471  
41              
42             sub new {
43 46     46 1 766395 my $class = shift;
44              
45 46         259 my $self = Symbol::gensym();
46 46         889 bless $self, $class;
47 46         661 tie *$self, $self;
48              
49 46 100       194 if (@_) {
50 45 100       265 $self->open(@_) or return;
51             }
52              
53 36         266 return $self;
54             }
55              
56             my $STATUS_SUBDIR = '.logfile-tail-status';
57             my $CHECK_LENGTH = 512;
58             sub open {
59 45     45 1 122 my $self = shift;
60              
61 45         115 my $filename = shift;
62 45 100 100     236 if (@_ and ref $_[-1] eq 'HASH') {
63 13         55 *$self->{opts} = pop @_;
64             }
65 45 100       215 if (not exists *$self->{opts}{autocommit}) {
66 41         128 *$self->{opts}{autocommit} = 1;
67             }
68              
69 45         213 my ($archive, $offset, $checksum) = $self->_load_data_from_status($filename);
70 45 100       185 return unless defined $offset;
71              
72 40         104 my $need_commit = *$self->{opts}{autocommit};
73 40 100       137 if (not defined $checksum) {
74 13         40 $need_commit = 1;
75             }
76              
77 40 100       241 my ($fh, $content) = $self->_open(defined $archive ? $filename . $archive : $filename, $offset);
78 40 100 100     541 if (not defined $fh) {
    100          
    100          
79 5 100       21 if (not defined $archive) {
80 3         53 return;
81             }
82 2         14 my ($older_fh, $older_archive, $older_content) = $self->_get_archive($archive, 'older', $offset, $checksum);
83 2 100       7 if (defined $older_fh) {
84 1         2 $fh = $older_fh;
85 1         55 $content = $older_content;
86 1         3 $archive = $older_archive;
87             } else {
88 1         8 return;
89             }
90             } elsif (not defined $checksum) {
91 11         44 $content = $self->_seek_to($fh, 0);
92             } elsif (not defined $content
93             or $checksum ne Digest::SHA::sha256_hex($content)) {
94 9         50 my ($older_fh, $older_archive, $older_content) = $self->_get_archive($archive, 'older', $offset, $checksum);
95 9 100       30 if (defined $older_fh) {
96 5         21 $fh->close();
97 5         86 $fh = $older_fh;
98 5         12 $content = $older_content;
99 5         11 $archive = $older_archive;
100             } else {
101 4         14 $content = $self->_seek_to($fh, 0);
102             }
103             }
104              
105 36         76 my $layers = $_[0];
106 36 100 100     148 if (defined $layers and $layers =~ /<:/) {
107 2         8 $layers =~ s!<:!<:scalar:!;
108             } else {
109 34         67 $layers = '<:scalar';
110             }
111              
112 36         76 my $buffer = '';
113 36         110 *$self->{int_buffer} = \$buffer;
114 36         56 my $int_fh;
115 36     2   84 eval { open $int_fh, $layers, *$self->{int_buffer} };
  36         464  
  2         1824  
  2         32  
  2         11  
116 36 100       6384 if ($@) {
117 1         9 warn "$@\n";
118 1         52 return;
119             };
120 35         108 *$self->{int_fh} = $int_fh;
121              
122 35         100 *$self->{_fh} = $fh;
123 35         108 *$self->{data_array} = [ $content ];
124 35         117 *$self->{data_length} = length $content;
125 35         110 *$self->{archive} = $archive;
126              
127 35 100       98 if ($need_commit) {
128 32         108 $self->commit();
129             }
130 35         2675 1;
131             }
132              
133             sub _open {
134 80     80   204 my ($self, $filename, $offset) = @_;
135 80 100       442 my $fh = new IO::File or return;
136 78 100       2606 $fh->open($filename, '<:raw') or return;
137              
138 69 100       3515 if ($offset > 0) {
139 35         132 my $content = $self->_seek_to($fh, $offset);
140 35         123 return ($fh, $content);
141             }
142 34         109 return ($fh, '');
143             }
144              
145             sub _fh {
146 2083     2083   1947 *{$_[0]}->{_fh};
  2083         2723  
147             }
148              
149             sub _seek_to {
150 50     50   118 my ($self, $fh, $offset) = @_;
151              
152 50         95 my $offset_start = $offset - $CHECK_LENGTH;
153 50 100       168 $offset_start = 0 if $offset_start < 0;
154              
155             # no point in checking the return value, seek will
156             # go beyond the end of the file anyway
157 50         359 $fh->seek($offset_start, 0);
158              
159 50         453 my $buffer = '';
160 50         150 while ($offset - $offset_start > 0) {
161 38         188 my $read = $fh->read($buffer, $offset - $offset_start, length($buffer));
162             # $read is not defined for example when we try to read directory
163 38 100 100     1194 last if not defined $read or $read <= 0;
164 30         81 $offset_start += $read;
165             }
166 50 100       147 if ($offset_start == $offset) {
167 42         117 return $buffer;
168             } else {
169 8         21 return;
170             }
171             }
172              
173             sub _load_data_from_status {
174 45     45   123 my ($self, $log_filename) = @_;
175 45         2003 my $abs_filename = Cwd::abs_path($log_filename);
176 45 100       189 if (not defined $abs_filename) {
177             # can we access the file at all?
178 1         16 warn "Cannot access file [$log_filename]\n";
179 1         22 return;
180             }
181 44         692 my @abs_stat = stat $abs_filename;
182 44 100 100     618 if (defined $abs_stat[1] and (stat $log_filename)[1] == $abs_stat[1]) {
183 40         91 $log_filename = $abs_filename;
184             }
185              
186 44         184 *$self->{filename} = $log_filename;
187              
188 44         117 my $status_filename = *$self->{opts}{status_file};
189 44 100       164 if (not defined $status_filename) {
190 39         348 $status_filename = Digest::SHA::sha256_hex($log_filename);
191             }
192 44         121 my $status_dir = *$self->{opts}{status_dir};
193 44 100       146 if (not defined $status_dir) {
    100          
194 38         81 $status_dir = $STATUS_SUBDIR;
195             } elsif ($status_dir eq '') {
196 1         3 $status_dir = '.';
197             }
198 44 100       631 if (not -d $status_dir) {
199 5         436 mkdir $status_dir, 0775;
200             }
201 44         957 my $status_path = File::Spec->catfile($status_dir, $status_filename);
202 44         416 my $status_fh = new IO::File $status_path, O_RDWR | O_CREAT;
203 44 100       7375 if (not defined $status_fh) {
204 2         49 warn "Error reading/creating status file [$status_path]\n";
205 2         20 return;
206             }
207 42         153 *$self->{status_fh} = $status_fh;
208              
209 42         991 my $status_line = <$status_fh>;
210 42         144 my ($offset, $checksum, $archive_filename) = (0, undef, undef);
211 42 100       191 if (defined $status_line) {
212 29 100       346 if (not $status_line =~ /^File \[(.+?)\] (?:archive \[(.+)\] )?offset \[(\d+)\] checksum \[([0-9a-z]+)\]\n/) {
213 1         14 warn "Status file [$status_path] has bad format\n";
214 1         13 return;
215             }
216 28         92 my $check_filename = $1;
217 28         87 $archive_filename = $2;
218 28         110 $offset = $3;
219 28         114 $checksum = $4;
220 28 100       103 if ($check_filename ne $log_filename) {
221 1         16 warn "Status file [$status_path] is for file [$check_filename] while expected [$log_filename]\n";
222 1         13 return;
223             }
224             }
225              
226 40         238 return ($archive_filename, $offset, $checksum);
227             }
228              
229             sub _save_offset_to_status {
230 70     70   173 my ($self, $offset) = @_;
231 70         171 my $log_filename = *$self->{filename};
232 70         150 my $status_fh = *$self->{status_fh};
233 70         181 my $checksum = $self->_get_current_checksum();
234 70 50       329 $status_fh->seek(0, 0) if defined $status_fh;
235 70 100       817 my $archive_text = defined *$self->{archive} ? " archive [@{[ *$self->{archive} ]}]" : '';
  15         70  
236 70         423 $status_fh->printflush("File [$log_filename]$archive_text offset [$offset] checksum [$checksum]\n");
237 70         6468 $status_fh->truncate($status_fh->tell);
238             }
239              
240             sub _push_to_data {
241 2068     2068   1930 my $self = shift;
242 2068         2577 my $chunk = shift;
243 2068 100       3116 if (length($chunk) >= $CHECK_LENGTH) {
244 1         7 *$self->{data_array} = [ substr $chunk, -$CHECK_LENGTH ];
245 1         4 *$self->{data_length} = $CHECK_LENGTH;
246 1         3 return;
247             }
248 2067         2241 my $data = *$self->{data_array};
249 2067         2110 my $data_length = *$self->{data_length};
250 2067         2571 push @$data, $chunk;
251 2067         2063 $data_length += length($chunk);
252 2067         3078 while ($data_length - length($data->[0]) >= $CHECK_LENGTH) {
253 1895         2031 $data_length -= length($data->[0]);
254 1895         2989 shift @$data;
255             }
256 2067         2643 *$self->{data_length} = $data_length;
257             }
258              
259             sub _get_current_checksum {
260 74     74   144 my $self = shift;
261 74         150 my $data_length = *$self->{data_length};
262 74         120 my $data = *$self->{data_array};
263 74         93 my $i = 0;
264 74         568 my $digest = new Digest::SHA('sha256');
265 74 100       1709 if ($data_length > $CHECK_LENGTH) {
266 1         11 $digest->add(substr($data->[0], $data_length - $CHECK_LENGTH));
267 1         2 $i++;
268             }
269 74         226 for (; $i <= $#$data; $i++) {
270 156         665 $digest->add($data->[$i]);
271             }
272 74         769 return $digest->hexdigest();
273             }
274              
275             sub _get_archive {
276 36     36   161 my ($self, $start, $older_newer, $offset, $checksum) = @_;
277 36         81 my @types = ( '-', '.' );
278 36         75 my $start_num;
279 36 100       85 if (defined $start) {
280 27         69 @types = substr($start, 0, 1);
281 27         70 $start_num = substr($start, 1);
282             }
283 36         75 my $filename = *$self->{filename};
284 36         70 for my $t (@types) {
285 43         78 my $srt;
286 43 100       91 if ($t eq '.') {
287 30 100       58 if ($older_newer eq 'newer') {
288 17     98   63 $srt = sub { $_[1] <=> $_[0] };
  98         267  
289             } else {
290 13     34   81 $srt = sub { $_[0] <=> $_[1] };
  34         96  
291             }
292             } else {
293 13 100       30 if ($older_newer eq 'newer') {
294 4     14   18 $srt = sub { $_[0] cmp $_[1] };
  14         40  
295             } else {
296 9     2   52 $srt = sub { $_[1] cmp $_[0] };
  2         5  
297             }
298             }
299 50         106 my @archives = map { "$t$_" } # make it a suffix
300 36         54 sort { $srt->($a, $b) } # sort properly
301 125 100       291 grep { not defined $start_num or $srt->($_, $start_num) == 1} # only newer / older
302 125         340 grep { /^[0-9]+$/ } # only numerical suffixes
303 43         5288 map { substr($_, length($filename) + 1) } # only get the numerical suffixes
  125         301  
304             glob "$filename$t*"; # we look at file.1, file.2 or file-20091231, ...
305 43 100 100     446 if ($older_newer eq 'newer' and -f $filename) {
306 17         38 push @archives, '';
307             }
308 43         133 for my $a (@archives) {
309 40   100     257 my ($fh, $content) = $self->_open($filename . $a, ($offset || 0));
310 40 100       343 if (not defined $fh) {
311 6         20 next;
312             }
313 34 100       56 if (defined $checksum) {
314 14 100 100     120 if (defined $content
315             and $checksum eq Digest::SHA::sha256_hex($content)) {
316 9         81 return ($fh, $a, $content);
317             }
318             } else {
319 20 100       135 return ($fh, ($a eq '' ? undef : $a), $content);
320             }
321 5         19 $fh->close();
322             }
323             }
324 7         36 return;
325             }
326              
327             sub _close_status {
328 35     35   100 my ($self, $offset) = @_;
329 35         95 my $status_fh = delete *$self->{status_fh};
330 35 100       245 $status_fh->close() if defined $status_fh;
331             }
332              
333             sub _getline {
334 2083     2083   2112 my $self = shift;
335 2083         2659 my $fh = $self->_fh;
336 2083 100       2753 if (defined $fh) {
337 2082         2320 my $buffer_ref = *$self->{int_buffer};
338 2105         3443 DO_GETLINE:
339             my $ret = undef;
340 2105         5024 $$buffer_ref = $fh->getline();
341 2105 100       9015 if (not defined $$buffer_ref) {
342             # we are at the end of the current file
343             # we need to check if the file was rotated
344             # in the meantime
345 37         389 my @fh_stat = stat($fh);
346 37         79 my $filename = *$self->{filename};
347 37 100       624 my @file_stat = stat($filename . ( defined *$self->{archive} ? *$self->{archive} : '' ));
348 37 100 100     394 if (not @file_stat or "@fh_stat[0, 1]" ne "@file_stat[0, 1]") {
    100          
349             # our file was rotated, or generally
350             # is no longer where it was when
351             # we started to read
352             my ($older_fh, $older_archive, $older_content)
353 4         25 = $self->_get_archive(*$self->{archive}, 'older', $fh->tell, $self->_get_current_checksum);
354 4 100       13 if (not defined $older_fh) {
355             # we have lost the file / sync
356 1         4 return;
357             }
358 3         27 *$self->{_fh}->close();
359 3         65 *$self->{_fh} = $fh = $older_fh;
360 3         10 *$self->{data_array} = [ $older_content ];
361 3         10 *$self->{data_length} = length $older_content;
362 3         4 *$self->{archive} = $older_archive;
363 3         52 goto DO_GETLINE;
364             } elsif (defined *$self->{archive}) {
365             # our file was not rotated
366             # however, if our file is in fact
367             # a rotate file, we should go to the
368             # next one
369 21         51 my ($newer_fh, $newer_archive) = $self->_get_archive(*$self->{archive}, 'newer');
370 21 100       48 if (not defined $newer_fh) {
371             # clear the EOF flag to allow for repeated read
372 1         5 $fh->seek(0, 1);
373 1         9 return;
374             }
375 20         66 *$self->{_fh}->close();
376 20         292 *$self->{_fh} = $fh = $newer_fh;
377 20         75 *$self->{data_array} = [ '' ];
378 20         33 *$self->{data_length} = 0;
379 20         29 *$self->{archive} = $newer_archive;
380 20         256 goto DO_GETLINE;
381             } else {
382             # clear the EOF flag to allow for repeated read
383 12         62 $fh->seek(0, 1);
384             }
385 12         156 return;
386             }
387 2068         3412 $self->_push_to_data($$buffer_ref);
388 2068         2434 seek(*$self->{int_fh}, 0, 0);
389 2068         4160 my $line = *$self->{int_fh}->getline();
390 2068         3283 return $line;
391             } else {
392 1         3 return undef;
393             }
394             }
395              
396             sub getline {
397 59     59 1 2895 my $self = shift;
398 59         184 my $ret = $self->_getline();
399 13     13   115 no warnings 'uninitialized';
  13         26  
  13         2241  
400 59 100       236 if (*$self->{opts}{autocommit} == 2) {
401 1         4 $self->commit();
402             }
403 59         352 return $ret;
404             }
405              
406             sub getlines {
407 7     7 1 18 my $self = shift;
408 7         14 my @out;
409 7         13 while (1) {
410 2024         2631 my $l = $self->_getline();
411 2024 100       3022 if (not defined $l) {
412 7         15 last;
413             }
414 2017         2855 push @out, $l;
415             }
416 13     13   102 no warnings 'uninitialized';
  13         24  
  13         5726  
417 7 100       42 if (*$self->{opts}{autocommit} == 2) {
418 1         5 $self->commit();
419             }
420 7         464 @out;
421             }
422              
423             sub commit {
424 70     70 1 157 my $self = shift;
425 70         136 my $fh = *$self->{_fh};
426 70         366 my $offset = $fh->tell;
427 70         498 $self->_save_offset_to_status($offset);
428             }
429              
430             sub close {
431 35     35 1 4968 my $self = shift;
432 35 100       139 if (*$self->{opts}{autocommit}) {
433 31         87 $self->commit();
434             }
435 35         2631 $self->_close_status();
436 35         777 my $fh = delete *$self->{_fh};
437 35 100       197 $fh->close() if defined $fh;
438             }
439              
440             sub TIEHANDLE() {
441 47 100   47   2897 if (ref $_[0]) {
442             # if we already have object, probably called from new(),
443             # just return that
444 46         206 return $_[0];
445             } else {
446 1         12 my $class = shift;
447 1         6 return $class->new(@_);
448             }
449             }
450              
451             sub READLINE() {
452 29 100   29   23769 goto &getlines if wantarray;
453 24         119 goto &getline;
454             }
455              
456             sub CLOSE() {
457 1     1   3 my $self = shift;
458 1         5 $self->close();
459             }
460              
461             sub DESTROY() {
462 45     45   2521 my $self = shift;
463 45 100       692 $self->close() if defined *$self->{_fh};
464             }
465              
466             1;
467              
468             =head1 DESCRIPTION
469              
470             Log files are files that are generated by various running programs.
471             They are generally only appended to. When parsing information from
472             log files, it is important to only read each record / line once,
473             both for performance and for accounting and statistics reasons.
474              
475             The C provides an easy way to achieve the
476             read-just-once processing of log files.
477              
478             The module remembers for each file the position where it left
479             out the last time, in external status file, and upon next invocation
480             it seeks to the remembered position. It also stores checksum
481             of 512 bytes before that position, and if the checksum does not
482             match the file content the next time it is read, it will try to
483             find the rotated file and read the end of it before advancing to
484             newer rotated file or to the current log file.
485              
486             Both .num and -date suffixed rotated files are supported.
487              
488             =head1 METHODS
489              
490             =over 4
491              
492             =item new()
493              
494             =item new( FILENAME [,MODE [,PERMS]], [ { attributes } ] )
495              
496             =item new( FILENAME, IOLAYERS, [ { attributes } ] )
497              
498             Constructor, creates new C object. Like C,
499             it passes any parameters to method C; it actually creates
500             an C handle internally.
501              
502             Returns new object, or undef upon error.
503              
504             =item open( FILENAME [,MODE [,PERMS]], [ { attributes } ] )
505              
506             =item open( FILENAME, IOLAYERS, [ { attributes } ] )
507              
508             Opens the file using C. If the file was read before, the
509             offset where the reading left out the last time is read from an
510             external file in the ./.logfile-tail-status directory and seek is
511             made to that offset, to continue reading at the last remembered
512             position.
513              
514             If however checksum, which is also stored with the offset, does not
515             match the current content of the file (512 bytes before the offset
516             are checked), the module assumes that the file was rotated / reused
517             / truncated in the mean time since the last read. It will try to
518             find the checksum among the rotated files. If no match is found,
519             it will reset the offset to zero and start from the beginning of
520             the file.
521              
522             Returns true, or undef upon error.
523              
524             The attributes are passed as an optional hashref of key => value
525             pairs. The supported attribute is
526              
527             =over 4
528              
529             =item autocommit
530              
531             Value 0 means that no saving takes place; you need to save explicitly
532             using the commit() method.
533              
534             Value 1 (the default) means that position is saved when the object is
535             closed via explicit close() call, or when it is destroyed. The value
536             is also saved upon the first open.
537              
538             Value 2 causes the position to be save in all cases as value 1,
539             plus after each successful read.
540              
541             =item status_dir
542              
543             The attribute specifies the directory (or subdirectory of current
544             directory) which is used to hold status files. By default,
545             ./.logfile-tail-status directory is used. To store the status
546             files in the current directory, pass empty string or dot (.).
547              
548             =item status_file
549              
550             The attribute specifies the name of the status file which is used to
551             hold the offset and SHA256 checksum of 512 bytes before the offset.
552             By default, SHA256 of the full (absolute) logfile filename is used
553             as the status file name.
554              
555             =back
556              
557             =item commit()
558              
559             Explicitly save the current position and checksum in the status file.
560              
561             Returns true, or undef upon error.
562              
563             =item close()
564              
565             Closes the internal filehandle. It stores the current position
566             and checksum in an external file in the ./.logfile-tail-status
567             directory.
568              
569             Returns true, or undef upon error.
570              
571             =item getline()
572              
573             Line <$fh> in scalar context.
574              
575             =item getlines()
576              
577             Line <$fh> in list context.
578              
579             =back
580              
581             =head1 AUTHOR AND LICENSE
582              
583             Copyright (c) 2010--2024 Jan Pazdziora.
584              
585             Logfile::Tail is free software. You can redistribute it and/or modify
586             it under the terms of either:
587              
588             a) the GNU General Public License, version 2 or 3;
589              
590             b) the Artistic License, either the original or version 2.0.
591