File Coverage

blib/lib/Apache/LogRegex.pm
Criterion Covered Total %
statement 71 74 95.9
branch 31 34 91.1
condition 5 6 83.3
subroutine 12 12 100.0
pod 6 6 100.0
total 125 132 94.7


line stmt bran cond sub pod time code
1             package Apache::LogRegex;
2             $Apache::LogRegex::VERSION = '1.70';
3 1     1   577 use strict;
  1         1  
  1         33  
4 1     1   3 use warnings;
  1         1  
  1         470  
5              
6             sub new {
7 5     5 1 1904 my ($class, $format) = @_;
8              
9 5 100       35 die __PACKAGE__ . '->new() takes 1 argument' unless @_ == 2;
10 3 100       17 die __PACKAGE__ . '->new() argument 1 (FORMAT) is false' unless $format;
11              
12 1         13 my $self = bless {}, $class;
13              
14 1         5 $self->{_format} = $format;
15              
16 1         2 $self->{_regex} = '';
17 1         2 $self->{_regex_fields} = undef;
18              
19 1         2 $self->_parse_format();
20              
21 1         3 return $self;
22             }
23              
24             sub _parse_format {
25 1     1   1 my ($self) = @_;
26              
27 1         2 chomp $self->{_format};
28 1         5 $self->{_format} =~ s#[ \t]+# #;
29 1         2 $self->{_format} =~ s#^ ##;
30 1         1 $self->{_format} =~ s# $##;
31              
32 1         1 my @regex_elements;
33              
34 1         3 for my $element (split /\s+/, $self->{_format}) {
35 9 100       17 my $quotes = $element =~ m/^\\\"/ ? 1 : 0;
36              
37 9 100       14 if ($quotes) {
38 3         5 $element =~ s/^\\\"//;
39 3         5 $element =~ s/\\\"$//;
40             }
41              
42 9         6 push @{ $self->{_regex_fields} }, $self->rename_this_name($element);
  9         14  
43              
44 9         7 my $x = '(\S*)';
45              
46 9 100       19 if ($quotes) {
    100          
    50          
47 3 50 100     13 if ($element eq '%r' or $element =~ m/{Referer}/ or $element =~ m/{User-Agent}/) {
      66        
48 3         7 $x = qr/"([^"\\]*(?:\\.[^"\\]*)*)"/;
49             }
50             else {
51 0         0 $x = '\"([^\"]*)\"';
52             }
53             }
54             elsif ($element =~ m/^%.*t$/) {
55 1         2 $x = '(\[[^\]]+\])';
56             }
57             elsif ($element eq '%U') {
58 0         0 $x = '(.+?)';
59             }
60              
61 9         11 push @regex_elements, $x;
62             }
63              
64 1         4 my $regex_string = join '\s+', @regex_elements;
65 1         50 $self->{_regex} = qr/^$regex_string\s*$/;
66             }
67              
68             sub parse {
69 10     10 1 15440 my ($self, $line) = @_;
70              
71 10 100       34 die __PACKAGE__ . '->parse() takes 1 argument' unless @_ == 2;
72 8 100       19 die __PACKAGE__ . '->parse() argument 1 (LINE) is undefined' unless defined $line;
73              
74 7 100       85 if (my @temp = $line =~ $self->{_regex}) {
75 5         4 my %data;
76 5         6 @data{ @{ $self->{_regex_fields} } } = @temp;
  5         30  
77 5 100       47 return wantarray ? %data : \%data;
78             }
79              
80 2         6 return;
81             }
82              
83             sub generate_parser {
84 2     2 1 484 my ($self, %args) = @_;
85              
86 2         3 my $regex = $self->{_regex};
87 2         5 my @fields = @{ $self->{_regex_fields} };
  2         6  
88              
89 1     1   15 no warnings 'uninitialized';
  1         1  
  1         238  
90              
91 2 100       6 if ($args{reuse_record}) {
92 1         2 my $record = {};
93             return sub {
94 5 100   5   1107 if (@$record{@fields} = $_[0] =~ $regex) {
95 2         5 return $record;
96             } else {
97 3         5 return;
98             }
99             }
100 1         5 } else {
101             return sub {
102 2     2   2919 my $record = {};
103 2 50       38 if (@$record{@fields} = $_[0] =~ $regex) {
104 2         4 return $record;
105             } else {
106 0         0 return;
107             }
108             }
109 1         6 }
110             }
111              
112             sub names {
113 2     2 1 1079 my ($self) = @_;
114              
115 2 100       11 die __PACKAGE__ . '->names() takes no argument' unless @_ == 1;
116              
117 1         1 return @{ $self->{_regex_fields} };
  1         11  
118             }
119              
120             sub regex {
121 2     2 1 585 my ($self) = @_;
122              
123 2 100       13 die __PACKAGE__ . '->regex() takes no argument' unless @_ == 1;
124              
125 1         2 return $self->{_regex};
126             }
127              
128             sub rename_this_name {
129 9     9 1 8 my ($self, $name) = @_;
130              
131 9         10 return $name;
132             }
133              
134             1;
135              
136             =head1 NAME
137              
138             Apache::LogRegex - Parse a line from an Apache logfile into a hash
139              
140             =head1 VERSION
141              
142             version 1.70
143              
144             =head1 SYNOPSIS
145              
146             use Apache::LogRegex;
147              
148             my $lr;
149              
150             eval { $lr = Apache::LogRegex->new($log_format) };
151             die "Unable to parse log line: $@" if ($@);
152              
153             my %data;
154              
155             while ( my $line_from_logfile = <> ) {
156             eval { %data = $lr->parse($line_from_logfile); };
157             if (%data) {
158             # We have data to process
159             } else {
160             # We could not parse this line
161             }
162             }
163              
164             # or generate a closure for better performance
165              
166             my $parser = $lr->generate_parser;
167              
168             while ( my $line_from_logfile = <> ) {
169             my $data = $parser->($line_from_logfile) or last;
170             # We have data to process
171             }
172              
173             =head1 DESCRIPTION
174              
175             =head2 Overview
176              
177             Designed as a simple class to parse Apache log files. It will construct
178             a regex that will parse the given log file format and can then parse
179             lines from the log file line by line returning a hash of each line.
180              
181             The field names of the hash are derived from the log file format. Thus if
182             the format is '%a %t \"%r\" %s %b %T \"%{Referer}i\" ...' then the keys of
183             the hash will be %a, %t, %r, %s, %b, %T and %{Referer}i.
184              
185             Should these key names be unusable, as I guess they probably are, then subclass
186             and provide an override rename_this_name() method that can rename the keys
187             before they are added in the array of field names.
188              
189             This module supports variable spacing between elements, so if you have
190             more than one space between elements in your format or in your log
191             file, that should be OK.
192              
193             =head1 SUBROUTINES/METHODS
194              
195             =head2 Constructor
196              
197             =over 4
198              
199             =item Apache::LogRegex->new( FORMAT )
200              
201             Returns a Apache::LogRegex object that can parse a line from an Apache
202             logfile that was written to with the FORMAT string. The FORMAT
203             string is the CustomLog string from the httpd.conf file.
204              
205             =back
206              
207             =head2 Class and object methods
208              
209             =over 4
210              
211             =item parse( LINE )
212              
213             Given a LINE from an Apache logfile it will parse the line and return
214             all the elements of the line indexed by their corresponding format
215             string. In scalar context this takes the form of a hash reference, in
216             list context a flat paired list. In either context, if the line cannot
217             be parsed a false value will be returned.
218              
219             =item generate_parser( LIST )
220              
221             Generate and return a closure that, when called with a line, will
222             return a hash reference containing the parsed fields, or undef if the
223             parse failed. If LIST is supplied, it is interpreted as a flattened
224             hash of arguments. One argument is recognised; if C is a
225             true value, then the closure will reuse the same hash reference each
226             time it is called. The default is to allocate a new hash for each
227             result.
228              
229             Calling this closure is significantly faster than the C method.
230              
231             =item names()
232              
233             Returns a list of field names that were extracted from the data. Such as
234             '%a', '%t' and '%r' from the above example.
235              
236             =item regex()
237              
238             Returns a copy of the regex that will be used to parse the log file.
239              
240             =item rename_this_name( NAME )
241              
242             Use this method to rename the keys that will be used in the returned hash.
243             The initial NAME is passed in and the method should return the new name.
244              
245             =back
246              
247             =head1 CONFIGURATION AND ENVIRONMENT
248              
249             Perl 5
250              
251             =head1 DIAGNOSTICS
252              
253             The various custom time formats could be problematic but providing that
254             they are encased in '[' and ']' all should be fine.
255              
256             =over 4
257              
258             =item Apache::LogRegex->new() takes 1 argument
259              
260             When the constructor is called it requires one argument. This message is
261             given if more or less arguments were supplied.
262              
263             =item Apache::LogRegex->new() argument 1 (FORMAT) is undefined
264              
265             The correct number of arguments were supplied with the constructor call,
266             however the first argument, FORMAT, was undefined.
267              
268             =item Apache::LogRegex->parse() takes 1 argument
269              
270             When the method is called it requires one argument. This message is
271             given if more or less arguments were supplied.
272              
273             =item Apache::LogRegex->parse() argument 1 (LINE) is undefined
274              
275             The correct number of arguments were supplied with the method call,
276             however the first argument, LINE, was undefined.
277              
278             =item Apache::LogRegex->names() takes no argument
279              
280             When the method is called it requires no arguments. This message is
281             given if some arguments were supplied.
282              
283             =item Apache::LogRegex->regex() takes no argument
284              
285             When the method is called it requires no arguments. This message is
286             given if some arguments were supplied.
287              
288             =back
289              
290             =head1 BUGS
291              
292             None so far
293              
294             =head1 FILES
295              
296             None
297              
298             =head1 SEE ALSO
299              
300             mod_log_config for a description of the Apache format commands
301              
302             =head1 THANKS
303              
304             Peter Hickman wrote the original module and maintained it for
305             several years. He kindly passed maintainership on just prior to
306             the 1.51 release. Most of the features of this module are the
307             fruits of his work. If you find any bugs they are my doing.
308              
309             =head1 AUTHOR
310              
311             Original code by Peter Hickman
312              
313             Additional code by Andrew Kirkpatrick
314              
315             =head1 LICENSE AND COPYRIGHT
316              
317             Original code copyright (c) 2004-2006 Peter Hickman. All rights reserved.
318              
319             Additional code copyright (c) 2013 Andrew Kirkpatrick. All rights reserved.
320              
321             This module is free software. It may be used, redistributed and/or
322             modified under the same terms as Perl itself.
323              
324             =cut