File Coverage

blib/lib/Apache/LogRegex.pm
Criterion Covered Total %
statement 78 81 96.3
branch 33 36 91.6
condition 8 9 88.8
subroutine 13 13 100.0
pod 6 7 85.7
total 138 146 94.5


line stmt bran cond sub pod time code
1             package Apache::LogRegex;
2             $Apache::LogRegex::VERSION = '1.71';
3 1     1   842 use strict;
  1         2  
  1         29  
4 1     1   6 use warnings;
  1         1  
  1         768  
5              
6             sub new {
7 5     5 1 3062 my ($class, $format) = @_;
8              
9 5 100       29 die __PACKAGE__ . '->new() takes 1 argument' unless @_ == 2;
10 3 100       21 die __PACKAGE__ . '->new() argument 1 (FORMAT) is false' unless $format;
11              
12 1         3 my $self = bless {}, $class;
13              
14 1         7 $self->{_format} = $format;
15              
16 1         3 $self->{_regex} = '';
17 1         1 $self->{_regex_fields} = undef;
18              
19 1         4 $self->_parse_format();
20              
21 1         3 return $self;
22             }
23              
24             sub _parse_format {
25 1     1   2 my ($self) = @_;
26              
27             sub quoted_p {
28 11     11 0 32 $_[0] =~ m/^\\\"/;
29             }
30              
31 1         3 chomp $self->{_format};
32 1         5 $self->{_format} =~ s#[ \t]+# #;
33 1         2 $self->{_format} =~ s#^ ##;
34 1         2 $self->{_format} =~ s# $##;
35              
36 1         10 my @format_elements = split /\s+/, $self->{_format};
37 1         1 my $regex_string = '';
38              
39 1         5 for (my $i = 0; $i < @format_elements; $i++) {
40 9         13 my $element = $format_elements[$i];
41 9         17 my $quoted = quoted_p($element);
42              
43 9 100       22 if ($quoted) {
44 3         8 $element =~ s/^\\\"//;
45 3         9 $element =~ s/\\\"$//;
46             }
47              
48 9         10 push @{ $self->{_regex_fields} }, $self->rename_this_name($element);
  9         23  
49              
50 9         12 my $group = '(\S*)';
51              
52 9 100       29 if ($quoted) {
    100          
    50          
53 3 50 100     21 if ($element eq '%r' or $element =~ m/{Referer}/ or $element =~ m/{User-Agent}/) {
      66        
54 3         10 $group = qr/"([^"\\]*(?:\\.[^"\\]*)*)"/;
55             }
56             else {
57 0         0 $group = '\"([^\"]*)\"';
58             }
59             }
60             elsif ($element =~ m/^%.*t$/) {
61 1         2 $group = '(\[[^\]]+\])';
62             }
63             elsif ($element eq '%U') {
64 0         0 $group = '(.+?)';
65             }
66              
67 9         13 $regex_string .= $group;
68              
69             # expect elements separated by whitespace
70 9 100       30 if ($i < $#format_elements) {
71 8         16 my $next_element = $format_elements[$i + 1];
72 8 100 100     30 if ($quoted && quoted_p($next_element)) {
73             # tolerate multiple whitespaces iff both elements are quoted
74 1         4 $regex_string .= '\s+';
75             } else {
76 7         23 $regex_string .= '\s';
77             }
78             }
79             }
80              
81 1         66 $self->{_regex} = qr/^$regex_string\s*$/;
82             }
83              
84             sub parse {
85 10     10 1 6121 my ($self, $line) = @_;
86              
87 10 100       47 die __PACKAGE__ . '->parse() takes 1 argument' unless @_ == 2;
88 8 100       25 die __PACKAGE__ . '->parse() argument 1 (LINE) is undefined' unless defined $line;
89              
90 7 100       95 if (my @temp = $line =~ $self->{_regex}) {
91 5         6 my %data;
92 5         9 @data{ @{ $self->{_regex_fields} } } = @temp;
  5         34  
93 5 100       66 return wantarray ? %data : \%data;
94             }
95              
96 2         6 return;
97             }
98              
99             sub generate_parser {
100 2     2 1 961 my ($self, %args) = @_;
101              
102 2         4 my $regex = $self->{_regex};
103 2         3 my @fields = @{ $self->{_regex_fields} };
  2         9  
104              
105 1     1   14 no warnings 'uninitialized';
  1         2  
  1         343  
106              
107 2 100       23 if ($args{reuse_record}) {
108 1         3 my $record = {};
109             return sub {
110 7 100   7   3463 if (@$record{@fields} = $_[0] =~ $regex) {
111 3         8 return $record;
112             } else {
113 4         12 return;
114             }
115             }
116 1         7 } else {
117             return sub {
118 2     2   1275 my $record = {};
119 2 50       49 if (@$record{@fields} = $_[0] =~ $regex) {
120 2         7 return $record;
121             } else {
122 0         0 return;
123             }
124             }
125 1         7 }
126             }
127              
128             sub names {
129 2     2 1 1991 my ($self) = @_;
130              
131 2 100       14 die __PACKAGE__ . '->names() takes no argument' unless @_ == 1;
132              
133 1         2 return @{ $self->{_regex_fields} };
  1         6  
134             }
135              
136             sub regex {
137 2     2 1 968 my ($self) = @_;
138              
139 2 100       14 die __PACKAGE__ . '->regex() takes no argument' unless @_ == 1;
140              
141 1         3 return $self->{_regex};
142             }
143              
144             sub rename_this_name {
145 9     9 1 14 my ($self, $name) = @_;
146              
147 9         20 return $name;
148             }
149              
150             1;
151              
152             =head1 NAME
153              
154             Apache::LogRegex - Parse a line from an Apache logfile into a hash
155              
156             =head1 VERSION
157              
158             version 1.71
159              
160             =head1 SYNOPSIS
161              
162             use Apache::LogRegex;
163              
164             my $lr;
165              
166             eval { $lr = Apache::LogRegex->new($log_format) };
167             die "Unable to parse log line: $@" if ($@);
168              
169             my %data;
170              
171             while ( my $line_from_logfile = <> ) {
172             eval { %data = $lr->parse($line_from_logfile); };
173             if (%data) {
174             # We have data to process
175             } else {
176             # We could not parse this line
177             }
178             }
179              
180             # or generate a closure for better performance
181              
182             my $parser = $lr->generate_parser;
183              
184             while ( my $line_from_logfile = <> ) {
185             my $data = $parser->($line_from_logfile) or last;
186             # We have data to process
187             }
188              
189             =head1 DESCRIPTION
190              
191             =head2 Overview
192              
193             A simple class to parse Apache access log files. It will construct a
194             regex that will parse the given log file format and can then parse
195             lines from the log file line by line returning a hash of each line.
196              
197             The field names of the hash are derived from the log file format. Thus if
198             the format is '%a %t \"%r\" %s %b %T \"%{Referer}i\" ...' then the keys of
199             the hash will be %a, %t, %r, %s, %b, %T and %{Referer}i.
200              
201             Should these key names be unusable, as I guess they probably are, then subclass
202             and provide an override rename_this_name() method that can rename the keys
203             before they are added in the array of field names.
204              
205             This module supports variable spacing between elements that are
206             surrounded by quotes, so if you have more than one space between those
207             elements in your format or in your log file, that should be OK.
208              
209             =head1 SUBROUTINES/METHODS
210              
211             =head2 Constructor
212              
213             =over 4
214              
215             =item Apache::LogRegex->new( FORMAT )
216              
217             Returns a Apache::LogRegex object that can parse a line from an Apache
218             logfile that was written to with the FORMAT string. The FORMAT
219             string is the CustomLog string from the httpd.conf file.
220              
221             =back
222              
223             =head2 Class and object methods
224              
225             =over 4
226              
227             =item parse( LINE )
228              
229             Given a LINE from an Apache logfile it will parse the line and return
230             all the elements of the line indexed by their corresponding format
231             string. In scalar context this takes the form of a hash reference, in
232             list context a flat paired list. In either context, if the line cannot
233             be parsed a false value will be returned.
234              
235             =item generate_parser( LIST )
236              
237             Generate and return a closure that, when called with a line, will
238             return a hash reference containing the parsed fields, or undef if the
239             parse failed. If LIST is supplied, it is interpreted as a flattened
240             hash of arguments. One argument is recognised; if C is a
241             true value, then the closure will reuse the same hash reference each
242             time it is called. The default is to allocate a new hash for each
243             result.
244              
245             Calling this closure is significantly faster than the C method.
246              
247             =item names()
248              
249             Returns a list of field names that were extracted from the data. Such as
250             '%a', '%t' and '%r' from the above example.
251              
252             =item regex()
253              
254             Returns a copy of the regex that will be used to parse the log file.
255              
256             =item rename_this_name( NAME )
257              
258             Use this method to rename the keys that will be used in the returned hash.
259             The initial NAME is passed in and the method should return the new name.
260              
261             =back
262              
263             =head1 CONFIGURATION AND ENVIRONMENT
264              
265             Perl 5
266              
267             =head1 DIAGNOSTICS
268              
269             The various custom time formats could be problematic but providing that
270             they are encased in '[' and ']' all should be fine.
271              
272             =over 4
273              
274             =item Apache::LogRegex->new() takes 1 argument
275              
276             When the constructor is called it requires one argument. This message is
277             given if more or less arguments were supplied.
278              
279             =item Apache::LogRegex->new() argument 1 (FORMAT) is undefined
280              
281             The correct number of arguments were supplied with the constructor call,
282             however the first argument, FORMAT, was undefined.
283              
284             =item Apache::LogRegex->parse() takes 1 argument
285              
286             When the method is called it requires one argument. This message is
287             given if more or less arguments were supplied.
288              
289             =item Apache::LogRegex->parse() argument 1 (LINE) is undefined
290              
291             The correct number of arguments were supplied with the method call,
292             however the first argument, LINE, was undefined.
293              
294             =item Apache::LogRegex->names() takes no argument
295              
296             When the method is called it requires no arguments. This message is
297             given if some arguments were supplied.
298              
299             =item Apache::LogRegex->regex() takes no argument
300              
301             When the method is called it requires no arguments. This message is
302             given if some arguments were supplied.
303              
304             =back
305              
306             =head1 BUGS
307              
308             None so far
309              
310             =head1 FILES
311              
312             None
313              
314             =head1 SEE ALSO
315              
316             mod_log_config for a description of the Apache format commands
317              
318             =head1 THANKS
319              
320             Peter Hickman wrote the original module and maintained it for
321             several years. He kindly passed maintainership on just prior to
322             the 1.51 release. Most of the features of this module are the
323             fruits of his work. If you find any bugs they are my doing.
324              
325             =head1 AUTHOR
326              
327             Original code by Peter Hickman
328              
329             Additional code by Andrew Kirkpatrick
330              
331             =head1 LICENSE AND COPYRIGHT
332              
333             Original code copyright (c) 2004-2006 Peter Hickman. All rights reserved.
334              
335             Additional code copyright (c) 2013 Andrew Kirkpatrick. All rights reserved.
336              
337             This module is free software. It may be used, redistributed and/or
338             modified under the same terms as Perl itself.
339              
340             =cut