File Coverage

blib/lib/Apache/LogRegex.pm

Criterion	Covered	Total	%
statement	78	81	96.3
branch	33	36	91.6
condition	8	9	88.8
subroutine	13	13	100.0
pod	6	7	85.7
total	138	146	94.5

line	stmt	bran	cond	sub	pod	time	code
1							package Apache::LogRegex;
2							$Apache::LogRegex::VERSION = '1.71';
3	1			1		842	use strict;
	1					2
	1					29
4	1			1		6	use warnings;
	1					1
	1					768
5
6							sub new {
7	5			5	1	3062	my ($class, $format) = @_;
8
9	5	100				29	die __PACKAGE__ . '->new() takes 1 argument' unless @_ == 2;
10	3	100				21	die __PACKAGE__ . '->new() argument 1 (FORMAT) is false' unless $format;
11
12	1					3	my $self = bless {}, $class;
13
14	1					7	$self->{_format} = $format;
15
16	1					3	$self->{_regex} = '';
17	1					1	$self->{_regex_fields} = undef;
18
19	1					4	$self->_parse_format();
20
21	1					3	return $self;
22							}
23
24							sub _parse_format {
25	1			1		2	my ($self) = @_;
26
27							sub quoted_p {
28	11			11	0	32	$_[0] =~ m/^\\\"/;
29							}
30
31	1					3	chomp $self->{_format};
32	1					5	$self->{_format} =~ s#[ \t]+# #;
33	1					2	$self->{_format} =~ s#^ ##;
34	1					2	$self->{_format} =~ s# $##;
35
36	1					10	my @format_elements = split /\s+/, $self->{_format};
37	1					1	my $regex_string = '';
38
39	1					5	for (my $i = 0; $i < @format_elements; $i++) {
40	9					13	my $element = $format_elements[$i];
41	9					17	my $quoted = quoted_p($element);
42
43	9	100				22	if ($quoted) {
44	3					8	$element =~ s/^\\\"//;
45	3					9	$element =~ s/\\\"$//;
46							}
47
48	9					10	push @{ $self->{_regex_fields} }, $self->rename_this_name($element);
	9					23
49
50	9					12	my $group = '(\S*)';
51
52	9	100				29	if ($quoted) {
		100
		50
53	3	50	100			21	if ($element eq '%r' or $element =~ m/{Referer}/ or $element =~ m/{User-Agent}/) {
			66
54	3					10	$group = qr/"([^"\\](?:\\.[^"\\])*)"/;
55							}
56							else {
57	0					0	$group = '\"([^\"]*)\"';
58							}
59							}
60							elsif ($element =~ m/^%.*t$/) {
61	1					2	$group = '(\[[^\]]+\])';
62							}
63							elsif ($element eq '%U') {
64	0					0	$group = '(.+?)';
65							}
66
67	9					13	$regex_string .= $group;
68
69							# expect elements separated by whitespace
70	9	100				30	if ($i < $#format_elements) {
71	8					16	my $next_element = $format_elements[$i + 1];
72	8	100	100			30	if ($quoted && quoted_p($next_element)) {
73							# tolerate multiple whitespaces iff both elements are quoted
74	1					4	$regex_string .= '\s+';
75							} else {
76	7					23	$regex_string .= '\s';
77							}
78							}
79							}
80
81	1					66	$self->{_regex} = qr/^$regex_string\s*$/;
82							}
83
84							sub parse {
85	10			10	1	6121	my ($self, $line) = @_;
86
87	10	100				47	die __PACKAGE__ . '->parse() takes 1 argument' unless @_ == 2;
88	8	100				25	die __PACKAGE__ . '->parse() argument 1 (LINE) is undefined' unless defined $line;
89
90	7	100				95	if (my @temp = $line =~ $self->{_regex}) {
91	5					6	my %data;
92	5					9	@data{ @{ $self->{_regex_fields} } } = @temp;
	5					34
93	5	100				66	return wantarray ? %data : \%data;
94							}
95
96	2					6	return;
97							}
98
99							sub generate_parser {
100	2			2	1	961	my ($self, %args) = @_;
101
102	2					4	my $regex = $self->{_regex};
103	2					3	my @fields = @{ $self->{_regex_fields} };
	2					9
104
105	1			1		14	no warnings 'uninitialized';
	1					2
	1					343
106
107	2	100				23	if ($args{reuse_record}) {
108	1					3	my $record = {};
109							return sub {
110	7	100		7		3463	if (@$record{@fields} = $_[0] =~ $regex) {
111	3					8	return $record;
112							} else {
113	4					12	return;
114							}
115							}
116	1					7	} else {
117							return sub {
118	2			2		1275	my $record = {};
119	2	50				49	if (@$record{@fields} = $_[0] =~ $regex) {
120	2					7	return $record;
121							} else {
122	0					0	return;
123							}
124							}
125	1					7	}
126							}
127
128							sub names {
129	2			2	1	1991	my ($self) = @_;
130
131	2	100				14	die __PACKAGE__ . '->names() takes no argument' unless @_ == 1;
132
133	1					2	return @{ $self->{_regex_fields} };
	1					6
134							}
135
136							sub regex {
137	2			2	1	968	my ($self) = @_;
138
139	2	100				14	die __PACKAGE__ . '->regex() takes no argument' unless @_ == 1;
140
141	1					3	return $self->{_regex};
142							}
143
144							sub rename_this_name {
145	9			9	1	14	my ($self, $name) = @_;
146
147	9					20	return $name;
148							}
149
150							1;
151
152							=head1 NAME
153
154							Apache::LogRegex - Parse a line from an Apache logfile into a hash
155
156							=head1 VERSION
157
158							version 1.71
159
160							=head1 SYNOPSIS
161
162							use Apache::LogRegex;
163
164							my $lr;
165
166							eval { $lr = Apache::LogRegex->new($log_format) };
167							die "Unable to parse log line: $@" if ($@);
168
169							my %data;
170
171							while ( my $line_from_logfile = <> ) {
172							eval { %data = $lr->parse($line_from_logfile); };
173							if (%data) {
174							# We have data to process
175							} else {
176							# We could not parse this line
177							}
178							}
179
180							# or generate a closure for better performance
181
182							my $parser = $lr->generate_parser;
183
184							while ( my $line_from_logfile = <> ) {
185							my $data = $parser->($line_from_logfile) or last;
186							# We have data to process
187							}
188
189							=head1 DESCRIPTION
190
191							=head2 Overview
192
193							A simple class to parse Apache access log files. It will construct a
194							regex that will parse the given log file format and can then parse
195							lines from the log file line by line returning a hash of each line.
196
197							The field names of the hash are derived from the log file format. Thus if
198							the format is '%a %t \"%r\" %s %b %T \"%{Referer}i\" ...' then the keys of
199							the hash will be %a, %t, %r, %s, %b, %T and %{Referer}i.
200
201							Should these key names be unusable, as I guess they probably are, then subclass
202							and provide an override rename_this_name() method that can rename the keys
203							before they are added in the array of field names.
204
205							This module supports variable spacing between elements that are
206							surrounded by quotes, so if you have more than one space between those
207							elements in your format or in your log file, that should be OK.
208
209							=head1 SUBROUTINES/METHODS
210
211							=head2 Constructor
212
213							=over 4
214
215							=item Apache::LogRegex->new( FORMAT )
216
217							Returns a Apache::LogRegex object that can parse a line from an Apache
218							logfile that was written to with the FORMAT string. The FORMAT
219							string is the CustomLog string from the httpd.conf file.
220
221							=back
222
223							=head2 Class and object methods
224
225							=over 4
226
227							=item parse( LINE )
228
229							Given a LINE from an Apache logfile it will parse the line and return
230							all the elements of the line indexed by their corresponding format
231							string. In scalar context this takes the form of a hash reference, in
232							list context a flat paired list. In either context, if the line cannot
233							be parsed a false value will be returned.
234
235							=item generate_parser( LIST )
236
237							Generate and return a closure that, when called with a line, will
238							return a hash reference containing the parsed fields, or undef if the
239							parse failed. If LIST is supplied, it is interpreted as a flattened
240							hash of arguments. One argument is recognised; if C is a
241							true value, then the closure will reuse the same hash reference each
242							time it is called. The default is to allocate a new hash for each
243							result.
244
245							Calling this closure is significantly faster than the C method.
246
247							=item names()
248
249							Returns a list of field names that were extracted from the data. Such as
250							'%a', '%t' and '%r' from the above example.
251
252							=item regex()
253
254							Returns a copy of the regex that will be used to parse the log file.
255
256							=item rename_this_name( NAME )
257
258							Use this method to rename the keys that will be used in the returned hash.
259							The initial NAME is passed in and the method should return the new name.
260
261							=back
262
263							=head1 CONFIGURATION AND ENVIRONMENT
264
265							Perl 5
266
267							=head1 DIAGNOSTICS
268
269							The various custom time formats could be problematic but providing that
270							they are encased in '[' and ']' all should be fine.
271
272							=over 4
273
274							=item Apache::LogRegex->new() takes 1 argument
275
276							When the constructor is called it requires one argument. This message is
277							given if more or less arguments were supplied.
278
279							=item Apache::LogRegex->new() argument 1 (FORMAT) is undefined
280
281							The correct number of arguments were supplied with the constructor call,
282							however the first argument, FORMAT, was undefined.
283
284							=item Apache::LogRegex->parse() takes 1 argument
285
286							When the method is called it requires one argument. This message is
287							given if more or less arguments were supplied.
288
289							=item Apache::LogRegex->parse() argument 1 (LINE) is undefined
290
291							The correct number of arguments were supplied with the method call,
292							however the first argument, LINE, was undefined.
293
294							=item Apache::LogRegex->names() takes no argument
295
296							When the method is called it requires no arguments. This message is
297							given if some arguments were supplied.
298
299							=item Apache::LogRegex->regex() takes no argument
300
301							When the method is called it requires no arguments. This message is
302							given if some arguments were supplied.
303
304							=back
305
306							=head1 BUGS
307
308							None so far
309
310							=head1 FILES
311
312							None
313
314							=head1 SEE ALSO
315
316							mod_log_config for a description of the Apache format commands
317
318							=head1 THANKS
319
320							Peter Hickman wrote the original module and maintained it for
321							several years. He kindly passed maintainership on just prior to
322							the 1.51 release. Most of the features of this module are the
323							fruits of his work. If you find any bugs they are my doing.
324
325							=head1 AUTHOR
326
327							Original code by Peter Hickman
328
329							Additional code by Andrew Kirkpatrick
330
331							=head1 LICENSE AND COPYRIGHT
332
333							Original code copyright (c) 2004-2006 Peter Hickman. All rights reserved.
334
335							Additional code copyright (c) 2013 Andrew Kirkpatrick. All rights reserved.
336
337							This module is free software. It may be used, redistributed and/or
338							modified under the same terms as Perl itself.
339
340							=cut