line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Apache::LogRegex; |
2
|
|
|
|
|
|
|
$Apache::LogRegex::VERSION = '1.71'; |
3
|
1
|
|
|
1
|
|
842
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
4
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
768
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
sub new { |
7
|
5
|
|
|
5
|
1
|
3062
|
my ($class, $format) = @_; |
8
|
|
|
|
|
|
|
|
9
|
5
|
100
|
|
|
|
29
|
die __PACKAGE__ . '->new() takes 1 argument' unless @_ == 2; |
10
|
3
|
100
|
|
|
|
21
|
die __PACKAGE__ . '->new() argument 1 (FORMAT) is false' unless $format; |
11
|
|
|
|
|
|
|
|
12
|
1
|
|
|
|
|
3
|
my $self = bless {}, $class; |
13
|
|
|
|
|
|
|
|
14
|
1
|
|
|
|
|
7
|
$self->{_format} = $format; |
15
|
|
|
|
|
|
|
|
16
|
1
|
|
|
|
|
3
|
$self->{_regex} = ''; |
17
|
1
|
|
|
|
|
1
|
$self->{_regex_fields} = undef; |
18
|
|
|
|
|
|
|
|
19
|
1
|
|
|
|
|
4
|
$self->_parse_format(); |
20
|
|
|
|
|
|
|
|
21
|
1
|
|
|
|
|
3
|
return $self; |
22
|
|
|
|
|
|
|
} |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub _parse_format { |
25
|
1
|
|
|
1
|
|
2
|
my ($self) = @_; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub quoted_p { |
28
|
11
|
|
|
11
|
0
|
32
|
$_[0] =~ m/^\\\"/; |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
1
|
|
|
|
|
3
|
chomp $self->{_format}; |
32
|
1
|
|
|
|
|
5
|
$self->{_format} =~ s#[ \t]+# #; |
33
|
1
|
|
|
|
|
2
|
$self->{_format} =~ s#^ ##; |
34
|
1
|
|
|
|
|
2
|
$self->{_format} =~ s# $##; |
35
|
|
|
|
|
|
|
|
36
|
1
|
|
|
|
|
10
|
my @format_elements = split /\s+/, $self->{_format}; |
37
|
1
|
|
|
|
|
1
|
my $regex_string = ''; |
38
|
|
|
|
|
|
|
|
39
|
1
|
|
|
|
|
5
|
for (my $i = 0; $i < @format_elements; $i++) { |
40
|
9
|
|
|
|
|
13
|
my $element = $format_elements[$i]; |
41
|
9
|
|
|
|
|
17
|
my $quoted = quoted_p($element); |
42
|
|
|
|
|
|
|
|
43
|
9
|
100
|
|
|
|
22
|
if ($quoted) { |
44
|
3
|
|
|
|
|
8
|
$element =~ s/^\\\"//; |
45
|
3
|
|
|
|
|
9
|
$element =~ s/\\\"$//; |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
9
|
|
|
|
|
10
|
push @{ $self->{_regex_fields} }, $self->rename_this_name($element); |
|
9
|
|
|
|
|
23
|
|
49
|
|
|
|
|
|
|
|
50
|
9
|
|
|
|
|
12
|
my $group = '(\S*)'; |
51
|
|
|
|
|
|
|
|
52
|
9
|
100
|
|
|
|
29
|
if ($quoted) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
53
|
3
|
50
|
100
|
|
|
21
|
if ($element eq '%r' or $element =~ m/{Referer}/ or $element =~ m/{User-Agent}/) { |
|
|
|
66
|
|
|
|
|
54
|
3
|
|
|
|
|
10
|
$group = qr/"([^"\\]*(?:\\.[^"\\]*)*)"/; |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
else { |
57
|
0
|
|
|
|
|
0
|
$group = '\"([^\"]*)\"'; |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
elsif ($element =~ m/^%.*t$/) { |
61
|
1
|
|
|
|
|
2
|
$group = '(\[[^\]]+\])'; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
elsif ($element eq '%U') { |
64
|
0
|
|
|
|
|
0
|
$group = '(.+?)'; |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
9
|
|
|
|
|
13
|
$regex_string .= $group; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# expect elements separated by whitespace |
70
|
9
|
100
|
|
|
|
30
|
if ($i < $#format_elements) { |
71
|
8
|
|
|
|
|
16
|
my $next_element = $format_elements[$i + 1]; |
72
|
8
|
100
|
100
|
|
|
30
|
if ($quoted && quoted_p($next_element)) { |
73
|
|
|
|
|
|
|
# tolerate multiple whitespaces iff both elements are quoted |
74
|
1
|
|
|
|
|
4
|
$regex_string .= '\s+'; |
75
|
|
|
|
|
|
|
} else { |
76
|
7
|
|
|
|
|
23
|
$regex_string .= '\s'; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
1
|
|
|
|
|
66
|
$self->{_regex} = qr/^$regex_string\s*$/; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub parse { |
85
|
10
|
|
|
10
|
1
|
6121
|
my ($self, $line) = @_; |
86
|
|
|
|
|
|
|
|
87
|
10
|
100
|
|
|
|
47
|
die __PACKAGE__ . '->parse() takes 1 argument' unless @_ == 2; |
88
|
8
|
100
|
|
|
|
25
|
die __PACKAGE__ . '->parse() argument 1 (LINE) is undefined' unless defined $line; |
89
|
|
|
|
|
|
|
|
90
|
7
|
100
|
|
|
|
95
|
if (my @temp = $line =~ $self->{_regex}) { |
91
|
5
|
|
|
|
|
6
|
my %data; |
92
|
5
|
|
|
|
|
9
|
@data{ @{ $self->{_regex_fields} } } = @temp; |
|
5
|
|
|
|
|
34
|
|
93
|
5
|
100
|
|
|
|
66
|
return wantarray ? %data : \%data; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
2
|
|
|
|
|
6
|
return; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub generate_parser { |
100
|
2
|
|
|
2
|
1
|
961
|
my ($self, %args) = @_; |
101
|
|
|
|
|
|
|
|
102
|
2
|
|
|
|
|
4
|
my $regex = $self->{_regex}; |
103
|
2
|
|
|
|
|
3
|
my @fields = @{ $self->{_regex_fields} }; |
|
2
|
|
|
|
|
9
|
|
104
|
|
|
|
|
|
|
|
105
|
1
|
|
|
1
|
|
14
|
no warnings 'uninitialized'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
343
|
|
106
|
|
|
|
|
|
|
|
107
|
2
|
100
|
|
|
|
23
|
if ($args{reuse_record}) { |
108
|
1
|
|
|
|
|
3
|
my $record = {}; |
109
|
|
|
|
|
|
|
return sub { |
110
|
7
|
100
|
|
7
|
|
3463
|
if (@$record{@fields} = $_[0] =~ $regex) { |
111
|
3
|
|
|
|
|
8
|
return $record; |
112
|
|
|
|
|
|
|
} else { |
113
|
4
|
|
|
|
|
12
|
return; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
} |
116
|
1
|
|
|
|
|
7
|
} else { |
117
|
|
|
|
|
|
|
return sub { |
118
|
2
|
|
|
2
|
|
1275
|
my $record = {}; |
119
|
2
|
50
|
|
|
|
49
|
if (@$record{@fields} = $_[0] =~ $regex) { |
120
|
2
|
|
|
|
|
7
|
return $record; |
121
|
|
|
|
|
|
|
} else { |
122
|
0
|
|
|
|
|
0
|
return; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
} |
125
|
1
|
|
|
|
|
7
|
} |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub names { |
129
|
2
|
|
|
2
|
1
|
1991
|
my ($self) = @_; |
130
|
|
|
|
|
|
|
|
131
|
2
|
100
|
|
|
|
14
|
die __PACKAGE__ . '->names() takes no argument' unless @_ == 1; |
132
|
|
|
|
|
|
|
|
133
|
1
|
|
|
|
|
2
|
return @{ $self->{_regex_fields} }; |
|
1
|
|
|
|
|
6
|
|
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub regex { |
137
|
2
|
|
|
2
|
1
|
968
|
my ($self) = @_; |
138
|
|
|
|
|
|
|
|
139
|
2
|
100
|
|
|
|
14
|
die __PACKAGE__ . '->regex() takes no argument' unless @_ == 1; |
140
|
|
|
|
|
|
|
|
141
|
1
|
|
|
|
|
3
|
return $self->{_regex}; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub rename_this_name { |
145
|
9
|
|
|
9
|
1
|
14
|
my ($self, $name) = @_; |
146
|
|
|
|
|
|
|
|
147
|
9
|
|
|
|
|
20
|
return $name; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
1; |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head1 NAME |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Apache::LogRegex - Parse a line from an Apache logfile into a hash |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head1 VERSION |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
version 1.71 |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head1 SYNOPSIS |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
use Apache::LogRegex; |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
my $lr; |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
eval { $lr = Apache::LogRegex->new($log_format) }; |
167
|
|
|
|
|
|
|
die "Unable to parse log line: $@" if ($@); |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
my %data; |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
while ( my $line_from_logfile = <> ) { |
172
|
|
|
|
|
|
|
eval { %data = $lr->parse($line_from_logfile); }; |
173
|
|
|
|
|
|
|
if (%data) { |
174
|
|
|
|
|
|
|
# We have data to process |
175
|
|
|
|
|
|
|
} else { |
176
|
|
|
|
|
|
|
# We could not parse this line |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# or generate a closure for better performance |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
my $parser = $lr->generate_parser; |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
while ( my $line_from_logfile = <> ) { |
185
|
|
|
|
|
|
|
my $data = $parser->($line_from_logfile) or last; |
186
|
|
|
|
|
|
|
# We have data to process |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=head1 DESCRIPTION |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=head2 Overview |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
A simple class to parse Apache access log files. It will construct a |
194
|
|
|
|
|
|
|
regex that will parse the given log file format and can then parse |
195
|
|
|
|
|
|
|
lines from the log file line by line returning a hash of each line. |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
The field names of the hash are derived from the log file format. Thus if |
198
|
|
|
|
|
|
|
the format is '%a %t \"%r\" %s %b %T \"%{Referer}i\" ...' then the keys of |
199
|
|
|
|
|
|
|
the hash will be %a, %t, %r, %s, %b, %T and %{Referer}i. |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
Should these key names be unusable, as I guess they probably are, then subclass |
202
|
|
|
|
|
|
|
and provide an override rename_this_name() method that can rename the keys |
203
|
|
|
|
|
|
|
before they are added in the array of field names. |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
This module supports variable spacing between elements that are |
206
|
|
|
|
|
|
|
surrounded by quotes, so if you have more than one space between those |
207
|
|
|
|
|
|
|
elements in your format or in your log file, that should be OK. |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head1 SUBROUTINES/METHODS |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head2 Constructor |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=over 4 |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=item Apache::LogRegex->new( FORMAT ) |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Returns a Apache::LogRegex object that can parse a line from an Apache |
218
|
|
|
|
|
|
|
logfile that was written to with the FORMAT string. The FORMAT |
219
|
|
|
|
|
|
|
string is the CustomLog string from the httpd.conf file. |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=back |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=head2 Class and object methods |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=over 4 |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=item parse( LINE ) |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
Given a LINE from an Apache logfile it will parse the line and return |
230
|
|
|
|
|
|
|
all the elements of the line indexed by their corresponding format |
231
|
|
|
|
|
|
|
string. In scalar context this takes the form of a hash reference, in |
232
|
|
|
|
|
|
|
list context a flat paired list. In either context, if the line cannot |
233
|
|
|
|
|
|
|
be parsed a false value will be returned. |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=item generate_parser( LIST ) |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
Generate and return a closure that, when called with a line, will |
238
|
|
|
|
|
|
|
return a hash reference containing the parsed fields, or undef if the |
239
|
|
|
|
|
|
|
parse failed. If LIST is supplied, it is interpreted as a flattened |
240
|
|
|
|
|
|
|
hash of arguments. One argument is recognised; if C is a |
241
|
|
|
|
|
|
|
true value, then the closure will reuse the same hash reference each |
242
|
|
|
|
|
|
|
time it is called. The default is to allocate a new hash for each |
243
|
|
|
|
|
|
|
result. |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
Calling this closure is significantly faster than the C method. |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=item names() |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Returns a list of field names that were extracted from the data. Such as |
250
|
|
|
|
|
|
|
'%a', '%t' and '%r' from the above example. |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=item regex() |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
Returns a copy of the regex that will be used to parse the log file. |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=item rename_this_name( NAME ) |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
Use this method to rename the keys that will be used in the returned hash. |
259
|
|
|
|
|
|
|
The initial NAME is passed in and the method should return the new name. |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=back |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=head1 CONFIGURATION AND ENVIRONMENT |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
Perl 5 |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head1 DIAGNOSTICS |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
The various custom time formats could be problematic but providing that |
270
|
|
|
|
|
|
|
they are encased in '[' and ']' all should be fine. |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=over 4 |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
=item Apache::LogRegex->new() takes 1 argument |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
When the constructor is called it requires one argument. This message is |
277
|
|
|
|
|
|
|
given if more or less arguments were supplied. |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=item Apache::LogRegex->new() argument 1 (FORMAT) is undefined |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
The correct number of arguments were supplied with the constructor call, |
282
|
|
|
|
|
|
|
however the first argument, FORMAT, was undefined. |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=item Apache::LogRegex->parse() takes 1 argument |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
When the method is called it requires one argument. This message is |
287
|
|
|
|
|
|
|
given if more or less arguments were supplied. |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=item Apache::LogRegex->parse() argument 1 (LINE) is undefined |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
The correct number of arguments were supplied with the method call, |
292
|
|
|
|
|
|
|
however the first argument, LINE, was undefined. |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=item Apache::LogRegex->names() takes no argument |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
When the method is called it requires no arguments. This message is |
297
|
|
|
|
|
|
|
given if some arguments were supplied. |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=item Apache::LogRegex->regex() takes no argument |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
When the method is called it requires no arguments. This message is |
302
|
|
|
|
|
|
|
given if some arguments were supplied. |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=back |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=head1 BUGS |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
None so far |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=head1 FILES |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
None |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=head1 SEE ALSO |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
mod_log_config for a description of the Apache format commands |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=head1 THANKS |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
Peter Hickman wrote the original module and maintained it for |
321
|
|
|
|
|
|
|
several years. He kindly passed maintainership on just prior to |
322
|
|
|
|
|
|
|
the 1.51 release. Most of the features of this module are the |
323
|
|
|
|
|
|
|
fruits of his work. If you find any bugs they are my doing. |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=head1 AUTHOR |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
Original code by Peter Hickman |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
Additional code by Andrew Kirkpatrick |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
Original code copyright (c) 2004-2006 Peter Hickman. All rights reserved. |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
Additional code copyright (c) 2013 Andrew Kirkpatrick. All rights reserved. |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
This module is free software. It may be used, redistributed and/or |
338
|
|
|
|
|
|
|
modified under the same terms as Perl itself. |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=cut |