line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Apache::LogRegex; |
2
|
|
|
|
|
|
|
$Apache::LogRegex::VERSION = '1.70'; |
3
|
1
|
|
|
1
|
|
577
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
33
|
|
4
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
470
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
sub new { |
7
|
5
|
|
|
5
|
1
|
1904
|
my ($class, $format) = @_; |
8
|
|
|
|
|
|
|
|
9
|
5
|
100
|
|
|
|
35
|
die __PACKAGE__ . '->new() takes 1 argument' unless @_ == 2; |
10
|
3
|
100
|
|
|
|
17
|
die __PACKAGE__ . '->new() argument 1 (FORMAT) is false' unless $format; |
11
|
|
|
|
|
|
|
|
12
|
1
|
|
|
|
|
13
|
my $self = bless {}, $class; |
13
|
|
|
|
|
|
|
|
14
|
1
|
|
|
|
|
5
|
$self->{_format} = $format; |
15
|
|
|
|
|
|
|
|
16
|
1
|
|
|
|
|
2
|
$self->{_regex} = ''; |
17
|
1
|
|
|
|
|
2
|
$self->{_regex_fields} = undef; |
18
|
|
|
|
|
|
|
|
19
|
1
|
|
|
|
|
2
|
$self->_parse_format(); |
20
|
|
|
|
|
|
|
|
21
|
1
|
|
|
|
|
3
|
return $self; |
22
|
|
|
|
|
|
|
} |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub _parse_format { |
25
|
1
|
|
|
1
|
|
1
|
my ($self) = @_; |
26
|
|
|
|
|
|
|
|
27
|
1
|
|
|
|
|
2
|
chomp $self->{_format}; |
28
|
1
|
|
|
|
|
5
|
$self->{_format} =~ s#[ \t]+# #; |
29
|
1
|
|
|
|
|
2
|
$self->{_format} =~ s#^ ##; |
30
|
1
|
|
|
|
|
1
|
$self->{_format} =~ s# $##; |
31
|
|
|
|
|
|
|
|
32
|
1
|
|
|
|
|
1
|
my @regex_elements; |
33
|
|
|
|
|
|
|
|
34
|
1
|
|
|
|
|
3
|
for my $element (split /\s+/, $self->{_format}) { |
35
|
9
|
100
|
|
|
|
17
|
my $quotes = $element =~ m/^\\\"/ ? 1 : 0; |
36
|
|
|
|
|
|
|
|
37
|
9
|
100
|
|
|
|
14
|
if ($quotes) { |
38
|
3
|
|
|
|
|
5
|
$element =~ s/^\\\"//; |
39
|
3
|
|
|
|
|
5
|
$element =~ s/\\\"$//; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
9
|
|
|
|
|
6
|
push @{ $self->{_regex_fields} }, $self->rename_this_name($element); |
|
9
|
|
|
|
|
14
|
|
43
|
|
|
|
|
|
|
|
44
|
9
|
|
|
|
|
7
|
my $x = '(\S*)'; |
45
|
|
|
|
|
|
|
|
46
|
9
|
100
|
|
|
|
19
|
if ($quotes) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
47
|
3
|
50
|
100
|
|
|
13
|
if ($element eq '%r' or $element =~ m/{Referer}/ or $element =~ m/{User-Agent}/) { |
|
|
|
66
|
|
|
|
|
48
|
3
|
|
|
|
|
7
|
$x = qr/"([^"\\]*(?:\\.[^"\\]*)*)"/; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
else { |
51
|
0
|
|
|
|
|
0
|
$x = '\"([^\"]*)\"'; |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
elsif ($element =~ m/^%.*t$/) { |
55
|
1
|
|
|
|
|
2
|
$x = '(\[[^\]]+\])'; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
elsif ($element eq '%U') { |
58
|
0
|
|
|
|
|
0
|
$x = '(.+?)'; |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
9
|
|
|
|
|
11
|
push @regex_elements, $x; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
|
64
|
1
|
|
|
|
|
4
|
my $regex_string = join '\s+', @regex_elements; |
65
|
1
|
|
|
|
|
50
|
$self->{_regex} = qr/^$regex_string\s*$/; |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub parse { |
69
|
10
|
|
|
10
|
1
|
15440
|
my ($self, $line) = @_; |
70
|
|
|
|
|
|
|
|
71
|
10
|
100
|
|
|
|
34
|
die __PACKAGE__ . '->parse() takes 1 argument' unless @_ == 2; |
72
|
8
|
100
|
|
|
|
19
|
die __PACKAGE__ . '->parse() argument 1 (LINE) is undefined' unless defined $line; |
73
|
|
|
|
|
|
|
|
74
|
7
|
100
|
|
|
|
85
|
if (my @temp = $line =~ $self->{_regex}) { |
75
|
5
|
|
|
|
|
4
|
my %data; |
76
|
5
|
|
|
|
|
6
|
@data{ @{ $self->{_regex_fields} } } = @temp; |
|
5
|
|
|
|
|
30
|
|
77
|
5
|
100
|
|
|
|
47
|
return wantarray ? %data : \%data; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
2
|
|
|
|
|
6
|
return; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub generate_parser { |
84
|
2
|
|
|
2
|
1
|
484
|
my ($self, %args) = @_; |
85
|
|
|
|
|
|
|
|
86
|
2
|
|
|
|
|
3
|
my $regex = $self->{_regex}; |
87
|
2
|
|
|
|
|
5
|
my @fields = @{ $self->{_regex_fields} }; |
|
2
|
|
|
|
|
6
|
|
88
|
|
|
|
|
|
|
|
89
|
1
|
|
|
1
|
|
15
|
no warnings 'uninitialized'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
238
|
|
90
|
|
|
|
|
|
|
|
91
|
2
|
100
|
|
|
|
6
|
if ($args{reuse_record}) { |
92
|
1
|
|
|
|
|
2
|
my $record = {}; |
93
|
|
|
|
|
|
|
return sub { |
94
|
5
|
100
|
|
5
|
|
1107
|
if (@$record{@fields} = $_[0] =~ $regex) { |
95
|
2
|
|
|
|
|
5
|
return $record; |
96
|
|
|
|
|
|
|
} else { |
97
|
3
|
|
|
|
|
5
|
return; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
} |
100
|
1
|
|
|
|
|
5
|
} else { |
101
|
|
|
|
|
|
|
return sub { |
102
|
2
|
|
|
2
|
|
2919
|
my $record = {}; |
103
|
2
|
50
|
|
|
|
38
|
if (@$record{@fields} = $_[0] =~ $regex) { |
104
|
2
|
|
|
|
|
4
|
return $record; |
105
|
|
|
|
|
|
|
} else { |
106
|
0
|
|
|
|
|
0
|
return; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
} |
109
|
1
|
|
|
|
|
6
|
} |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub names { |
113
|
2
|
|
|
2
|
1
|
1079
|
my ($self) = @_; |
114
|
|
|
|
|
|
|
|
115
|
2
|
100
|
|
|
|
11
|
die __PACKAGE__ . '->names() takes no argument' unless @_ == 1; |
116
|
|
|
|
|
|
|
|
117
|
1
|
|
|
|
|
1
|
return @{ $self->{_regex_fields} }; |
|
1
|
|
|
|
|
11
|
|
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub regex { |
121
|
2
|
|
|
2
|
1
|
585
|
my ($self) = @_; |
122
|
|
|
|
|
|
|
|
123
|
2
|
100
|
|
|
|
13
|
die __PACKAGE__ . '->regex() takes no argument' unless @_ == 1; |
124
|
|
|
|
|
|
|
|
125
|
1
|
|
|
|
|
2
|
return $self->{_regex}; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub rename_this_name { |
129
|
9
|
|
|
9
|
1
|
8
|
my ($self, $name) = @_; |
130
|
|
|
|
|
|
|
|
131
|
9
|
|
|
|
|
10
|
return $name; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
1; |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head1 NAME |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Apache::LogRegex - Parse a line from an Apache logfile into a hash |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head1 VERSION |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
version 1.70 |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=head1 SYNOPSIS |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
use Apache::LogRegex; |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
my $lr; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
eval { $lr = Apache::LogRegex->new($log_format) }; |
151
|
|
|
|
|
|
|
die "Unable to parse log line: $@" if ($@); |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
my %data; |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
while ( my $line_from_logfile = <> ) { |
156
|
|
|
|
|
|
|
eval { %data = $lr->parse($line_from_logfile); }; |
157
|
|
|
|
|
|
|
if (%data) { |
158
|
|
|
|
|
|
|
# We have data to process |
159
|
|
|
|
|
|
|
} else { |
160
|
|
|
|
|
|
|
# We could not parse this line |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# or generate a closure for better performance |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
my $parser = $lr->generate_parser; |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
while ( my $line_from_logfile = <> ) { |
169
|
|
|
|
|
|
|
my $data = $parser->($line_from_logfile) or last; |
170
|
|
|
|
|
|
|
# We have data to process |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=head1 DESCRIPTION |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=head2 Overview |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
Designed as a simple class to parse Apache log files. It will construct |
178
|
|
|
|
|
|
|
a regex that will parse the given log file format and can then parse |
179
|
|
|
|
|
|
|
lines from the log file line by line returning a hash of each line. |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
The field names of the hash are derived from the log file format. Thus if |
182
|
|
|
|
|
|
|
the format is '%a %t \"%r\" %s %b %T \"%{Referer}i\" ...' then the keys of |
183
|
|
|
|
|
|
|
the hash will be %a, %t, %r, %s, %b, %T and %{Referer}i. |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Should these key names be unusable, as I guess they probably are, then subclass |
186
|
|
|
|
|
|
|
and provide an override rename_this_name() method that can rename the keys |
187
|
|
|
|
|
|
|
before they are added in the array of field names. |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
This module supports variable spacing between elements, so if you have |
190
|
|
|
|
|
|
|
more than one space between elements in your format or in your log |
191
|
|
|
|
|
|
|
file, that should be OK. |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=head1 SUBROUTINES/METHODS |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=head2 Constructor |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=over 4 |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=item Apache::LogRegex->new( FORMAT ) |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
Returns a Apache::LogRegex object that can parse a line from an Apache |
202
|
|
|
|
|
|
|
logfile that was written to with the FORMAT string. The FORMAT |
203
|
|
|
|
|
|
|
string is the CustomLog string from the httpd.conf file. |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=back |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=head2 Class and object methods |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=over 4 |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=item parse( LINE ) |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
Given a LINE from an Apache logfile it will parse the line and return |
214
|
|
|
|
|
|
|
all the elements of the line indexed by their corresponding format |
215
|
|
|
|
|
|
|
string. In scalar context this takes the form of a hash reference, in |
216
|
|
|
|
|
|
|
list context a flat paired list. In either context, if the line cannot |
217
|
|
|
|
|
|
|
be parsed a false value will be returned. |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=item generate_parser( LIST ) |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
Generate and return a closure that, when called with a line, will |
222
|
|
|
|
|
|
|
return a hash reference containing the parsed fields, or undef if the |
223
|
|
|
|
|
|
|
parse failed. If LIST is supplied, it is interpreted as a flattened |
224
|
|
|
|
|
|
|
hash of arguments. One argument is recognised; if C is a |
225
|
|
|
|
|
|
|
true value, then the closure will reuse the same hash reference each |
226
|
|
|
|
|
|
|
time it is called. The default is to allocate a new hash for each |
227
|
|
|
|
|
|
|
result. |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
Calling this closure is significantly faster than the C method. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=item names() |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
Returns a list of field names that were extracted from the data. Such as |
234
|
|
|
|
|
|
|
'%a', '%t' and '%r' from the above example. |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=item regex() |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
Returns a copy of the regex that will be used to parse the log file. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=item rename_this_name( NAME ) |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
Use this method to rename the keys that will be used in the returned hash. |
243
|
|
|
|
|
|
|
The initial NAME is passed in and the method should return the new name. |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=back |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=head1 CONFIGURATION AND ENVIRONMENT |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Perl 5 |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=head1 DIAGNOSTICS |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
The various custom time formats could be problematic but providing that |
254
|
|
|
|
|
|
|
they are encased in '[' and ']' all should be fine. |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=over 4 |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=item Apache::LogRegex->new() takes 1 argument |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
When the constructor is called it requires one argument. This message is |
261
|
|
|
|
|
|
|
given if more or less arguments were supplied. |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=item Apache::LogRegex->new() argument 1 (FORMAT) is undefined |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
The correct number of arguments were supplied with the constructor call, |
266
|
|
|
|
|
|
|
however the first argument, FORMAT, was undefined. |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=item Apache::LogRegex->parse() takes 1 argument |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
When the method is called it requires one argument. This message is |
271
|
|
|
|
|
|
|
given if more or less arguments were supplied. |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=item Apache::LogRegex->parse() argument 1 (LINE) is undefined |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
The correct number of arguments were supplied with the method call, |
276
|
|
|
|
|
|
|
however the first argument, LINE, was undefined. |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=item Apache::LogRegex->names() takes no argument |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
When the method is called it requires no arguments. This message is |
281
|
|
|
|
|
|
|
given if some arguments were supplied. |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=item Apache::LogRegex->regex() takes no argument |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
When the method is called it requires no arguments. This message is |
286
|
|
|
|
|
|
|
given if some arguments were supplied. |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=back |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head1 BUGS |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
None so far |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=head1 FILES |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
None |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=head1 SEE ALSO |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
mod_log_config for a description of the Apache format commands |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=head1 THANKS |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
Peter Hickman wrote the original module and maintained it for |
305
|
|
|
|
|
|
|
several years. He kindly passed maintainership on just prior to |
306
|
|
|
|
|
|
|
the 1.51 release. Most of the features of this module are the |
307
|
|
|
|
|
|
|
fruits of his work. If you find any bugs they are my doing. |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=head1 AUTHOR |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
Original code by Peter Hickman |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
Additional code by Andrew Kirkpatrick |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
Original code copyright (c) 2004-2006 Peter Hickman. All rights reserved. |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
Additional code copyright (c) 2013 Andrew Kirkpatrick. All rights reserved. |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
This module is free software. It may be used, redistributed and/or |
322
|
|
|
|
|
|
|
modified under the same terms as Perl itself. |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=cut |