| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package ELFF::Parser; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# ELFF-Parser is a perl module for parsing ELFF formatted log files. |
|
4
|
|
|
|
|
|
|
# |
|
5
|
|
|
|
|
|
|
# Copyright (C) 2007-2010 Mark Warren |
|
6
|
|
|
|
|
|
|
# |
|
7
|
|
|
|
|
|
|
# This library is free software; you can redistribute it and/or |
|
8
|
|
|
|
|
|
|
# modify it under the terms of the GNU Lesser General Public |
|
9
|
|
|
|
|
|
|
# License as published by the Free Software Foundation; either |
|
10
|
|
|
|
|
|
|
# version 2.1 of the License, or (at your option) any later version. |
|
11
|
|
|
|
|
|
|
# |
|
12
|
|
|
|
|
|
|
# This library is distributed in the hope that it will be useful, |
|
13
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
15
|
|
|
|
|
|
|
# Lesser General Public License for more details. |
|
16
|
|
|
|
|
|
|
# |
|
17
|
|
|
|
|
|
|
# You should have received a copy of the GNU Lesser General Public |
|
18
|
|
|
|
|
|
|
# License along with this library; if not, write to the Free Software |
|
19
|
|
|
|
|
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=pod |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 NAME |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
ELFF::Parser - parse ELFF formatted log files |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
use ELFF::Parser; |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
$p = new ELFF::Parser(); |
|
33
|
|
|
|
|
|
|
while() { |
|
34
|
|
|
|
|
|
|
$res = $p->parse_line($_); |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
if($res->{directive} && $res->{directive} eq 'Start-Date') { |
|
37
|
|
|
|
|
|
|
print "Log starts at $res->{value}\n"; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
elsif($res->{href}) { |
|
40
|
|
|
|
|
|
|
print $res->{href}{'rs-bytes'}, "\n"; |
|
41
|
|
|
|
|
|
|
} |
|
42
|
|
|
|
|
|
|
elsif($res->{aref}) { |
|
43
|
|
|
|
|
|
|
print "Detected log format change, or no fields directive\n"; |
|
44
|
|
|
|
|
|
|
foreach my $field (@{$res->{aref}}) { |
|
45
|
|
|
|
|
|
|
print " found field: $field\n"; |
|
46
|
|
|
|
|
|
|
} |
|
47
|
|
|
|
|
|
|
print "\n"; |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
else { |
|
50
|
|
|
|
|
|
|
print STDERR "Failed to parse log line\n"; |
|
51
|
|
|
|
|
|
|
} |
|
52
|
|
|
|
|
|
|
} |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
C parses ELFF formatted logs. For a description of ELFF |
|
57
|
|
|
|
|
|
|
(Extended Log File Format), see http://www.w3.org/TR/WD-logfile.html. In |
|
58
|
|
|
|
|
|
|
brief, ELFF log files consist of directives (meta-data about the logs) |
|
59
|
|
|
|
|
|
|
and logs. C parses both, extracting log format information |
|
60
|
|
|
|
|
|
|
from the directives and using it to build hashes for each log entry. |
|
61
|
|
|
|
|
|
|
If log format information isn't available or becomes invalidated (see |
|
62
|
|
|
|
|
|
|
the L"ELFF PROBLEMS"> section below), C will return |
|
63
|
|
|
|
|
|
|
arrays for each log entry instead of hashes. |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=over 4 |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=item $ep = new ELFF::Parser() |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Creates a new C object. |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=back |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head1 METHODS |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=over 4 |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=item $res = $ep->parse_line($line) |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Parse an ELFF log line. The returned result will be a hash reference that |
|
82
|
|
|
|
|
|
|
contains different information depending on the state of the object and |
|
83
|
|
|
|
|
|
|
the type of line parsed (i.e. directive or log entry). |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
If the line is a directive, the returned hash will have the following |
|
86
|
|
|
|
|
|
|
keys: |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
$res->{directive} the name of the directive |
|
89
|
|
|
|
|
|
|
$res->{value} the value of the directive |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
If the line is a Fields directive, the result will contain a 'fields' |
|
92
|
|
|
|
|
|
|
key as well, which is an array reference containing the fields. |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
foreach my $field (@{$res->{fields}}) { |
|
95
|
|
|
|
|
|
|
print "Found field $field\n"; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Since C builds hashes for you for each log entry, you |
|
99
|
|
|
|
|
|
|
generally don't need to worry about the fields. |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
If the line is a log entry, and the C object has parsed |
|
102
|
|
|
|
|
|
|
a fields directive already, the result hash will contain a 'href' |
|
103
|
|
|
|
|
|
|
key whose value is a hash reference containing the log entry data. |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
print "client to proxy bytes: ", $res->{href}{'cs-bytes'}, "\n"; |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
If no fields directive has been parsed, or C detects a |
|
108
|
|
|
|
|
|
|
change in log format (see the L"ELFF PROBLEMS"> section below), an |
|
109
|
|
|
|
|
|
|
array reference may be returned instead: |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
foreach my $field (@{$res->{aref}}) { |
|
112
|
|
|
|
|
|
|
print "data: ", $field, "\n"; |
|
113
|
|
|
|
|
|
|
} |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
If C detects a malformed line, it will return undef. |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=back |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=head1 ELFF PROBLEMS |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
There is one particularly annoying thing about ELFF log files, which is |
|
122
|
|
|
|
|
|
|
that the ELFF standard doesn't require that a new Fields directive be |
|
123
|
|
|
|
|
|
|
inserted into the log file when the log format changes. Because of this, |
|
124
|
|
|
|
|
|
|
if the log format changes in the middle of a log file, there is very |
|
125
|
|
|
|
|
|
|
little that a parser can do to detect the change. All reporting software |
|
126
|
|
|
|
|
|
|
that I have seen simply ignores logs as soon as a change in format |
|
127
|
|
|
|
|
|
|
is detected (i.e. when errors are encountered extracting statistics |
|
128
|
|
|
|
|
|
|
from the logs). This is a shortcoming in the ELFF standard, and I'm |
|
129
|
|
|
|
|
|
|
afraid that C doesn't handle the problem much better. |
|
130
|
|
|
|
|
|
|
C detects log format changes by checking the number of |
|
131
|
|
|
|
|
|
|
fields in each log entry. If the number of fields in a log entry differs |
|
132
|
|
|
|
|
|
|
from the number of fields specified in the Fields directive, C |
|
133
|
|
|
|
|
|
|
will invalidate the format and start returning arrays of fields for |
|
134
|
|
|
|
|
|
|
each message instead of hashes. This way, the log data is still |
|
135
|
|
|
|
|
|
|
available to you, and you can attempt to recover from the problem |
|
136
|
|
|
|
|
|
|
yourself. However, if the number of fields in the log messages |
|
137
|
|
|
|
|
|
|
doesn't change when the log format changes (e.g. when fields are |
|
138
|
|
|
|
|
|
|
re-ordered, or when the same number of fields is added and removed), |
|
139
|
|
|
|
|
|
|
C will not detected the format change. |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
Thankfully, log formats usually don't change on their own, so |
|
142
|
|
|
|
|
|
|
administrators can modify their procedures such that the impact |
|
143
|
|
|
|
|
|
|
of this shortcoming is minimized (e.g. rotate the log file |
|
144
|
|
|
|
|
|
|
immediately after changing the log format to force a new fields |
|
145
|
|
|
|
|
|
|
directive to be logged). |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head1 HOMEPAGE |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
L |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head1 BUGS |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
None that I know of, but please let me know if you find one. Please |
|
154
|
|
|
|
|
|
|
report bugs via the SourceForge tracker. |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head1 AUTHOR |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Copyright (c) 2007 Mark Warren |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head1 LICENSE AND DISCLAIMER |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
This software is distributed under the terms of the GNU Lesser General |
|
163
|
|
|
|
|
|
|
Public License. |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
166
|
|
|
|
|
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
167
|
|
|
|
|
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
168
|
|
|
|
|
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
|
169
|
|
|
|
|
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
170
|
|
|
|
|
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
171
|
|
|
|
|
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
172
|
|
|
|
|
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
173
|
|
|
|
|
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
174
|
|
|
|
|
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
175
|
|
|
|
|
|
|
POSSIBILITY OF SUCH DAMAGE. |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=cut |
|
178
|
|
|
|
|
|
|
|
|
179
|
3
|
|
|
3
|
|
24005
|
use 5.00; |
|
|
3
|
|
|
|
|
12
|
|
|
|
3
|
|
|
|
|
136
|
|
|
180
|
3
|
|
|
3
|
|
20
|
use strict; |
|
|
3
|
|
|
|
|
9
|
|
|
|
3
|
|
|
|
|
98
|
|
|
181
|
3
|
|
|
3
|
|
17
|
use Carp; |
|
|
3
|
|
|
|
|
12
|
|
|
|
3
|
|
|
|
|
1855
|
|
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
our $VERSION = '0.92'; |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub new { |
|
187
|
1
|
|
|
1
|
1
|
265
|
my $class = shift; |
|
188
|
|
|
|
|
|
|
|
|
189
|
1
|
|
|
|
|
4
|
my $self = { |
|
190
|
|
|
|
|
|
|
# we use number of fields to detect log format changes. it's |
|
191
|
|
|
|
|
|
|
# not perfect, but we don't understand the log content, so this |
|
192
|
|
|
|
|
|
|
# is the best that we can do |
|
193
|
|
|
|
|
|
|
'fields' => 0, |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# revmap is used to figure out the name of each field as we |
|
196
|
|
|
|
|
|
|
# build the result hash in parse_line |
|
197
|
|
|
|
|
|
|
'revmap' => {}, |
|
198
|
|
|
|
|
|
|
}; |
|
199
|
|
|
|
|
|
|
|
|
200
|
1
|
|
|
|
|
4
|
return bless $self, $class; |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub parse_line { |
|
204
|
15
|
|
|
15
|
1
|
7179
|
my ($self, $line) = @_; |
|
205
|
15
|
|
|
|
|
22
|
chomp($line); |
|
206
|
|
|
|
|
|
|
|
|
207
|
15
|
|
|
|
|
23
|
my $res = {}; |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# if the line is a directive, handle it here |
|
210
|
15
|
100
|
66
|
|
|
80
|
if($line && substr($line, 0, 1) eq '#') { |
|
211
|
|
|
|
|
|
|
# some vendors put whitespace between # and the directive name, remove it |
|
212
|
9
|
|
|
|
|
22
|
$line =~ s/^#\s+/#/; |
|
213
|
|
|
|
|
|
|
|
|
214
|
9
|
|
|
|
|
37
|
@$res{('directive', 'value')} = split(/\s+/, $line, 2); |
|
215
|
9
|
|
|
|
|
46
|
$res->{directive} =~ s/(?:^#|:$)//g; |
|
216
|
|
|
|
|
|
|
|
|
217
|
9
|
100
|
|
|
|
24
|
if($res->{directive} eq 'Fields') { |
|
218
|
5
|
|
|
|
|
28
|
$self->{revmap} = tokenize($res->{value}); |
|
219
|
5
|
|
|
|
|
8
|
$self->{fields} = $#{$self->{revmap}}; |
|
|
5
|
|
|
|
|
9
|
|
|
220
|
5
|
|
|
|
|
5
|
@{$res->{fields}} = @{$self->{revmap}}; |
|
|
5
|
|
|
|
|
17
|
|
|
|
5
|
|
|
|
|
8
|
|
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
|
|
223
|
9
|
|
|
|
|
23
|
return $res; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# not a directive, regular log |
|
227
|
|
|
|
|
|
|
|
|
228
|
6
|
|
|
|
|
22
|
my $flds = tokenize($line); |
|
229
|
6
|
50
|
|
|
|
13
|
return undef unless $flds; |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
# no field names - return array |
|
232
|
6
|
50
|
|
|
|
14
|
unless($self->{revmap}) { |
|
233
|
0
|
|
|
|
|
0
|
$res->{aref} = $flds; |
|
234
|
0
|
|
|
|
|
0
|
return $res; |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
# change in format, invalidate fields and return array |
|
238
|
6
|
100
|
|
|
|
15
|
if($#$flds != $self->{fields}) { |
|
239
|
1
|
|
|
|
|
3
|
$self->{revmap} = undef; |
|
240
|
1
|
|
|
|
|
3
|
$res->{aref} = $flds; |
|
241
|
1
|
|
|
|
|
3
|
return $res; |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
# return href |
|
245
|
5
|
|
|
|
|
6
|
my %href; |
|
246
|
5
|
|
|
|
|
6
|
@href{@{$self->{revmap}}} = @$flds; |
|
|
5
|
|
|
|
|
30
|
|
|
247
|
5
|
|
|
|
|
12
|
$res->{href} = \%href; |
|
248
|
|
|
|
|
|
|
|
|
249
|
5
|
|
|
|
|
22
|
return $res; |
|
250
|
|
|
|
|
|
|
} |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
require XSLoader; |
|
254
|
|
|
|
|
|
|
XSLoader::load('ELFF::Parser', $VERSION); |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
1; |