line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Regexp::Log::Common; |
2
|
|
|
|
|
|
|
|
3
|
6
|
|
|
6
|
|
116455
|
use warnings; |
|
6
|
|
|
|
|
14
|
|
|
6
|
|
|
|
|
317
|
|
4
|
6
|
|
|
6
|
|
30
|
use strict; |
|
6
|
|
|
|
|
6
|
|
|
6
|
|
|
|
|
215
|
|
5
|
6
|
|
|
6
|
|
29
|
use base qw( Regexp::Log ); |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
3695
|
|
6
|
6
|
|
|
6
|
|
9263
|
use vars qw( $VERSION %DEFAULT %FORMAT %REGEXP ); |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
1606
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
$VERSION = '0.10'; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 NAME |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
Regexp::Log::Common - A regular expression parser for the Common Log Format |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 SYNOPSIS |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
my $foo = Regexp::Log::Common->new( |
17
|
|
|
|
|
|
|
format => '%date %request', |
18
|
|
|
|
|
|
|
capture => [qw( ts request )], |
19
|
|
|
|
|
|
|
); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# the format() and capture() methods can be used to set or get |
22
|
|
|
|
|
|
|
$foo->format('%date %request %status %bytes'); |
23
|
|
|
|
|
|
|
$foo->capture(qw( ts req )); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# this is necessary to know in which order |
26
|
|
|
|
|
|
|
# we will receive the captured fields from the regexp |
27
|
|
|
|
|
|
|
my @fields = $foo->capture; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# the all-powerful capturing regexp :-) |
30
|
|
|
|
|
|
|
my $re = $foo->regexp; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
while (<>) { |
33
|
|
|
|
|
|
|
my %data; |
34
|
|
|
|
|
|
|
@data{@fields} = /$re/; # no need for /o, it's a compiled regexp |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# now munge the fields |
37
|
|
|
|
|
|
|
... |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 DESCRIPTION |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Regexp::Log::Common uses Regexp::Log as a base class, to generate regular |
43
|
|
|
|
|
|
|
expressions for performing the usual data munging tasks on log files that |
44
|
|
|
|
|
|
|
cannot be simply split(). |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
This specific module enables the computation of regular expressions for |
47
|
|
|
|
|
|
|
parsing the log files created using the Common Log Format. An example of |
48
|
|
|
|
|
|
|
this format are the logs generated by the httpd web server using the |
49
|
|
|
|
|
|
|
keyword 'common'. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
The module also allows for the use of the Extended Common Log Format. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
For more information on how to use this module, please see Regexp::Log. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head1 ABSTRACT |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
Enables simple parsing of log files created using the Common Log Format or the |
58
|
|
|
|
|
|
|
Extended Common Log Format, such as the logs generated by the httpd/Apache web |
59
|
|
|
|
|
|
|
server using the keyword 'common'. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=cut |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# default values |
64
|
|
|
|
|
|
|
%DEFAULT = ( |
65
|
|
|
|
|
|
|
format => '%host %rfc %authuser %date %request %status %bytes %referer %useragent', |
66
|
|
|
|
|
|
|
capture => [ 'host', 'rfc', 'authuser', 'date', 'ts', 'request', 'req', |
67
|
|
|
|
|
|
|
'status', 'bytes', 'referer', 'ref', 'useragent', 'ua' ], |
68
|
|
|
|
|
|
|
); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# predefined format strings |
71
|
|
|
|
|
|
|
%FORMAT = ( |
72
|
|
|
|
|
|
|
':default' => '%host %rfc %authuser %date %request %status %bytes', |
73
|
|
|
|
|
|
|
':common' => '%host %rfc %authuser %date %request %status %bytes', |
74
|
|
|
|
|
|
|
':extended' => '%host %rfc %authuser %date %request %status %bytes %referer %useragent', |
75
|
|
|
|
|
|
|
); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# the regexps that match the various fields |
78
|
|
|
|
|
|
|
%REGEXP = ( |
79
|
|
|
|
|
|
|
# %a Remote IP-address |
80
|
|
|
|
|
|
|
# %A Local IP-address |
81
|
|
|
|
|
|
|
'%a' => '(?#=a)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?#!a)', |
82
|
|
|
|
|
|
|
'%A' => '(?#=A)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?#!A)', |
83
|
|
|
|
|
|
|
'%remoteip' => '(?#=remoteip)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?#!remoteip)', |
84
|
|
|
|
|
|
|
'%localip' => '(?#=localip)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?#!localip)', |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# %B Size of response in bytes, excluding HTTP headers. |
87
|
|
|
|
|
|
|
# %b Size of response in bytes, excluding HTTP headers. In CLF format, i.e. a '-' rather than a 0 when no bytes are sent. |
88
|
|
|
|
|
|
|
'%B' => '(?#=B)\d+(?#!B)', # bytes (non-CLF format) |
89
|
|
|
|
|
|
|
'%b' => '(?#=b)-|\d+(?#!b)', # bytes (CLF format) |
90
|
|
|
|
|
|
|
'%bytes' => '(?#=bytes)-|\d+(?#!bytes)', # bytes (CLF and non-CLF format) |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# %D The time taken to serve the request, in microseconds. |
93
|
|
|
|
|
|
|
'%D' => '(?#=D)\d+(?#!D)', # response time (in microseconds) |
94
|
|
|
|
|
|
|
'%time' => '(?#=time)\d+(?#!time)', # response time (in microseconds) |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# %f Filename |
97
|
|
|
|
|
|
|
'%F' => '(?#=F)\S+(?#!F)', # filename |
98
|
|
|
|
|
|
|
'%filename' => '(?#=filename)\S+(?#!filename)', # filename |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# %h Remote host |
101
|
|
|
|
|
|
|
'%h' => '(?#=h)\S+(?#!h)', # numeric or name of remote host |
102
|
|
|
|
|
|
|
'%host' => '(?#=host)\S+(?#!host)', # numeric or name of remote host |
103
|
|
|
|
|
|
|
'%remotehost' => '(?#=remotehost)\S+(?#!remotehost)', # numeric or name of remote host |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# %H The request protocol |
106
|
|
|
|
|
|
|
'%H' => '(?#=H)\S+(?#!H)', # protocol |
107
|
|
|
|
|
|
|
'%protcol' => '(?#=protocol)\S+(?#!protocol)', # protocol |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# %{Foobar}i The contents of Foobar: header line(s) in the request sent to the server. |
110
|
|
|
|
|
|
|
'%referer' => '(?#=referer)\"(?#=ref).*?(?#!ref)\"(?#!referer)', # "referer" from \"%{Referer}i\" |
111
|
|
|
|
|
|
|
'%useragent' => '(?#=useragent)\"(?#=ua).*?(?#!ua)\"(?#!useragent)', # "user_agent" from \"%{User-Agent}i\" |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# %k Number of keepalive requests handled on this connection. Interesting if KeepAlive is being used, so that, for example, a '1' means the first keepalive request after the initial one, '2' the second, etc...; otherwise this is always 0 (indicating the initial request). Available in versions 2.2.11 and later. |
114
|
|
|
|
|
|
|
'%k' => '(?#=k)\d+(?#!k)', # keep alive requests |
115
|
|
|
|
|
|
|
'%keepalive' => '(?#=keepalive)\d+(?#!keepalive)', # keep alive requests |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# %l Remote logname (from identd, if supplied). This will return a dash unless mod_ident is present and IdentityCheck is set On. |
118
|
|
|
|
|
|
|
'%l' => '(?#=F)\S+(?#!F)', # logname |
119
|
|
|
|
|
|
|
'%logname' => '(?#=logname)\S+(?#!logname)', # logname |
120
|
|
|
|
|
|
|
'%rfc' => '(?#=rfc)\S+(?#!rfc)', # rfc931 |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# %m The request method |
123
|
|
|
|
|
|
|
'%m' => '(?#=F)\S+(?#!F)', # request method |
124
|
|
|
|
|
|
|
'%method' => '(?#=method)\S+(?#!method)', # request method |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# %p The canonical port of the server serving the request |
127
|
|
|
|
|
|
|
'%p' => '(?#=p)\d+(?#!p)', # port |
128
|
|
|
|
|
|
|
'%port' => '(?#=port)\d+(?#!port)', # port |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# %P The process ID of the child that serviced the request. |
131
|
|
|
|
|
|
|
'%P' => '(?#=P)\d+(?#!P)', # process id |
132
|
|
|
|
|
|
|
'%pid' => '(?#=pid)\d+(?#!pid)', # process id |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# %q The query string (prepended with a ? if a query string exists, otherwise an empty string) |
135
|
|
|
|
|
|
|
'%q' => '(?#=q)\".*?\"(?#!q)', # "query string" |
136
|
|
|
|
|
|
|
'%queryatring' => '(?#=queryatring)\"(?#=qs).*?(?#!qs)\"(?#!queryatring)', # "query string" |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# %r First line of request |
139
|
|
|
|
|
|
|
'%r' => '(?#=r)\".*?\"(?#!r)', # "request" |
140
|
|
|
|
|
|
|
'%request' => '(?#=request)\"(?#=req).*?(?#!req)\"(?#!request)', # "request" |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# %s Status. For requests that got internally redirected, this is the status of the *original* request --- %>s for the last. |
143
|
|
|
|
|
|
|
'%s' => '(?#=s)\d+(?#!s)', # status |
144
|
|
|
|
|
|
|
'%status' => '(?#=status)\d+(?#!status)', # status |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# %t Time the request was received (standard english format) |
147
|
|
|
|
|
|
|
'%t' => '(?#=t)\[\d{2}\/\w{3}\/\d{4}(?::\d{2}){3} [-+]\d{4}\](?#!t)', # [date] (see note 1) |
148
|
|
|
|
|
|
|
'%date' => '(?#=date)\[(?#=ts)\d{2}\/\w{3}\/\d{4}(?::\d{2}){3} [-+]\d{4}(?#!ts)\](?#!date)', # [date] (see note 1) |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# %T The time taken to serve the request, in seconds. |
151
|
|
|
|
|
|
|
'%T' => '(?#=T)\d+(?#!T)', # response time (in seconds) |
152
|
|
|
|
|
|
|
'%seconds' => '(?#=seconds)\d+(?#!seconds)', # response time (in seconds) |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# %u Remote user (from auth; may be bogus if return status (%s) is 401) |
155
|
|
|
|
|
|
|
'%u' => '(?#=u)\S+(?#!u)', # authuser |
156
|
|
|
|
|
|
|
'%authuser' => '(?#=authuser)\S+(?#!authuser)', # authuser |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# %U The URL path requested, not including any query string. |
159
|
|
|
|
|
|
|
'%U' => '(?#U)\".*?\"(?#!U)', # request |
160
|
|
|
|
|
|
|
'%request' => '(?#=request)\"(?#=req).*?(?#!req)\"(?#!request)', # "request" |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# %v The canonical ServerName of the server serving the request. |
163
|
|
|
|
|
|
|
# %V The server name according to the UseCanonicalName setting. |
164
|
|
|
|
|
|
|
'%v' => '(?#=v)\S+(?#!v)', # server name |
165
|
|
|
|
|
|
|
'%V' => '(?#=V)\S+(?#!V)', # server name |
166
|
|
|
|
|
|
|
'%servername' => '(?#=servername)\S+(?#!servername)', # server name |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# %X Connection status when response is completed: |
170
|
|
|
|
|
|
|
'%X' => '(?#=X)\S+(?#!X)', # connection status (X, + or -) |
171
|
|
|
|
|
|
|
'%connection' => '(?#=connection)\S+(?#!connection)', # connection status (X, + or -) |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
# %I Bytes received, including request and headers, cannot be zero. You need to enable mod_logio to use this. |
174
|
|
|
|
|
|
|
# %O Bytes sent, including headers, cannot be zero. You need to enable mod_logio to use this. |
175
|
|
|
|
|
|
|
'%I' => '(?#=I)\S+(?#!I)', # Bytes recieved |
176
|
|
|
|
|
|
|
'%O' => '(?#=O)\S+(?#!O)', # Bytes sent |
177
|
|
|
|
|
|
|
); |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# note 1: date is in the format [01/Jan/1997:13:07:21 -0600] |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
1; |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
__END__ |