File Coverage

blib/lib/Regexp/Log/Common.pm
Criterion Covered Total %
statement 12 12 100.0
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 16 16 100.0


line stmt bran cond sub pod time code
1             package Regexp::Log::Common;
2              
3 6     6   116455 use warnings;
  6         14  
  6         317  
4 6     6   30 use strict;
  6         6  
  6         215  
5 6     6   29 use base qw( Regexp::Log );
  6         11  
  6         3695  
6 6     6   9263 use vars qw( $VERSION %DEFAULT %FORMAT %REGEXP );
  6         11  
  6         1606  
7              
8             $VERSION = '0.10';
9              
10             =head1 NAME
11              
12             Regexp::Log::Common - A regular expression parser for the Common Log Format
13              
14             =head1 SYNOPSIS
15              
16             my $foo = Regexp::Log::Common->new(
17             format => '%date %request',
18             capture => [qw( ts request )],
19             );
20              
21             # the format() and capture() methods can be used to set or get
22             $foo->format('%date %request %status %bytes');
23             $foo->capture(qw( ts req ));
24              
25             # this is necessary to know in which order
26             # we will receive the captured fields from the regexp
27             my @fields = $foo->capture;
28              
29             # the all-powerful capturing regexp :-)
30             my $re = $foo->regexp;
31              
32             while (<>) {
33             my %data;
34             @data{@fields} = /$re/; # no need for /o, it's a compiled regexp
35              
36             # now munge the fields
37             ...
38             }
39              
40             =head1 DESCRIPTION
41              
42             Regexp::Log::Common uses Regexp::Log as a base class, to generate regular
43             expressions for performing the usual data munging tasks on log files that
44             cannot be simply split().
45              
46             This specific module enables the computation of regular expressions for
47             parsing the log files created using the Common Log Format. An example of
48             this format are the logs generated by the httpd web server using the
49             keyword 'common'.
50              
51             The module also allows for the use of the Extended Common Log Format.
52              
53             For more information on how to use this module, please see Regexp::Log.
54              
55             =head1 ABSTRACT
56              
57             Enables simple parsing of log files created using the Common Log Format or the
58             Extended Common Log Format, such as the logs generated by the httpd/Apache web
59             server using the keyword 'common'.
60              
61             =cut
62              
63             # default values
64             %DEFAULT = (
65             format => '%host %rfc %authuser %date %request %status %bytes %referer %useragent',
66             capture => [ 'host', 'rfc', 'authuser', 'date', 'ts', 'request', 'req',
67             'status', 'bytes', 'referer', 'ref', 'useragent', 'ua' ],
68             );
69              
70             # predefined format strings
71             %FORMAT = (
72             ':default' => '%host %rfc %authuser %date %request %status %bytes',
73             ':common' => '%host %rfc %authuser %date %request %status %bytes',
74             ':extended' => '%host %rfc %authuser %date %request %status %bytes %referer %useragent',
75             );
76              
77             # the regexps that match the various fields
78             %REGEXP = (
79             # %a Remote IP-address
80             # %A Local IP-address
81             '%a' => '(?#=a)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?#!a)',
82             '%A' => '(?#=A)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?#!A)',
83             '%remoteip' => '(?#=remoteip)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?#!remoteip)',
84             '%localip' => '(?#=localip)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?#!localip)',
85              
86             # %B Size of response in bytes, excluding HTTP headers.
87             # %b Size of response in bytes, excluding HTTP headers. In CLF format, i.e. a '-' rather than a 0 when no bytes are sent.
88             '%B' => '(?#=B)\d+(?#!B)', # bytes (non-CLF format)
89             '%b' => '(?#=b)-|\d+(?#!b)', # bytes (CLF format)
90             '%bytes' => '(?#=bytes)-|\d+(?#!bytes)', # bytes (CLF and non-CLF format)
91              
92             # %D The time taken to serve the request, in microseconds.
93             '%D' => '(?#=D)\d+(?#!D)', # response time (in microseconds)
94             '%time' => '(?#=time)\d+(?#!time)', # response time (in microseconds)
95              
96             # %f Filename
97             '%F' => '(?#=F)\S+(?#!F)', # filename
98             '%filename' => '(?#=filename)\S+(?#!filename)', # filename
99              
100             # %h Remote host
101             '%h' => '(?#=h)\S+(?#!h)', # numeric or name of remote host
102             '%host' => '(?#=host)\S+(?#!host)', # numeric or name of remote host
103             '%remotehost' => '(?#=remotehost)\S+(?#!remotehost)', # numeric or name of remote host
104              
105             # %H The request protocol
106             '%H' => '(?#=H)\S+(?#!H)', # protocol
107             '%protcol' => '(?#=protocol)\S+(?#!protocol)', # protocol
108              
109             # %{Foobar}i The contents of Foobar: header line(s) in the request sent to the server.
110             '%referer' => '(?#=referer)\"(?#=ref).*?(?#!ref)\"(?#!referer)', # "referer" from \"%{Referer}i\"
111             '%useragent' => '(?#=useragent)\"(?#=ua).*?(?#!ua)\"(?#!useragent)', # "user_agent" from \"%{User-Agent}i\"
112              
113             # %k Number of keepalive requests handled on this connection. Interesting if KeepAlive is being used, so that, for example, a '1' means the first keepalive request after the initial one, '2' the second, etc...; otherwise this is always 0 (indicating the initial request). Available in versions 2.2.11 and later.
114             '%k' => '(?#=k)\d+(?#!k)', # keep alive requests
115             '%keepalive' => '(?#=keepalive)\d+(?#!keepalive)', # keep alive requests
116              
117             # %l Remote logname (from identd, if supplied). This will return a dash unless mod_ident is present and IdentityCheck is set On.
118             '%l' => '(?#=F)\S+(?#!F)', # logname
119             '%logname' => '(?#=logname)\S+(?#!logname)', # logname
120             '%rfc' => '(?#=rfc)\S+(?#!rfc)', # rfc931
121              
122             # %m The request method
123             '%m' => '(?#=F)\S+(?#!F)', # request method
124             '%method' => '(?#=method)\S+(?#!method)', # request method
125              
126             # %p The canonical port of the server serving the request
127             '%p' => '(?#=p)\d+(?#!p)', # port
128             '%port' => '(?#=port)\d+(?#!port)', # port
129              
130             # %P The process ID of the child that serviced the request.
131             '%P' => '(?#=P)\d+(?#!P)', # process id
132             '%pid' => '(?#=pid)\d+(?#!pid)', # process id
133              
134             # %q The query string (prepended with a ? if a query string exists, otherwise an empty string)
135             '%q' => '(?#=q)\".*?\"(?#!q)', # "query string"
136             '%queryatring' => '(?#=queryatring)\"(?#=qs).*?(?#!qs)\"(?#!queryatring)', # "query string"
137              
138             # %r First line of request
139             '%r' => '(?#=r)\".*?\"(?#!r)', # "request"
140             '%request' => '(?#=request)\"(?#=req).*?(?#!req)\"(?#!request)', # "request"
141              
142             # %s Status. For requests that got internally redirected, this is the status of the *original* request --- %>s for the last.
143             '%s' => '(?#=s)\d+(?#!s)', # status
144             '%status' => '(?#=status)\d+(?#!status)', # status
145              
146             # %t Time the request was received (standard english format)
147             '%t' => '(?#=t)\[\d{2}\/\w{3}\/\d{4}(?::\d{2}){3} [-+]\d{4}\](?#!t)', # [date] (see note 1)
148             '%date' => '(?#=date)\[(?#=ts)\d{2}\/\w{3}\/\d{4}(?::\d{2}){3} [-+]\d{4}(?#!ts)\](?#!date)', # [date] (see note 1)
149              
150             # %T The time taken to serve the request, in seconds.
151             '%T' => '(?#=T)\d+(?#!T)', # response time (in seconds)
152             '%seconds' => '(?#=seconds)\d+(?#!seconds)', # response time (in seconds)
153              
154             # %u Remote user (from auth; may be bogus if return status (%s) is 401)
155             '%u' => '(?#=u)\S+(?#!u)', # authuser
156             '%authuser' => '(?#=authuser)\S+(?#!authuser)', # authuser
157              
158             # %U The URL path requested, not including any query string.
159             '%U' => '(?#U)\".*?\"(?#!U)', # request
160             '%request' => '(?#=request)\"(?#=req).*?(?#!req)\"(?#!request)', # "request"
161              
162             # %v The canonical ServerName of the server serving the request.
163             # %V The server name according to the UseCanonicalName setting.
164             '%v' => '(?#=v)\S+(?#!v)', # server name
165             '%V' => '(?#=V)\S+(?#!V)', # server name
166             '%servername' => '(?#=servername)\S+(?#!servername)', # server name
167              
168              
169             # %X Connection status when response is completed:
170             '%X' => '(?#=X)\S+(?#!X)', # connection status (X, + or -)
171             '%connection' => '(?#=connection)\S+(?#!connection)', # connection status (X, + or -)
172              
173             # %I Bytes received, including request and headers, cannot be zero. You need to enable mod_logio to use this.
174             # %O Bytes sent, including headers, cannot be zero. You need to enable mod_logio to use this.
175             '%I' => '(?#=I)\S+(?#!I)', # Bytes recieved
176             '%O' => '(?#=O)\S+(?#!O)', # Bytes sent
177             );
178              
179             # note 1: date is in the format [01/Jan/1997:13:07:21 -0600]
180              
181             1;
182              
183             __END__