File Coverage

lib/Regexp/Log/Monster.pm
Criterion Covered Total %
statement 12 12 100.0
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 16 16 100.0


line stmt bran cond sub pod time code
1             package Regexp::Log::Monster;
2              
3 3     3   18 use strict;
  3         7  
  3         123  
4 3     3   16 use warnings;
  3         6  
  3         188  
5              
6 3     3   19 use base qw( Regexp::Log );
  3         28  
  3         349  
7 3     3   17 use vars qw( %DEFAULT %FORMAT %REGEXP );
  3         6  
  3         731  
8              
9             our $VERSION = 0.05;
10              
11             =head1 NAME
12              
13             Regexp::Log::Monster - A regexp parser for the Extended Log Format + vhost
14              
15             =head1 SYNOPSIS
16              
17             my $foo = Regexp::Log::Monster->new(
18             format => ':logmonster';
19             capture => [qw( ts request )],
20             );
21              
22             # the format() and capture() methods can be used to set or get
23             $foo->format('custom %date %request %status %bytes');
24             $foo->capture(qw( ts req ));
25              
26             # this is necessary to know in which order
27             # we will receive the captured fields from the regexp
28             my @fields = $foo->capture;
29              
30             # the all-powerful capturing regexp :-)
31             my $re = $foo->regexp;
32              
33             while (<>) {
34             my %data;
35             @data{@fields} = /$re/; # no need for /o, it's a compiled regexp
36              
37             # now munge the fields
38             ...
39             }
40              
41             =head1 DESCRIPTION
42              
43             Regexp::Log::Monster uses Regexp::Log as a base class, to generate regular
44             expressions for performing the usual data munging tasks on log files that
45             cannot be simply split().
46              
47             This specific module enables the computation of regular expressions for
48             parsing the log files created using the Monster Log Format. An example of
49             this format are the logs generated by the httpd web server using the
50             keyword 'common'.
51              
52             The module also allows for the use of the Extended Monster Log Format.
53              
54             For more information on how to use this module, please see Regexp::Log.
55              
56             =head1 ABSTRACT
57              
58             Regexp::Log::Monster enables simple parsing of log files created using the
59             Extended Log Format, such as the logs generated by the httpd web server
60             using the keyword 'common'.
61              
62             =cut
63              
64             # default values
65             %DEFAULT = (
66             format => '%host %rfc %authuser %date %request %status %bytes %referer %useragent %vhost',
67             capture => [ 'host', 'rfc', 'authuser', 'date', 'ts', 'request', 'req',
68             'status', 'bytes', 'referer', 'ref', 'useragent', 'ua', 'vhost' ],
69             );
70              
71             # predefined format strings
72             %FORMAT = (
73             ':default' => '%host %rfc %authuser %date %request %status %bytes %referer %useragent %vhost',
74             ':common' => '%host %rfc %authuser %date %request %status %bytes',
75             ':extended' => '%host %rfc %authuser %date %request %status %bytes %referer %useragent',
76             ':logmonster' => '%host %rfc %authuser %date %request %status %bytes %referer %useragent %vhost',
77             );
78              
79             # the regexps that match the various fields
80             %REGEXP = (
81             # '%host' => '(?#=host)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?#!host)', # IPv4 only
82             '%host' => '(?#=host)\S+(?#!host)', # numeric or name of remote host
83             '%rfc' => '(?#=rfc).*?(?#!rfc)', # rfc931
84             '%authuser' => '(?#=authuser).*?(?#!authuser)', # authuser
85             '%date' => '(?#=date)\[(?#=ts)\d{2}\/\w{3}\/\d{4}(?::\d{2}){3} [-+]\d{4}(?#!ts)\](?#!date)',
86             # [date] (see note)
87             '%request' => '(?#=request)\"(?#=req).*?(?#!req)\"(?#!request)', # "request"
88             '%status' => '(?#=status)\d+(?#!status)', # status
89             '%bytes' => '(?#=bytes)-|\d+(?#!bytes)', # bytes
90             '%referer' => '(?#=referer)\"(?#=ref).*?(?#!ref)\"(?#!referer)', # "referer"
91             '%useragent'=> '(?#=useragent)\"(?#=ua).*?(?#!ua)\"(?#!useragent)', # "user_agent"
92             '%vhost' => '(?#=vhost)\S+(?#!vhost)', # name of local vhost
93             );
94              
95             # note: date is in the format [01/Jan/1997:13:07:21 -0600]
96              
97             1;
98              
99             =head1 SEE ALSO
100              
101             Regexp::Log
102              
103             =cut
104