File Coverage

blib/lib/AnyData/Format/Weblog.pm
Criterion Covered Total %
statement 32 32 100.0
branch 3 6 50.0
condition 4 8 50.0
subroutine 6 6 100.0
pod 0 2 0.0
total 45 54 83.3


line stmt bran cond sub pod time code
1             #########################################################
2             package AnyData::Format::Weblog;
3             #########################################################
4             # AnyData driver for "Common Log Format" web log files
5             # copyright (c) 2000, Jeff Zucker
6             #########################################################
7              
8             =head1 NAME
9              
10             AnyData::Format::Weblog - tiedhash & DBI/SQL access to HTTPD Logs
11              
12             =head1 SYNOPSIS
13              
14             use AnyData;
15             my $weblog = adTie( 'Weblog', $filename );
16             while (my $hit = each %$weblog) {
17             print $hit->{remotehost},"\n" if $hit->{request} =~ /mypage.html/;
18             }
19             # ... other tied hash operations
20              
21             OR
22              
23             use DBI
24             my $dbh = DBI->connect('dbi:AnyData:');
25             $dbh->func('hits','Weblog','access_log','ad_catalog');
26             my $hits = $dbh->selectall_arrayref( qq{
27             SELECT remotehost FROM hits WHERE request LIKE '%mypage.html%'
28             });
29             # ... other DBI/SQL read operations
30              
31             =head1 DESCRIPTION
32              
33             This is a plug-in format parser for the AnyData and DBD::AnyData modules. You can gain read access to Common Log Format files web server log files (e.g. NCSA or Apache) either through tied hashes or arrays or through SQL database queries.
34              
35             Fieldnames are taken from the W3 definitions found at
36              
37             http://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format
38              
39             remotehost
40             usernname
41             authuser
42             date
43             request
44             status
45             bytes
46              
47             In addition, two extra fields that may be present in extended format logfiles are:
48              
49             referer
50             client
51              
52             This module does not currently support writing to weblog files.
53              
54             Please refer to the documentation for AnyData.pm and DBD::AnyData.pm
55             for further details.
56              
57             =head1 AUTHOR & COPYRIGHT
58              
59             copyright 2000, Jeff Zucker
60             all rights reserved
61              
62             =cut
63              
64 1     1   5 use strict;
  1         1  
  1         36  
65 1     1   4 use warnings;
  1         2  
  1         30  
66 1     1   356 use AnyData::Format::Base;
  1         2  
  1         23  
67 1     1   4 use vars qw( @ISA $DEBUG $VERSION);
  1         2  
  1         254  
68             @AnyData::Format::Weblog::ISA = qw( AnyData::Format::Base );
69             $DEBUG = 0;
70              
71             $VERSION = '0.12';
72              
73             sub new {
74 1     1 0 3 my $class = shift;
75 1   50     4 my $self = shift || {};
76 1         3 $self->{col_names} =
77             'remotehost,username,authuser,date,request,status,bytes,referer,client';
78 1         2 $self->{record_sep} = "\n";
79 1         1 $self->{key} = 'datestamp';
80 1         2 $self->{keep_first_line} = 1;
81 1         6 return bless $self, $class;
82             }
83              
84             sub read_fields {
85 2 50   2 0 5 print "PARSE RECORD\n" if $DEBUG;
86 2         3 my $self = shift;
87 2   50     6 my $str = shift || return undef;
88 2         7 $str =~ s/^\s+//;
89 2         12 $str =~ s/\s+$//;
90 2 50       4 return undef unless $str;
91 2         25 my (@row) =
92             $str =~ /^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (\S*)\s*(.*)$/;
93 2 50       6 return undef unless defined $row[0];
94 2         12 my ( $referer, $client ) = $row[7] =~ /^(.*?)\s(.*)$/;
95 2   50     5 $client ||= '';
96 2   50     5 $referer ||= '';
97 2         4 ( $row[7], $row[8] ) = ( $referer, $client );
98              
99             # $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp
100 2         13 return @row;
101             }
102             1;
103