|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Logfile::EPrints::Hit;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Logfile::EPrints::Hit - Generic 'hit' object  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This object represents a single entry in a log file and doesn't proscribe any particular schema.  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This uses the 'AUTOLOAD' mechanism to allow any variable to be defined as a method.  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'Hit' objects are passed between filters that may add additional functionality (e.g. by subclassing the hit object).  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	use Logfile::EPrints::Hit;  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $hit = Logfile::EPrints::Hit->new;  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$hit->date( '2006-05-01 23:10:05' );  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	print $hit->date;  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 CLASS METHODS  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item Logfile::EPrints::Hit::Combined::load_country_db( FILENAME [, FLAGS ] )  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Load the Maxmind country database located at FILENAME.  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item Logfile::EPrints::Hit::Combined::load_org_db( FILENAME [, FLAGS ] )  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Load the Maxmind organisation database located at FILENAME.  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item address()  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 IP address (or hostname if IP address could not be found).  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item hostname()  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Hostname (undef if the address is an IP without a reverse DNS entry).  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item date()  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Apache formatted date/time.  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item datetime()  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Date/time formatted as yyyymmddHHMMSS.  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item userid_identd()  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item identd()  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item request()  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Request string.  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item code()  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 HTTP server code.  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item size()  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 HTTP server response size.  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item referrer()  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 User agent referrer.  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item agent()  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 User agent string.  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item method()  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Request method (GET, HEAD etc.).  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item page()  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Requested page - probably won't include the virtual host!  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item version()  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 HTTP version requested (HTTP/1.1 etc).  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item country()  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Country that the IP is probably in, must call load_country_db first.  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item organisation()  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Organisation that the IP belongs to, must call load_org_db first.  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item institution()  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the title from the homepage()  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item homepage()  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the homepage for the user's network.  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
36
 | 
 use strict;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
624
 | 
    | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
118
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
5857
 | 
 use POSIX qw/ strftime /;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
60111
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
    | 
| 
119
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
14155
 | 
 use Date::Parse;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58551
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
963
 | 
    | 
| 
120
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
58
 | 
 use Socket;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4693
 | 
    | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
122
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
40
 | 
 use vars qw( $AUTOLOAD %INST_CACHE );  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
373
 | 
    | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
29
 | 
 use vars qw( $GEO_IP_CLASS $ORG_DB $COUNTRY_DB );  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
644
 | 
    | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for(qw( Geo::IP Geo::IP::PurePerl ))  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
127
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
2574
 | 
 	eval "use $_";  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
0
 | 
    | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8897
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
240655
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
618
 | 
    | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	unless($@)  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$GEO_IP_CLASS = $_;  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		last;  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
135
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
30
 | 
 use vars qw( $UA );  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2219
 | 
    | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 require LWP::UserAgent;  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $UA = LWP::UserAgent->new();  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $UA->timeout(5);  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
142
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
0
  
 | 
7
 | 
 	my( $class, %args ) = @_;  | 
| 
143
 | 
1
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
40
 | 
 	return bless \%args, ref($class) || $class;  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub AUTOLOAD {  | 
| 
147
 | 
5376
 | 
 
 | 
 
 | 
  
5376
  
 | 
 
 | 
20819
 | 
 	$AUTOLOAD =~ s/.*:://;  | 
| 
148
 | 
5376
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12679
 | 
 	return if $AUTOLOAD =~ /^[A-Z]/;  | 
| 
149
 | 
5376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5902
 | 
 	my $self = shift;  | 
| 
150
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return ref($self->{$AUTOLOAD}) ?  | 
| 
151
 | 
5376
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
37739
 | 
 		&{$self->{$AUTOLOAD}}($self,@_) :   | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$self->{$AUTOLOAD};  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # should be to_string in Perl  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 *toString = \&to_string;  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub to_string {  | 
| 
158
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
0
 | 
 	my $self = shift;  | 
| 
159
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $str = "===Parsed Reference===\n";  | 
| 
160
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	while(my ($k,$v) = each %$self) {  | 
| 
161
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 		$str .= "$k=".($v||'n/a')."\n";  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
163
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$str;  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub load_country_db  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
168
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 	my( $filename, $flags ) = @_;  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
170
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	Carp::croak "Requires Geo::IP or Geo::IP::PurePerl" unless $GEO_IP_CLASS;  | 
| 
171
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	Carp::croak "Missing filename argument" unless @_;  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
173
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$COUNTRY_DB = $GEO_IP_CLASS->open( @_ );  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
175
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
38
 | 
 	no warnings;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
908
 | 
    | 
| 
176
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	*country = \&_country;  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub load_org_db  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
181
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 	my( $filename, $flags ) = @_;  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
183
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	Carp::croak "Requires Geo::IP or Geo::IP::PurePerl" unless $GEO_IP_CLASS;  | 
| 
184
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	Carp::croak "Missing filename argument" unless @_;  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
186
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$ORG_DB = $GEO_IP_CLASS->open( @_ );  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
188
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
31
 | 
 	no warnings;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6245
 | 
    | 
| 
189
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	*organisation = \&_organisation;  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _getipbyname  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
194
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
753
 | 
 	my( $name, $aliases, $addrtype, $length, @addrs ) = gethostbyname($_[0]);  | 
| 
195
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
29
 | 
 	return defined($addrs[0]) ? inet_ntoa($addrs[0]) : undef;  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub address  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
200
 | 
1203
 | 
 
 | 
  
 33
  
 | 
  
1203
  
 | 
  
1
  
 | 
24786
 | 
 	$_[0]->{address} ||= _getipbyname( $_[0]->{hostname} ) || $_[0]->{hostname};  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub country  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
205
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 	Carp::croak "You must call ".__PACKAGE__."::load_country_db first";  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _country  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
210
 | 
0
 | 
 
 | 
  
  0
  
 | 
  
0
  
 | 
 
 | 
0
 | 
 	$_[0]->{country} ||= $COUNTRY_DB->country_code_by_addr($_[0]->address);  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub organisation  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
215
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 	Carp::croak "You must call ".__PACKAGE__."::load_org_db first";  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _organisation  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
220
 | 
0
 | 
 
 | 
  
  0
  
 | 
  
0
  
 | 
 
 | 
0
 | 
 	$_[0]->{organisation} ||= Encode::decode('iso-8859-1', $ORG_DB->org_by_name($_[0]->address));  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub hostname  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
225
 | 
4
 | 
 
 | 
  
 66
  
 | 
  
4
  
 | 
  
1
  
 | 
1169
 | 
 	$_[0]->{hostname} ||= gethostbyaddr(inet_aton($_[0]->address), AF_INET);  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub utime  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
230
 | 
124
 | 
  
 50
  
 | 
  
 66
  
 | 
  
124
  
 | 
  
0
  
 | 
899
 | 
 	$_[0]->{'utime'} ||= Date::Parse::str2time($_[0]->{date})  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		or Carp::croak "Unrecognised or invalid date: $_[0]->{date}";  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub datetime  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
236
 | 
2
 | 
 
 | 
  
 33
  
 | 
  
2
  
 | 
  
1
  
 | 
30
 | 
 	$_[0]->{datetime} ||= _time2datetime($_[0]->utime);  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _time2datetime {  | 
| 
240
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
1188
 | 
 	strftime("%Y%m%d%H%M%S",gmtime($_[0]));  | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub institution  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
245
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 	my( $self ) = @_;  | 
| 
246
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return $self->{_institution} if exists($self->{_institution});  | 
| 
247
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	@$self{qw(_institution _homepage)} = addr2institution($self->hostname);  | 
| 
248
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$self->{_institution};  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub homepage  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
253
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
21
 | 
 	my( $self ) = @_;  | 
| 
254
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	return $self->{_homepage} if exists($self->{_homepage});  | 
| 
255
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	@$self{qw(_institution _homepage)} = addr2institution($self->hostname);  | 
| 
256
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
 	$self->{_homepage};  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub addr2institution  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
261
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
0
  
 | 
5
 | 
 	my( $addr ) = @_;  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Can't do anything unless the address is defined  | 
| 
264
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 	return () unless defined $addr;  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Get the domain name  | 
| 
267
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return () unless $addr =~ /([^\.]+)\.([^\.]+)\.([^\.]+)$/;  | 
| 
268
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	my $uri = 'http://www.' . ((length($3) > 2 || length($2) > 3) ?  | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		join('.', $2, $3) :  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		join('.', $1, $2, $3));  | 
| 
271
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$uri .= '/';  | 
| 
272
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return ($INST_CACHE{$uri},$uri) if defined $INST_CACHE{$uri};  | 
| 
273
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return () if exists($INST_CACHE{$uri});  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Retrieve the home page  | 
| 
276
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$UA->max_size( 2048 );  | 
| 
277
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $r = $UA->get($uri);  | 
| 
278
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$UA->max_size( undef );  | 
| 
279
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if( $r->is_error )  | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
281
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		warn "Error retrieving homepage ($uri): " . $r->message;  | 
| 
282
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$INST_CACHE{$uri} = undef;  | 
| 
283
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return ();  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
286
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return () unless $r->content =~ /<\s*title[^>]*>([^<]+)<\s*\/\s*title\s*>/is;  | 
| 
287
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $title = $1;  | 
| 
288
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$title =~ s/\r\n/ /sg;  | 
| 
289
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$title =~ s/^\s+//;  | 
| 
290
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$title =~ s/(?:\-)?\s+$//;  | 
| 
291
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return ($INST_CACHE{$uri} = $title,$uri);  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Logfile::EPrints::Hit::Combined;  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Log file format is:  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ADDRESS IDENTD_USERID USER_ID [DATE TIMEZONE] "request" HTTP_CODE RESPONSE_SIZE "referrer" "agent"  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Logfile::EPrints::Hit::Combined - Parse combined format logs like those generated from Apache  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	use Logfile::EPrints::Hit;  | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $hit = Logfile::EPrints::Hit::Combined->new($line);  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	printf("%s requested %s\n",  | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$hit->hostname,  | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$hit->page);  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Tim Brody -   | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
321
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
42
 | 
 use strict;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
225
 | 
    | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
323
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
26
 | 
 use vars qw( @ISA );  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
326
 | 
    | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 @ISA = qw( Logfile::EPrints::Hit );  | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
326
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
32
 | 
 use vars qw( $AUTOLOAD $LINE_PARSER @FIELDS );  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
336
 | 
    | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
328
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
9318
 | 
 use Text::CSV_XS;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
91851
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2232
 | 
    | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $LINE_PARSER = Text::CSV_XS->new({  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	escape_char => '\\',  | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	sep_char => ' ',  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 });  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Fields in a single log line (as split by Text::CSV)  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # !!! date is handled separately !!!  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 @FIELDS = qw(  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	address userid_identd userid   | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	request code size referrer agent  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );  | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new($$)  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
343
 | 
2255
 | 
 
 | 
 
 | 
  
2255
  
 | 
 
 | 
7857
 | 
 	my %self = ('raw'=>$_[1]);  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# The date is contained in square-brackets  | 
| 
346
 | 
2255
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
13084
 | 
 	if( $_[1] =~ s/\[([^\]]+)\]\s// ) {  | 
| 
347
 | 
2255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5969
 | 
 		$self{date} = $1;  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Change apache escaping back to URI escaping  | 
| 
350
 | 
2255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3874
 | 
 	$_[1] =~ s/\\x/\%/g;  | 
| 
351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Split the log file fields  | 
| 
353
 | 
2255
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6540
 | 
 	if($LINE_PARSER->parse($_[1])) {  | 
| 
354
 | 
2255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
63591
 | 
 		@self{@FIELDS} = $LINE_PARSER->fields;  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} else {  | 
| 
356
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		warn "Text::CSV_XS couldn't parse: " . $LINE_PARSER->error_input;  | 
| 
357
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Split the request  | 
| 
361
 | 
2255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41643
 | 
 	@self{qw(method page version)} = split / /, $self{'request'};  | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Look up the IP if the log file contains hostnames  | 
| 
363
 | 
2255
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
10878
 | 
 	if( $self{'address'} !~ /\d$/ ) {  | 
| 
364
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		$self{'hostname'} = delete $self{'address'};  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  | 
| 
367
 | 
2255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10641
 | 
 	return bless \%self, $_[0];  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Logfile::EPrints::Hit::arXiv;  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Log file format is:  | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ADDRESS IDENTD_USERID USER_ID [DATE TIMEZONE] "request" HTTP_CODE RESPONSE_SIZE "referrer" "agent"  | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # But can have unescaped quotes in the request or agent field (might be just uk mirror oddity)  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
376
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
72
 | 
 use strict;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
230
 | 
    | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
378
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
34
 | 
 use vars qw( @ISA );  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3203
 | 
    | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 @ISA = qw( Logfile::EPrints::Hit::Combined );  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
382
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
987
 | 
 	my ($class,$hit) = @_;  | 
| 
383
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my (%self, $rest);  | 
| 
384
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	$self{raw} = $hit;  | 
| 
385
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	(@self{qw(address userid_identd userid)},$rest) = split / /, $hit, 4;  | 
| 
386
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	$rest =~ s/^\[([^\]]+)\] //;  | 
| 
387
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	$self{date} = $1;  | 
| 
388
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	$rest =~ s/ (\d+) (\d+|-)(?= )//; # Chop code & size out of the middle  | 
| 
389
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	@self{qw(code size)} = ($1,$2);  | 
| 
390
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	$rest =~ s/^\"([A-Z]+) ([^ ]+) (HTTP\/1\.[01])\" //;  | 
| 
391
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	@self{qw(method page version)} = ($1,$2,$3);  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Apache replaces the % in URIs with \x  | 
| 
394
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	$self{page} =~ s/\\x/\%/g;  | 
| 
395
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	chop($self{page}) if substr($self{page},-1) eq '"';  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
397
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	$rest =~ s/^\"([^\"]+)\" \"(.+)\"$//;  | 
| 
398
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	@self{qw(referrer agent)} = ($1,$2);  | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Look up the IP if the log file contains hostnames  | 
| 
401
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	if( $self{'address'} !~ /\d$/ ) {  | 
| 
402
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 		$self{'hostname'} = delete $self{'address'};  | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
405
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	bless \%self, $class;  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Logfile::EPrints::Hit::Bracket;  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Logfile format is:  | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # host ident user_id [dd/mmm/yyyy:hh:mm:ss +zone] [User Agent|email?|?|referrer] "page" code size  | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
414
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
38
 | 
 use strict;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1566
 | 
    | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @ISA = qw( Logfile::EPrints::Hit::Combined );  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
419
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
9
 | 
 	my( $class, $hit ) = @_;  | 
| 
420
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 	my %self = (raw => $hit);  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
422
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
164
 | 
 	@self{qw(  | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		hostname  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		userid_identd  | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		userid  | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		date  | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		agent  | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		from  | 
| 
429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		process_time  | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		referrer  | 
| 
431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		method  | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		page  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		version  | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		code  | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		size  | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	)} = $hit =~ /([^ ]+) ([^ ]+) ([^ ]+) \[(.{26})\] \[(.+)\|([^\|]+)\|([^\|]+)\|([^\|]+)\] "([A-Z]+) ([^ ]+) (HTTP\/1\.[01])" (\d+) (\d+|-)/  | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		or return undef;  | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Is an IP address rather than hostname  | 
| 
440
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
21
 | 
 	if( $self{'hostname'} =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ ) {  | 
| 
441
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$self{'address'} = delete $self{'hostname'};  | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
444
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 	return bless \%self, $class;  | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  |