| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Regexp::Log::BlueCoat; |
|
2
|
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
157945
|
use strict; |
|
|
5
|
|
|
|
|
40
|
|
|
|
5
|
|
|
|
|
195
|
|
|
4
|
5
|
|
|
5
|
|
29
|
use Carp; |
|
|
5
|
|
|
|
|
10
|
|
|
|
5
|
|
|
|
|
479
|
|
|
5
|
5
|
|
|
5
|
|
5034
|
use Regexp::Log 0.01; |
|
|
5
|
|
|
|
|
8594
|
|
|
|
5
|
|
|
|
|
173
|
|
|
6
|
5
|
|
|
5
|
|
35
|
use base qw( Regexp::Log ); |
|
|
5
|
|
|
|
|
9
|
|
|
|
5
|
|
|
|
|
716
|
|
|
7
|
5
|
|
|
5
|
|
24
|
use vars qw( $VERSION %DEFAULT %FORMAT %REGEXP %UFS ); |
|
|
5
|
|
|
|
|
8
|
|
|
|
5
|
|
|
|
|
13026
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
$VERSION = 0.03; |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Regexp::Log::BlueCoat - A regexp builder to parse BlueCoat log files |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my $blue = Regexp::Log::BlueCoat->new( |
|
18
|
|
|
|
|
|
|
format => '%g %e %a %w/%s %b %m %i %u %H/%d %c', |
|
19
|
|
|
|
|
|
|
capture => [qw( host code )], |
|
20
|
|
|
|
|
|
|
); |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# the format() and capture() methods can be used to set or get |
|
23
|
|
|
|
|
|
|
$blue->format('%g %e %a %w/%s %b %m %i %u %H/%d %c %f %A'); |
|
24
|
|
|
|
|
|
|
$blue->capture(qw( host code )); |
|
25
|
|
|
|
|
|
|
$blue->ufs( 'smartfilter' ); |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# this is necessary to know in which order |
|
28
|
|
|
|
|
|
|
# we will receive the captured fields from the regex |
|
29
|
|
|
|
|
|
|
my @fields = $blue->capture; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# the all-powerful capturing regex :-) |
|
32
|
|
|
|
|
|
|
my $re = $blue->regex; |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
while (<>) { |
|
35
|
|
|
|
|
|
|
my %data; |
|
36
|
|
|
|
|
|
|
@data{@fields} = /$re/; |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# do something with the fields |
|
39
|
|
|
|
|
|
|
} |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Regexp::Log::BlueCoat is a module that computes custom regular |
|
44
|
|
|
|
|
|
|
expressions to parse log files generated by the BlueCoat Sytems |
|
45
|
|
|
|
|
|
|
I. |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
See the Regexp::Log documentation for a description of the standard |
|
48
|
|
|
|
|
|
|
Regexp::Log interface. |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head2 Streaming media logs |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
This version of Regexp::Log::BlueCoat does not support streaming |
|
53
|
|
|
|
|
|
|
related logs. You will have to add the following line at the beginning |
|
54
|
|
|
|
|
|
|
of the log parsing loop in your scripts, if your BlueCoat appliance |
|
55
|
|
|
|
|
|
|
is configured to log those events. |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
next if /^(?:Windows_Media|)/; |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
This may or may not be faster than have the regular expression generated |
|
60
|
|
|
|
|
|
|
by the regexp() method fail on each streaming log line. |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=cut |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
my $IP = '\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}'; |
|
65
|
|
|
|
|
|
|
my $HOST = '[-.\\S]+'; |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# define the BlueCoat specific stuff |
|
68
|
|
|
|
|
|
|
%REGEXP = ( |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# %% - Denotes '%' character - |
|
71
|
|
|
|
|
|
|
'%%' => '%', |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# %a c-ip Client IP address. Yes |
|
74
|
|
|
|
|
|
|
'%a' => "(?#=c-ip)$IP(?#!c-ip)", |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# %b sc-bytes Number of bytes returned by the server (or the Cache). Yes |
|
77
|
|
|
|
|
|
|
'%b' => '(?#=sc-bytes)-|\\d+(?#!sc-bytes)', |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# %c cs (content-type) The type of object. Usually the MIME-type. No |
|
80
|
|
|
|
|
|
|
'%c' => '(?#=cs-content-type)-|UNKNOWN|\\S+(?:/\\S+)?(?#!cs-content-type)', |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# %d cs-supplier-name SUPPLIER NAME - Name or IP address of the server/cache from which the object was received. Yes |
|
83
|
|
|
|
|
|
|
'%d' => "(?#=cs-supplier-name)-|$HOST(?#!cs-supplier-name)", |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# %e time-taken Number of milliseconds request took to process. Yes |
|
86
|
|
|
|
|
|
|
'%e' => '(?#=time-taken)\\d+(?#!time-taken)', |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# %f sc-filter-category Filtering reason. Why it was denied (such as sex or business) No |
|
89
|
|
|
|
|
|
|
# this is handled in _postprocess() |
|
90
|
|
|
|
|
|
|
'%f' => '(?#=sc-filter-category)%f(?#!sc-filter-category)', |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# %g timestamp UNIX type timestamp. Yes |
|
93
|
|
|
|
|
|
|
'%g' => '(?#=timestamp)\\d+\\.\\d+(?#!timestamp)', |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# %h c-ip Client Hostname (uses IP to avoid reverse DNS) - same as %a Yes |
|
96
|
|
|
|
|
|
|
'%h' => "(?#=c-hostname)-|$HOST(?#!c-hostname)", |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# %i cs-uri The requested URI. Note: Web trends expects this to be only cs-uri-stem + cs-uri-query No |
|
99
|
|
|
|
|
|
|
'%i' => '(?#=cs-uri)-|\\S+://\\S+|.*?(?#!cs-uri)', |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# %j - [Not used.] - |
|
102
|
|
|
|
|
|
|
'%j' => '', |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# %l - Client Identification string. (User Login name remote). - always '-' Yes |
|
105
|
|
|
|
|
|
|
# %m cs-method HTTP method. HTTP methods include GET, PUT, POST, and so on. Yes |
|
106
|
|
|
|
|
|
|
'%m' => |
|
107
|
|
|
|
|
|
|
'(?#=cs-method)-|OPTIONS|GET|HEAD|POST|PUT|DELETE|TRACE|CONNECT(?#!cs-method)', |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# %n - [Not used.] - |
|
110
|
|
|
|
|
|
|
'%n' => '', |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# %o - [Not used.] - |
|
113
|
|
|
|
|
|
|
'%o' => '', |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# %p r-port Port fetched from on host - origin server port Yes |
|
116
|
|
|
|
|
|
|
'%p' => '(?#=r-port)\\d+(?#!r-port)', |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
# %q - [Not used.] - |
|
119
|
|
|
|
|
|
|
'%q' => '', |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# %r cs-request-line First line of the request No |
|
122
|
|
|
|
|
|
|
# %s sc-status The code returned by the cache to the client (HTTP code). Yes |
|
123
|
|
|
|
|
|
|
'%s' => '(?#=sc-status)\\d{1,4}(?#!sc-status)', |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# %t gmttime GMT date and time of the user request, in the format [DD/MM/YYYY:hh:mm:ss GMT] Yes |
|
126
|
|
|
|
|
|
|
'%t' => |
|
127
|
|
|
|
|
|
|
'(?#=gmttime)-|\\[(?#=gmtday)\\d\\d(?#!gmtday)/(?#=gmtmonth)\\d\\d(?#!gmtmonth)/(?#=gmtyear)\\d\\d\\d\\d(?#!gmtyear):(?#=gmthour)\\d\\d(?#!gmthour):(?#=gmtminute)(?#!gmtminute):(?#=gmtsecond)(?#!gmtsecond) GMT\\](?#!gmttime)', |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# %u cs-username Authenticated user ID. Yes |
|
130
|
|
|
|
|
|
|
'%u' => '(?### You must define \'login\' to use %u in format ###))', |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# %v cs-host Name of host sourcing the object. Yes |
|
133
|
|
|
|
|
|
|
# %w s-action What type of action did the CM take to process this request. NOTE: 'cached' is used by ELFF but has int value. Yes |
|
134
|
|
|
|
|
|
|
'%w' => |
|
135
|
|
|
|
|
|
|
'(?#=s-action)(?:TCP_(?:CLIENT_REFRESH|DENIED|ERR_MISS|HIT|M(?:EM_HIT|ISS)|NC_MISS|PARTIAL_MISS|REFRESH_(?:HIT|MISS)|S(?:PLASHED|WAPFAIL)|TUNNELED)?|UDP_(?:DENIED|HIT|INVALID|MISS(?:_NOFETCH)?)?)(?#!s-action)', |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# %x date Date in YYYY-MM-DD format Yes |
|
138
|
|
|
|
|
|
|
'%x' => |
|
139
|
|
|
|
|
|
|
'(?#=date)(?#=year)\\d\\d\\d\\d(?#!year)-(?#=month)\\d\\d(?#!month)-(?#=day)\\d\\d(?#!day)(?#!date)', |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# %y time GMT time in HH:MM:SS format No |
|
142
|
|
|
|
|
|
|
'%y' => |
|
143
|
|
|
|
|
|
|
'(?#=time)(?#=hour)\\d\\d(?#!hour):(?#=minute)\\d\\d(?#!minute):(?#=second)\\d\\d(?#!second)(?#!time)', |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# %z - [Not used.] - |
|
146
|
|
|
|
|
|
|
'%z' => '', |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# %A cs (user-agent) User agent No |
|
149
|
|
|
|
|
|
|
'%A' => '(?#=user-agent).*(?#!user-agent)', |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# %B cs-bytes The number of bytes received by the server Yes |
|
152
|
|
|
|
|
|
|
'%b' => '(?#=cs-bytes)\\d+(?#!cs-bytes)', |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# %C cs (cookie) Cookie data No |
|
155
|
|
|
|
|
|
|
# %D s-supplier-ip SUPPLIER IP - IP address of server/cache from which the object was received. Yes |
|
156
|
|
|
|
|
|
|
# %E s-Policy-Message Policy enforcement message Yes |
|
157
|
|
|
|
|
|
|
# %F - [Not used.] - |
|
158
|
|
|
|
|
|
|
'%F' => '', |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# %G - [Not used.] - |
|
161
|
|
|
|
|
|
|
'%G' => '', |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# %H s-hierarchy How and where the object was retrieved from the cache hierarchy (DIRECT from the server, PARENT_HIT = from the parent cache, and so on) No |
|
164
|
|
|
|
|
|
|
'%H' => |
|
165
|
|
|
|
|
|
|
'(?#=s-hierarchy)DIRECT|NONE|(?:PARENT|SIBLING)_HIT|FIRST_PARENT_MISS(?#!s-hierarchy)', |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# %I s-ip Server IP, the IP address of the server on which the log entry was generated Yes |
|
168
|
|
|
|
|
|
|
# %J - [Not used.] - |
|
169
|
|
|
|
|
|
|
'%J' => '', |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# %K - [Not used.] - |
|
172
|
|
|
|
|
|
|
'%K' => '', |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# %L localtime Local date and time of the user request in format: [DD/MMM/YYYY:hh:mm:ss +nnnn] Yes |
|
175
|
|
|
|
|
|
|
'%L' => |
|
176
|
|
|
|
|
|
|
'\\[(?#=localtime)(?#=localday)\\d\\d(?#!localday)/(?#=localmonth)\\d\\d(?#!localmonth)/(?#=localyear)\\d\\d\\d\\d(?#!localyear):(?#=localhour)\\d\\d(?#!localhour):(?#=localminute)\\d\\d(?#!localminute):(?#=localsecond)\\d\\d(?#!localsecond) \\+\\d\\d\\d\\d(?#!localtime)\\]', |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# %M - [Not used.] - |
|
179
|
|
|
|
|
|
|
'%M' => '', |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# %N s-computername Server name, the name of the server on which the log entry was generated Yes |
|
182
|
|
|
|
|
|
|
'%N' => "(?#=s-computername)$HOST(?#!s-computername)", |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
# %O - [Not used.] - |
|
185
|
|
|
|
|
|
|
'%O' => '', |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# %P s-port Server port, the port number the client is connected to. Yes |
|
188
|
|
|
|
|
|
|
'%P' => '(?#=s-port)\\d+(?#!s-port)', |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# %Q cs-uri-query The URI query portion of the URL No |
|
191
|
|
|
|
|
|
|
# %R cs (Referer) Request referrer No |
|
192
|
|
|
|
|
|
|
# %S s-sitename Internet service and instance number running on client computer Yes |
|
193
|
|
|
|
|
|
|
# %T duration Elapsed time, seconds Yes |
|
194
|
|
|
|
|
|
|
'%T' => '(?#=duration)\\d+(?#!duration)', |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# %U cs-uri-stem Object path from request URL Yes |
|
197
|
|
|
|
|
|
|
# %V cs-version The protocol (HTTP, FTP) version used by the client. Yes |
|
198
|
|
|
|
|
|
|
# %W sc-filter-result UFS event (May differ between Websense or SmartFilter or others). No |
|
199
|
|
|
|
|
|
|
# this is handled in _postprocess() and is unsupported yet |
|
200
|
|
|
|
|
|
|
'%W' => '', |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# %X cs (X-Forwarded-For) The IP address of the device which sent the HTTP request. No |
|
203
|
|
|
|
|
|
|
# %Y - [Not used.] - |
|
204
|
|
|
|
|
|
|
'%Y' => '', |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# %Z - [Not used.] - |
|
207
|
|
|
|
|
|
|
'%Z' => '', |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# UFS specific |
|
210
|
|
|
|
|
|
|
# Smartfilter |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# Login specific |
|
213
|
|
|
|
|
|
|
'%u-username' => '(?#=cs-username)[-.\\w]+(?#!cs-username)', |
|
214
|
|
|
|
|
|
|
'%u-ldap' => |
|
215
|
|
|
|
|
|
|
'(?#=cs-username)-|(?:[A-Za-z]+=[^,]*,)*[A-Za-z]=[^,]*?(?#!cs-username)', |
|
216
|
|
|
|
|
|
|
); |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=head1 METHODS |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
Regexp::Log::BlueCoat is a standard Regexp::Log object, and therefore |
|
221
|
|
|
|
|
|
|
supports all the standard Regexp::Log methods. |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
Regexp::Log::BlueCoat's constructor accepts several BlueCoat specific |
|
224
|
|
|
|
|
|
|
arguments: |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
ufs - URL Filtering Service |
|
227
|
|
|
|
|
|
|
login - The type of username information |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
Note: Though BlueCoat supports SmartFilter, Websense and others, |
|
230
|
|
|
|
|
|
|
Regexp::Log::BlueCoat only support I UFS in this version. |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
The appropriate accessors are defined for them (if used to set, they |
|
233
|
|
|
|
|
|
|
return the new value for the attribute). |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=over 4 |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=item ufs( [$ufs] ) |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
Get/set the URL Filter System type (C<%f> and C<%W>). |
|
240
|
|
|
|
|
|
|
Only C is supported in this version. |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=cut |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
sub ufs { |
|
245
|
3
|
|
|
3
|
1
|
2124
|
my $self = shift; |
|
246
|
3
|
100
|
|
|
|
12
|
$self->{ufs} = shift if @_; |
|
247
|
3
|
|
|
|
|
14
|
return $self->{ufs}; |
|
248
|
|
|
|
|
|
|
} |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=item ufs_category( category => string, [...] ) |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
This method lets you override the default category names in your UFS. |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
For example, I allows to configure the name of the |
|
255
|
|
|
|
|
|
|
categories; Regexp::Log::BlueCoat supports the default category names, |
|
256
|
|
|
|
|
|
|
but lets you override them if needed. |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
The changes are applied on the objet current C. |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
$log->ufs('smartfilter'); |
|
261
|
|
|
|
|
|
|
$log->ufs_category( hm => 'FunStuff' ); # change the Humor category |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
See L for details about the category names. |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
When called without arguments, ufs_category() will return the whole |
|
266
|
|
|
|
|
|
|
category list for the instance. |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=item ufs_category( ufs_name, category => string, [...] ) |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
This method can also be called as a class method. |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
If you'd rather change the UFS category names for every |
|
273
|
|
|
|
|
|
|
Regexp::Log::BlueCoat that will be created, you can use the |
|
274
|
|
|
|
|
|
|
method as a class method. |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
You'll need to tell ufs_category() on which UFS to apply these |
|
277
|
|
|
|
|
|
|
modifications. |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
Regexp::Log::BlueCoat->ufs_category( |
|
280
|
|
|
|
|
|
|
'smartfilter', |
|
281
|
|
|
|
|
|
|
hm => 'Fun', # change the Humor category |
|
282
|
|
|
|
|
|
|
mp => 'Music', # change the MP3 category |
|
283
|
|
|
|
|
|
|
); |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
These changes will be on for any new Regexp::Log::Object you'll create. |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
When called with a single argument, ufs_category() will return the whole |
|
288
|
|
|
|
|
|
|
category list for the specified UFS for the class. |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=cut |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
sub ufs_category { |
|
293
|
8
|
|
|
8
|
1
|
4330
|
my $self = shift; |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
# instance method |
|
296
|
8
|
100
|
|
|
|
24
|
if ( ref $self ) { |
|
297
|
3
|
|
|
|
|
6
|
my $ufs = $self->{ufs}; |
|
298
|
3
|
100
|
|
|
|
8
|
if (@_) { |
|
299
|
1
|
|
|
|
|
4
|
my %ufs = @_; |
|
300
|
1
|
|
|
|
|
4
|
@{ $self->{_ufs}{$ufs} }{ keys %ufs } = values %ufs; |
|
|
1
|
|
|
|
|
5
|
|
|
301
|
|
|
|
|
|
|
} |
|
302
|
2
|
|
|
|
|
3
|
else { return ( %{ $UFS{$ufs} }, %{ $self->{_ufs}{$ufs} } ) } |
|
|
2
|
|
|
|
|
22
|
|
|
|
2
|
|
|
|
|
55
|
|
|
303
|
|
|
|
|
|
|
} |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# class method |
|
306
|
|
|
|
|
|
|
else { |
|
307
|
5
|
|
|
|
|
12
|
my $ufs = shift; |
|
308
|
5
|
100
|
|
|
|
20
|
if (@_) { |
|
309
|
3
|
|
|
|
|
40
|
my %ufs = @_; |
|
310
|
3
|
|
|
|
|
20
|
@{ $UFS{$ufs} }{ keys %ufs } = values %ufs; |
|
|
3
|
|
|
|
|
50
|
|
|
311
|
|
|
|
|
|
|
} |
|
312
|
2
|
|
|
|
|
3
|
else { return %{ $UFS{$ufs} } } |
|
|
2
|
|
|
|
|
58
|
|
|
313
|
|
|
|
|
|
|
} |
|
314
|
|
|
|
|
|
|
} |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=item login() |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
Get/set the user login type (C<%u>). |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
This version supports C (standard bareword) and C |
|
321
|
|
|
|
|
|
|
(standard C form). |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=cut |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub login { |
|
326
|
3
|
|
|
3
|
1
|
5
|
my $self = shift; |
|
327
|
3
|
100
|
|
|
|
11
|
$self->{login} = shift if @_; |
|
328
|
3
|
|
|
|
|
12
|
return $self->{login}; |
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=back |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=head1 PREDEFINED FORMATS |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
Regexp::Log::BlueCoat supports several standards log formats. |
|
336
|
|
|
|
|
|
|
These can be set up by using their short name as the format string, |
|
337
|
|
|
|
|
|
|
with the format() method. |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
Description Name Format String |
|
340
|
|
|
|
|
|
|
----------- ---- ------------- |
|
341
|
|
|
|
|
|
|
Squid log format :squid %g %e %a %w/%s %b %m %i %u %H/%d %c |
|
342
|
|
|
|
|
|
|
NCSA common log format :clf %h %l %u %t "%r" %s %b |
|
343
|
|
|
|
|
|
|
NCSA extended log format :elf %h %l %u %L "%r" %s %b "%R" "%A" |
|
344
|
|
|
|
|
|
|
Microsoft IIS format :iis %a, -, %x, %y, %S, %N, %I, %e, %b, %B, %s, 0, %m, %U, - |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
=cut |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
%FORMAT = ( |
|
349
|
|
|
|
|
|
|
':squid' => '%g %e %a %w/%s %b %m %i %u %H/%d %c', |
|
350
|
|
|
|
|
|
|
':clf' => '%h %l %u %t "%r" %s %b', |
|
351
|
|
|
|
|
|
|
':elf' => '%h %l %u %L "%r" %s %b "%R" "%A"', |
|
352
|
|
|
|
|
|
|
':iis' => '%a, -, %x, %y, %S, %N, %I, %e, %b, %B, %s, 0, %m, %U, -', |
|
353
|
|
|
|
|
|
|
); |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=head1 FIELDS |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=head2 Blue Coat custom format |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
Not all C<%>-escapes are supported in this version of Regexp::Log::BlueCoat. |
|
360
|
|
|
|
|
|
|
ELFF is not supported yet. |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
Multiple consecutive spaces in the format string are compressed to |
|
363
|
|
|
|
|
|
|
a single space. |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
The following list is straight from Blue Coat's documentation. |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
Name ELFF Description |
|
368
|
|
|
|
|
|
|
---- ---- ----------- |
|
369
|
|
|
|
|
|
|
% - Denotes an expansion field. |
|
370
|
|
|
|
|
|
|
%% - Denotes '%' character. |
|
371
|
|
|
|
|
|
|
%a c-ip Client IP address. |
|
372
|
|
|
|
|
|
|
%b sc-bytes Number of bytes returned by the server |
|
373
|
|
|
|
|
|
|
(or the Cache). |
|
374
|
|
|
|
|
|
|
%c cs (content-type) The type of object. Usually the MIME-type. |
|
375
|
|
|
|
|
|
|
%d cs-supplier-name SUPPLIER NAME - Name or IP address of the |
|
376
|
|
|
|
|
|
|
server/cache from which the object was received. |
|
377
|
|
|
|
|
|
|
%e time-taken Number of milliseconds request took to process. |
|
378
|
|
|
|
|
|
|
%f sc-filter-category Filtering reason. Why it was denied (such as |
|
379
|
|
|
|
|
|
|
sex or business) |
|
380
|
|
|
|
|
|
|
%g timestamp UNIX type timestamp. |
|
381
|
|
|
|
|
|
|
%h c-ip Client Hostname (uses IP to avoid reverse DNS) |
|
382
|
|
|
|
|
|
|
- same as %a |
|
383
|
|
|
|
|
|
|
%i cs-uri The requested URI. Note: Web trends expects |
|
384
|
|
|
|
|
|
|
this to be only cs-uri-stem + cs-uri-query |
|
385
|
|
|
|
|
|
|
%j - [Not used.] |
|
386
|
|
|
|
|
|
|
%l - Client Identification string. |
|
387
|
|
|
|
|
|
|
(User Login name remote). - always '-' |
|
388
|
|
|
|
|
|
|
%m cs-method HTTP method. HTTP methods include GET, PUT, |
|
389
|
|
|
|
|
|
|
POST, and so on. |
|
390
|
|
|
|
|
|
|
%n - [Not used.] |
|
391
|
|
|
|
|
|
|
%o - [Not used.] |
|
392
|
|
|
|
|
|
|
%p r-port Port fetched from on host - origin server port |
|
393
|
|
|
|
|
|
|
%q - [Not used.] |
|
394
|
|
|
|
|
|
|
%r cs-request-line First line of the request |
|
395
|
|
|
|
|
|
|
%s sc-status The code returned by the cache to the client |
|
396
|
|
|
|
|
|
|
(HTTP code). |
|
397
|
|
|
|
|
|
|
%t gmttime GMT date and time of the user request, in |
|
398
|
|
|
|
|
|
|
the format [DD/MM/YYYY:hh:mm:ss GMT] |
|
399
|
|
|
|
|
|
|
%u cs-username Authenticated user ID. |
|
400
|
|
|
|
|
|
|
%v cs-host Name of host sourcing the object. |
|
401
|
|
|
|
|
|
|
%w s-action What type of action did the CM take to process |
|
402
|
|
|
|
|
|
|
this request. NOTE: 'cached' is used by ELFF |
|
403
|
|
|
|
|
|
|
but has int value. |
|
404
|
|
|
|
|
|
|
%x date Date in YYYY-MM-DD format |
|
405
|
|
|
|
|
|
|
%y time GMT time in HH:MM:SS format |
|
406
|
|
|
|
|
|
|
%z - [Not used.] |
|
407
|
|
|
|
|
|
|
%A cs (user-agent) User agent |
|
408
|
|
|
|
|
|
|
%B cs-bytes The number of bytes received by the server |
|
409
|
|
|
|
|
|
|
%C cs (cookie) Cookie data |
|
410
|
|
|
|
|
|
|
%D s-supplier-ip SUPPLIER IP - IP address of server/cache from |
|
411
|
|
|
|
|
|
|
which the object was received. |
|
412
|
|
|
|
|
|
|
%E s-Policy-Message Policy enforcement message |
|
413
|
|
|
|
|
|
|
%F - [Not used.] |
|
414
|
|
|
|
|
|
|
%G - [Not used.] |
|
415
|
|
|
|
|
|
|
%H s-hierarchy How and where the object was retrieved from the |
|
416
|
|
|
|
|
|
|
cache hierarchy (DIRECT from the server, |
|
417
|
|
|
|
|
|
|
PARENT_HIT = from the parent cache, and so on) |
|
418
|
|
|
|
|
|
|
%I s-ip Server IP, the IP address of the server on which |
|
419
|
|
|
|
|
|
|
the log entry was generated |
|
420
|
|
|
|
|
|
|
%J - [Not used.] |
|
421
|
|
|
|
|
|
|
%K - [Not used.] |
|
422
|
|
|
|
|
|
|
%L localtime Local date and time of the user request in |
|
423
|
|
|
|
|
|
|
format: [DD/MMM/YYYY:hh:mm:ss +nnnn] |
|
424
|
|
|
|
|
|
|
%M - [Not used.] |
|
425
|
|
|
|
|
|
|
%N s-computername Server name, the name of the server on which |
|
426
|
|
|
|
|
|
|
the log entry was generated |
|
427
|
|
|
|
|
|
|
%O - [Not used.] |
|
428
|
|
|
|
|
|
|
%P s-port Server port, the port number the client is |
|
429
|
|
|
|
|
|
|
connected to. |
|
430
|
|
|
|
|
|
|
%Q cs-uri-query The URI query portion of the URL |
|
431
|
|
|
|
|
|
|
%R cs (Referer) Request referrer |
|
432
|
|
|
|
|
|
|
%S s-sitename Internet service and instance number running |
|
433
|
|
|
|
|
|
|
on client computer |
|
434
|
|
|
|
|
|
|
%T duration Elapsed time, seconds |
|
435
|
|
|
|
|
|
|
%U cs-uri-stem Object path from request URL |
|
436
|
|
|
|
|
|
|
%V cs-version The protocol (HTTP, FTP) version used by |
|
437
|
|
|
|
|
|
|
the client. |
|
438
|
|
|
|
|
|
|
%W sc-filter-result UFS event (May differ between Websense or |
|
439
|
|
|
|
|
|
|
SmartFilter or others). |
|
440
|
|
|
|
|
|
|
%X cs (X-Forwarded-For) The IP address of the device which sent |
|
441
|
|
|
|
|
|
|
the HTTP request. |
|
442
|
|
|
|
|
|
|
%Y - [Not used.] |
|
443
|
|
|
|
|
|
|
%Z - [Not used.] |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=head1 URL FILTERING SYSTEMS |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
The BlueCoat Systems Port 80 Security Appliance supports two URL Filtering |
|
448
|
|
|
|
|
|
|
Systems (UFS): I and I. |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
Since I only had access to log files generated with a BlueCoat + SmartFilter |
|
451
|
|
|
|
|
|
|
combination, this version of Regexp::Log only supports I UFS. |
|
452
|
|
|
|
|
|
|
Patches welcome! |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=head2 SmartFilter |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
When C is set to C, the computed regular expression |
|
457
|
|
|
|
|
|
|
matches the default SmartFilter category names. These can be changed |
|
458
|
|
|
|
|
|
|
in SmartFilter's configuration (furthermore one can create one's own |
|
459
|
|
|
|
|
|
|
categories, with user-defined names). |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
So we need to be able to modify the category names, either in an |
|
462
|
|
|
|
|
|
|
object instance, or in class data (shared by all instances). |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
To compute a regular expression that matches your specific fields, there |
|
465
|
|
|
|
|
|
|
are several possibilities: |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=over 4 |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
=item Make the changes in your object instance |
|
470
|
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
The method ufs_category() lets you replace any standard category by |
|
472
|
|
|
|
|
|
|
your own, and even add new "categories" (text that will be matched by |
|
473
|
|
|
|
|
|
|
the C<%f> fields). |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
These changes are valid for the object only. |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
See ufs_category() for details. |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=item Change the Regexp::Log::BlueCoat class itself |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
ufs_category() can be used as a class method. |
|
482
|
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
One can also be adventurous and acces %Regexp::Log::BlueCoat::UFS directly, |
|
484
|
|
|
|
|
|
|
but you'll need to read the source to understand the details. |
|
485
|
|
|
|
|
|
|
Here's an example: |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
$Regexp::Log::UFS{smartfilter} = { simple => '[-\\w]+' }; |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=back |
|
490
|
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
I default categories are: |
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
Key Default value Category |
|
494
|
|
|
|
|
|
|
--- ------------- -------- |
|
495
|
|
|
|
|
|
|
sx "sex" Sex |
|
496
|
|
|
|
|
|
|
dr "drugs" Drugs |
|
497
|
|
|
|
|
|
|
hs "hate speech" Hate Speech |
|
498
|
|
|
|
|
|
|
cs "crim. skills" Criminal Skills |
|
499
|
|
|
|
|
|
|
nd "nudity" Nudity |
|
500
|
|
|
|
|
|
|
os "on-line sales" Online Sales |
|
501
|
|
|
|
|
|
|
gb "gambling" Gambling |
|
502
|
|
|
|
|
|
|
pp "personal pages" Personnal Pages |
|
503
|
|
|
|
|
|
|
js "job search" Job Search |
|
504
|
|
|
|
|
|
|
sp "sports" Sports |
|
505
|
|
|
|
|
|
|
gm "games" Games |
|
506
|
|
|
|
|
|
|
hm "humor" Humor |
|
507
|
|
|
|
|
|
|
mp "MP3 sites" MP3 Sites |
|
508
|
|
|
|
|
|
|
et "entertainment" Entertainment |
|
509
|
|
|
|
|
|
|
ls "lifestyle" Lifestyle |
|
510
|
|
|
|
|
|
|
ex "extreme" Extreme |
|
511
|
|
|
|
|
|
|
ch "chat" Chat |
|
512
|
|
|
|
|
|
|
in "investing" Investing |
|
513
|
|
|
|
|
|
|
nw "general news" General News |
|
514
|
|
|
|
|
|
|
po "politics, opinion, religion" Politics, Opinion, Religion |
|
515
|
|
|
|
|
|
|
mm "dating" Dating |
|
516
|
|
|
|
|
|
|
ac "art/culture" Art/Culture |
|
517
|
|
|
|
|
|
|
na "usenet news access" Usenet News Access |
|
518
|
|
|
|
|
|
|
oc "cults/occult" Cults/Occult |
|
519
|
|
|
|
|
|
|
na "Usenet News" Usenet News |
|
520
|
|
|
|
|
|
|
sh "self help" Self-Help |
|
521
|
|
|
|
|
|
|
tr "travel" Travel |
|
522
|
|
|
|
|
|
|
mt "mature" Mature |
|
523
|
|
|
|
|
|
|
wm "webmail" Webmail |
|
524
|
|
|
|
|
|
|
ps "portal sites" Portal Sites |
|
525
|
|
|
|
|
|
|
an "anonymizer/translator" Anonymizer/Translator |
|
526
|
|
|
|
|
|
|
u0 "user defined category 0" First User-defined Category |
|
527
|
|
|
|
|
|
|
u1 "user defined category 1" Second User-defined Category |
|
528
|
|
|
|
|
|
|
u2 "user defined category 2" Third User-defined Category |
|
529
|
|
|
|
|
|
|
u3 "user defined category 3" Fourth User-defined Category |
|
530
|
|
|
|
|
|
|
u4 "user defined category 4" Fifth User-defined Category |
|
531
|
|
|
|
|
|
|
u5 "user defined category 5" Sixth User-defined Category |
|
532
|
|
|
|
|
|
|
u6 "user defined category 6" Seventh User-defined Category |
|
533
|
|
|
|
|
|
|
u7 "user defined category 7" Eighth User-defined Category |
|
534
|
|
|
|
|
|
|
u8 "user defined category 8" Ninth User-defined Category |
|
535
|
|
|
|
|
|
|
u9 "user defined category 9" Tenth User-defined Category |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
Regexp::Log::BlueCoat add the following three categories: |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
Key Default value Category |
|
540
|
|
|
|
|
|
|
--- ------------- -------- |
|
541
|
|
|
|
|
|
|
none "-" None |
|
542
|
|
|
|
|
|
|
uncategorized "uncategorized" Uncategorized |
|
543
|
|
|
|
|
|
|
not_applied "content_filter_not_applied" Filter not applied |
|
544
|
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=head2 Websense |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
I is not supported yet. Patches and log file excerpts are |
|
548
|
|
|
|
|
|
|
welcome. |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=cut |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
%UFS = ( |
|
553
|
|
|
|
|
|
|
smartfilter => { |
|
554
|
|
|
|
|
|
|
none => '-', |
|
555
|
|
|
|
|
|
|
uncategorized => 'uncategorized', |
|
556
|
|
|
|
|
|
|
filter_not_applied => 'content_filter_not_applied', |
|
557
|
|
|
|
|
|
|
sx => "sex", |
|
558
|
|
|
|
|
|
|
dr => "drugs", |
|
559
|
|
|
|
|
|
|
hs => "hate speech", |
|
560
|
|
|
|
|
|
|
cs => "crim. skills", |
|
561
|
|
|
|
|
|
|
nd => "nudity", |
|
562
|
|
|
|
|
|
|
os => "on-line sales", |
|
563
|
|
|
|
|
|
|
gb => "gambling", |
|
564
|
|
|
|
|
|
|
pp => "personal pages", |
|
565
|
|
|
|
|
|
|
js => "job search", |
|
566
|
|
|
|
|
|
|
sp => "sports", |
|
567
|
|
|
|
|
|
|
gm => "games", |
|
568
|
|
|
|
|
|
|
hm => "humor", |
|
569
|
|
|
|
|
|
|
mp => "MP3 sites", |
|
570
|
|
|
|
|
|
|
et => "entertainment", |
|
571
|
|
|
|
|
|
|
ls => "lifestyle", |
|
572
|
|
|
|
|
|
|
ex => "extreme", |
|
573
|
|
|
|
|
|
|
ch => "chat", |
|
574
|
|
|
|
|
|
|
in => "investing", |
|
575
|
|
|
|
|
|
|
nw => "general news", |
|
576
|
|
|
|
|
|
|
po => "politics, opinion, religion", |
|
577
|
|
|
|
|
|
|
mm => "dating", |
|
578
|
|
|
|
|
|
|
ac => "art/culture", |
|
579
|
|
|
|
|
|
|
na => "usenet news access", |
|
580
|
|
|
|
|
|
|
oc => "cults/occult", |
|
581
|
|
|
|
|
|
|
na => "Usenet News", |
|
582
|
|
|
|
|
|
|
sh => "self help", |
|
583
|
|
|
|
|
|
|
tr => "travel", |
|
584
|
|
|
|
|
|
|
mt => "mature", |
|
585
|
|
|
|
|
|
|
wm => "webmail", |
|
586
|
|
|
|
|
|
|
ps => "portal sites", |
|
587
|
|
|
|
|
|
|
an => "anonymizer/translator", |
|
588
|
|
|
|
|
|
|
u0 => "user defined category 0", |
|
589
|
|
|
|
|
|
|
u1 => "user defined category 1", |
|
590
|
|
|
|
|
|
|
u2 => "user defined category 2", |
|
591
|
|
|
|
|
|
|
u3 => "user defined category 3", |
|
592
|
|
|
|
|
|
|
u4 => "user defined category 4", |
|
593
|
|
|
|
|
|
|
u5 => "user defined category 5", |
|
594
|
|
|
|
|
|
|
u6 => "user defined category 6", |
|
595
|
|
|
|
|
|
|
u7 => "user defined category 7", |
|
596
|
|
|
|
|
|
|
u8 => "user defined category 8", |
|
597
|
|
|
|
|
|
|
u9 => "user defined category 9", |
|
598
|
|
|
|
|
|
|
}, |
|
599
|
|
|
|
|
|
|
websense => {}, |
|
600
|
|
|
|
|
|
|
); |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
%DEFAULT = ( |
|
603
|
|
|
|
|
|
|
format => '', |
|
604
|
|
|
|
|
|
|
capture => [], |
|
605
|
|
|
|
|
|
|
ufs => '', |
|
606
|
|
|
|
|
|
|
login => '', |
|
607
|
|
|
|
|
|
|
_ufs => { map { ($_, {} ) } keys %UFS }, |
|
608
|
|
|
|
|
|
|
); |
|
609
|
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
sub _preprocess { |
|
611
|
9
|
|
|
9
|
|
964
|
my $self = shift; |
|
612
|
9
|
|
|
|
|
19
|
my $login = $self->{login}; |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
# Login specific regexps |
|
615
|
9
|
100
|
66
|
|
|
105
|
$self->{_regexp} =~ s/%u/%u-$login/g |
|
616
|
|
|
|
|
|
|
if defined $login && $login =~ /^(?:ldap|username)$/; |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
# Multiple consecutive spaces are compressed to a single space |
|
619
|
9
|
|
|
|
|
82
|
$self->{_regexp} =~ s/ +/ /g; |
|
620
|
|
|
|
|
|
|
} |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
sub _postprocess { |
|
623
|
9
|
|
|
9
|
|
1335
|
my $self = shift; |
|
624
|
9
|
|
|
|
|
24
|
my $ufs = $self->{ufs}; |
|
625
|
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
# UFS specific regexps |
|
627
|
9
|
100
|
66
|
|
|
67
|
if ( defined $ufs and $ufs ne '' ) { |
|
628
|
8
|
|
|
|
|
12
|
my %categories = ( %{ $UFS{$ufs} }, %{ $self->{_ufs}{$ufs} } ); |
|
|
8
|
|
|
|
|
53
|
|
|
|
8
|
|
|
|
|
102
|
|
|
629
|
8
|
|
|
|
|
188
|
my $categories = join '|', sort values %categories; |
|
630
|
8
|
|
|
|
|
98
|
$self->{_regexp} =~ s/%f/$categories/g; |
|
631
|
|
|
|
|
|
|
} |
|
632
|
|
|
|
|
|
|
} |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
=head1 TODO |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
Support streaming logs: Windows Media and RealMedia. |
|
637
|
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
Support the W3C Extended Log File Format (ELFF), which is a subset of |
|
639
|
|
|
|
|
|
|
the Blue Coat format where each field is described using a text string. |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
Have a look at the entries that produce multi-line logs. |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
=head1 BUGS |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
Most of the developpement has been done when I was trying to process |
|
646
|
|
|
|
|
|
|
logs created with the following format: |
|
647
|
|
|
|
|
|
|
C<%g %e %a %w/%s %b %m %i %u %H/%d %c %f %A>. |
|
648
|
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
Which means that the regular expressions that this module produces do not |
|
650
|
|
|
|
|
|
|
cover every possible format. |
|
651
|
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
If Regexp::Log::BlueCoat's regular expressions do not match some of the |
|
653
|
|
|
|
|
|
|
log that you are trying to munge, please use the F script |
|
654
|
|
|
|
|
|
|
and send the resulting file to me. |
|
655
|
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=head1 REFERENCES |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
Blue Coat Systems Port 80 Security Appliance, I
|
|
659
|
|
|
|
|
|
|
Guide>: http://www.bluecoat.com/downloads/manuals/BC_Config_Mgmt_Guide.pdf |
|
660
|
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
Secure Computing Smartfilter, I, |
|
662
|
|
|
|
|
|
|
version 3.1.2: http://www.securecomputing.com/pdf/SFConfig312_IC_RevE.pdf |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
=head1 THANKS |
|
665
|
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
Thanks to Jarkko Hietaniemi for Regex::PreSuf. |
|
667
|
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
=head1 AUTHOR |
|
669
|
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
Philippe 'BooK' Bruhat Ebook@cpan.orgE. |
|
671
|
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
=head1 LICENCE |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
This module is free software; you can redistribute it or modify it under |
|
675
|
|
|
|
|
|
|
the same terms as Perl itself. |
|
676
|
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=cut |
|
678
|
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
1; |