line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
package Mail::Graph; |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
# Read mail mbox files (compressed or uncompressed), and generate a |
5
|
|
|
|
|
|
|
# statistic from it |
6
|
|
|
|
|
|
|
# (c) by Tels 2002. See http://bloodgate.com/spams/ for an example. |
7
|
|
|
|
|
|
|
|
8
|
2
|
|
|
2
|
|
37319
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
103
|
|
9
|
2
|
|
|
2
|
|
2333
|
use GD::Graph::lines; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
use GD::Graph::bars; |
11
|
|
|
|
|
|
|
use GD::Graph::colour; |
12
|
|
|
|
|
|
|
use GD::Graph::Data; |
13
|
|
|
|
|
|
|
use GD::Graph::Error; |
14
|
|
|
|
|
|
|
use Date::Calc |
15
|
|
|
|
|
|
|
qw/Delta_Days Date_to_Days Today_and_Now Today check_date |
16
|
|
|
|
|
|
|
Delta_YMDHMS Add_Delta_Days |
17
|
|
|
|
|
|
|
/; |
18
|
|
|
|
|
|
|
use Math::BigFloat lib => 'GMP'; |
19
|
|
|
|
|
|
|
use File::Spec; |
20
|
|
|
|
|
|
|
use Compress::Zlib; # for gzip file support |
21
|
|
|
|
|
|
|
use Time::HiRes; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
use vars qw/$VERSION/; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
$VERSION = '0.14'; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
BEGIN |
28
|
|
|
|
|
|
|
{ |
29
|
|
|
|
|
|
|
$| = 1; # buffer off |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
my ($month_table,$dow_table); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub new |
35
|
|
|
|
|
|
|
{ |
36
|
|
|
|
|
|
|
my $class = shift; |
37
|
|
|
|
|
|
|
my $self = {}; |
38
|
|
|
|
|
|
|
bless $self, $class; |
39
|
|
|
|
|
|
|
$self->_init(@_); |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub _init |
43
|
|
|
|
|
|
|
{ |
44
|
|
|
|
|
|
|
my $self = shift; |
45
|
|
|
|
|
|
|
my $options = $_[0]; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
$options = { @_ } unless ref $options eq 'HASH'; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
$self->{_options} = $options; |
50
|
|
|
|
|
|
|
my $def = { |
51
|
|
|
|
|
|
|
input => 'archives', |
52
|
|
|
|
|
|
|
output => 'spams', |
53
|
|
|
|
|
|
|
items => 'spams', |
54
|
|
|
|
|
|
|
index => 'index/', |
55
|
|
|
|
|
|
|
height => 200, |
56
|
|
|
|
|
|
|
template => 'index.tpl', |
57
|
|
|
|
|
|
|
no_title => 0, |
58
|
|
|
|
|
|
|
filter_domains => [ ], |
59
|
|
|
|
|
|
|
filter_target => [ ], |
60
|
|
|
|
|
|
|
average => 7, |
61
|
|
|
|
|
|
|
average_daily => 14, |
62
|
|
|
|
|
|
|
graph_ext => 'png', |
63
|
|
|
|
|
|
|
first_date => undef, |
64
|
|
|
|
|
|
|
last_date => undef, |
65
|
|
|
|
|
|
|
valid_forwarders => undef, |
66
|
|
|
|
|
|
|
generate => { |
67
|
|
|
|
|
|
|
month => 1, |
68
|
|
|
|
|
|
|
yearly => 1, |
69
|
|
|
|
|
|
|
day => 1, |
70
|
|
|
|
|
|
|
daily => 1, |
71
|
|
|
|
|
|
|
dow => 1, |
72
|
|
|
|
|
|
|
monthly => 1, |
73
|
|
|
|
|
|
|
hour => 1, |
74
|
|
|
|
|
|
|
toplevel => 1, |
75
|
|
|
|
|
|
|
rule => 1, |
76
|
|
|
|
|
|
|
target => 1, |
77
|
|
|
|
|
|
|
domain => 1, |
78
|
|
|
|
|
|
|
last_x_days => 30, |
79
|
|
|
|
|
|
|
score_histogram => 5, |
80
|
|
|
|
|
|
|
score_daily => 60, |
81
|
|
|
|
|
|
|
score_scatter => 6, # limit is 6 |
82
|
|
|
|
|
|
|
}, |
83
|
|
|
|
|
|
|
}; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
foreach my $k (keys %$def) |
86
|
|
|
|
|
|
|
{ |
87
|
|
|
|
|
|
|
$options->{$k} = $def->{$k} unless exists $options->{$k}; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
# accept only valid options |
90
|
|
|
|
|
|
|
foreach my $k (keys %$options) |
91
|
|
|
|
|
|
|
{ |
92
|
|
|
|
|
|
|
die ("Unknown option '$k'") if !exists $def->{$k}; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# try to create the output directory |
96
|
|
|
|
|
|
|
mkdir $options->{output} unless -d $options->{output}; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
$options->{input} .= '/' |
99
|
|
|
|
|
|
|
if -d $options->{input} && $options->{input} !~ /\/$/; |
100
|
|
|
|
|
|
|
$self->{error} = undef; |
101
|
|
|
|
|
|
|
$self->{error} = "input '$options->{input}' is neither directory nor file" |
102
|
|
|
|
|
|
|
if ((! -d $options->{input}) && (!-f $options->{input})); |
103
|
|
|
|
|
|
|
$self->{error} = "output '$options->{output}' is not a directory" |
104
|
|
|
|
|
|
|
if (! -d $options->{output}); |
105
|
|
|
|
|
|
|
return $self; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub error |
109
|
|
|
|
|
|
|
{ |
110
|
|
|
|
|
|
|
my $self = shift; |
111
|
|
|
|
|
|
|
return $self->{error}; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub _process_mail |
115
|
|
|
|
|
|
|
{ |
116
|
|
|
|
|
|
|
# takes one mail text and processes it |
117
|
|
|
|
|
|
|
# It will take it apart and store it in an index cache, which can be written |
118
|
|
|
|
|
|
|
# out to an index file, which later can be reread |
119
|
|
|
|
|
|
|
my ($self,$mail) = @_; |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
my $cur = { |
122
|
|
|
|
|
|
|
target => 'unknown', |
123
|
|
|
|
|
|
|
domain => 'unknown', |
124
|
|
|
|
|
|
|
size => $mail->{size}, |
125
|
|
|
|
|
|
|
}; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# split "From blah@bar.baz Datestring" |
128
|
|
|
|
|
|
|
if (!defined $mail->{header}->[0]) |
129
|
|
|
|
|
|
|
{ |
130
|
|
|
|
|
|
|
$cur->{invalid} = 'no_mail_header'; |
131
|
|
|
|
|
|
|
return $cur; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
# skip replies of the mailer-daemon to non-existant addresses |
134
|
|
|
|
|
|
|
if ($mail->{header}->[0] =~ /MAILER-DAEMON/i) |
135
|
|
|
|
|
|
|
{ |
136
|
|
|
|
|
|
|
$cur->{invalid} = 'from_mailer_daemon'; |
137
|
|
|
|
|
|
|
return $cur; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
my ($a,$b,$c,$d); |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
if ($mail->{header}->[0] =~ |
143
|
|
|
|
|
|
|
/^From [<]?(.+?\@)([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})[>]? (.*)/) |
144
|
|
|
|
|
|
|
{ |
145
|
|
|
|
|
|
|
$cur->{from} = $1.$2; |
146
|
|
|
|
|
|
|
$cur->{toplevel} = 'undef'; |
147
|
|
|
|
|
|
|
$cur->{date} = $3; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
else |
150
|
|
|
|
|
|
|
{ |
151
|
|
|
|
|
|
|
$mail->{header}->[0] =~ /^From [<]?(.+?\@)([a-zA-Z0-9\-\.]+?)(\.[a-zA-Z]{2,4})[>]? (.*)/; |
152
|
|
|
|
|
|
|
$a = $1 || 'undef'; |
153
|
|
|
|
|
|
|
$b = $2 || 'undef'; |
154
|
|
|
|
|
|
|
$c = $3 || 'undef'; |
155
|
|
|
|
|
|
|
$d = $4 || 'undef'; |
156
|
|
|
|
|
|
|
$cur->{from} = $a.$b.$c; |
157
|
|
|
|
|
|
|
$cur->{date} = $d; |
158
|
|
|
|
|
|
|
$cur->{toplevel} = lc($c); |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
if (!defined $cur->{date}) |
161
|
|
|
|
|
|
|
{ |
162
|
|
|
|
|
|
|
$cur->{invalid} = 'invalid_date'; |
163
|
|
|
|
|
|
|
return $cur; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
($cur->{day},$cur->{month},$cur->{year}, |
167
|
|
|
|
|
|
|
$cur->{dow},$cur->{hour},$cur->{minute},$cur->{second},$cur->{offset}) |
168
|
|
|
|
|
|
|
= $self->_parse_date($cur->{date}); |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
if ((!defined $cur->{month}) || ($cur->{month} == 0)) |
171
|
|
|
|
|
|
|
{ |
172
|
|
|
|
|
|
|
$cur->{invalid} = 'invalid_month'; |
173
|
|
|
|
|
|
|
return $cur; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
if (! check_date($cur->{year},$cur->{month},$cur->{day})) |
176
|
|
|
|
|
|
|
{ |
177
|
|
|
|
|
|
|
$cur->{invalid} = 'invalid_date_check'; |
178
|
|
|
|
|
|
|
return $cur; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# Mktime() doesn't like these (they are probably forged, anyway) |
182
|
|
|
|
|
|
|
if ($cur->{year} < 1970 || $cur->{year} > 2038) |
183
|
|
|
|
|
|
|
{ |
184
|
|
|
|
|
|
|
$cur->{invalid} = 'before_1970_or_after_2038'; |
185
|
|
|
|
|
|
|
return $cur; |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# extract the filter rule that matched and also the SpamAssassin score |
189
|
|
|
|
|
|
|
my $filter_rule = $self->{_options}->{filter_rule} || 'X-Spamblock:'; |
190
|
|
|
|
|
|
|
foreach my $line (@{$mail->{header}}) |
191
|
|
|
|
|
|
|
{ |
192
|
|
|
|
|
|
|
chomp($line); |
193
|
|
|
|
|
|
|
if ($line =~ /^$filter_rule/i) |
194
|
|
|
|
|
|
|
{ |
195
|
|
|
|
|
|
|
my $rule = lc($line); $rule =~ s/^[A-Za-z0-9:\s-]+//; |
196
|
|
|
|
|
|
|
$rule =~ s/^(kill|bounce), //; |
197
|
|
|
|
|
|
|
$rule =~ s/^, caught by //; |
198
|
|
|
|
|
|
|
$rule =~ s/^by //; |
199
|
|
|
|
|
|
|
$rule =~ s/^rule //; |
200
|
|
|
|
|
|
|
$rule =~ s/^, //; |
201
|
|
|
|
|
|
|
push @{$cur->{rule}}, $rule if $rule !~ /^\s*$/; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
else |
204
|
|
|
|
|
|
|
{ |
205
|
|
|
|
|
|
|
next if $line !~ /^X-Spam-Status:/i; |
206
|
|
|
|
|
|
|
$line =~ /, hits=([0-9.]+)/; |
207
|
|
|
|
|
|
|
$cur->{score} = $1 || 0; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
($cur->{target}, $cur->{domain}) = |
212
|
|
|
|
|
|
|
$self->_extract_target($mail->{header}); |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
$cur; |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub _clear_index |
218
|
|
|
|
|
|
|
{ |
219
|
|
|
|
|
|
|
my $self = shift; |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
$self->{_index} = []; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
sub _index_mail |
225
|
|
|
|
|
|
|
{ |
226
|
|
|
|
|
|
|
my ($self,$cur) = @_; |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
push @{$self->{_index}}, $cur; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
sub _write_index |
232
|
|
|
|
|
|
|
{ |
233
|
|
|
|
|
|
|
# write the index file for archive $file |
234
|
|
|
|
|
|
|
my ($self,$file,$stats) = @_; |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
my $invalid = 0; |
237
|
|
|
|
|
|
|
# gather count of skipped mails |
238
|
|
|
|
|
|
|
foreach my $mail (@{$self->{_index}}) |
239
|
|
|
|
|
|
|
{ |
240
|
|
|
|
|
|
|
$invalid ++ if exists $mail->{invalid}; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
# get the filename alone, without directory et all |
244
|
|
|
|
|
|
|
my ($volume,$directories,$filename) = File::Spec->splitpath( $file ); |
245
|
|
|
|
|
|
|
my $index_file = |
246
|
|
|
|
|
|
|
File::Spec->catfile($self->{_options}->{index},$filename.'.idx.gz'); |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
unlink $index_file; # delete old version |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
my $gz = gzopen($index_file, "wb") |
251
|
|
|
|
|
|
|
or die "Cannot open $index_file: $gzerrno\n" ; |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
$gz->gzwrite( |
254
|
|
|
|
|
|
|
"# Mail::Graph mail index file\n" |
255
|
|
|
|
|
|
|
."# Automatically created on " |
256
|
|
|
|
|
|
|
. scalar localtime() . " by Mail::Graph v$VERSION\n" |
257
|
|
|
|
|
|
|
. "# To force re-indexing of $filename, delete this file.\n" |
258
|
|
|
|
|
|
|
. "items_skipped=$invalid\n" |
259
|
|
|
|
|
|
|
. "size_compressed=$stats->{stats}->{current_size_compressed}\n\n" ); |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
my $doc = ""; |
262
|
|
|
|
|
|
|
foreach my $mail (@{$self->{_index}}) |
263
|
|
|
|
|
|
|
{ |
264
|
|
|
|
|
|
|
# don't include invalid mail |
265
|
|
|
|
|
|
|
next if exists $mail->{invalid}; |
266
|
|
|
|
|
|
|
my $m = ""; |
267
|
|
|
|
|
|
|
foreach my $key (qw/ |
268
|
|
|
|
|
|
|
target size rule from score/) |
269
|
|
|
|
|
|
|
{ |
270
|
|
|
|
|
|
|
if (ref($mail->{$key}) eq 'ARRAY') |
271
|
|
|
|
|
|
|
{ |
272
|
|
|
|
|
|
|
foreach (@{$mail->{$key}}) |
273
|
|
|
|
|
|
|
{ |
274
|
|
|
|
|
|
|
$m .= "$key=$_\n" if ($_||'') ne ''; |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
else |
278
|
|
|
|
|
|
|
{ |
279
|
|
|
|
|
|
|
# $mail->{$key} = '' unless defined $mail->{$key}; |
280
|
|
|
|
|
|
|
$m .= "$key=$mail->{$key}\n" if ($mail->{$key} || '') ne ''; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
if (($mail->{invalid} || 0) == 0) |
284
|
|
|
|
|
|
|
{ |
285
|
|
|
|
|
|
|
eval { |
286
|
|
|
|
|
|
|
$m .= "date=" . Date::Calc::Mktime( |
287
|
|
|
|
|
|
|
$mail->{year}, |
288
|
|
|
|
|
|
|
$mail->{month}, |
289
|
|
|
|
|
|
|
$mail->{day}, |
290
|
|
|
|
|
|
|
$mail->{hour}, |
291
|
|
|
|
|
|
|
$mail->{minute}, |
292
|
|
|
|
|
|
|
$mail->{second}) . "\n"; |
293
|
|
|
|
|
|
|
}; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
# else |
296
|
|
|
|
|
|
|
# { |
297
|
|
|
|
|
|
|
# print join(' ', $mail->{year}, |
298
|
|
|
|
|
|
|
# $mail->{month}, |
299
|
|
|
|
|
|
|
# $mail->{day}, |
300
|
|
|
|
|
|
|
# $mail->{hour}, |
301
|
|
|
|
|
|
|
# $mail->{minute}, |
302
|
|
|
|
|
|
|
# $mail->{second}) . "\n"; |
303
|
|
|
|
|
|
|
# require Data::Dumper; print Data::Dumper::Dumper($mail),"\n"; |
304
|
|
|
|
|
|
|
# } |
305
|
|
|
|
|
|
|
if ($@ ne '') |
306
|
|
|
|
|
|
|
{ |
307
|
|
|
|
|
|
|
require Data::Dumper; print Data::Dumper::Dumper($mail),"\n"; |
308
|
|
|
|
|
|
|
die ($@); |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
$doc .= "$m\n"; |
311
|
|
|
|
|
|
|
if (length($doc) > 8192) |
312
|
|
|
|
|
|
|
{ |
313
|
|
|
|
|
|
|
$gz->gzwrite ( $doc ); $doc = ""; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
$gz->gzwrite ( $doc ) if $doc ne ''; |
317
|
|
|
|
|
|
|
$gz->gzclose(); |
318
|
|
|
|
|
|
|
$self; |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
sub _read_index |
322
|
|
|
|
|
|
|
{ |
323
|
|
|
|
|
|
|
# read index file $index (or for archive $file) and return list of indexed |
324
|
|
|
|
|
|
|
# mails; also reads global counts and applies (adds) them to $stats |
325
|
|
|
|
|
|
|
my ($self,$file,$stats) = @_; |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
$file .= '.idx' if $file !~ /\.idx(\.gz)?$/; |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
my $index_file = |
330
|
|
|
|
|
|
|
File::Spec->catfile($self->{_options}->{index},$file); |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
$index_file .= '.gz' if -f "$index_file.gz"; # prefer compressed version |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
# might be a bit slow to read in everything at once, but better than reading |
335
|
|
|
|
|
|
|
# the entire mail archive at once |
336
|
|
|
|
|
|
|
my $index = $self->_read_file($file); |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
my @lines = @{ _split ($index); }; |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
if ($lines[0] !~ /^# Mail::Graph mail index file/) |
341
|
|
|
|
|
|
|
{ |
342
|
|
|
|
|
|
|
warn ("$index_file doesn't look like a mail index, skipping"); |
343
|
|
|
|
|
|
|
return (); |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
# read the "header" lines, e.g. the lines with global parameters |
347
|
|
|
|
|
|
|
my $line_nr = 0; |
348
|
|
|
|
|
|
|
foreach my $line (@lines) |
349
|
|
|
|
|
|
|
{ |
350
|
|
|
|
|
|
|
$line_nr++; |
351
|
|
|
|
|
|
|
chomp($line); |
352
|
|
|
|
|
|
|
next if $line =~ /^#/; # skip comments |
353
|
|
|
|
|
|
|
last if $line =~ /^\s*$/; # end at first empty line |
354
|
|
|
|
|
|
|
if ($line !~ /^([A-Za-z0-9_-]+)=([0-9]+)\s*/) |
355
|
|
|
|
|
|
|
{ |
356
|
|
|
|
|
|
|
warn ("malformed header line in index $index_file at line $line_nr"); |
357
|
|
|
|
|
|
|
return (); |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
my $name = $1; |
360
|
|
|
|
|
|
|
my $value = $2; |
361
|
|
|
|
|
|
|
$stats->{stats}->{$name} += $value; |
362
|
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
splice @lines, 0, $line_nr; # remove first N lines |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
my $cur = {}; |
367
|
|
|
|
|
|
|
foreach my $line (@lines) |
368
|
|
|
|
|
|
|
{ |
369
|
|
|
|
|
|
|
$line_nr++; |
370
|
|
|
|
|
|
|
chomp($line); |
371
|
|
|
|
|
|
|
next if $line =~ /^#/; # skip comments |
372
|
|
|
|
|
|
|
if ($line =~ /^\s*$/) # next mail at empty line |
373
|
|
|
|
|
|
|
{ |
374
|
|
|
|
|
|
|
# disassemble the date field into the parts again |
375
|
|
|
|
|
|
|
($cur->{year},$cur->{month},$cur->{day}, |
376
|
|
|
|
|
|
|
$cur->{hour},$cur->{minute},$cur->{second}, |
377
|
|
|
|
|
|
|
$cur->{doy},$cur->{dow},$cur->{dst}) = |
378
|
|
|
|
|
|
|
Date::Calc::Localtime($cur->{date}); |
379
|
|
|
|
|
|
|
# extract the target domain from the target field |
380
|
|
|
|
|
|
|
$cur->{domain} = $cur->{target}; |
381
|
|
|
|
|
|
|
$cur->{domain} =~ /\@((.+?)\.(.+))$/; $cur->{domain} = $1 || 'unknown'; |
382
|
|
|
|
|
|
|
# get the toplevel from target |
383
|
|
|
|
|
|
|
$cur->{toplevel} = $cur->{target}; |
384
|
|
|
|
|
|
|
$cur->{toplevel} =~ /(\.[^.]+)$/; |
385
|
|
|
|
|
|
|
$cur->{toplevel} = $1 || 'unknown'; |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# remember this mail and move to the next one |
388
|
|
|
|
|
|
|
push @{$self->{_index}}, $cur; |
389
|
|
|
|
|
|
|
$cur = {}; |
390
|
|
|
|
|
|
|
next; |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
if ($line !~ /^([A-Za-z0-9_-]+)=(.*)\s*/) |
393
|
|
|
|
|
|
|
{ |
394
|
|
|
|
|
|
|
warn ("malformed line in index $index_file at line $line_nr"); |
395
|
|
|
|
|
|
|
warn ("line '$line'"); |
396
|
|
|
|
|
|
|
return (); |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
my $name = $1; my $value = $2 || ''; |
399
|
|
|
|
|
|
|
if ($name eq 'rule') |
400
|
|
|
|
|
|
|
{ |
401
|
|
|
|
|
|
|
# create array, but don't push empty values |
402
|
|
|
|
|
|
|
$cur->{rule} = [] unless exists $cur->{rule}; |
403
|
|
|
|
|
|
|
push @{$cur->{rule}}, $value if $value ne ''; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
else |
406
|
|
|
|
|
|
|
{ |
407
|
|
|
|
|
|
|
$cur->{$1} = $2 || ''; |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
return $self; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
sub _merge_mail |
415
|
|
|
|
|
|
|
{ |
416
|
|
|
|
|
|
|
# take on mail in HASH format (read from index or processed from mail text) |
417
|
|
|
|
|
|
|
# and merge it in into $stats. $first is an optional first date, anything |
418
|
|
|
|
|
|
|
# earlier is discarded as invalid. |
419
|
|
|
|
|
|
|
my ($self,$cur,$stats,$now,$first) = @_; |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
$cur->{invalid} = $cur->{invalid} || ''; |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
if ($cur->{invalid} ne '') |
424
|
|
|
|
|
|
|
{ |
425
|
|
|
|
|
|
|
$stats->{reasons}->{$cur->{invalid}}++; |
426
|
|
|
|
|
|
|
$stats->{stats}->{items_skipped}++; return; |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
# shortcut |
430
|
|
|
|
|
|
|
my ($year,$month,$day) = ($cur->{year}, $cur->{month}, $cur->{day}); |
431
|
|
|
|
|
|
|
my ($hour,$minute,$second) = ($cur->{hour}, $cur->{minute}, $cur->{second}); |
432
|
|
|
|
|
|
|
my ($dow) = $cur->{dow}; |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
if (!defined $year || !defined $month || !defined $day) |
435
|
|
|
|
|
|
|
{ |
436
|
|
|
|
|
|
|
# huh? |
437
|
|
|
|
|
|
|
$stats->{reasons}->{invalid_date}++; |
438
|
|
|
|
|
|
|
$stats->{stats}->{items_skipped}++; |
439
|
|
|
|
|
|
|
return; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
# mail is earlier than first_date? |
443
|
|
|
|
|
|
|
if (defined $first) |
444
|
|
|
|
|
|
|
{ |
445
|
|
|
|
|
|
|
my $delta = |
446
|
|
|
|
|
|
|
Delta_Days($first->[0],$first->[1],$first->[2],$year,$month,$day); |
447
|
|
|
|
|
|
|
if ($delta < 0) |
448
|
|
|
|
|
|
|
{ |
449
|
|
|
|
|
|
|
# too early |
450
|
|
|
|
|
|
|
$stats->{reasons}->{too_early}++; |
451
|
|
|
|
|
|
|
$stats->{stats}->{items_skipped}++; |
452
|
|
|
|
|
|
|
return; |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
# mail is newer than last_date (or today)? |
457
|
|
|
|
|
|
|
my $delta = Delta_Days($year,$month,$day,$now->[0],$now->[1],$now->[2]); |
458
|
|
|
|
|
|
|
if ($delta < 0) |
459
|
|
|
|
|
|
|
{ |
460
|
|
|
|
|
|
|
# mail newer |
461
|
|
|
|
|
|
|
$stats->{stats}->{items_skipped}++; |
462
|
|
|
|
|
|
|
$stats->{reasons}->{too_new}++; |
463
|
|
|
|
|
|
|
return; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
$stats->{stats}->{items_processed}++; |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
$stats->{target}->{$cur->{target}}++; |
469
|
|
|
|
|
|
|
$stats->{domain}->{$cur->{domain}}++; |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
# XXX TODO include check for valid target domain |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
my ($D_y,$D_m,$D_d, $Dh,$Dm,$Ds) = |
474
|
|
|
|
|
|
|
Delta_YMDHMS($year,$month,$day,$hour,$minute,$second, @$now); |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
$stats->{stats}->{last_24_hours}++ |
477
|
|
|
|
|
|
|
if ($D_y == 0 && $D_m == 0 && $D_d == 0 && $Dh < 24); |
478
|
|
|
|
|
|
|
$stats->{stats}->{last_7_days}++ if $delta <= 7; |
479
|
|
|
|
|
|
|
$stats->{stats}->{last_30_days}++ if $delta <= 30; |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
$stats->{month}->{$year}->[$month-1]++; |
482
|
|
|
|
|
|
|
$stats->{hour}->{$year}->[$hour]++ if $hour >= 0 && $hour <= 23; |
483
|
|
|
|
|
|
|
$stats->{dow}->{$year}->[$dow-1]++; |
484
|
|
|
|
|
|
|
$stats->{day}->{$year}->[$day-1]++; |
485
|
|
|
|
|
|
|
$stats->{yearly}->{$year}++; |
486
|
|
|
|
|
|
|
$stats->{monthly}->{"$month/$year"}++; |
487
|
|
|
|
|
|
|
$stats->{daily}->{"$day/$month/$year"}++; |
488
|
|
|
|
|
|
|
my $l = $self->{_options}->{generate}->{last_x_days} || 0; |
489
|
|
|
|
|
|
|
if ($l > 0 && $delta <= $l && $delta > 0) |
490
|
|
|
|
|
|
|
{ |
491
|
|
|
|
|
|
|
$stats->{last_x_days}->{"$day/$month/$year"}++; |
492
|
|
|
|
|
|
|
} |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
foreach my $rule (@{$cur->{rule}}) |
495
|
|
|
|
|
|
|
{ |
496
|
|
|
|
|
|
|
$stats->{rule}->{$rule}++; |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
# SpamAssassing or other score |
500
|
|
|
|
|
|
|
$cur->{score} = 0 if !defined $cur->{score}; |
501
|
|
|
|
|
|
|
# for scatter diagram (score_daily is just a limited scatter diagram) |
502
|
|
|
|
|
|
|
$stats->{score_daily}->{"$day/$month/$year"}->{$cur->{score}}++; |
503
|
|
|
|
|
|
|
# for histogram |
504
|
|
|
|
|
|
|
my $s = $self->{_options}->{generate}->{score_histogram}; |
505
|
|
|
|
|
|
|
if ($s > 0) |
506
|
|
|
|
|
|
|
{ |
507
|
|
|
|
|
|
|
$cur->{score} = $cur->{score} || 0; |
508
|
|
|
|
|
|
|
$cur->{score} = 10000 if $cur->{score} > 10000; # hard limit |
509
|
|
|
|
|
|
|
if ($cur->{score} > 0) # uh? |
510
|
|
|
|
|
|
|
{ |
511
|
|
|
|
|
|
|
my $s = int($cur->{score} / int($s)) * int($s); # normalize to steps |
512
|
|
|
|
|
|
|
$stats->{score_histogram}->{$s} ++; |
513
|
|
|
|
|
|
|
$stats->{stats}->{max_score} = $s if $s > $stats->{stats}->{max_score}; |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
$stats->{stats}->{size_uncompressed} += $cur->{size}; |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
$stats->{toplevel}->{$cur->{toplevel}}++; |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
sub generate |
523
|
|
|
|
|
|
|
{ |
524
|
|
|
|
|
|
|
my $self = shift; |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
return $self if defined $self->{error}; |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
# for stats: |
529
|
|
|
|
|
|
|
my $stats = { |
530
|
|
|
|
|
|
|
reasons => {} , # reasons for invalid skips |
531
|
|
|
|
|
|
|
start_time => Time::HiRes::time() }; |
532
|
|
|
|
|
|
|
foreach my $k ( |
533
|
|
|
|
|
|
|
qw/toplevel date month dow day yearly monthly daily rule target domain |
534
|
|
|
|
|
|
|
hour score_histogram score_daily score_scatter/) |
535
|
|
|
|
|
|
|
{ |
536
|
|
|
|
|
|
|
$stats->{$k} = {}; |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
foreach my $k (qw/ |
539
|
|
|
|
|
|
|
items_proccessed items_skipped last_30_days last_7_days last_24_hours |
540
|
|
|
|
|
|
|
size_compressed size_uncompressed max_score |
541
|
|
|
|
|
|
|
/) |
542
|
|
|
|
|
|
|
{ |
543
|
|
|
|
|
|
|
$stats->{stats}->{$k} = 0; |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
my @files = $self->_gather_files($stats); |
546
|
|
|
|
|
|
|
my $id = 0; my @mails; |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
my $first; |
549
|
|
|
|
|
|
|
my $now = [ Today_and_Now() ]; # [year,month,day,...] |
550
|
|
|
|
|
|
|
if (defined $self->{_options}->{last_date}) |
551
|
|
|
|
|
|
|
{ |
552
|
|
|
|
|
|
|
($now->[0],$now->[1],$now->[2]) = split '-',$self->{_options}->{last_date}; |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
print "Last valid date is $now->[0]",'-',$now->[1],'-',$now->[2],"\n"; |
555
|
|
|
|
|
|
|
if (defined $self->{_options}->{first_date}) |
556
|
|
|
|
|
|
|
{ |
557
|
|
|
|
|
|
|
$first = [ split ('-',$self->{_options}->{first_date}) ]; |
558
|
|
|
|
|
|
|
print "First date is $first->[0]",'-',$first->[1],'-',$first->[2],"\n"; |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
foreach my $file (sort @files) |
562
|
|
|
|
|
|
|
{ |
563
|
|
|
|
|
|
|
print "At file $file\n"; |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
# if index file exists, use it. Otherwise process archive and create index |
566
|
|
|
|
|
|
|
# at the same time |
567
|
|
|
|
|
|
|
$self->_clear_index(); # empty internal index |
568
|
|
|
|
|
|
|
if ($file =~ /\.(idx|idx\.gz)$/) |
569
|
|
|
|
|
|
|
{ |
570
|
|
|
|
|
|
|
$self->_read_index($file,$stats); |
571
|
|
|
|
|
|
|
foreach my $cur (@{$self->{_index}}) |
572
|
|
|
|
|
|
|
{ |
573
|
|
|
|
|
|
|
$self->_merge_mail($cur,$stats,$now,$first); # merge into $stats |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
else |
577
|
|
|
|
|
|
|
{ |
578
|
|
|
|
|
|
|
# gather and merge mails into the current stats |
579
|
|
|
|
|
|
|
$self->_gather_mails($file,\$id,$stats,$now,$first); |
580
|
|
|
|
|
|
|
$self->_write_index($file,$stats); # write index for that archive |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
$self->_clear_index(); # empty to save mem |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
my $what = $self->{_options}->{items}; |
586
|
|
|
|
|
|
|
my $h = $self->{_options}->{height}; |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
# adjust the width of the toplevel stat, so that it doesn't look to broad |
589
|
|
|
|
|
|
|
my $w = (scalar keys %{$stats->{toplevel}}) * 30; $w = 1020 if $w > 1020; |
590
|
|
|
|
|
|
|
$self->_graph ($stats,'toplevel', $w, $h, { |
591
|
|
|
|
|
|
|
title => "$what/top-level domain", |
592
|
|
|
|
|
|
|
x_label => 'top-level domain', |
593
|
|
|
|
|
|
|
bar_spacing => 3, |
594
|
|
|
|
|
|
|
show_values => 1, |
595
|
|
|
|
|
|
|
values_vertical => 1, |
596
|
|
|
|
|
|
|
}, |
597
|
|
|
|
|
|
|
undef,0,$now, |
598
|
|
|
|
|
|
|
); |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
$self->_graph ($stats,'month', 400, $h, { |
601
|
|
|
|
|
|
|
title => "$what/month", |
602
|
|
|
|
|
|
|
x_label => 'month', |
603
|
|
|
|
|
|
|
x_labels_vertical => 0, |
604
|
|
|
|
|
|
|
bar_spacing => 6, |
605
|
|
|
|
|
|
|
cumulate => 1, |
606
|
|
|
|
|
|
|
}, |
607
|
|
|
|
|
|
|
\&_num_to_month, |
608
|
|
|
|
|
|
|
0,$now, |
609
|
|
|
|
|
|
|
); |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
$self->_graph ($stats,'hour', 800, $h, { |
612
|
|
|
|
|
|
|
title => "$what/hour", |
613
|
|
|
|
|
|
|
x_label => 'hour', |
614
|
|
|
|
|
|
|
x_labels_vertical => 0, |
615
|
|
|
|
|
|
|
bar_spacing => 6, |
616
|
|
|
|
|
|
|
cumulate => 1, |
617
|
|
|
|
|
|
|
}, |
618
|
|
|
|
|
|
|
undef,0,$now, |
619
|
|
|
|
|
|
|
); |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
$self->_graph ($stats,'dow', 300, $h, { |
622
|
|
|
|
|
|
|
title => "$what/day", |
623
|
|
|
|
|
|
|
x_label => 'day of the week', |
624
|
|
|
|
|
|
|
x_labels_vertical => 0, |
625
|
|
|
|
|
|
|
bar_spacing => 6, |
626
|
|
|
|
|
|
|
cumulate => 1, |
627
|
|
|
|
|
|
|
}, |
628
|
|
|
|
|
|
|
\&_num_to_dow, |
629
|
|
|
|
|
|
|
0,$now, |
630
|
|
|
|
|
|
|
); |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
$self->_graph ($stats,'day', 800, $h, { |
633
|
|
|
|
|
|
|
title => "$what/day", |
634
|
|
|
|
|
|
|
x_label => 'day of the month', |
635
|
|
|
|
|
|
|
x_labels_vertical => 0, |
636
|
|
|
|
|
|
|
bar_spacing => 4, |
637
|
|
|
|
|
|
|
cumulate => 1, |
638
|
|
|
|
|
|
|
}, |
639
|
|
|
|
|
|
|
undef,0,$now, |
640
|
|
|
|
|
|
|
); |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
# adjust the width of the yearly stat, so that it doesn't look to broad |
643
|
|
|
|
|
|
|
$w = (scalar keys %{$stats->{yearly}}) * 50; $w = 600 if $w > 600; |
644
|
|
|
|
|
|
|
$self->_graph ($stats,'yearly', $w, $h, { |
645
|
|
|
|
|
|
|
title => "$what/year", |
646
|
|
|
|
|
|
|
x_label => 'year', |
647
|
|
|
|
|
|
|
x_labels_vertical => 0, |
648
|
|
|
|
|
|
|
bar_spacing => 8, |
649
|
|
|
|
|
|
|
show_values => 1, |
650
|
|
|
|
|
|
|
}, |
651
|
|
|
|
|
|
|
undef, |
652
|
|
|
|
|
|
|
2, # do linear plus last 60 days prediction |
653
|
|
|
|
|
|
|
$now, |
654
|
|
|
|
|
|
|
); |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
# adjust the width of the monthly stat, so that it doesn't look to broad |
657
|
|
|
|
|
|
|
$w = (scalar keys %{$stats->{monthly}}) * 30; |
658
|
|
|
|
|
|
|
$w = 800 if $w > 800; |
659
|
|
|
|
|
|
|
$w = 160 if $w < 160; # min width due to long "prediction for this month" txt |
660
|
|
|
|
|
|
|
$self->_graph ($stats,'monthly', $w, $h, { |
661
|
|
|
|
|
|
|
title => "$what/month", |
662
|
|
|
|
|
|
|
x_label => 'month', |
663
|
|
|
|
|
|
|
x_labels_vertical => 1, |
664
|
|
|
|
|
|
|
bar_spacing => 2, |
665
|
|
|
|
|
|
|
}, |
666
|
|
|
|
|
|
|
\&_year_month_to_num, |
667
|
|
|
|
|
|
|
1, # do prediction |
668
|
|
|
|
|
|
|
$now, |
669
|
|
|
|
|
|
|
); |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
# adjust the width of the rule stat, so that it doesn't look to broad |
672
|
|
|
|
|
|
|
$w = (scalar keys %{$stats->{rule}}) * 30; $w = 800 if $w > 800; |
673
|
|
|
|
|
|
|
# go trough the rule data and create a percentage |
674
|
|
|
|
|
|
|
$self->_add_percentage($stats,'rule'); |
675
|
|
|
|
|
|
|
# need more height for long rule names |
676
|
|
|
|
|
|
|
$self->_graph ($stats,'rule', $w, $h + 200, { |
677
|
|
|
|
|
|
|
title => "$what/rule", |
678
|
|
|
|
|
|
|
x_label => 'rule', |
679
|
|
|
|
|
|
|
x_labels_vertical => 1, |
680
|
|
|
|
|
|
|
bar_spacing => 2, |
681
|
|
|
|
|
|
|
show_values => 1, |
682
|
|
|
|
|
|
|
values_vertical => 1, |
683
|
|
|
|
|
|
|
}, |
684
|
|
|
|
|
|
|
undef, |
685
|
|
|
|
|
|
|
undef,0,$now, |
686
|
|
|
|
|
|
|
); |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
# adjust the width of the target stat, so that it doesn't look to broad |
689
|
|
|
|
|
|
|
$w = (scalar keys %{$stats->{target}}) * 30; $w = 800 if $w > 800; |
690
|
|
|
|
|
|
|
$self->_add_percentage($stats,'target'); |
691
|
|
|
|
|
|
|
# need more height for long target names |
692
|
|
|
|
|
|
|
$self->_graph ($stats, 'target', $w, $h + 320, { |
693
|
|
|
|
|
|
|
title => "$what/address", |
694
|
|
|
|
|
|
|
x_label => 'target address', |
695
|
|
|
|
|
|
|
x_labels_vertical => 1, |
696
|
|
|
|
|
|
|
bar_spacing => 2, |
697
|
|
|
|
|
|
|
show_values => 1, |
698
|
|
|
|
|
|
|
values_vertical => 1, |
699
|
|
|
|
|
|
|
}, |
700
|
|
|
|
|
|
|
undef,0,$now, |
701
|
|
|
|
|
|
|
); |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
# adjust the width of the domain stat, so that it doesn't look to broad |
704
|
|
|
|
|
|
|
$w = (scalar keys %{$stats->{domain}}) * 50; $w = 800 if $w > 800; |
705
|
|
|
|
|
|
|
$self->_add_percentage($stats,'domain'); |
706
|
|
|
|
|
|
|
# need more height for long domain names |
707
|
|
|
|
|
|
|
$self->_graph ($stats, 'domain', $w, $h + 120, { |
708
|
|
|
|
|
|
|
title => "$what/domain", |
709
|
|
|
|
|
|
|
x_label => 'target domain', |
710
|
|
|
|
|
|
|
x_labels_vertical => 1, |
711
|
|
|
|
|
|
|
bar_spacing => 4, |
712
|
|
|
|
|
|
|
show_values => 1, |
713
|
|
|
|
|
|
|
values_vertical => 1, |
714
|
|
|
|
|
|
|
long_ticks => 0, |
715
|
|
|
|
|
|
|
}, |
716
|
|
|
|
|
|
|
undef,0,$now, |
717
|
|
|
|
|
|
|
); |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
my $l = $self->{_options}->{generate}->{last_x_days} || 0; |
720
|
|
|
|
|
|
|
if ($l > 0) |
721
|
|
|
|
|
|
|
{ |
722
|
|
|
|
|
|
|
$stats->{last_x_days} = $self->_average($stats->{last_x_days}); |
723
|
|
|
|
|
|
|
# adjust the width of the stat, so that it doesn't look to broad |
724
|
|
|
|
|
|
|
$w = $l * 50; $w = 800 if $w > 800; |
725
|
|
|
|
|
|
|
$self->_graph ($stats, ['last_x_days','daily'], $w, $h, { |
726
|
|
|
|
|
|
|
title => "$what/day", |
727
|
|
|
|
|
|
|
x_label => 'day', |
728
|
|
|
|
|
|
|
x_labels_vertical => 1, |
729
|
|
|
|
|
|
|
bar_spacing => 4, |
730
|
|
|
|
|
|
|
long_ticks => 0, |
731
|
|
|
|
|
|
|
type => 'lines', |
732
|
|
|
|
|
|
|
}, |
733
|
|
|
|
|
|
|
\&_year_month_day_to_num, |
734
|
|
|
|
|
|
|
0,$now, |
735
|
|
|
|
|
|
|
); |
736
|
|
|
|
|
|
|
} |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
# calculate how many entries we must skip to have a sensible amount of them |
739
|
|
|
|
|
|
|
my $skip = scalar keys %{$stats->{daily}}; |
740
|
|
|
|
|
|
|
$skip = int($skip / 82); $skip = 1 if $skip < 1; |
741
|
|
|
|
|
|
|
$stats->{daily} = $self->_average($stats->{daily}, |
742
|
|
|
|
|
|
|
$self->{_options}->{average_daily}); |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
$self->_graph ($stats,'daily', 900, $h + 50, { |
745
|
|
|
|
|
|
|
title => "$what/day", |
746
|
|
|
|
|
|
|
x_label => 'date', |
747
|
|
|
|
|
|
|
x_labels_vertical => 1, |
748
|
|
|
|
|
|
|
x_label_skip => $skip, |
749
|
|
|
|
|
|
|
type => 'lines', |
750
|
|
|
|
|
|
|
}, |
751
|
|
|
|
|
|
|
\&_year_month_day_to_num, |
752
|
|
|
|
|
|
|
0,$now, |
753
|
|
|
|
|
|
|
); |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
$l = $self->{_options}->{generate}->{score_histogram} || 0; |
756
|
|
|
|
|
|
|
if ($l > 0) |
757
|
|
|
|
|
|
|
{ |
758
|
|
|
|
|
|
|
$w = ($stats->{stats}->{max_score} || 0) * 50; |
759
|
|
|
|
|
|
|
if ($w > 0) |
760
|
|
|
|
|
|
|
{ |
761
|
|
|
|
|
|
|
$w = 800 if $w > 800; |
762
|
|
|
|
|
|
|
# for each undefined between first defined and last, set to 0 |
763
|
|
|
|
|
|
|
for (my $i = 0; $i < $stats->{stats}->{max_score}; $i += $l) |
764
|
|
|
|
|
|
|
{ |
765
|
|
|
|
|
|
|
$stats->{score_histogram}->{$i} ||= 0; |
766
|
|
|
|
|
|
|
} |
767
|
|
|
|
|
|
|
} |
768
|
|
|
|
|
|
|
} |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
$self->_graph ($stats,'score_histogram', $w, $h + 50, { |
771
|
|
|
|
|
|
|
title => "SpamAssassin score histogram", |
772
|
|
|
|
|
|
|
x_label => 'score', |
773
|
|
|
|
|
|
|
x_labels_vertical => 0, |
774
|
|
|
|
|
|
|
y_label => $self->{_options}->{items}, |
775
|
|
|
|
|
|
|
bar_spacing => 2, |
776
|
|
|
|
|
|
|
}, |
777
|
|
|
|
|
|
|
undef, 0,$now, |
778
|
|
|
|
|
|
|
); |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
# calculate how many entries we must skip to have a sensible amount of them |
782
|
|
|
|
|
|
|
$skip = scalar keys %{$stats->{score_daily}}; |
783
|
|
|
|
|
|
|
$skip = int($skip / 82); $skip = 1 if $skip < 1; |
784
|
|
|
|
|
|
|
$stats->{score_daily} = $self->_average($stats->{score_daily}, |
785
|
|
|
|
|
|
|
$self->{_options}->{average_score_daily}); |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
$self->_graph ($stats,'score_daily', 900, $h + 50, { |
788
|
|
|
|
|
|
|
title => "SpamAssassin score", |
789
|
|
|
|
|
|
|
x_label => 'date', |
790
|
|
|
|
|
|
|
x_labels_vertical => 1, |
791
|
|
|
|
|
|
|
x_label_skip => $skip, |
792
|
|
|
|
|
|
|
type => 'points', |
793
|
|
|
|
|
|
|
}, |
794
|
|
|
|
|
|
|
\&_year_month_day_to_num, |
795
|
|
|
|
|
|
|
); |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
$l = $self->{_options}->{generate}->{score_daily} || 0; |
798
|
|
|
|
|
|
|
if ($l > 0) |
799
|
|
|
|
|
|
|
{ |
800
|
|
|
|
|
|
|
$stats->{score_daily} = $self->_average($stats->{score_daily}); |
801
|
|
|
|
|
|
|
# adjust the width of the stat, so that it doesn't look to broad |
802
|
|
|
|
|
|
|
$w = $l * 50; $w = 800 if $w > 800; |
803
|
|
|
|
|
|
|
$self->_graph ($stats, ['last_x_days','daily'], $w, $h, { |
804
|
|
|
|
|
|
|
title => "$what/day", |
805
|
|
|
|
|
|
|
x_label => 'day', |
806
|
|
|
|
|
|
|
x_labels_vertical => 1, |
807
|
|
|
|
|
|
|
bar_spacing => 4, |
808
|
|
|
|
|
|
|
long_ticks => 0, |
809
|
|
|
|
|
|
|
type => 'lines', |
810
|
|
|
|
|
|
|
}, |
811
|
|
|
|
|
|
|
\&_year_month_day_to_num, |
812
|
|
|
|
|
|
|
undef,0,$now, |
813
|
|
|
|
|
|
|
); |
814
|
|
|
|
|
|
|
} |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
require Data::Dumper; print Data::Dumper::Dumper($stats->{reasons}); |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
# calculate how many entries we must skip to have a sensible amount of them |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
$self->_fill_template($stats); |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
############################################################################### |
824
|
|
|
|
|
|
|
# private methods |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
sub _add_percentage |
827
|
|
|
|
|
|
|
{ |
828
|
|
|
|
|
|
|
# given the single numbers for a certain statistics, chnages the values |
829
|
|
|
|
|
|
|
# from "xyz" to "xyz (u%)" |
830
|
|
|
|
|
|
|
my ($self,$stats,$what) = @_; |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
my $sum = 0; |
833
|
|
|
|
|
|
|
my $s = $stats->{$what}; |
834
|
|
|
|
|
|
|
# sum them all up |
835
|
|
|
|
|
|
|
foreach my $k (keys %$s) |
836
|
|
|
|
|
|
|
{ |
837
|
|
|
|
|
|
|
$sum += $s->{$k}; |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
# calculate the percantage value |
840
|
|
|
|
|
|
|
$sum = Math::BigInt->new($sum); |
841
|
|
|
|
|
|
|
foreach my $k (keys %$s) |
842
|
|
|
|
|
|
|
{ |
843
|
|
|
|
|
|
|
# 12 / 100 => 0.12 * 100 => 12% |
844
|
|
|
|
|
|
|
# round to 1 digit after dot |
845
|
|
|
|
|
|
|
my $p = |
846
|
|
|
|
|
|
|
Math::BigFloat->new($s->{$k} * 100)->bdiv($sum,undef,-1); |
847
|
|
|
|
|
|
|
$p->precision(undef); # no pading with 0's |
848
|
|
|
|
|
|
|
$s->{$k} = "$s->{$k}, $p%" if $p > 0; # don't add "(0%)" |
849
|
|
|
|
|
|
|
} |
850
|
|
|
|
|
|
|
$self; |
851
|
|
|
|
|
|
|
} |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
sub _average |
854
|
|
|
|
|
|
|
{ |
855
|
|
|
|
|
|
|
my ($self,$stats,$average) = @_; |
856
|
|
|
|
|
|
|
# calculate a rolling average over the last x day |
857
|
|
|
|
|
|
|
my $avrg = {}; |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
my $back = $average || $self->{_options}->{average} || 7; |
860
|
|
|
|
|
|
|
foreach my $thisday (keys %$stats) |
861
|
|
|
|
|
|
|
{ |
862
|
|
|
|
|
|
|
my $sum = $stats->{$thisday}; |
863
|
|
|
|
|
|
|
my ($day,$month,$year) = split /\//,$thisday; |
864
|
|
|
|
|
|
|
my ($d,$m,$y); |
865
|
|
|
|
|
|
|
for (my $i = 1; $i < $back; $i++) |
866
|
|
|
|
|
|
|
{ |
867
|
|
|
|
|
|
|
($y,$m,$d) = Add_Delta_Days($year,$month,$day,-$i); |
868
|
|
|
|
|
|
|
my $this = "$d/$m/$y"; |
869
|
|
|
|
|
|
|
$sum += $stats->{$this}||0; # non-existant => 0 |
870
|
|
|
|
|
|
|
} |
871
|
|
|
|
|
|
|
$avrg->{$thisday} = [ $stats->{$thisday}, int($sum / $back) ]; |
872
|
|
|
|
|
|
|
} |
873
|
|
|
|
|
|
|
return $avrg; |
874
|
|
|
|
|
|
|
} |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
sub _fill_template |
877
|
|
|
|
|
|
|
{ |
878
|
|
|
|
|
|
|
my ($self,$stats) = @_; |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
# read in |
881
|
|
|
|
|
|
|
my $file = $self->{_options}->{template}; |
882
|
|
|
|
|
|
|
my $tpl = ''; |
883
|
|
|
|
|
|
|
open FILE, "$file" or die ("Cannot read $file: $!"); |
884
|
|
|
|
|
|
|
while () { $tpl .= $_; } |
885
|
|
|
|
|
|
|
close FILE; |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
# replace placeholders |
888
|
|
|
|
|
|
|
$tpl =~ s/##generated##/scalar localtime();/eg; |
889
|
|
|
|
|
|
|
$tpl =~ s/##version##/$VERSION/g; |
890
|
|
|
|
|
|
|
$tpl =~ s/##items##/lc($self->{_options}->{items})/eg; |
891
|
|
|
|
|
|
|
$tpl =~ s/##Items##/ucfirst($self->{_options}->{items})/eg; |
892
|
|
|
|
|
|
|
$tpl =~ s/##ITEMS##/uc($self->{_options}->{items})/eg; |
893
|
|
|
|
|
|
|
my $time = sprintf("%0.2f",Time::HiRes::time() - $stats->{start_time}); |
894
|
|
|
|
|
|
|
$tpl =~ s/##took##/$time/g; |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
foreach my $t (qw/ |
897
|
|
|
|
|
|
|
items_processed items_skipped last_7_days last_30_days last_24_hours |
898
|
|
|
|
|
|
|
/) |
899
|
|
|
|
|
|
|
{ |
900
|
|
|
|
|
|
|
print "at $t\n"; |
901
|
|
|
|
|
|
|
$tpl =~ s/##$t##/$stats->{stats}->{$t}/g; |
902
|
|
|
|
|
|
|
} |
903
|
|
|
|
|
|
|
foreach (qw/ |
904
|
|
|
|
|
|
|
size_compressed size_uncompressed |
905
|
|
|
|
|
|
|
/) |
906
|
|
|
|
|
|
|
{ |
907
|
|
|
|
|
|
|
# in MByte |
908
|
|
|
|
|
|
|
$stats->{stats}->{$_} = |
909
|
|
|
|
|
|
|
int(($stats->{stats}->{$_} * 10) / (1024*1024)) / 10; |
910
|
|
|
|
|
|
|
$tpl =~ s/##$_##/$stats->{stats}->{$_}/g; |
911
|
|
|
|
|
|
|
} |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
# write out |
914
|
|
|
|
|
|
|
$file =~ s/\.tpl/.html/; |
915
|
|
|
|
|
|
|
$file = File::Spec->catfile($self->{_options}->{output},$file); |
916
|
|
|
|
|
|
|
open FILE, ">$file" or die ("Cannot write $file: $!"); |
917
|
|
|
|
|
|
|
print FILE $tpl; |
918
|
|
|
|
|
|
|
close FILE; |
919
|
|
|
|
|
|
|
return $self; |
920
|
|
|
|
|
|
|
} |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
BEGIN |
923
|
|
|
|
|
|
|
{ |
924
|
|
|
|
|
|
|
$month_table = { jan => 1, feb => 2, mar => 3, apr => 4, may => 5, jun => 6, |
925
|
|
|
|
|
|
|
jul => 7, aug => 8, sep => 9, oct => 10, nov => 11, dec => 12 }; |
926
|
|
|
|
|
|
|
$dow_table = { mon => 1, tue => 2, wed => 3, thu => 4, fri => 5, |
927
|
|
|
|
|
|
|
sat => 6, sun => 7, }; |
928
|
|
|
|
|
|
|
} |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
sub _month_to_num |
931
|
|
|
|
|
|
|
{ |
932
|
|
|
|
|
|
|
my $m = lc(shift || 0); |
933
|
|
|
|
|
|
|
return $month_table->{$m} || 0; |
934
|
|
|
|
|
|
|
} |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
sub _year_month_to_num |
937
|
|
|
|
|
|
|
{ |
938
|
|
|
|
|
|
|
my $m = shift; |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
my ($month,$year) = split /\//,$m; |
941
|
|
|
|
|
|
|
$year * 12+$month; |
942
|
|
|
|
|
|
|
} |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
sub _year_month_day_to_num |
945
|
|
|
|
|
|
|
{ |
946
|
|
|
|
|
|
|
my $m = shift; |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
my ($day,$month,$year) = split /\//,$m; |
949
|
|
|
|
|
|
|
return Date_to_Days($year,$month,$day); |
950
|
|
|
|
|
|
|
} |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
sub _dow_to_num |
953
|
|
|
|
|
|
|
{ |
954
|
|
|
|
|
|
|
my $d = lc(shift); |
955
|
|
|
|
|
|
|
return $dow_table->{$d} || 0; |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
sub _num_to_dow |
959
|
|
|
|
|
|
|
{ |
960
|
|
|
|
|
|
|
my $d = shift; |
961
|
|
|
|
|
|
|
foreach my $k (keys %$dow_table) |
962
|
|
|
|
|
|
|
{ |
963
|
|
|
|
|
|
|
return $k if $dow_table->{$k} eq $d; |
964
|
|
|
|
|
|
|
} |
965
|
|
|
|
|
|
|
return 'unknown dow $d'; |
966
|
|
|
|
|
|
|
} |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
sub _num_to_month |
969
|
|
|
|
|
|
|
{ |
970
|
|
|
|
|
|
|
my $d = shift; |
971
|
|
|
|
|
|
|
foreach my $k (keys %$month_table) |
972
|
|
|
|
|
|
|
{ |
973
|
|
|
|
|
|
|
return $k if $month_table->{$k} eq $d; |
974
|
|
|
|
|
|
|
} |
975
|
|
|
|
|
|
|
return 'unknown month $d'; |
976
|
|
|
|
|
|
|
} |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
sub _parse_date |
979
|
|
|
|
|
|
|
{ |
980
|
|
|
|
|
|
|
my ($self,$date) = @_; |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
return (0,0,0,0,0,0,0,0) if !defined $date; |
983
|
|
|
|
|
|
|
|
984
|
|
|
|
|
|
|
my ($day,$month,$year,$dow,$hour,$minute,$seconds,$offset); |
985
|
|
|
|
|
|
|
if ($date =~ /,/) |
986
|
|
|
|
|
|
|
{ |
987
|
|
|
|
|
|
|
# Sun, 19 Jul 1998 23:49:16 +0200 |
988
|
|
|
|
|
|
|
# Sun, 19 Jul 03 23:49:16 +0200 |
989
|
|
|
|
|
|
|
$date =~ /([A-Za-z]+),\s+(\d+)\s([A-Za-z]+)\s(\d+)\s(\d+):(\d+):(\d+)\s(.*)/; |
990
|
|
|
|
|
|
|
$day = int($2 || 0); |
991
|
|
|
|
|
|
|
$month = _month_to_num($3); |
992
|
|
|
|
|
|
|
$year = int($4 || 0); |
993
|
|
|
|
|
|
|
$dow = _dow_to_num($1 || 0); |
994
|
|
|
|
|
|
|
$hour = $5 || 0; |
995
|
|
|
|
|
|
|
$minute = $6 || 0; |
996
|
|
|
|
|
|
|
$seconds = $7 || 0; |
997
|
|
|
|
|
|
|
$offset = $8 || 0; |
998
|
|
|
|
|
|
|
} |
999
|
|
|
|
|
|
|
elsif ($date =~ /([A-Za-z]+)\s([A-Za-z]+)\s+(\d+)\s(\d+):(\d+):(\d+)\s(\d+)/) |
1000
|
|
|
|
|
|
|
{ |
1001
|
|
|
|
|
|
|
# Tue Oct 27 18:38:52 1998 |
1002
|
|
|
|
|
|
|
$date =~ /([A-Za-z]+)\s([A-Za-z]+)\s+(\d+)\s(\d+):(\d+):(\d+)\s(\d+)/; |
1003
|
|
|
|
|
|
|
$day = int($3 || 0); |
1004
|
|
|
|
|
|
|
$month = _month_to_num($2); |
1005
|
|
|
|
|
|
|
$year = int($7 || 0); |
1006
|
|
|
|
|
|
|
$dow = _dow_to_num($1 || 0); |
1007
|
|
|
|
|
|
|
$hour = $4 || 0; $minute = $5 || 0; $seconds = $6 || 0; $offset = 0; |
1008
|
|
|
|
|
|
|
my $dow2 = Date::Calc::Day_of_Week($year,$month,$day); |
1009
|
|
|
|
|
|
|
# wrong Day Of Week? Shouldn't happen unless date is forged |
1010
|
|
|
|
|
|
|
return (0,0,0,0,0,0,0,0) |
1011
|
|
|
|
|
|
|
if ($dow2 ne $dow); |
1012
|
|
|
|
|
|
|
} |
1013
|
|
|
|
|
|
|
elsif ($date =~ /(\d{2})\s([A-Za-z]+)\s(\d+)\s(\d+):(\d+):(\d+)\s([-+]?\d+)/) |
1014
|
|
|
|
|
|
|
{ |
1015
|
|
|
|
|
|
|
# 18 Oct 2003 23:45:29 -0000 |
1016
|
|
|
|
|
|
|
$day = int($1 || 0); |
1017
|
|
|
|
|
|
|
$month = _month_to_num($2 || 0); |
1018
|
|
|
|
|
|
|
$year = int($3 || 0); |
1019
|
|
|
|
|
|
|
$hour = $4 || 0; $minute = $5 || 0; $seconds = $6 || 0; $offset = $7 || 0; |
1020
|
|
|
|
|
|
|
$dow = Date::Calc::Day_of_Week($year,$month,$day); |
1021
|
|
|
|
|
|
|
} |
1022
|
|
|
|
|
|
|
else |
1023
|
|
|
|
|
|
|
{ |
1024
|
|
|
|
|
|
|
$month = 0; |
1025
|
|
|
|
|
|
|
$day = 0; |
1026
|
|
|
|
|
|
|
$year = 0; |
1027
|
|
|
|
|
|
|
$dow = 0; |
1028
|
|
|
|
|
|
|
$hour = 0; |
1029
|
|
|
|
|
|
|
$seconds = 0; |
1030
|
|
|
|
|
|
|
$minute = 0; |
1031
|
|
|
|
|
|
|
$offset = 0; |
1032
|
|
|
|
|
|
|
} |
1033
|
|
|
|
|
|
|
$year += 1900 if $year < 100 && $year >= 70; |
1034
|
|
|
|
|
|
|
$year += 2000 if $year < 70 && $year > 0; |
1035
|
|
|
|
|
|
|
return ($day,$month,$year,$dow,$hour,$minute,$seconds,$offset); |
1036
|
|
|
|
|
|
|
} |
1037
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
sub _graph |
1039
|
|
|
|
|
|
|
{ |
1040
|
|
|
|
|
|
|
my ($self,$stats,$stat,$w,$h,$options,$map,$predict,$now) = @_; |
1041
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
$predict = $predict || 0; |
1043
|
|
|
|
|
|
|
my $label = $stat; |
1044
|
|
|
|
|
|
|
if (ref($stat) eq 'ARRAY') |
1045
|
|
|
|
|
|
|
{ |
1046
|
|
|
|
|
|
|
$label = $stat->[1]; |
1047
|
|
|
|
|
|
|
$stat = $stat->[0]; |
1048
|
|
|
|
|
|
|
} |
1049
|
|
|
|
|
|
|
return if ($self->{_options}->{generate}->{$stat}||0) == 0; # skip this |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
print "Making graph $stat...\n"; |
1052
|
|
|
|
|
|
|
my $max = 0; |
1053
|
|
|
|
|
|
|
$map = sub { $_[0]; } if !defined $map; |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
# sort the data so that it can be processed by GD::Graph |
1056
|
|
|
|
|
|
|
my @legend = (); my @data; |
1057
|
|
|
|
|
|
|
my $k = []; my $v = []; |
1058
|
|
|
|
|
|
|
if (defined $options->{cumulate}) |
1059
|
|
|
|
|
|
|
{ |
1060
|
|
|
|
|
|
|
my $make_k = 0; # only once |
1061
|
|
|
|
|
|
|
foreach my $key (sort keys %{$stats->{$stat}}) |
1062
|
|
|
|
|
|
|
{ |
1063
|
|
|
|
|
|
|
#print "at key $key\n"; |
1064
|
|
|
|
|
|
|
push @legend, $key; |
1065
|
|
|
|
|
|
|
$v = []; my $i = 1; |
1066
|
|
|
|
|
|
|
foreach my $kkey (@{$stats->{$stat}->{$key}}) |
1067
|
|
|
|
|
|
|
{ |
1068
|
|
|
|
|
|
|
$kkey = 0 if !defined $kkey; |
1069
|
|
|
|
|
|
|
push @$k, &$map($i) if $make_k == 0; $i++; |
1070
|
|
|
|
|
|
|
push @$v, $kkey; |
1071
|
|
|
|
|
|
|
} |
1072
|
|
|
|
|
|
|
$make_k = 1; |
1073
|
|
|
|
|
|
|
push @data, $v; |
1074
|
|
|
|
|
|
|
} |
1075
|
|
|
|
|
|
|
} |
1076
|
|
|
|
|
|
|
elsif ($options->{type}||'' eq 'lines') |
1077
|
|
|
|
|
|
|
{ |
1078
|
|
|
|
|
|
|
my $av = 'average'; $av .= '_daily' if $stat eq 'daily'; |
1079
|
|
|
|
|
|
|
push @legend, $label, |
1080
|
|
|
|
|
|
|
"average over last ".$self->{_options}->{$av}." days"; |
1081
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
foreach my $key (sort { |
1083
|
|
|
|
|
|
|
my $aa = &$map($a); my $bb = &$map($b); |
1084
|
|
|
|
|
|
|
if (($aa =~ /^[0-9\.]+$/) && ($bb =~ /^[0-9\.]+$/)) |
1085
|
|
|
|
|
|
|
{ |
1086
|
|
|
|
|
|
|
return $aa <=> $bb; |
1087
|
|
|
|
|
|
|
} |
1088
|
|
|
|
|
|
|
$aa cmp $bb; |
1089
|
|
|
|
|
|
|
} keys %{$stats->{$stat}}) |
1090
|
|
|
|
|
|
|
{ |
1091
|
|
|
|
|
|
|
push @$k, $key; |
1092
|
|
|
|
|
|
|
my $i = 0; |
1093
|
|
|
|
|
|
|
foreach my $j (@{$stats->{$stat}->{$key}}) |
1094
|
|
|
|
|
|
|
{ |
1095
|
|
|
|
|
|
|
push @{$v->[$i]}, $j; $i++; |
1096
|
|
|
|
|
|
|
} |
1097
|
|
|
|
|
|
|
} |
1098
|
|
|
|
|
|
|
foreach my $j (@$v) |
1099
|
|
|
|
|
|
|
{ |
1100
|
|
|
|
|
|
|
push @data, $j; |
1101
|
|
|
|
|
|
|
} |
1102
|
|
|
|
|
|
|
} |
1103
|
|
|
|
|
|
|
else |
1104
|
|
|
|
|
|
|
{ |
1105
|
|
|
|
|
|
|
foreach my $key (sort { |
1106
|
|
|
|
|
|
|
my $aa = &$map($a); my $bb = &$map($b); |
1107
|
|
|
|
|
|
|
if (($aa =~ /^[0-9\.]+$/) && ($bb =~ /^[0-9\.]+$/)) |
1108
|
|
|
|
|
|
|
{ |
1109
|
|
|
|
|
|
|
return $aa <=> $bb; |
1110
|
|
|
|
|
|
|
} |
1111
|
|
|
|
|
|
|
$aa cmp $bb; |
1112
|
|
|
|
|
|
|
} keys %{$stats->{$stat}}) |
1113
|
|
|
|
|
|
|
{ |
1114
|
|
|
|
|
|
|
push @$k,$key; |
1115
|
|
|
|
|
|
|
push @$v, $stats->{$stat}->{$key}; |
1116
|
|
|
|
|
|
|
} |
1117
|
|
|
|
|
|
|
push @data, $v; |
1118
|
|
|
|
|
|
|
} |
1119
|
|
|
|
|
|
|
# end sort data |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
if ($predict) |
1122
|
|
|
|
|
|
|
{ |
1123
|
|
|
|
|
|
|
my $t = 1; # month |
1124
|
|
|
|
|
|
|
$t = 0 if $stat eq 'yearly'; |
1125
|
|
|
|
|
|
|
unshift @data, $self->_prediction($stats, $t, scalar @{$data[0]}, $now); |
1126
|
|
|
|
|
|
|
$t = $stat; $t =~ s/ly//; |
1127
|
|
|
|
|
|
|
# legend only if we did prediction |
1128
|
|
|
|
|
|
|
if ($predict != 1) |
1129
|
|
|
|
|
|
|
{ |
1130
|
|
|
|
|
|
|
# based on last 60 days |
1131
|
|
|
|
|
|
|
$predict = 1; # 2 colors |
1132
|
|
|
|
|
|
|
# if under 80 days in the current year, don't make this (to have a |
1133
|
|
|
|
|
|
|
# difference between the two) |
1134
|
|
|
|
|
|
|
if (Delta_Days($now->[0],1,1, $now->[0], $now->[1], $now->[2]) > 80) |
1135
|
|
|
|
|
|
|
{ |
1136
|
|
|
|
|
|
|
unshift @data, $self->_prediction($stats, 2, scalar @{$data[0]}, $now); |
1137
|
|
|
|
|
|
|
push @legend, "based on last 60 days" if defined $data[0]->[-1]; |
1138
|
|
|
|
|
|
|
$predict = 2; # 3 colors |
1139
|
|
|
|
|
|
|
} |
1140
|
|
|
|
|
|
|
push @legend, "linear prediction" if defined $data[0]->[-1]; |
1141
|
|
|
|
|
|
|
} |
1142
|
|
|
|
|
|
|
else |
1143
|
|
|
|
|
|
|
{ |
1144
|
|
|
|
|
|
|
push @legend, "prediction for this $t" if defined $data[0]->[-1]; |
1145
|
|
|
|
|
|
|
} |
1146
|
|
|
|
|
|
|
$options->{overwrite} = 1; |
1147
|
|
|
|
|
|
|
} |
1148
|
|
|
|
|
|
|
# calculate maximum value |
1149
|
|
|
|
|
|
|
my @sum; |
1150
|
|
|
|
|
|
|
if (defined $options->{cumulate}) |
1151
|
|
|
|
|
|
|
{ |
1152
|
|
|
|
|
|
|
foreach my $r ( @data ) |
1153
|
|
|
|
|
|
|
{ |
1154
|
|
|
|
|
|
|
my $i = 0; my $j; |
1155
|
|
|
|
|
|
|
foreach my $h ( @$r ) |
1156
|
|
|
|
|
|
|
{ |
1157
|
|
|
|
|
|
|
$j = $h || 0; $j =~ s/,.*//; # "12, 12%" => 12 |
1158
|
|
|
|
|
|
|
$sum[$i++] += $j || 0; |
1159
|
|
|
|
|
|
|
} |
1160
|
|
|
|
|
|
|
} |
1161
|
|
|
|
|
|
|
} |
1162
|
|
|
|
|
|
|
else |
1163
|
|
|
|
|
|
|
{ |
1164
|
|
|
|
|
|
|
foreach my $r ( @data ) |
1165
|
|
|
|
|
|
|
{ |
1166
|
|
|
|
|
|
|
my $i = 0; my $j; |
1167
|
|
|
|
|
|
|
foreach my $h ( @$r ) |
1168
|
|
|
|
|
|
|
{ |
1169
|
|
|
|
|
|
|
$j = $h || 0; $j =~ s/,.*//; # "12, 12%" => 12 |
1170
|
|
|
|
|
|
|
$sum[$i] = $j if ($j || 0) >= ($sum[$i] || 0); $i++; |
1171
|
|
|
|
|
|
|
} |
1172
|
|
|
|
|
|
|
} |
1173
|
|
|
|
|
|
|
} |
1174
|
|
|
|
|
|
|
foreach my $r ( @sum ) |
1175
|
|
|
|
|
|
|
{ |
1176
|
|
|
|
|
|
|
$max = $r if $r > $max; |
1177
|
|
|
|
|
|
|
} |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
my $data = GD::Graph::Data->new([$k, @data]) or die GD::Graph::Data->error; |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
# This is hackery, replace it with something more clean |
1182
|
|
|
|
|
|
|
my $grow = 1.05; |
1183
|
|
|
|
|
|
|
$grow = 1.15 if defined $options->{show_values}; |
1184
|
|
|
|
|
|
|
$grow = 1.25 if defined $options->{values_vertical}; |
1185
|
|
|
|
|
|
|
$grow = 1.15 if defined $options->{values_vertical} && |
1186
|
|
|
|
|
|
|
$options->{x_label} eq 'target address'; |
1187
|
|
|
|
|
|
|
$grow = 1.6 if $stat =~ /^(rule)$/; # percentages |
1188
|
|
|
|
|
|
|
$grow = 1.4 if $stat =~ /^(domain|target)$/; # percentages |
1189
|
|
|
|
|
|
|
if (int($max * $grow) == $max) # increase by at least 1 |
1190
|
|
|
|
|
|
|
{ |
1191
|
|
|
|
|
|
|
$max++; |
1192
|
|
|
|
|
|
|
} |
1193
|
|
|
|
|
|
|
else |
1194
|
|
|
|
|
|
|
{ |
1195
|
|
|
|
|
|
|
$max = int($max*$grow); # + x percent |
1196
|
|
|
|
|
|
|
} |
1197
|
|
|
|
|
|
|
my $defaults = { |
1198
|
|
|
|
|
|
|
x_label => $self->{_options}->{items}, |
1199
|
|
|
|
|
|
|
y_label => 'count', |
1200
|
|
|
|
|
|
|
title => $self->{_options}->{items} . '/day', |
1201
|
|
|
|
|
|
|
y_max_value => $max, |
1202
|
|
|
|
|
|
|
y_tick_number => 8, |
1203
|
|
|
|
|
|
|
bar_spacing => 4, |
1204
|
|
|
|
|
|
|
y_number_format => '%i', |
1205
|
|
|
|
|
|
|
x_labels_vertical => 1, |
1206
|
|
|
|
|
|
|
transparent => 1, |
1207
|
|
|
|
|
|
|
# gridclr => 'lgray', # to be compatible w/ old GD::Graph |
1208
|
|
|
|
|
|
|
y_long_ticks => 2, |
1209
|
|
|
|
|
|
|
values_space => 6, |
1210
|
|
|
|
|
|
|
}; |
1211
|
|
|
|
|
|
|
my @opt = (); |
1212
|
|
|
|
|
|
|
foreach my $k (keys %$options, keys %$defaults) |
1213
|
|
|
|
|
|
|
{ |
1214
|
|
|
|
|
|
|
next if $k eq 'title' && $self->{_options}->{no_title} != 0; |
1215
|
|
|
|
|
|
|
next if $k eq 'type'; |
1216
|
|
|
|
|
|
|
$options->{$k} = $defaults->{$k} if !defined $options->{$k}; |
1217
|
|
|
|
|
|
|
push @opt, $k, $options->{$k}; |
1218
|
|
|
|
|
|
|
} |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
############################################################################# |
1221
|
|
|
|
|
|
|
# retry to make a graph until it fits |
1222
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
$w = 120 if $w < 120; # minimum width |
1224
|
|
|
|
|
|
|
my $redo = 0; |
1225
|
|
|
|
|
|
|
while ($redo == 0) |
1226
|
|
|
|
|
|
|
{ |
1227
|
|
|
|
|
|
|
my $my_graph; |
1228
|
|
|
|
|
|
|
if (($options->{type} || '') eq 'lines') |
1229
|
|
|
|
|
|
|
{ |
1230
|
|
|
|
|
|
|
$my_graph = GD::Graph::lines->new( $w, $h ); |
1231
|
|
|
|
|
|
|
$my_graph->set( dclrs => [ '#9090e0','#ff6040' ] ); |
1232
|
|
|
|
|
|
|
} |
1233
|
|
|
|
|
|
|
else |
1234
|
|
|
|
|
|
|
{ |
1235
|
|
|
|
|
|
|
$my_graph = GD::Graph::bars->new( $w, $h ); |
1236
|
|
|
|
|
|
|
if ($predict == 2) |
1237
|
|
|
|
|
|
|
{ |
1238
|
|
|
|
|
|
|
$my_graph->set( dclrs => [ '#f8e8e8', '#e0c8c8', '#ff2060' ] ); |
1239
|
|
|
|
|
|
|
} |
1240
|
|
|
|
|
|
|
elsif ($predict) |
1241
|
|
|
|
|
|
|
{ |
1242
|
|
|
|
|
|
|
$my_graph->set( dclrs => [ '#e0d0d0', '#ff2060' ] ); |
1243
|
|
|
|
|
|
|
} |
1244
|
|
|
|
|
|
|
else |
1245
|
|
|
|
|
|
|
{ |
1246
|
|
|
|
|
|
|
$my_graph->set( dclrs => |
1247
|
|
|
|
|
|
|
[ '#ff2060','#60ff80','#6080ff','#ffff00','#f060f0', |
1248
|
|
|
|
|
|
|
'#209020','#d0d0f0','#f0a060','#ffd0d0','#b0ffb0' ] ); |
1249
|
|
|
|
|
|
|
} |
1250
|
|
|
|
|
|
|
} |
1251
|
|
|
|
|
|
|
$my_graph->set_legend(@legend) if @legend != 0; |
1252
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
$my_graph->set( @opt ) or warn $my_graph->error(); |
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
print " Making $w x $h\n"; |
1256
|
|
|
|
|
|
|
$my_graph->clear_errors(); |
1257
|
|
|
|
|
|
|
$my_graph->plot($data); |
1258
|
|
|
|
|
|
|
$redo = 1; |
1259
|
|
|
|
|
|
|
if (($my_graph->error()||'') =~ /Horizontal size too small/) |
1260
|
|
|
|
|
|
|
{ |
1261
|
|
|
|
|
|
|
$w += 32; $redo = 0; |
1262
|
|
|
|
|
|
|
} |
1263
|
|
|
|
|
|
|
if (($my_graph->error()||'') =~ /Vertical size too small/) |
1264
|
|
|
|
|
|
|
{ |
1265
|
|
|
|
|
|
|
$h += 64; $redo = 0; |
1266
|
|
|
|
|
|
|
} |
1267
|
|
|
|
|
|
|
if (!$my_graph->error()) |
1268
|
|
|
|
|
|
|
{ |
1269
|
|
|
|
|
|
|
$self->_save_chart($my_graph, |
1270
|
|
|
|
|
|
|
File::Spec->catfile($self->{_options}->{output},$stat)); |
1271
|
|
|
|
|
|
|
print "Saved\n"; |
1272
|
|
|
|
|
|
|
last; |
1273
|
|
|
|
|
|
|
} |
1274
|
|
|
|
|
|
|
elsif ($redo != 0) |
1275
|
|
|
|
|
|
|
{ |
1276
|
|
|
|
|
|
|
print $my_graph->error(),"\n"; |
1277
|
|
|
|
|
|
|
} |
1278
|
|
|
|
|
|
|
} |
1279
|
|
|
|
|
|
|
return $self; |
1280
|
|
|
|
|
|
|
} |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
sub _prediction |
1283
|
|
|
|
|
|
|
{ |
1284
|
|
|
|
|
|
|
# from item count per day calculate an average for the given timeframe, |
1285
|
|
|
|
|
|
|
# then interpolate how many items will occur this month/year |
1286
|
|
|
|
|
|
|
my ($self, $stats, $m, $needed_samples, $now ) = @_; |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
my $max = undef; |
1289
|
|
|
|
|
|
|
my ($month,$year) = ($now->[1],$now->[0]); |
1290
|
|
|
|
|
|
|
my $day = 1; my $days; |
1291
|
|
|
|
|
|
|
if ($m == 1) |
1292
|
|
|
|
|
|
|
{ |
1293
|
|
|
|
|
|
|
# good enough? |
1294
|
|
|
|
|
|
|
$days = 28 if $month == 2; |
1295
|
|
|
|
|
|
|
$days = 30 if $month != 2; |
1296
|
|
|
|
|
|
|
$days = 31 if $now->[2] == 31; |
1297
|
|
|
|
|
|
|
} |
1298
|
|
|
|
|
|
|
elsif ($m == 2) |
1299
|
|
|
|
|
|
|
{ |
1300
|
|
|
|
|
|
|
# prediction for year based on last 60 days |
1301
|
|
|
|
|
|
|
($year,$month,$day) = @$now; |
1302
|
|
|
|
|
|
|
($year,$month,$day) = Add_Delta_Days($year,$month,$day, -60); |
1303
|
|
|
|
|
|
|
$days = 365; # good enough? |
1304
|
|
|
|
|
|
|
} |
1305
|
|
|
|
|
|
|
else |
1306
|
|
|
|
|
|
|
{ |
1307
|
|
|
|
|
|
|
$month = 1; |
1308
|
|
|
|
|
|
|
$days = 365; # good enough? |
1309
|
|
|
|
|
|
|
} |
1310
|
|
|
|
|
|
|
my $delta = Delta_Days($year,$month,$day, $now->[0], $now->[1], $now->[2]); |
1311
|
|
|
|
|
|
|
# sum up all items for each day since start of timeframe |
1312
|
|
|
|
|
|
|
my $sum = 0; |
1313
|
|
|
|
|
|
|
for (my $i = 0; $i < $delta; $i++) |
1314
|
|
|
|
|
|
|
{ |
1315
|
|
|
|
|
|
|
$sum += $stats->{daily}->{"$day/$month/$year"} || 0; |
1316
|
|
|
|
|
|
|
($year,$month,$day) = Add_Delta_Days($year,$month,$day, 1); |
1317
|
|
|
|
|
|
|
} |
1318
|
|
|
|
|
|
|
if ($delta != 0) |
1319
|
|
|
|
|
|
|
{ |
1320
|
|
|
|
|
|
|
$max = int($days * $sum / $delta); |
1321
|
|
|
|
|
|
|
} |
1322
|
|
|
|
|
|
|
my @samples; |
1323
|
|
|
|
|
|
|
for (my $i = 1; $i < $needed_samples; $i++) |
1324
|
|
|
|
|
|
|
{ |
1325
|
|
|
|
|
|
|
push @samples, undef; |
1326
|
|
|
|
|
|
|
} |
1327
|
|
|
|
|
|
|
push @samples, $max; |
1328
|
|
|
|
|
|
|
\@samples; |
1329
|
|
|
|
|
|
|
} |
1330
|
|
|
|
|
|
|
|
1331
|
|
|
|
|
|
|
sub _extract_target |
1332
|
|
|
|
|
|
|
{ |
1333
|
|
|
|
|
|
|
my ($self,$header) = @_; |
1334
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
my ($target,$domain) = ''; |
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
# ignore target in "From target@target-host.com datestring" and |
1338
|
|
|
|
|
|
|
# try to extract target from defined valid forwardes, since X-Envelope-To |
1339
|
|
|
|
|
|
|
# will probably point to the forwarded address endpoint |
1340
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
foreach my $line (@$header) |
1342
|
|
|
|
|
|
|
{ |
1343
|
|
|
|
|
|
|
foreach my $for (@{$self->{_options}->{valid_forwarders}}) |
1344
|
|
|
|
|
|
|
{ |
1345
|
|
|
|
|
|
|
if (($line =~ /^Received:/) && |
1346
|
|
|
|
|
|
|
($line =~ /by [^\s]*?$for.*? for <([^>]+)>/)) |
1347
|
|
|
|
|
|
|
{ |
1348
|
|
|
|
|
|
|
$target = $1 || 'unknown'; last; |
1349
|
|
|
|
|
|
|
} |
1350
|
|
|
|
|
|
|
} |
1351
|
|
|
|
|
|
|
last if $target ne ''; |
1352
|
|
|
|
|
|
|
} |
1353
|
|
|
|
|
|
|
$target ||= 'unknown'; |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
if ($target eq 'unknown') |
1356
|
|
|
|
|
|
|
{ |
1357
|
|
|
|
|
|
|
# try to extract the target address from X-Envelope-To; |
1358
|
|
|
|
|
|
|
foreach my $line (@$header) |
1359
|
|
|
|
|
|
|
{ |
1360
|
|
|
|
|
|
|
if ($line =~ /^X-Envelope-To:/i) |
1361
|
|
|
|
|
|
|
{ |
1362
|
|
|
|
|
|
|
$target = $line; $target =~ s/^[A-Za-z-]+: //; last; |
1363
|
|
|
|
|
|
|
} |
1364
|
|
|
|
|
|
|
} |
1365
|
|
|
|
|
|
|
} |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
# no X-Envelope-To:, no valid forwarder? So try "From " |
1368
|
|
|
|
|
|
|
if ($target eq 'unknown') |
1369
|
|
|
|
|
|
|
{ |
1370
|
|
|
|
|
|
|
my $line = $header->[0] || ''; |
1371
|
|
|
|
|
|
|
$line =~ /^From ([^\s]+)/; |
1372
|
|
|
|
|
|
|
$target = $1 || 'unknown'; |
1373
|
|
|
|
|
|
|
} |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
# if still not defined, try 'received for' in Received: header lines |
1376
|
|
|
|
|
|
|
if ($target eq 'unknown') |
1377
|
|
|
|
|
|
|
{ |
1378
|
|
|
|
|
|
|
foreach my $line (@$header) |
1379
|
|
|
|
|
|
|
{ |
1380
|
|
|
|
|
|
|
if (($line =~ /^Received:/) && |
1381
|
|
|
|
|
|
|
($line =~ /received for <([^>]+)>:/)) |
1382
|
|
|
|
|
|
|
{ |
1383
|
|
|
|
|
|
|
$target = $1 || 'unknown'; last; |
1384
|
|
|
|
|
|
|
} |
1385
|
|
|
|
|
|
|
} |
1386
|
|
|
|
|
|
|
} |
1387
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
$target = lc($target); # normalize |
1389
|
|
|
|
|
|
|
$target =~ s/^\".+?\"\s+//; # throw away comment/name |
1390
|
|
|
|
|
|
|
$target =~ s/[<>]//g; |
1391
|
|
|
|
|
|
|
$target = substr($target,0,64) if length($target) > 64; |
1392
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
foreach my $dom (@{$self->{_options}->{filter_domains}}) |
1394
|
|
|
|
|
|
|
{ |
1395
|
|
|
|
|
|
|
$target = 'unknown' if $target =~ /\@.*$dom/i; |
1396
|
|
|
|
|
|
|
} |
1397
|
|
|
|
|
|
|
foreach my $dom (@{$self->{_options}->{filter_target}}) |
1398
|
|
|
|
|
|
|
{ |
1399
|
|
|
|
|
|
|
$target = 'unknown' if $target =~ /$dom/i; |
1400
|
|
|
|
|
|
|
} |
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
$domain = $target; $domain =~ /\@(.+)$/; $domain = $1 || 'unknown'; |
1403
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
$target = 'unknown' if $target eq ''; |
1405
|
|
|
|
|
|
|
$domain = 'unknown' if $target eq 'unknown'; |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
($target,$domain); |
1408
|
|
|
|
|
|
|
} |
1409
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
sub _gather_files |
1411
|
|
|
|
|
|
|
{ |
1412
|
|
|
|
|
|
|
my ($self,$stats) = @_; |
1413
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
my $dir = $self->{_options}->{input}; |
1415
|
|
|
|
|
|
|
# if input is a single file, use only this (does not look for an index yet) |
1416
|
|
|
|
|
|
|
if (-f $dir) |
1417
|
|
|
|
|
|
|
{ |
1418
|
|
|
|
|
|
|
$stats->{stats}->{size_compressed} += -s $dir; |
1419
|
|
|
|
|
|
|
return ($dir); |
1420
|
|
|
|
|
|
|
} |
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
############################################################################ |
1423
|
|
|
|
|
|
|
# open the input/archive directory |
1424
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
opendir my $DIR, $dir or die "Cannot open dir $dir: $!"; |
1426
|
|
|
|
|
|
|
my @files = readdir $DIR; |
1427
|
|
|
|
|
|
|
closedir $DIR; |
1428
|
|
|
|
|
|
|
|
1429
|
|
|
|
|
|
|
############################################################################ |
1430
|
|
|
|
|
|
|
# open the index directory |
1431
|
|
|
|
|
|
|
my $index_dir = $self->{_options}->{index}; |
1432
|
|
|
|
|
|
|
opendir $DIR, $index_dir or die "Cannot open dir $index_dir: $!"; |
1433
|
|
|
|
|
|
|
my @index = readdir $DIR; |
1434
|
|
|
|
|
|
|
closedir $DIR; |
1435
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
# for each archive file, see if we have an index file. If yes, use that |
1437
|
|
|
|
|
|
|
# instead and also prefer gzipped (.idx.gz) index files over the normal |
1438
|
|
|
|
|
|
|
# ones (.idx) |
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
my @ret = (); |
1441
|
|
|
|
|
|
|
foreach my $file (@files) |
1442
|
|
|
|
|
|
|
{ |
1443
|
|
|
|
|
|
|
next if $file =~ /^\.\.?\z/; # skip '..', '.' etc |
1444
|
|
|
|
|
|
|
print "Evaluating file '$file' ... "; |
1445
|
|
|
|
|
|
|
my $archive = File::Spec->catfile ($dir,$file); |
1446
|
|
|
|
|
|
|
my $index = File::Spec->catfile ($index_dir,$file.'idx'); |
1447
|
|
|
|
|
|
|
my $index_gz = File::Spec->catfile ($index_dir,$file.'.idx.gz'); |
1448
|
|
|
|
|
|
|
|
1449
|
|
|
|
|
|
|
# compressed size is stored in index file |
1450
|
|
|
|
|
|
|
if (-f $index_gz) |
1451
|
|
|
|
|
|
|
{ |
1452
|
|
|
|
|
|
|
print "found gzipped index.\n"; |
1453
|
|
|
|
|
|
|
push @ret, $index_gz; |
1454
|
|
|
|
|
|
|
} |
1455
|
|
|
|
|
|
|
elsif (-f $index) |
1456
|
|
|
|
|
|
|
{ |
1457
|
|
|
|
|
|
|
print "found index.\n"; |
1458
|
|
|
|
|
|
|
push @ret, $index; |
1459
|
|
|
|
|
|
|
} |
1460
|
|
|
|
|
|
|
elsif (-f $archive) |
1461
|
|
|
|
|
|
|
{ |
1462
|
|
|
|
|
|
|
print "found no index at all, will re-index.\n"; |
1463
|
|
|
|
|
|
|
push @ret, $archive; |
1464
|
|
|
|
|
|
|
$stats->{stats}->{size_compressed} += -s $archive; |
1465
|
|
|
|
|
|
|
$stats->{stats}->{current_size_compressed} = -s $archive; |
1466
|
|
|
|
|
|
|
} |
1467
|
|
|
|
|
|
|
# everything else (directories etc) is ignored |
1468
|
|
|
|
|
|
|
} |
1469
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
# also, for all (gzipped) index files without an archive file, add these |
1471
|
|
|
|
|
|
|
# too, so that you can safey remove the archives |
1472
|
|
|
|
|
|
|
foreach my $file (@index) |
1473
|
|
|
|
|
|
|
{ |
1474
|
|
|
|
|
|
|
my $index = File::Spec->catfile ($index_dir,$file); |
1475
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
my $archive = File::Spec->catfile ($dir,$file); |
1477
|
|
|
|
|
|
|
$archive =~ s/\.idx.gz$//; |
1478
|
|
|
|
|
|
|
$archive =~ s/\.idx$//; |
1479
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
if ((-f $index) && (!-f $archive)) |
1481
|
|
|
|
|
|
|
{ |
1482
|
|
|
|
|
|
|
print "Will also use index '$index' w/o archive.\n"; |
1483
|
|
|
|
|
|
|
push @ret, $index; |
1484
|
|
|
|
|
|
|
} |
1485
|
|
|
|
|
|
|
} |
1486
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
return @ret; |
1488
|
|
|
|
|
|
|
} |
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
sub _open_file |
1491
|
|
|
|
|
|
|
{ |
1492
|
|
|
|
|
|
|
my ($file) = @_; |
1493
|
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
# try as .gz file first |
1495
|
|
|
|
|
|
|
my $FILE; |
1496
|
|
|
|
|
|
|
if ($file =~ /\.(gz|zip|gzip)$/) |
1497
|
|
|
|
|
|
|
{ |
1498
|
|
|
|
|
|
|
$FILE = gzopen($file, "r") or die "Cannot open $file: $gzerrno\n"; |
1499
|
|
|
|
|
|
|
} |
1500
|
|
|
|
|
|
|
else |
1501
|
|
|
|
|
|
|
{ |
1502
|
|
|
|
|
|
|
open ($FILE, $file) or die "Cannot open $file: $!\n"; |
1503
|
|
|
|
|
|
|
} |
1504
|
|
|
|
|
|
|
$FILE; |
1505
|
|
|
|
|
|
|
} |
1506
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
sub _read_line |
1508
|
|
|
|
|
|
|
{ |
1509
|
|
|
|
|
|
|
my ($file) = @_; |
1510
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
if (ref($file) eq 'GLOB') |
1512
|
|
|
|
|
|
|
{ |
1513
|
|
|
|
|
|
|
return <$file>; |
1514
|
|
|
|
|
|
|
} |
1515
|
|
|
|
|
|
|
my $line; |
1516
|
|
|
|
|
|
|
$file->gzreadline($line); |
1517
|
|
|
|
|
|
|
return if $gzerrno != 0; |
1518
|
|
|
|
|
|
|
$line; |
1519
|
|
|
|
|
|
|
} |
1520
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
sub _close_file |
1522
|
|
|
|
|
|
|
{ |
1523
|
|
|
|
|
|
|
my ($file) = shift; |
1524
|
|
|
|
|
|
|
|
1525
|
|
|
|
|
|
|
if (ref($file) ne 'GLOB') |
1526
|
|
|
|
|
|
|
{ |
1527
|
|
|
|
|
|
|
die "Error reading from $file: ", $file->gzerror(),"\n" |
1528
|
|
|
|
|
|
|
if $file->gzerror != Z_STREAM_END; |
1529
|
|
|
|
|
|
|
$file->gzclose(); |
1530
|
|
|
|
|
|
|
} |
1531
|
|
|
|
|
|
|
else |
1532
|
|
|
|
|
|
|
{ |
1533
|
|
|
|
|
|
|
close $file; |
1534
|
|
|
|
|
|
|
} |
1535
|
|
|
|
|
|
|
} |
1536
|
|
|
|
|
|
|
|
1537
|
|
|
|
|
|
|
sub _read_file |
1538
|
|
|
|
|
|
|
{ |
1539
|
|
|
|
|
|
|
# read file (but prefer the gzipped version) in one go and return a ref to |
1540
|
|
|
|
|
|
|
# the contents |
1541
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
my ($self,$file) = @_; |
1543
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
# that is a bit inefficient, sucking in anything at a time... |
1545
|
|
|
|
|
|
|
my $doc; |
1546
|
|
|
|
|
|
|
if ($file =~ /\.gz$/) |
1547
|
|
|
|
|
|
|
{ |
1548
|
|
|
|
|
|
|
return $self->_read_compressed_file($file); |
1549
|
|
|
|
|
|
|
} |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
open FILE, "$file" or die ("Cannot read $file: $!"); |
1552
|
|
|
|
|
|
|
while () |
1553
|
|
|
|
|
|
|
{ |
1554
|
|
|
|
|
|
|
$doc .= $_; |
1555
|
|
|
|
|
|
|
} |
1556
|
|
|
|
|
|
|
close FILE; |
1557
|
|
|
|
|
|
|
\$doc; |
1558
|
|
|
|
|
|
|
} |
1559
|
|
|
|
|
|
|
|
1560
|
|
|
|
|
|
|
sub _read_compressed_file |
1561
|
|
|
|
|
|
|
{ |
1562
|
|
|
|
|
|
|
my ($self,$file) = @_; |
1563
|
|
|
|
|
|
|
|
1564
|
|
|
|
|
|
|
my $gz = gzopen($file, "rb") or die "Cannot open $file: $gzerrno\n"; |
1565
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
my ($line, $doc); |
1567
|
|
|
|
|
|
|
while ($gz->gzreadline($line) > 0) |
1568
|
|
|
|
|
|
|
{ |
1569
|
|
|
|
|
|
|
$doc .= $line; |
1570
|
|
|
|
|
|
|
} |
1571
|
|
|
|
|
|
|
die "Error reading from $file: $gzerrno\n" if $gzerrno != Z_STREAM_END; |
1572
|
|
|
|
|
|
|
$gz->gzclose(); |
1573
|
|
|
|
|
|
|
\$doc; |
1574
|
|
|
|
|
|
|
} |
1575
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
sub _split |
1577
|
|
|
|
|
|
|
{ |
1578
|
|
|
|
|
|
|
my $doc = shift; |
1579
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
my $l = [ split(/\n/, $$doc) ]; |
1581
|
|
|
|
|
|
|
$l; |
1582
|
|
|
|
|
|
|
} |
1583
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
sub _gather_mails |
1585
|
|
|
|
|
|
|
{ |
1586
|
|
|
|
|
|
|
my ($self,$file,$id,$stats,$now,$first) = @_; |
1587
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
my $FILE = _open_file($file); |
1589
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
my $header = 0; # in header or body? |
1591
|
|
|
|
|
|
|
my @header_lines = (); # current header |
1592
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
my $cur_size = 0; |
1594
|
|
|
|
|
|
|
my $line; |
1595
|
|
|
|
|
|
|
my $lines = 0; |
1596
|
|
|
|
|
|
|
# endless loop until done |
1597
|
|
|
|
|
|
|
while ( 3 < 5 ) |
1598
|
|
|
|
|
|
|
{ |
1599
|
|
|
|
|
|
|
if (ref($FILE) eq 'GLOB') |
1600
|
|
|
|
|
|
|
{ |
1601
|
|
|
|
|
|
|
$line = <$FILE>; |
1602
|
|
|
|
|
|
|
} |
1603
|
|
|
|
|
|
|
else |
1604
|
|
|
|
|
|
|
{ |
1605
|
|
|
|
|
|
|
$FILE->gzreadline($line); |
1606
|
|
|
|
|
|
|
$line = undef if $gzerrno == Z_STREAM_END; |
1607
|
|
|
|
|
|
|
if ($FILE->gzerror()) |
1608
|
|
|
|
|
|
|
{ |
1609
|
|
|
|
|
|
|
$line = undef; |
1610
|
|
|
|
|
|
|
print "Compress:Zip error: ", $FILE->gzerror(), "\n" |
1611
|
|
|
|
|
|
|
if $FILE->gzerror() != Z_STREAM_END; |
1612
|
|
|
|
|
|
|
} |
1613
|
|
|
|
|
|
|
} |
1614
|
|
|
|
|
|
|
last if !defined $line; |
1615
|
|
|
|
|
|
|
$lines++; |
1616
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
$cur_size += length($line); |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
if ($line =~ /^From .*\d+/) |
1620
|
|
|
|
|
|
|
{ |
1621
|
|
|
|
|
|
|
$header = 1; |
1622
|
|
|
|
|
|
|
if (@header_lines > 0) |
1623
|
|
|
|
|
|
|
{ |
1624
|
|
|
|
|
|
|
# had a mail before with header? |
1625
|
|
|
|
|
|
|
my $cur = $self->_process_mail( |
1626
|
|
|
|
|
|
|
{ header => [ @header_lines ], |
1627
|
|
|
|
|
|
|
size => $cur_size, |
1628
|
|
|
|
|
|
|
id => $$id, |
1629
|
|
|
|
|
|
|
}, $now); |
1630
|
|
|
|
|
|
|
$self->_index_mail($cur); |
1631
|
|
|
|
|
|
|
$self->_merge_mail($cur,$stats,$now,$first); # merge into $stats |
1632
|
|
|
|
|
|
|
$$id ++; |
1633
|
|
|
|
|
|
|
@header_lines = (); |
1634
|
|
|
|
|
|
|
$cur_size = 0; |
1635
|
|
|
|
|
|
|
} |
1636
|
|
|
|
|
|
|
} |
1637
|
|
|
|
|
|
|
$header = 0 if $header == 1 && $line =~ /^\n$/; # now in body? |
1638
|
|
|
|
|
|
|
push @header_lines, $line if $header == 1; |
1639
|
|
|
|
|
|
|
} |
1640
|
|
|
|
|
|
|
# process last mail |
1641
|
|
|
|
|
|
|
if (@header_lines > 0) |
1642
|
|
|
|
|
|
|
{ |
1643
|
|
|
|
|
|
|
# was a valid mail? so get it's size (because we throw away the body) |
1644
|
|
|
|
|
|
|
my $cur = $self->_process_mail( |
1645
|
|
|
|
|
|
|
{ header => [ @header_lines ], |
1646
|
|
|
|
|
|
|
size => $cur_size, |
1647
|
|
|
|
|
|
|
id => $$id, |
1648
|
|
|
|
|
|
|
}, $now); |
1649
|
|
|
|
|
|
|
$self->_index_mail($cur); |
1650
|
|
|
|
|
|
|
$self->_merge_mail($cur,$stats,$now,$first); # merge into $stats |
1651
|
|
|
|
|
|
|
} |
1652
|
|
|
|
|
|
|
$$id ++; |
1653
|
|
|
|
|
|
|
_close_file($FILE); |
1654
|
|
|
|
|
|
|
return; |
1655
|
|
|
|
|
|
|
} |
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
sub _save_chart |
1658
|
|
|
|
|
|
|
{ |
1659
|
|
|
|
|
|
|
my $self = shift; |
1660
|
|
|
|
|
|
|
my $chart = shift or die "Need a chart!"; |
1661
|
|
|
|
|
|
|
my $name = shift or die "Need a name!"; |
1662
|
|
|
|
|
|
|
local(*OUT); |
1663
|
|
|
|
|
|
|
|
1664
|
|
|
|
|
|
|
my $ext = $self->{_options}->{graph_ext} || $chart->export_format(); |
1665
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
open(OUT, ">$name.$ext") or |
1667
|
|
|
|
|
|
|
die "Cannot open $name.$ext for write: $!"; |
1668
|
|
|
|
|
|
|
binmode OUT; |
1669
|
|
|
|
|
|
|
print OUT $chart->gd->$ext(); |
1670
|
|
|
|
|
|
|
close OUT; |
1671
|
|
|
|
|
|
|
} |
1672
|
|
|
|
|
|
|
|
1673
|
|
|
|
|
|
|
1; |
1674
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
__END__ |