|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # <@LICENSE>  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Licensed to the Apache Software Foundation (ASF) under one or more  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # contributor license agreements.  See the NOTICE file distributed with  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # this work for additional information regarding copyright ownership.  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # The ASF licenses this file to you under the Apache License, Version 2.0  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # (the "License"); you may not use this file except in compliance with  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # the License.  You may obtain a copy of the License at:  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #   | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     http://www.apache.org/licenses/LICENSE-2.0  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #   | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Unless required by applicable law or agreed to in writing, software  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # distributed under the License is distributed on an "AS IS" BASIS,  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # See the License for the specific language governing permissions and  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # limitations under the License.  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # </@LICENSE>  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Mail::SpamAssassin::Plugin::AutoLearnThreshold - threshold-based discriminator for Bayes auto-learning  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   loadplugin     Mail::SpamAssassin::Plugin::AutoLearnThreshold  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This plugin implements the threshold-based auto-learning discriminator  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for SpamAssassin's Bayes subsystem.  Auto-learning is a mechanism  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 whereby high-scoring mails (or low-scoring mails, for non-spam) are fed  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 into its learning systems without user intervention, during scanning.  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Note that certain tests are ignored when determining whether a message  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 should be trained upon:  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * rules with tflags set to 'learn' (the Bayesian rules)  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * rules with tflags set to 'userconf' (user configuration)  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * rules with tflags set to 'noautolearn'  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Also note that auto-learning occurs using scores from either scoreset 0  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or 1, depending on what scoreset is used during message check.  It is  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 likely that the message check and auto-learn scores will be different.  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Mail::SpamAssassin::Plugin::AutoLearnThreshold;  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
54
 | 
22
 | 
 
 | 
 
 | 
  
22
  
 | 
 
 | 
166
 | 
 use Mail::SpamAssassin::Plugin;  | 
| 
 
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
    | 
| 
 
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
725
 | 
    | 
| 
55
 | 
22
 | 
 
 | 
 
 | 
  
22
  
 | 
 
 | 
126
 | 
 use Mail::SpamAssassin::Logger;  | 
| 
 
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
    | 
| 
 
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1475
 | 
    | 
| 
56
 | 
22
 | 
 
 | 
 
 | 
  
22
  
 | 
 
 | 
180
 | 
 use strict;  | 
| 
 
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
    | 
| 
 
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
569
 | 
    | 
| 
57
 | 
22
 | 
 
 | 
 
 | 
  
22
  
 | 
 
 | 
122
 | 
 use warnings;  | 
| 
 
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
76
 | 
    | 
| 
 
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
865
 | 
    | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # use bytes;  | 
| 
59
 | 
22
 | 
 
 | 
 
 | 
  
22
  
 | 
 
 | 
161
 | 
 use re 'taint';  | 
| 
 
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
    | 
| 
 
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18062
 | 
    | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @ISA = qw(Mail::SpamAssassin::Plugin);  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
64
 | 
63
 | 
 
 | 
 
 | 
  
63
  
 | 
  
1
  
 | 
222
 | 
   my $class = shift;  | 
| 
65
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
153
 | 
   my $mailsaobject = shift;  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
67
 | 
63
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
405
 | 
   $class = ref($class) || $class;  | 
| 
68
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
358
 | 
   my $self = $class->SUPER::new($mailsaobject);  | 
| 
69
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
167
 | 
   bless ($self, $class);  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
71
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
344
 | 
   $self->set_config($mailsaobject->{conf});  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
73
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
574
 | 
   return $self;  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub set_config {  | 
| 
77
 | 
63
 | 
 
 | 
 
 | 
  
63
  
 | 
  
0
  
 | 
189
 | 
   my($self, $conf) = @_;  | 
| 
78
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
159
 | 
   my @cmds;  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 USER OPTIONS  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The following configuration settings are used to control auto-learning:  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item bayes_auto_learn_threshold_nonspam n.nn   (default: 0.1)  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The score threshold below which a mail has to score, to be fed into  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 SpamAssassin's learning systems automatically as a non-spam message.  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
93
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
374
 | 
   push (@cmds, {  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     setting => 'bayes_auto_learn_threshold_nonspam',  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     default => 0.1,  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   });  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item bayes_auto_learn_threshold_spam n.nn      (default: 12.0)  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The score threshold above which a mail has to score, to be fed into  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 SpamAssassin's learning systems automatically as a spam message.  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Note: SpamAssassin requires at least 3 points from the header, and 3  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 points from the body to auto-learn as spam.  Therefore, the minimum  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 working value for this option is 6.  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If the test option autolearn_force is set, the minimum value will   | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 remain at 6 points but there is no requirement that the points come  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 from body and header rules.  This option is useful for autolearning  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with rules that are considered to be extremely safe indicators of   | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the spaminess of a message.  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
358
 | 
   push (@cmds, {  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     setting => 'bayes_auto_learn_threshold_spam',  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     default => 12.0,  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   });  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item bayes_auto_learn_on_error (0 | 1)        (default: 0)  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 With C<bayes_auto_learn_on_error> off, autolearning will be performed  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 even if bayes classifier already agrees with the new classification (i.e.  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 yielded BAYES_00 for what we are now trying to teach it as ham, or yielded  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 BAYES_99 for spam). This is a traditional setting, the default was chosen  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to retain backward compatibility.  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 With C<bayes_auto_learn_on_error> turned on, autolearning will be performed  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 only when a bayes classifier had a different opinion from what the autolearner  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is now trying to teach it (i.e. it made an error in judgement). This strategy  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 may or may not produce better future classifications, but usually works  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 very well, while also preventing unnecessary overlearning and slows down  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 database growth.  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
139
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
279
 | 
   push (@cmds, {  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     setting => 'bayes_auto_learn_on_error',  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     default => 0,  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   });  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
145
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
322
 | 
   $conf->{parser}->register_commands(\@cmds);  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub autolearn_discriminator {  | 
| 
149
 | 
12
 | 
 
 | 
 
 | 
  
12
  
 | 
  
1
  
 | 
35
 | 
   my ($self, $params) = @_;  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
151
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
   my $scan = $params->{permsgstatus};  | 
| 
152
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
   my $conf = $scan->{conf};  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Figure out min/max for autolearning.  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Default to specified auto_learn_threshold settings  | 
| 
156
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
   my $min = $conf->{bayes_auto_learn_threshold_nonspam};  | 
| 
157
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
   my $max = $conf->{bayes_auto_learn_threshold_spam};  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Find out what score we should consider this message to have ...  | 
| 
160
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
   my $score = $scan->get_autolearn_points();  | 
| 
161
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
73
 | 
   my $body_only_points = $scan->get_body_only_points();  | 
| 
162
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
55
 | 
   my $head_only_points = $scan->get_head_only_points();  | 
| 
163
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
62
 | 
   my $learned_points = $scan->get_learned_points();  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # find out if any of the tests added an autolearn_force status  | 
| 
166
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
   my $force_autolearn = $scan->get_autolearn_force_status();  | 
| 
167
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
   my $force_autolearn_names = $scan->get_autolearn_force_names();  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
169
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
139
 | 
   dbg("learn: auto-learn? ham=$min, spam=$max, ".  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 "body-points=".$body_only_points.", ".  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 "head-points=".$head_only_points.", ".  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 "learned-points=".$learned_points);  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
174
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
   my $isspam;  | 
| 
175
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
64
 | 
   if ($score < $min) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
176
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     $isspam = 0;  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif ($score >= $max) {  | 
| 
178
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $isspam = 1;  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
180
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
     dbg("learn: auto-learn? no: inside auto-learn thresholds, not considered ham or spam");  | 
| 
181
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     return;  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
184
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   my $learner_said_ham_points = -1.0;  | 
| 
185
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   my $learner_said_spam_points = 1.0;  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
187
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
   if ($isspam) {  | 
| 
188
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $required_body_points = 3;  | 
| 
189
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $required_head_points = 3;  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #Set a lower threshold of "just has to be spam" if autolearn_force was set on a rule  | 
| 
192
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ($force_autolearn) {  | 
| 
193
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $required_body_points = -99;  | 
| 
194
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $required_head_points = -99;  | 
| 
195
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       dbg("learn: auto-learn: autolearn_force flagged for a rule.  Removing seperate body and head point threshold.  Body Only Points: $body_only_points ($required_body_points req'd) / Head Only Points: $head_only_points ($required_head_points req'd)");  | 
| 
196
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       dbg("learn: auto-learn: autolearn_force flagged because of rule(s): $force_autolearn_names");  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
198
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       dbg("learn: auto-learn: autolearn_force not flagged for a rule. Body Only Points: $body_only_points ($required_body_points req'd) / Head Only Points: $head_only_points ($required_head_points req'd)");  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
201
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ($body_only_points < $required_body_points) {  | 
| 
202
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       dbg("learn: auto-learn? no: scored as spam but too few body points (".  | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           $body_only_points." < ".$required_body_points.")");  | 
| 
204
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return;  | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
206
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ($head_only_points < $required_head_points) {  | 
| 
207
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       dbg("learn: auto-learn? no: scored as spam but too few head points (".  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           $head_only_points." < ".$required_head_points.")");  | 
| 
209
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return;  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
211
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ($learned_points < $learner_said_ham_points) {  | 
| 
212
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       dbg("learn: auto-learn? no: scored as spam but learner indicated ham (".  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           $learned_points." < ".$learner_said_ham_points.")");  | 
| 
214
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return;  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
217
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if (!$scan->is_spam()) {  | 
| 
218
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       dbg("learn: auto-learn? no: scored as ham but autolearn wanted spam");  | 
| 
219
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return;  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
223
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     if ($learned_points > $learner_said_spam_points) {  | 
| 
224
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       dbg("learn: auto-learn? no: scored as ham but learner indicated spam (".  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           $learned_points." > ".$learner_said_spam_points.")");  | 
| 
226
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return;  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
229
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     if ($scan->is_spam()) {  | 
| 
230
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
       dbg("learn: auto-learn? no: scored as spam but autolearn wanted ham");  | 
| 
231
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
       return;  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
235
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
   if ($conf->{bayes_auto_learn_on_error}) {  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # learn-on-error strategy chosen:  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # only allow learning if the autolearning classifier was unsure or  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # had a different opinion from what we are trying to make it learn  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  | 
| 
240
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $tests = $scan->get_tag('TESTS');  | 
| 
241
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
     if (defined $tests && $tests ne 'none') {  | 
| 
242
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my %t = map { ($_,1) } split(/,/, $tests);  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
243
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
       if ($isspam && $t{'BAYES_99'} || !$isspam && $t{'BAYES_00'}) {  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
244
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         dbg("learn: auto-learn? no: learn-on-error, %s, already classified ".  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             "as such",  $isspam ? 'spam' : 'ham');  | 
| 
246
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return;  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
251
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
22
 | 
   dbg("learn: auto-learn? yes, ".($isspam?"spam ($score > $max)":"ham ($score < $min)")." autolearn_force=".($force_autolearn?"yes":"no"));  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #Return an array reference because call_plugins only carry's one return value   | 
| 
254
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
   return [$isspam, $force_autolearn, $force_autolearn_names];  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  |