line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# <@LICENSE> |
2
|
|
|
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more |
3
|
|
|
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with |
4
|
|
|
|
|
|
|
# this work for additional information regarding copyright ownership. |
5
|
|
|
|
|
|
|
# The ASF licenses this file to you under the Apache License, Version 2.0 |
6
|
|
|
|
|
|
|
# (the "License"); you may not use this file except in compliance with |
7
|
|
|
|
|
|
|
# the License. You may obtain a copy of the License at: |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software |
12
|
|
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
13
|
|
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14
|
|
|
|
|
|
|
# See the License for the specific language governing permissions and |
15
|
|
|
|
|
|
|
# limitations under the License. |
16
|
|
|
|
|
|
|
# </@LICENSE> |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
package Mail::SpamAssassin::Plugin::HeaderEval; |
19
|
|
|
|
|
|
|
|
20
|
21
|
|
|
21
|
|
152
|
use strict; |
|
21
|
|
|
|
|
58
|
|
|
21
|
|
|
|
|
781
|
|
21
|
21
|
|
|
21
|
|
133
|
use warnings; |
|
21
|
|
|
|
|
65
|
|
|
21
|
|
|
|
|
721
|
|
22
|
|
|
|
|
|
|
# use bytes; |
23
|
21
|
|
|
21
|
|
139
|
use re 'taint'; |
|
21
|
|
|
|
|
49
|
|
|
21
|
|
|
|
|
765
|
|
24
|
21
|
|
|
21
|
|
167
|
use Errno qw(EBADF); |
|
21
|
|
|
|
|
64
|
|
|
21
|
|
|
|
|
1123
|
|
25
|
|
|
|
|
|
|
|
26
|
21
|
|
|
21
|
|
141
|
use Mail::SpamAssassin::Plugin; |
|
21
|
|
|
|
|
60
|
|
|
21
|
|
|
|
|
554
|
|
27
|
21
|
|
|
21
|
|
150
|
use Mail::SpamAssassin::Locales; |
|
21
|
|
|
|
|
61
|
|
|
21
|
|
|
|
|
670
|
|
28
|
21
|
|
|
21
|
|
142
|
use Mail::SpamAssassin::Util qw(get_my_locales parse_rfc822_date); |
|
21
|
|
|
|
|
59
|
|
|
21
|
|
|
|
|
1500
|
|
29
|
21
|
|
|
21
|
|
152
|
use Mail::SpamAssassin::Logger; |
|
21
|
|
|
|
|
49
|
|
|
21
|
|
|
|
|
1273
|
|
30
|
21
|
|
|
21
|
|
146
|
use Mail::SpamAssassin::Constants qw(:sa :ip); |
|
21
|
|
|
|
|
54
|
|
|
21
|
|
|
|
|
77459
|
|
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
our @ISA = qw(Mail::SpamAssassin::Plugin); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# constructor: register the eval rule |
35
|
|
|
|
|
|
|
sub new { |
36
|
62
|
|
|
62
|
1
|
281
|
my $class = shift; |
37
|
62
|
|
|
|
|
202
|
my $mailsaobject = shift; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# some boilerplate... |
40
|
62
|
|
33
|
|
|
513
|
$class = ref($class) || $class; |
41
|
62
|
|
|
|
|
400
|
my $self = $class->SUPER::new($mailsaobject); |
42
|
62
|
|
|
|
|
523
|
bless ($self, $class); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# the important bit! |
45
|
62
|
|
|
|
|
309
|
$self->register_eval_rule("check_for_fake_aol_relay_in_rcvd"); |
46
|
62
|
|
|
|
|
258
|
$self->register_eval_rule("check_for_faraway_charset_in_headers"); |
47
|
62
|
|
|
|
|
243
|
$self->register_eval_rule("check_for_unique_subject_id"); |
48
|
62
|
|
|
|
|
286
|
$self->register_eval_rule("check_illegal_chars"); |
49
|
62
|
|
|
|
|
233
|
$self->register_eval_rule("check_for_forged_hotmail_received_headers"); |
50
|
62
|
|
|
|
|
218
|
$self->register_eval_rule("check_for_no_hotmail_received_headers"); |
51
|
62
|
|
|
|
|
244
|
$self->register_eval_rule("check_for_msn_groups_headers"); |
52
|
62
|
|
|
|
|
218
|
$self->register_eval_rule("check_for_forged_eudoramail_received_headers"); |
53
|
62
|
|
|
|
|
231
|
$self->register_eval_rule("check_for_forged_yahoo_received_headers"); |
54
|
62
|
|
|
|
|
260
|
$self->register_eval_rule("check_for_forged_juno_received_headers"); |
55
|
62
|
|
|
|
|
235
|
$self->register_eval_rule("check_for_forged_gmail_received_headers"); |
56
|
62
|
|
|
|
|
253
|
$self->register_eval_rule("check_for_matching_env_and_hdr_from"); |
57
|
62
|
|
|
|
|
254
|
$self->register_eval_rule("sorted_recipients"); |
58
|
62
|
|
|
|
|
252
|
$self->register_eval_rule("similar_recipients"); |
59
|
62
|
|
|
|
|
212
|
$self->register_eval_rule("check_for_missing_to_header"); |
60
|
62
|
|
|
|
|
206
|
$self->register_eval_rule("check_for_forged_gw05_received_headers"); |
61
|
62
|
|
|
|
|
242
|
$self->register_eval_rule("check_for_shifted_date"); |
62
|
62
|
|
|
|
|
236
|
$self->register_eval_rule("subject_is_all_caps"); |
63
|
62
|
|
|
|
|
228
|
$self->register_eval_rule("check_for_to_in_subject"); |
64
|
62
|
|
|
|
|
547
|
$self->register_eval_rule("check_outlook_message_id"); |
65
|
62
|
|
|
|
|
234
|
$self->register_eval_rule("check_messageid_not_usable"); |
66
|
62
|
|
|
|
|
220
|
$self->register_eval_rule("check_header_count_range"); |
67
|
62
|
|
|
|
|
208
|
$self->register_eval_rule("check_unresolved_template"); |
68
|
62
|
|
|
|
|
201
|
$self->register_eval_rule("check_ratware_name_id"); |
69
|
62
|
|
|
|
|
250
|
$self->register_eval_rule("check_ratware_envelope_from"); |
70
|
62
|
|
|
|
|
207
|
$self->register_eval_rule("gated_through_received_hdr_remover"); |
71
|
62
|
|
|
|
|
221
|
$self->register_eval_rule("received_within_months"); |
72
|
62
|
|
|
|
|
216
|
$self->register_eval_rule("check_equal_from_domains"); |
73
|
|
|
|
|
|
|
|
74
|
62
|
|
|
|
|
600
|
return $self; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# load triplets.txt into memory |
78
|
|
|
|
|
|
|
sub compile_now_start { |
79
|
2
|
|
|
2
|
1
|
6
|
my ($self) = @_; |
80
|
|
|
|
|
|
|
|
81
|
2
|
|
|
|
|
11
|
$self->word_is_in_dictionary("aba"); |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub check_for_fake_aol_relay_in_rcvd { |
85
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
86
|
0
|
|
|
|
|
0
|
local ($_); |
87
|
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
0
|
$_ = $pms->get('Received'); |
89
|
0
|
|
|
|
|
0
|
s/\s/ /gs; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# this is the hostname format used by AOL for their relays. Spammers love |
92
|
|
|
|
|
|
|
# forging it. Don't make it more specific to match aol.com only, though -- |
93
|
|
|
|
|
|
|
# there's another set of spammers who generate fake hostnames to go with |
94
|
|
|
|
|
|
|
# it! |
95
|
0
|
0
|
|
|
|
0
|
if (/ rly-[a-z][a-z]\d\d\./i) { |
96
|
0
|
0
|
|
|
|
0
|
return 0 if /\/AOL-\d+\.\d+\.\d+\)/; # via AOL mail relay |
97
|
0
|
0
|
|
|
|
0
|
return 0 if /ESMTP id (?:RELAY|MAILRELAY|MAILIN)/; # AOLish |
98
|
0
|
|
|
|
|
0
|
return 1; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# spam: Received: from unknown (HELO mta05bw.bigpond.com) (80.71.176.130) by |
102
|
|
|
|
|
|
|
# rly-xw01.mx.aol.com with QMQP; Sat, 15 Jun 2002 23:37:16 -0000 |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# non: Received: from rly-xj02.mx.aol.com (rly-xj02.mail.aol.com [172.20.116.39]) by |
105
|
|
|
|
|
|
|
# omr-r05.mx.aol.com (v83.35) with ESMTP id RELAYIN7-0501132011; Wed, 01 |
106
|
|
|
|
|
|
|
# May 2002 13:20:11 -0400 |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# non: Received: from logs-tr.proxy.aol.com (logs-tr.proxy.aol.com [152.163.201.132]) |
109
|
|
|
|
|
|
|
# by rly-ip01.mx.aol.com (8.8.8/8.8.8/AOL-5.0.0) |
110
|
|
|
|
|
|
|
# with ESMTP id NAA08955 for <sapient-alumni@yahoogroups.com>; |
111
|
|
|
|
|
|
|
# Thu, 4 Apr 2002 13:11:20 -0500 (EST) |
112
|
|
|
|
|
|
|
|
113
|
0
|
|
|
|
|
0
|
return 0; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub check_for_faraway_charset_in_headers { |
117
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
118
|
0
|
|
|
|
|
0
|
my $hdr; |
119
|
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
0
|
my @locales = get_my_locales($self->{main}->{conf}->{ok_locales}); |
121
|
|
|
|
|
|
|
|
122
|
0
|
0
|
|
|
|
0
|
return 0 if grep { $_ eq "all" } @locales; |
|
0
|
|
|
|
|
0
|
|
123
|
|
|
|
|
|
|
|
124
|
0
|
|
|
|
|
0
|
for my $h (qw(From Subject)) { |
125
|
0
|
|
|
|
|
0
|
my @hdrs = $pms->get("$h:raw"); # ??? get() returns a scalar ??? |
126
|
0
|
0
|
|
|
|
0
|
if ($#hdrs >= 0) { |
127
|
0
|
|
|
|
|
0
|
$hdr = join(" ", @hdrs); |
128
|
|
|
|
|
|
|
} else { |
129
|
0
|
|
|
|
|
0
|
$hdr = ''; |
130
|
|
|
|
|
|
|
} |
131
|
0
|
|
|
|
|
0
|
while ($hdr =~ /=\?(.+?)\?.\?.*?\?=/g) { |
132
|
0
|
0
|
|
|
|
0
|
Mail::SpamAssassin::Locales::is_charset_ok_for_locales($1, @locales) |
133
|
|
|
|
|
|
|
or return 1; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
} |
136
|
0
|
|
|
|
|
0
|
0; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub check_for_unique_subject_id { |
140
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
141
|
0
|
|
|
|
|
0
|
local ($_); |
142
|
0
|
|
|
|
|
0
|
$_ = lc $pms->get('Subject'); |
143
|
0
|
|
|
|
|
0
|
study; # study is a no-op since perl 5.16.0, eliminating related bugs |
144
|
|
|
|
|
|
|
|
145
|
0
|
|
|
|
|
0
|
my $id = 0; |
146
|
0
|
0
|
0
|
|
|
0
|
if (/[-_\.\s]{7,}([-a-z0-9]{4,})$/ |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
147
|
|
|
|
|
|
|
|| /\s{10,}(?:\S\s)?(\S+)$/ |
148
|
|
|
|
|
|
|
|| /\s{3,}[-:\#\(\[]+([-a-z0-9]{4,})[\]\)]+$/ |
149
|
|
|
|
|
|
|
|| /\s{3,}[:\#\(\[]*([a-f0-9]{4,})[\]\)]*$/ |
150
|
|
|
|
|
|
|
|| /\s{3,}[-:\#]([a-z0-9]{5,})$/ |
151
|
|
|
|
|
|
|
|| /[\s._]{3,}([^0\s._]\d{3,})$/ |
152
|
|
|
|
|
|
|
|| /[\s._]{3,}\[(\S+)\]$/ |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# (7217vPhZ0-478TLdy5829qicU9-0@26) and similar |
155
|
|
|
|
|
|
|
|| /\(([-\w]{7,}\@\d+)\)$/ |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# Seven or more digits at the end of a subject is almost certainly a id |
158
|
|
|
|
|
|
|
|| /\b(\d{7,})\s*$/ |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# stuff at end of line after "!" or "?" is usually an id |
161
|
|
|
|
|
|
|
|| /[!\?]\s*(\d{4,}|\w+(-\w+)+)\s*$/ |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# 9095IPZK7-095wsvp8715rJgY8-286-28 and similar |
164
|
|
|
|
|
|
|
# excluding 'Re:', etc and the first word |
165
|
|
|
|
|
|
|
|| /(?:\w{2,3}:\s)?\w+\s+(\w{7,}-\w{7,}(-\w+)*)\s*$/ |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# #30D7 and similar |
168
|
|
|
|
|
|
|
|| /\s#\s*([a-f0-9]{4,})\s*$/ |
169
|
|
|
|
|
|
|
) |
170
|
|
|
|
|
|
|
{ |
171
|
0
|
|
|
|
|
0
|
$id = $1; |
172
|
|
|
|
|
|
|
# exempt online purchases |
173
|
0
|
0
|
0
|
|
|
0
|
if ($id =~ /\d{5,}/ |
174
|
|
|
|
|
|
|
&& /(?:item|invoice|order|number|confirmation).{1,6}\Q$id\E\s*$/) |
175
|
|
|
|
|
|
|
{ |
176
|
0
|
|
|
|
|
0
|
$id = 0; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# for the "foo-bar-baz" case, otherwise it won't |
180
|
|
|
|
|
|
|
# be found in the dict: |
181
|
0
|
|
|
|
|
0
|
$id =~ s/-//; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
0
|
|
0
|
|
|
0
|
return ($id && !$self->word_is_in_dictionary($id)); |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# word_is_in_dictionary() |
188
|
|
|
|
|
|
|
# |
189
|
|
|
|
|
|
|
# See if the word looks like an English word, by checking if each triplet |
190
|
|
|
|
|
|
|
# of letters it contains is one that can be found in the English language. |
191
|
|
|
|
|
|
|
# Does not include triplets only found in proper names, or in the Latin |
192
|
|
|
|
|
|
|
# and Greek terms that might be found in a larger dictionary |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
my %triplets; |
195
|
|
|
|
|
|
|
my $triplets_loaded = 0; |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub word_is_in_dictionary { |
198
|
2
|
|
|
2
|
0
|
6
|
my ($self, $word) = @_; |
199
|
2
|
|
|
|
|
5
|
local ($_); |
200
|
2
|
|
|
|
|
9
|
local $/ = "\n"; # Ensure $/ is set appropriately |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# $word =~ tr/A-Z/a-z/; # already done by this stage |
203
|
2
|
|
|
|
|
9
|
$word =~ s/^\s+//; |
204
|
2
|
|
|
|
|
9
|
$word =~ s/\s+$//; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# If it contains a digit, dash, etc, it's not a valid word. |
207
|
|
|
|
|
|
|
# Don't reject words like "can't" and "I'll" |
208
|
2
|
50
|
|
|
|
10
|
return 0 if ($word =~ /[^a-z\']/); |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
# handle a few common "blah blah blah (comment)" styles |
211
|
2
|
50
|
|
|
|
14
|
return 1 if ($word eq "ot"); # off-topic |
212
|
2
|
50
|
|
|
|
12
|
return 1 if ($word =~ /(?:linux|nix|bsd)/); # not in most dicts |
213
|
2
|
50
|
|
|
|
12
|
return 1 if ($word =~ /(?:whew|phew|attn|tha?nx)/); # not in most dicts |
214
|
|
|
|
|
|
|
|
215
|
2
|
|
|
|
|
6
|
my $word_len = length($word); |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# Unique IDs probably aren't going to be only one or two letters long |
218
|
2
|
50
|
|
|
|
9
|
return 1 if ($word_len < 3); |
219
|
|
|
|
|
|
|
|
220
|
2
|
50
|
|
|
|
7
|
if (!$triplets_loaded) { |
221
|
|
|
|
|
|
|
# take a copy to avoid modifying the real one |
222
|
2
|
|
|
|
|
12
|
my @default_triplets_path = @Mail::SpamAssassin::default_rules_path; |
223
|
2
|
|
|
|
|
28
|
s{$}{/triplets.txt} for @default_triplets_path; |
224
|
2
|
|
|
|
|
14
|
my $filename = $self->{main}->first_existing_path (@default_triplets_path); |
225
|
|
|
|
|
|
|
|
226
|
2
|
50
|
|
|
|
9
|
if (!defined $filename) { |
227
|
2
|
|
|
|
|
18
|
dbg("eval: failed to locate the triplets.txt file"); |
228
|
2
|
|
|
|
|
13
|
return 1; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
0
|
|
|
|
|
0
|
local *TRIPLETS; |
232
|
0
|
0
|
|
|
|
0
|
if (!open (TRIPLETS, "<$filename")) { |
233
|
0
|
|
|
|
|
0
|
dbg("eval: failed to open '$filename', cannot check dictionary: $!"); |
234
|
0
|
|
|
|
|
0
|
return 1; |
235
|
|
|
|
|
|
|
} |
236
|
0
|
|
|
|
|
0
|
for($!=0; <TRIPLETS>; $!=0) { |
237
|
0
|
|
|
|
|
0
|
chomp; |
238
|
0
|
|
|
|
|
0
|
$triplets{$_} = 1; |
239
|
|
|
|
|
|
|
} |
240
|
0
|
0
|
0
|
|
|
0
|
defined $_ || $!==0 or |
|
|
0
|
|
|
|
|
|
241
|
|
|
|
|
|
|
$!==EBADF ? dbg("eval: error reading from $filename: $!") |
242
|
|
|
|
|
|
|
: die "error reading from $filename: $!"; |
243
|
0
|
0
|
|
|
|
0
|
close(TRIPLETS) or die "error closing $filename: $!"; |
244
|
|
|
|
|
|
|
|
245
|
0
|
|
|
|
|
0
|
$triplets_loaded = 1; |
246
|
|
|
|
|
|
|
} # if (!$triplets_loaded) |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
|
249
|
0
|
|
|
|
|
0
|
my $i; |
250
|
|
|
|
|
|
|
|
251
|
0
|
|
|
|
|
0
|
for ($i = 0; $i < ($word_len - 2); $i++) { |
252
|
0
|
|
|
|
|
0
|
my $triplet = substr($word, $i, 3); |
253
|
0
|
0
|
|
|
|
0
|
if (!$triplets{$triplet}) { |
254
|
0
|
|
|
|
|
0
|
dbg("eval: unique ID: letter triplet '$triplet' from word '$word' not valid"); |
255
|
0
|
|
|
|
|
0
|
return 0; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
} # for ($i = 0; $i < ($word_len - 2); $i++) |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
# All letter triplets in word were found to be valid |
260
|
0
|
|
|
|
|
0
|
return 1; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# look for 8-bit and other illegal characters that should be MIME |
264
|
|
|
|
|
|
|
# encoded, these might want to exempt languages that do not use |
265
|
|
|
|
|
|
|
# Latin-based alphabets, but only if the user wants it that way |
266
|
|
|
|
|
|
|
sub check_illegal_chars { |
267
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms, $header, $ratio, $count) = @_; |
268
|
|
|
|
|
|
|
|
269
|
0
|
0
|
|
|
|
0
|
$header .= ":raw" unless $header =~ /:raw$/; |
270
|
0
|
|
|
|
|
0
|
my $str = $pms->get($header); |
271
|
0
|
0
|
0
|
|
|
0
|
return 0 if !defined $str || $str eq ''; |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# count illegal substrings (RFC 2045) |
274
|
|
|
|
|
|
|
# (non-ASCII + C0 controls except TAB, NL, CR) |
275
|
0
|
|
|
|
|
0
|
my $illegal = $str =~ tr/\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff//; |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
# minor exemptions for Subject |
278
|
0
|
0
|
0
|
|
|
0
|
if ($illegal > 0 && lc $header eq "subject:raw") { |
279
|
|
|
|
|
|
|
# only exempt a single cent sign, pound sign, or registered sign |
280
|
0
|
|
|
|
|
0
|
my $exempt = $str =~ tr/\xa2\xa3\xae//; |
281
|
0
|
0
|
|
|
|
0
|
$illegal-- if $exempt == 1; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
0
|
0
|
|
|
|
0
|
return 0 if $str eq ''; |
285
|
0
|
|
0
|
|
|
0
|
return (($illegal / length($str)) >= $ratio && $illegal >= $count); |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
# ezmlm has a very bad habit of removing Received: headers! bad ezmlm. |
289
|
|
|
|
|
|
|
# |
290
|
|
|
|
|
|
|
sub gated_through_received_hdr_remover { |
291
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
292
|
|
|
|
|
|
|
|
293
|
0
|
|
|
|
|
0
|
my $txt = $pms->get("Mailing-List",undef); |
294
|
0
|
0
|
0
|
|
|
0
|
if (defined $txt && $txt =~ /^contact \S+\@\S+\; run by ezmlm$/) { |
295
|
0
|
|
|
|
|
0
|
my $dlto = $pms->get("Delivered-To"); |
296
|
0
|
|
|
|
|
0
|
my $rcvd = $pms->get("Received"); |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# ensure we have other indicative headers too |
299
|
0
|
0
|
0
|
|
|
0
|
if ($dlto =~ /^mailing list \S+\@\S+/ && |
300
|
|
|
|
|
|
|
$rcvd =~ /qmail \d+ invoked (?:from network|by .{3,20})\); \d+ ... \d+/) |
301
|
|
|
|
|
|
|
{ |
302
|
0
|
|
|
|
|
0
|
return 1; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
0
|
|
|
|
|
0
|
my $rcvd = $pms->get("Received",undef); |
307
|
0
|
0
|
|
|
|
0
|
if (!defined $rcvd) { |
308
|
|
|
|
|
|
|
# we have no Received headers! These tests cannot run in that case |
309
|
0
|
|
|
|
|
0
|
return 1; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
# MSN groups removes Received lines. thanks MSN |
313
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ /from groups\.msn\.com \(\S+\.msn\.com /) { |
314
|
0
|
|
|
|
|
0
|
return 1; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
0
|
|
|
|
|
0
|
return 0; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# FORGED_HOTMAIL_RCVD |
321
|
|
|
|
|
|
|
sub _check_for_forged_hotmail_received_headers { |
322
|
0
|
|
|
0
|
|
0
|
my ($self, $pms) = @_; |
323
|
|
|
|
|
|
|
|
324
|
0
|
0
|
|
|
|
0
|
if (defined $pms->{hotmail_addr_but_no_hotmail_received}) { return; } |
|
0
|
|
|
|
|
0
|
|
325
|
|
|
|
|
|
|
|
326
|
0
|
|
|
|
|
0
|
$pms->{hotmail_addr_with_forged_hotmail_received} = 0; |
327
|
0
|
|
|
|
|
0
|
$pms->{hotmail_addr_but_no_hotmail_received} = 0; |
328
|
|
|
|
|
|
|
|
329
|
0
|
|
|
|
|
0
|
my $rcvd = $pms->get('Received'); |
330
|
0
|
|
|
|
|
0
|
$rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp |
331
|
|
|
|
|
|
|
|
332
|
0
|
0
|
|
|
|
0
|
return if ($rcvd =~ |
333
|
|
|
|
|
|
|
/from mail pickup service by hotmail\.com with Microsoft SMTPSVC;/); |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
# Microsoft passes Hotmail mail directly to MSN Group servers. |
336
|
0
|
0
|
|
|
|
0
|
return if $self->check_for_msn_groups_headers($pms); |
337
|
|
|
|
|
|
|
|
338
|
0
|
|
|
|
|
0
|
my $ip = $pms->get('X-Originating-Ip',undef); |
339
|
0
|
|
|
|
|
0
|
my $IP_ADDRESS = IP_ADDRESS; |
340
|
0
|
|
|
|
|
0
|
my $orig = $pms->get('X-OriginatorOrg',undef); |
341
|
0
|
|
|
|
|
0
|
my $ORIGINATOR = 'hotmail.com'; |
342
|
|
|
|
|
|
|
|
343
|
0
|
0
|
0
|
|
|
0
|
if (defined $ip && $ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
344
|
0
|
0
|
0
|
|
|
0
|
if (defined $orig && $orig =~ /$ORIGINATOR/) { $orig = 1; } else { $orig = 0; } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
# Hotmail formats its received headers like this: |
347
|
|
|
|
|
|
|
# Received: from hotmail.com (f135.law8.hotmail.com [216.33.241.135]) |
348
|
|
|
|
|
|
|
# or like |
349
|
|
|
|
|
|
|
# Received: from EUR01-VE1-obe.outbound.protection.outlook.com (mail-oln040092066056.outbound.protection.outlook.com [40.92.66.56]) |
350
|
|
|
|
|
|
|
# spammers do not ;) |
351
|
|
|
|
|
|
|
|
352
|
0
|
0
|
|
|
|
0
|
if ($self->gated_through_received_hdr_remover($pms)) { return; } |
|
0
|
|
|
|
|
0
|
|
353
|
|
|
|
|
|
|
|
354
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /from (?:\S*\.)?hotmail.com \(\S+\.hotmail(?:\.msn)?\.com[ \)]/ && $ip) |
355
|
0
|
|
|
|
|
0
|
{ return; } |
356
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /from \S*\.outbound\.protection\.outlook\.com \(\S+\.outbound\.protection\.outlook\.com[ \)]/ && $orig) |
357
|
0
|
|
|
|
|
0
|
{ return; } |
358
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /from \S*\.hotmail.com \(\[$IP_ADDRESS\][ \):]/ && $ip) |
359
|
0
|
|
|
|
|
0
|
{ return; } |
360
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /from \S+ by \S+\.hotmail(?:\.msn)?\.com with HTTP\;/ && $ip) |
361
|
0
|
|
|
|
|
0
|
{ return; } |
362
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /from \[66\.218.\S+\] by \S+\.yahoo\.com/ && $ip) |
363
|
0
|
|
|
|
|
0
|
{ return; } |
364
|
|
|
|
|
|
|
|
365
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ /(?:from |HELO |helo=)\S*hotmail\.com\b/) { |
366
|
|
|
|
|
|
|
# HELO'd as hotmail.com, despite not being hotmail |
367
|
0
|
|
|
|
|
0
|
$pms->{hotmail_addr_with_forged_hotmail_received} = 1; |
368
|
|
|
|
|
|
|
} else { |
369
|
|
|
|
|
|
|
# check to see if From claimed to be @hotmail.com |
370
|
0
|
|
|
|
|
0
|
my $from = $pms->get('From:addr'); |
371
|
0
|
0
|
|
|
|
0
|
if ($from !~ /\bhotmail\.com$/i) { return; } |
|
0
|
|
|
|
|
0
|
|
372
|
0
|
|
|
|
|
0
|
$pms->{hotmail_addr_but_no_hotmail_received} = 1; |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
# FORGED_HOTMAIL_RCVD |
377
|
|
|
|
|
|
|
sub check_for_forged_hotmail_received_headers { |
378
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
379
|
0
|
|
|
|
|
0
|
$self->_check_for_forged_hotmail_received_headers($pms); |
380
|
0
|
|
|
|
|
0
|
return $pms->{hotmail_addr_with_forged_hotmail_received}; |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# SEMIFORGED_HOTMAIL_RCVD |
384
|
|
|
|
|
|
|
sub check_for_no_hotmail_received_headers { |
385
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
386
|
0
|
|
|
|
|
0
|
$self->_check_for_forged_hotmail_received_headers($pms); |
387
|
0
|
|
|
|
|
0
|
return $pms->{hotmail_addr_but_no_hotmail_received}; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
# MSN_GROUPS |
391
|
|
|
|
|
|
|
sub check_for_msn_groups_headers { |
392
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
393
|
|
|
|
|
|
|
|
394
|
0
|
|
|
|
|
0
|
my $to = $pms->get('To'); |
395
|
0
|
0
|
|
|
|
0
|
return 0 unless $to =~ /<(\S+)\@groups\.msn\.com>/i; |
396
|
0
|
|
|
|
|
0
|
my $listname = $1; |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
# from Theo Van Dinter, see bug 591 |
399
|
|
|
|
|
|
|
# Updated by DOS, based on messages from Bob Menschel, bug 4301 |
400
|
|
|
|
|
|
|
|
401
|
0
|
0
|
|
|
|
0
|
return 0 unless $pms->get('Received') =~ |
402
|
|
|
|
|
|
|
/from mail pickup service by ((?:p\d\d\.)groups\.msn\.com)\b/; |
403
|
0
|
|
|
|
|
0
|
my $server = $1; |
404
|
|
|
|
|
|
|
|
405
|
0
|
0
|
|
|
|
0
|
if ($listname =~ /^notifications$/) { |
406
|
0
|
0
|
|
|
|
0
|
return 0 unless $pms->get('Message-Id') =~ /^<\S+\@$server>/; |
407
|
|
|
|
|
|
|
} else { |
408
|
0
|
0
|
|
|
|
0
|
return 0 unless $pms->get('Message-Id') =~ /^<$listname-\S+\@groups\.msn\.com>/; |
409
|
0
|
0
|
|
|
|
0
|
return 0 unless $pms->get('EnvelopeFrom:addr') =~ /$listname-bounce\@groups\.msn\.com/; |
410
|
|
|
|
|
|
|
} |
411
|
0
|
|
|
|
|
0
|
return 1; |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
# MSN Groups |
414
|
|
|
|
|
|
|
# Return-path: <ListName-bounce@groups.msn.com> |
415
|
|
|
|
|
|
|
# Received: from groups.msn.com (tk2dcpuba02.msn.com [65.54.195.210]) by |
416
|
|
|
|
|
|
|
# dogma.slashnull.org (8.11.6/8.11.6) with ESMTP id g72K35v10457 for |
417
|
|
|
|
|
|
|
# <zzzzzzzzzzzz@jmason.org>; Fri, 2 Aug 2002 21:03:05 +0100 |
418
|
|
|
|
|
|
|
# Received: from mail pickup service by groups.msn.com with Microsoft |
419
|
|
|
|
|
|
|
# SMTPSVC; Fri, 2 Aug 2002 13:01:30 -0700 |
420
|
|
|
|
|
|
|
# Message-id: <ListName-1392@groups.msn.com> |
421
|
|
|
|
|
|
|
# X-loop: notifications@groups.msn.com |
422
|
|
|
|
|
|
|
# Reply-to: "List Full Name" <ListName@groups.msn.com> |
423
|
|
|
|
|
|
|
# To: "List Full Name" <ListName@groups.msn.com> |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
# Return-path: <ListName-bounce@groups.msn.com> |
426
|
|
|
|
|
|
|
# Received: from p04.groups.msn.com ([65.54.195.216]) etc... |
427
|
|
|
|
|
|
|
# Received: from mail pickup service by p04.groups.msn.com with Microsoft SMTPSVC; |
428
|
|
|
|
|
|
|
# Thu, 5 May 2005 20:30:37 -0700 |
429
|
|
|
|
|
|
|
# X-Originating-Ip: 207.68.170.30 |
430
|
|
|
|
|
|
|
# From: =?iso-8859-1?B?IqSj4/D9pEbzeN9s9vLw6qQiIA==?=<zzzzzzzz@hotmail.com> |
431
|
|
|
|
|
|
|
# To: "Managers of List Name" <notifications@groups.msn.com> |
432
|
|
|
|
|
|
|
# Subject: =?iso-8859-1?Q?APPROVAL_NEEDED:_=A4=A3=E3=F0=FD=A4F=F3x=DFl?= |
433
|
|
|
|
|
|
|
# =?iso-8859-1?Q?=F6=F2=F0=EA=A4_applied_to_join_List_Name=2C?= |
434
|
|
|
|
|
|
|
# =?iso-8859-1?Q?_an_MSN_Group?= |
435
|
|
|
|
|
|
|
# Date: Thu, 5 May 2005 20:30:37 -0700 |
436
|
|
|
|
|
|
|
# MIME-Version: 1.0 |
437
|
|
|
|
|
|
|
# Content-Type: multipart/alternative; |
438
|
|
|
|
|
|
|
# boundary="----=_NextPart_000_333944_01C551B1.4BBA02B0" |
439
|
|
|
|
|
|
|
# X-MimeOLE: Produced By Microsoft MimeOLE V5.50.4927.1200 |
440
|
|
|
|
|
|
|
# Message-ID: <TK2DCPUBA042cv0aGlt00020aa3@p04.groups.msn.com> |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
# Return-path: <ListName-bounce@groups.msn.com> |
443
|
|
|
|
|
|
|
# Received: from [65.54.208.83] (helo=p05.groups.msn.com) etc... |
444
|
|
|
|
|
|
|
# Received: from mail pickup service by p05.groups.msn.com with Microsoft SMTPSVC; |
445
|
|
|
|
|
|
|
# Fri, 6 May 2005 14:59:25 -0700 |
446
|
|
|
|
|
|
|
# X-Originating-Ip: 207.68.170.30 |
447
|
|
|
|
|
|
|
# Message-Id: <ListName-101@groups.msn.com> |
448
|
|
|
|
|
|
|
# Reply-To: "List Name" <ListName@groups.msn.com> |
449
|
|
|
|
|
|
|
# From: "whoever" <zzzzzzzzzz@hotmail.com> |
450
|
|
|
|
|
|
|
# To: "List Name" <ListName@groups.msn.com> |
451
|
|
|
|
|
|
|
# Subject: whatever |
452
|
|
|
|
|
|
|
# Date: Fri, 6 May 2005 14:59:25 -0700 |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
########################################################################### |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
sub check_for_forged_eudoramail_received_headers { |
459
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
460
|
|
|
|
|
|
|
|
461
|
0
|
|
|
|
|
0
|
my $from = $pms->get('From:addr'); |
462
|
0
|
0
|
|
|
|
0
|
if ($from !~ /\beudoramail\.com$/i) { return 0; } |
|
0
|
|
|
|
|
0
|
|
463
|
|
|
|
|
|
|
|
464
|
0
|
|
|
|
|
0
|
my $rcvd = $pms->get('Received'); |
465
|
0
|
|
|
|
|
0
|
$rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp |
466
|
|
|
|
|
|
|
|
467
|
0
|
|
|
|
|
0
|
my $ip = $pms->get('X-Sender-Ip',undef); |
468
|
0
|
|
|
|
|
0
|
my $IP_ADDRESS = IP_ADDRESS; |
469
|
0
|
0
|
0
|
|
|
0
|
if (defined $ip && $ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
# Eudoramail formats its received headers like this: |
472
|
|
|
|
|
|
|
# Received: from Unknown/Local ([?.?.?.?]) by shared1-mail.whowhere.com; |
473
|
|
|
|
|
|
|
# Thu Nov 29 13:44:25 2001 |
474
|
|
|
|
|
|
|
# Message-Id: <JGDHDEHPPJECDAAA@shared1-mail.whowhere.com> |
475
|
|
|
|
|
|
|
# Organization: QUALCOMM Eudora Web-Mail (http://www.eudoramail.com:80) |
476
|
|
|
|
|
|
|
# X-Sender-Ip: 192.175.21.146 |
477
|
|
|
|
|
|
|
# X-Mailer: MailCity Service |
478
|
|
|
|
|
|
|
|
479
|
0
|
0
|
|
|
|
0
|
if ($self->gated_through_received_hdr_remover($pms)) { return 0; } |
|
0
|
|
|
|
|
0
|
|
480
|
|
|
|
|
|
|
|
481
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /by \S*whowhere.com\;/ && $ip) { return 0; } |
|
0
|
|
|
|
|
0
|
|
482
|
|
|
|
|
|
|
|
483
|
0
|
|
|
|
|
0
|
return 1; |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
########################################################################### |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
sub check_for_forged_yahoo_received_headers { |
489
|
81
|
|
|
81
|
0
|
240
|
my ($self, $pms) = @_; |
490
|
|
|
|
|
|
|
|
491
|
81
|
|
|
|
|
324
|
my $from = $pms->get('From:addr'); |
492
|
81
|
50
|
|
|
|
432
|
if ($from !~ /\byahoo\.com$/i) { return 0; } |
|
81
|
|
|
|
|
1389
|
|
493
|
|
|
|
|
|
|
|
494
|
0
|
|
|
|
|
0
|
my $rcvd = $pms->get('Received'); |
495
|
|
|
|
|
|
|
|
496
|
0
|
0
|
0
|
|
|
0
|
if ($pms->get("Resent-From") ne '' && $pms->get("Resent-To") ne '') { |
497
|
0
|
|
|
|
|
0
|
my $xrcvd = $pms->get("X-Received"); |
498
|
0
|
0
|
|
|
|
0
|
$rcvd = $xrcvd if $xrcvd ne ''; |
499
|
|
|
|
|
|
|
} |
500
|
0
|
|
|
|
|
0
|
$rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
# not sure about this |
503
|
|
|
|
|
|
|
#if ($rcvd !~ /from \S*yahoo\.com/) { return 0; } |
504
|
|
|
|
|
|
|
|
505
|
0
|
0
|
|
|
|
0
|
if ($self->gated_through_received_hdr_remover($pms)) { return 0; } |
|
0
|
|
|
|
|
0
|
|
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
# bug 3740: ignore bounces from Yahoo!. only honoured if the |
508
|
|
|
|
|
|
|
# correct rDNS shows up in the trusted relay list, or first untrusted relay |
509
|
|
|
|
|
|
|
# |
510
|
|
|
|
|
|
|
# bug 4528: [ ip=68.142.202.54 rdns=mta122.mail.mud.yahoo.com |
511
|
|
|
|
|
|
|
# helo=mta122.mail.mud.yahoo.com by=eclectic.kluge.net ident= |
512
|
|
|
|
|
|
|
# envfrom= intl=0 id=49F2EAF13B auth= ] |
513
|
|
|
|
|
|
|
# |
514
|
0
|
0
|
0
|
|
|
0
|
if ($pms->{relays_trusted_str} =~ / rdns=\S+\.yahoo\.com / |
515
|
|
|
|
|
|
|
|| $pms->{relays_untrusted_str} =~ /^[^\]]+ rdns=\S+\.yahoo\.com /) |
516
|
0
|
|
|
|
|
0
|
{ return 0; } |
517
|
|
|
|
|
|
|
|
518
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ /by web\S+\.mail\S*\.yahoo\.com via HTTP/) { return 0; } |
|
0
|
|
|
|
|
0
|
|
519
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ /by sonic\S+\.consmr\.mail\S*\.yahoo\.com with HTTP/) { return 0; } |
|
0
|
|
|
|
|
0
|
|
520
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ /by smtp\S+\.yahoo\.com with SMTP/) { return 0; } |
|
0
|
|
|
|
|
0
|
|
521
|
0
|
|
|
|
|
0
|
my $IP_ADDRESS = IP_ADDRESS; |
522
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ |
523
|
|
|
|
|
|
|
/from \[$IP_ADDRESS\] by \S+\.(?:groups|scd|dcn)\.yahoo\.com with NNFMP/) { |
524
|
0
|
|
|
|
|
0
|
return 0; |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
# used in "forward this news item to a friend" links. There's no better |
528
|
|
|
|
|
|
|
# received hdrs to match on, unfortunately. I'm not sure if the next test is |
529
|
|
|
|
|
|
|
# still useful, as a result. |
530
|
|
|
|
|
|
|
# |
531
|
|
|
|
|
|
|
# search for msgid <20020929140301.451A92940A9@xent.com>, subject "Yahoo! |
532
|
|
|
|
|
|
|
# News Story - Top Stories", date Sep 29 2002 on |
533
|
|
|
|
|
|
|
# <http://xent.com/pipermail/fork/> for an example. |
534
|
|
|
|
|
|
|
# |
535
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /\bmailer\d+\.bulk\.scd\.yahoo\.com\b/ |
536
|
0
|
|
|
|
|
0
|
&& $from =~ /\@reply\.yahoo\.com$/i) { return 0; } |
537
|
|
|
|
|
|
|
|
538
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ /by \w+\.\w+\.yahoo\.com \(\d+\.\d+\.\d+\/\d+\.\d+\.\d+\)(?: with ESMTP)? id \w+/) { |
539
|
|
|
|
|
|
|
# possibly sent from "mail this story to a friend" |
540
|
0
|
|
|
|
|
0
|
return 0; |
541
|
|
|
|
|
|
|
} |
542
|
|
|
|
|
|
|
|
543
|
0
|
|
|
|
|
0
|
return 1; |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
sub check_for_forged_juno_received_headers { |
547
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
548
|
|
|
|
|
|
|
|
549
|
0
|
|
|
|
|
0
|
my $from = $pms->get('From:addr'); |
550
|
0
|
0
|
|
|
|
0
|
if ($from !~ /\bjuno\.com$/i) { return 0; } |
|
0
|
|
|
|
|
0
|
|
551
|
|
|
|
|
|
|
|
552
|
0
|
0
|
|
|
|
0
|
if ($self->gated_through_received_hdr_remover($pms)) { return 0; } |
|
0
|
|
|
|
|
0
|
|
553
|
|
|
|
|
|
|
|
554
|
0
|
|
|
|
|
0
|
my $xorig = $pms->get('X-Originating-IP'); |
555
|
0
|
|
|
|
|
0
|
my $xmailer = $pms->get('X-Mailer'); |
556
|
0
|
|
|
|
|
0
|
my $rcvd = $pms->get('Received'); |
557
|
0
|
|
|
|
|
0
|
my $IP_ADDRESS = IP_ADDRESS; |
558
|
|
|
|
|
|
|
|
559
|
0
|
0
|
|
|
|
0
|
if ($xorig ne '') { |
560
|
|
|
|
|
|
|
# New style Juno has no X-Originating-IP header, and other changes |
561
|
0
|
0
|
0
|
|
|
0
|
if($rcvd !~ /from.*\b(?:juno|untd)\.com.*[\[\(]$IP_ADDRESS[\]\)].*by/ |
562
|
0
|
|
|
|
|
0
|
&& $rcvd !~ / cookie\.(?:juno|untd)\.com /) { return 1; } |
563
|
0
|
0
|
|
|
|
0
|
if($xmailer !~ /Juno /) { return 1; } |
|
0
|
|
|
|
|
0
|
|
564
|
|
|
|
|
|
|
} else { |
565
|
0
|
0
|
|
|
|
0
|
if($rcvd =~ /from.*\bmail\.com.*\[$IP_ADDRESS\].*by/) { |
|
|
0
|
|
|
|
|
|
566
|
0
|
0
|
|
|
|
0
|
if($xmailer !~ /\bmail\.com/) { return 1; } |
|
0
|
|
|
|
|
0
|
|
567
|
|
|
|
|
|
|
} elsif($rcvd =~ /from (webmail\S+\.untd\.com) \(\1 \[$IP_ADDRESS\]\) by/) { |
568
|
0
|
0
|
|
|
|
0
|
if($xmailer !~ /^Webmail Version \d/) { return 1; } |
|
0
|
|
|
|
|
0
|
|
569
|
|
|
|
|
|
|
} else { |
570
|
0
|
|
|
|
|
0
|
return 1; |
571
|
|
|
|
|
|
|
} |
572
|
0
|
0
|
|
|
|
0
|
if($xorig !~ /$IP_ADDRESS/) { return 1; } |
|
0
|
|
|
|
|
0
|
|
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
0
|
|
|
|
|
0
|
return 0; |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
sub check_for_forged_gmail_received_headers { |
579
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
580
|
21
|
|
|
21
|
|
209
|
use constant GOOGLE_MESSAGE_STATE_LENGTH_MIN => 60; |
|
21
|
|
|
|
|
59
|
|
|
21
|
|
|
|
|
1670
|
|
581
|
21
|
|
|
21
|
|
163
|
use constant GOOGLE_SMTP_SOURCE_LENGTH_MIN => 60; |
|
21
|
|
|
|
|
46
|
|
|
21
|
|
|
|
|
8948
|
|
582
|
|
|
|
|
|
|
|
583
|
0
|
|
|
|
|
0
|
my $from = $pms->get('From:addr'); |
584
|
0
|
0
|
|
|
|
0
|
if ($from !~ /\bgmail\.com$/i) { return 0; } |
|
0
|
|
|
|
|
0
|
|
585
|
|
|
|
|
|
|
|
586
|
0
|
|
|
|
|
0
|
my $xgms = $pms->get('X-Gm-Message-State'); |
587
|
0
|
|
|
|
|
0
|
my $xss = $pms->get('X-Google-Smtp-Source'); |
588
|
0
|
|
|
|
|
0
|
my $xreceived = $pms->get('X-Received'); |
589
|
0
|
|
|
|
|
0
|
my $received = $pms->get('Received'); |
590
|
|
|
|
|
|
|
|
591
|
0
|
0
|
|
|
|
0
|
if ($xreceived =~ /by 10\.\S+ with SMTP id \S+/) { return 0; } |
|
0
|
|
|
|
|
0
|
|
592
|
0
|
0
|
|
|
|
0
|
if ($xreceived =~ /by 2002\:a\d\d\:\w+\:\S+ with SMTP id \S+/) { return 0; } |
|
0
|
|
|
|
|
0
|
|
593
|
0
|
0
|
|
|
|
0
|
if ($received =~ /by smtp\.googlemail\.com with ESMTPSA id \S+/) { |
594
|
0
|
|
|
|
|
0
|
return 0; |
595
|
|
|
|
|
|
|
} |
596
|
0
|
0
|
0
|
|
|
0
|
if ( (length($xgms) >= GOOGLE_MESSAGE_STATE_LENGTH_MIN) && |
597
|
|
|
|
|
|
|
(length($xss) >= GOOGLE_SMTP_SOURCE_LENGTH_MIN)) { |
598
|
0
|
|
|
|
|
0
|
return 0; |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
|
601
|
0
|
|
|
|
|
0
|
return 1; |
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
sub check_for_matching_env_and_hdr_from { |
605
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) =@_; |
606
|
|
|
|
|
|
|
# two blank headers match so don't bother checking |
607
|
0
|
|
|
|
|
0
|
return (lc $pms->get('EnvelopeFrom:addr') eq lc $pms->get('From:addr')); |
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
sub sorted_recipients { |
611
|
81
|
|
|
81
|
0
|
312
|
my ($self, $pms) = @_; |
612
|
|
|
|
|
|
|
|
613
|
81
|
100
|
|
|
|
283
|
if (!exists $pms->{tocc_sorted}) { |
614
|
39
|
|
|
|
|
215
|
$self->_check_recipients($pms); |
615
|
|
|
|
|
|
|
} |
616
|
81
|
|
|
|
|
1556
|
return $pms->{tocc_sorted}; |
617
|
|
|
|
|
|
|
} |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
sub similar_recipients { |
620
|
81
|
|
|
81
|
0
|
473
|
my ($self, $pms, $min, $max) = @_; |
621
|
|
|
|
|
|
|
|
622
|
81
|
100
|
|
|
|
333
|
if (!exists $pms->{tocc_similar}) { |
623
|
42
|
|
|
|
|
209
|
$self->_check_recipients($pms); |
624
|
|
|
|
|
|
|
} |
625
|
|
|
|
|
|
|
return (($min eq 'undef' || $pms->{tocc_similar} >= $min) && |
626
|
81
|
|
33
|
|
|
2424
|
($max eq 'undef' || $pms->{tocc_similar} < $max)); |
627
|
|
|
|
|
|
|
} |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
# best experimentally derived values |
630
|
21
|
|
|
21
|
|
181
|
use constant TOCC_SORTED_COUNT => 7; |
|
21
|
|
|
|
|
52
|
|
|
21
|
|
|
|
|
1325
|
|
631
|
21
|
|
|
21
|
|
161
|
use constant TOCC_SIMILAR_COUNT => 5; |
|
21
|
|
|
|
|
51
|
|
|
21
|
|
|
|
|
1182
|
|
632
|
21
|
|
|
21
|
|
141
|
use constant TOCC_SIMILAR_LENGTH => 2; |
|
21
|
|
|
|
|
50
|
|
|
21
|
|
|
|
|
73674
|
|
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
sub _check_recipients { |
635
|
81
|
|
|
81
|
|
202
|
my ($self, $pms) = @_; |
636
|
|
|
|
|
|
|
|
637
|
81
|
|
|
|
|
180
|
my @inputs; |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
# ToCc: pseudo-header works best, but sometimes Bcc: is better |
640
|
81
|
|
|
|
|
224
|
for ('ToCc', 'Bcc') { |
641
|
161
|
|
|
|
|
455
|
my $to = $pms->get($_); # get recipients |
642
|
161
|
|
|
|
|
400
|
$to =~ s/\(.*?\)//g; # strip out the (comments) |
643
|
161
|
|
|
|
|
518
|
push(@inputs, ($to =~ m/([\w.=-]+\@\w+(?:[\w.-]+\.)+\w+)/g)); |
644
|
161
|
100
|
|
|
|
625
|
last if scalar(@inputs) >= TOCC_SIMILAR_COUNT; |
645
|
|
|
|
|
|
|
} |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
# remove duplicate addresses only when they appear next to each other |
648
|
81
|
|
|
|
|
238
|
my @address; |
649
|
81
|
|
|
|
|
211
|
my $previous = ''; |
650
|
81
|
|
|
|
|
337
|
while (my $current = shift @inputs) { |
651
|
55
|
50
|
|
|
|
171
|
push(@address, ($previous = $current)) if lc($current) ne lc($previous); |
652
|
55
|
50
|
|
|
|
221
|
last if @address == 256; |
653
|
|
|
|
|
|
|
} |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
# ideas that had both poor S/O ratios and poor hit rates: |
656
|
|
|
|
|
|
|
# - testing for reverse sorted recipient lists |
657
|
|
|
|
|
|
|
# - testing To: and Cc: headers separately |
658
|
81
|
|
66
|
|
|
479
|
$pms->{tocc_sorted} = (scalar(@address) >= TOCC_SORTED_COUNT && |
659
|
|
|
|
|
|
|
join(',', @address) eq (join(',', sort @address))); |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
# a good S/O ratio and hit rate is achieved by comparing 2-byte |
662
|
|
|
|
|
|
|
# substrings and requiring 5 or more addresses |
663
|
81
|
|
|
|
|
305
|
$pms->{tocc_similar} = 0; |
664
|
81
|
100
|
|
|
|
376
|
if (scalar (@address) >= TOCC_SIMILAR_COUNT) { |
665
|
1
|
|
|
|
|
3
|
my @user = map { substr($_,0,TOCC_SIMILAR_LENGTH) } @address; |
|
48
|
|
|
|
|
85
|
|
666
|
1
|
|
|
|
|
6
|
my @fqhn = map { m/\@(.*)/ } @address; |
|
48
|
|
|
|
|
152
|
|
667
|
1
|
|
|
|
|
6
|
my @host = map { substr($_,0,TOCC_SIMILAR_LENGTH) } @fqhn; |
|
48
|
|
|
|
|
92
|
|
668
|
1
|
|
|
|
|
6
|
my $hits = 0; |
669
|
1
|
|
|
|
|
3
|
my $combinations = 0; |
670
|
1
|
|
|
|
|
4
|
for (my $i = 0; $i <= $#address; $i++) { |
671
|
48
|
|
|
|
|
71
|
for (my $j = $i+1; $j <= $#address; $j++) { |
672
|
1128
|
100
|
|
|
|
2114
|
$hits++ if $user[$i] eq $user[$j]; |
673
|
1128
|
100
|
100
|
|
|
2779
|
$hits++ if $host[$i] eq $host[$j] && $fqhn[$i] ne $fqhn[$j]; |
674
|
1128
|
|
|
|
|
2096
|
$combinations++; |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
} |
677
|
1
|
|
|
|
|
21
|
$pms->{tocc_similar} = $hits / $combinations; |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
} |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
sub check_for_missing_to_header { |
682
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
683
|
|
|
|
|
|
|
|
684
|
0
|
|
|
|
|
0
|
my $hdr = $pms->get('To'); |
685
|
0
|
0
|
|
|
|
0
|
$hdr = $pms->get('Apparently-To') if $hdr eq ''; |
686
|
0
|
0
|
|
|
|
0
|
return 1 if $hdr eq ''; |
687
|
|
|
|
|
|
|
|
688
|
0
|
|
|
|
|
0
|
return 0; |
689
|
|
|
|
|
|
|
} |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
sub check_for_forged_gw05_received_headers { |
692
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
693
|
0
|
|
|
|
|
0
|
local ($_); |
694
|
|
|
|
|
|
|
|
695
|
0
|
|
|
|
|
0
|
my $rcv = $pms->get('Received'); |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
# e.g. |
698
|
|
|
|
|
|
|
# Received: from mail3.icytundra.com by gw05 with ESMTP; Thu, 21 Jun 2001 02:28:32 -0400 |
699
|
0
|
|
|
|
|
0
|
my ($h1, $h2) = ($rcv =~ |
700
|
|
|
|
|
|
|
m/\nfrom\s(\S+)\sby\s(\S+)\swith\sESMTP\;\s+\S\S\S,\s+\d+\s+\S\S\S\s+ |
701
|
|
|
|
|
|
|
\d{4}\s+\d\d:\d\d:\d\d\s+[-+]*\d{4}\n$/xs); |
702
|
|
|
|
|
|
|
|
703
|
0
|
0
|
0
|
|
|
0
|
if (defined ($h1) && defined ($h2) && $h2 !~ /\./) { |
|
|
|
0
|
|
|
|
|
704
|
0
|
|
|
|
|
0
|
return 1; |
705
|
|
|
|
|
|
|
} |
706
|
|
|
|
|
|
|
|
707
|
0
|
|
|
|
|
0
|
0; |
708
|
|
|
|
|
|
|
} |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
########################################################################### |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
sub check_for_shifted_date { |
713
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms, $min, $max) = @_; |
714
|
|
|
|
|
|
|
|
715
|
0
|
0
|
|
|
|
0
|
if (!exists $pms->{date_diff}) { |
716
|
0
|
|
|
|
|
0
|
$self->_check_date_diff($pms); |
717
|
|
|
|
|
|
|
} |
718
|
|
|
|
|
|
|
return (($min eq 'undef' || $pms->{date_diff} >= (3600 * $min)) && |
719
|
0
|
|
0
|
|
|
0
|
($max eq 'undef' || $pms->{date_diff} < (3600 * $max))); |
720
|
|
|
|
|
|
|
} |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
# filters out some false positives in old corpus mail - Allen |
723
|
|
|
|
|
|
|
sub received_within_months { |
724
|
0
|
|
|
0
|
0
|
0
|
my ($self,$pms,$min,$max) = @_; |
725
|
|
|
|
|
|
|
|
726
|
0
|
0
|
|
|
|
0
|
if (!exists($pms->{date_received})) { |
727
|
0
|
|
|
|
|
0
|
$self->_check_date_received($pms); |
728
|
|
|
|
|
|
|
} |
729
|
0
|
|
|
|
|
0
|
my $diff = time() - $pms->{date_received}; |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
# 365.2425 * 24 * 60 * 60 = 31556952 = seconds in year (including leap) |
732
|
|
|
|
|
|
|
|
733
|
0
|
0
|
0
|
|
|
0
|
if (((! defined($min)) || ($min eq 'undef') || |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
734
|
|
|
|
|
|
|
($diff >= (31556952 * ($min/12)))) && |
735
|
|
|
|
|
|
|
((! defined($max)) || ($max eq 'undef') || |
736
|
|
|
|
|
|
|
($diff < (31556952 * ($max/12))))) { |
737
|
0
|
|
|
|
|
0
|
return 1; |
738
|
|
|
|
|
|
|
} else { |
739
|
0
|
|
|
|
|
0
|
return 0; |
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
} |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
sub _get_date_header_time { |
744
|
0
|
|
|
0
|
|
0
|
my ($self, $pms) = @_; |
745
|
|
|
|
|
|
|
|
746
|
0
|
|
|
|
|
0
|
my $time; |
747
|
|
|
|
|
|
|
# a Resent-Date: header takes precedence over any Date: header |
748
|
0
|
|
|
|
|
0
|
DATE: for my $header ('Resent-Date', 'Date') { |
749
|
0
|
|
|
|
|
0
|
my @dates = $pms->{msg}->get_header($header); |
750
|
0
|
|
|
|
|
0
|
for my $date (@dates) { |
751
|
0
|
0
|
0
|
|
|
0
|
if (defined($date) && length($date)) { |
752
|
0
|
|
|
|
|
0
|
chomp($date); |
753
|
0
|
|
|
|
|
0
|
$time = parse_rfc822_date($date); |
754
|
|
|
|
|
|
|
} |
755
|
0
|
0
|
|
|
|
0
|
last DATE if defined($time); |
756
|
|
|
|
|
|
|
} |
757
|
|
|
|
|
|
|
} |
758
|
0
|
0
|
|
|
|
0
|
if (defined($time)) { |
759
|
0
|
|
|
|
|
0
|
$pms->{date_header_time} = $time; |
760
|
|
|
|
|
|
|
} |
761
|
|
|
|
|
|
|
else { |
762
|
0
|
|
|
|
|
0
|
$pms->{date_header_time} = undef; |
763
|
|
|
|
|
|
|
} |
764
|
|
|
|
|
|
|
} |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
sub _get_received_header_times { |
767
|
0
|
|
|
0
|
|
0
|
my ($self, $pms) = @_; |
768
|
|
|
|
|
|
|
|
769
|
0
|
|
|
|
|
0
|
$pms->{received_header_times} = [ () ]; |
770
|
0
|
|
|
|
|
0
|
$pms->{received_fetchmail_time} = undef; |
771
|
|
|
|
|
|
|
|
772
|
0
|
|
|
|
|
0
|
my (@received); |
773
|
0
|
|
|
|
|
0
|
my $received = $pms->get('Received'); |
774
|
0
|
0
|
|
|
|
0
|
if ($received ne '') { |
775
|
0
|
|
|
|
|
0
|
@received = grep {$_ =~ m/\S/} (split(/\n/,$received)); |
|
0
|
|
|
|
|
0
|
|
776
|
|
|
|
|
|
|
} |
777
|
|
|
|
|
|
|
# if we have no Received: headers, chances are we're archived mail |
778
|
|
|
|
|
|
|
# with a limited set of headers |
779
|
0
|
0
|
|
|
|
0
|
if (!scalar(@received)) { |
780
|
0
|
|
|
|
|
0
|
return; |
781
|
|
|
|
|
|
|
} |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
# handle fetchmail headers |
784
|
0
|
|
|
|
|
0
|
my (@local); |
785
|
0
|
0
|
0
|
|
|
0
|
if (($received[0] =~ |
786
|
|
|
|
|
|
|
m/\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/) || |
787
|
|
|
|
|
|
|
($received[0] =~ m/qmail \d+ invoked by uid \d+/)) { |
788
|
0
|
|
|
|
|
0
|
push @local, (shift @received); |
789
|
|
|
|
|
|
|
} |
790
|
0
|
0
|
0
|
|
|
0
|
if (scalar(@received) && |
|
|
0
|
|
|
|
|
|
791
|
|
|
|
|
|
|
($received[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) { |
792
|
0
|
|
|
|
|
0
|
push @local, (shift @received); |
793
|
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
elsif (scalar(@local)) { |
795
|
0
|
|
|
|
|
0
|
unshift @received, (shift @local); |
796
|
|
|
|
|
|
|
} |
797
|
|
|
|
|
|
|
|
798
|
0
|
0
|
|
|
|
0
|
if (scalar(@local)) { |
799
|
0
|
|
|
|
|
0
|
my (@fetchmail_times); |
800
|
0
|
|
|
|
|
0
|
foreach my $rcvd (@local) { |
801
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) { |
802
|
0
|
|
|
|
|
0
|
my $date = $1; |
803
|
0
|
|
|
|
|
0
|
dbg2("eval: trying Received fetchmail header date for real time: $date"); |
804
|
0
|
|
|
|
|
0
|
my $time = parse_rfc822_date($date); |
805
|
0
|
0
|
0
|
|
|
0
|
if (defined($time) && (time() >= $time)) { |
806
|
0
|
|
|
|
|
0
|
dbg2("eval: time_t from date=$time, rcvd=$date"); |
807
|
0
|
|
|
|
|
0
|
push @fetchmail_times, $time; |
808
|
|
|
|
|
|
|
} |
809
|
|
|
|
|
|
|
} |
810
|
|
|
|
|
|
|
} |
811
|
0
|
0
|
|
|
|
0
|
if (scalar(@fetchmail_times) > 1) { |
|
|
0
|
|
|
|
|
|
812
|
|
|
|
|
|
|
$pms->{received_fetchmail_time} = |
813
|
0
|
|
|
|
|
0
|
(sort {$b <=> $a} (@fetchmail_times))[0]; |
|
0
|
|
|
|
|
0
|
|
814
|
|
|
|
|
|
|
} elsif (scalar(@fetchmail_times)) { |
815
|
0
|
|
|
|
|
0
|
$pms->{received_fetchmail_time} = $fetchmail_times[0]; |
816
|
|
|
|
|
|
|
} |
817
|
|
|
|
|
|
|
} |
818
|
|
|
|
|
|
|
|
819
|
0
|
|
|
|
|
0
|
my (@header_times); |
820
|
0
|
|
|
|
|
0
|
foreach my $rcvd (@received) { |
821
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) { |
822
|
0
|
|
|
|
|
0
|
my $date = $1; |
823
|
0
|
|
|
|
|
0
|
dbg2("eval: trying Received header date for real time: $date"); |
824
|
0
|
|
|
|
|
0
|
my $time = parse_rfc822_date($date); |
825
|
0
|
0
|
|
|
|
0
|
if (defined($time)) { |
826
|
0
|
|
|
|
|
0
|
dbg2("eval: time_t from date=$time, rcvd=$date"); |
827
|
0
|
|
|
|
|
0
|
push @header_times, $time; |
828
|
|
|
|
|
|
|
} |
829
|
|
|
|
|
|
|
} |
830
|
|
|
|
|
|
|
} |
831
|
|
|
|
|
|
|
|
832
|
0
|
0
|
|
|
|
0
|
if (scalar(@header_times)) { |
833
|
0
|
|
|
|
|
0
|
$pms->{received_header_times} = [ @header_times ]; |
834
|
|
|
|
|
|
|
} else { |
835
|
0
|
|
|
|
|
0
|
dbg("eval: no dates found in Received headers"); |
836
|
|
|
|
|
|
|
} |
837
|
|
|
|
|
|
|
} |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
sub _check_date_received { |
840
|
0
|
|
|
0
|
|
0
|
my ($self, $pms) = @_; |
841
|
|
|
|
|
|
|
|
842
|
0
|
|
|
|
|
0
|
my (@dates_poss); |
843
|
|
|
|
|
|
|
|
844
|
0
|
|
|
|
|
0
|
$pms->{date_received} = 0; |
845
|
|
|
|
|
|
|
|
846
|
0
|
0
|
|
|
|
0
|
if (!exists($pms->{date_header_time})) { |
847
|
0
|
|
|
|
|
0
|
$self->_get_date_header_time($pms); |
848
|
|
|
|
|
|
|
} |
849
|
|
|
|
|
|
|
|
850
|
0
|
0
|
|
|
|
0
|
if (defined($pms->{date_header_time})) { |
851
|
0
|
|
|
|
|
0
|
push @dates_poss, $pms->{date_header_time}; |
852
|
|
|
|
|
|
|
} |
853
|
|
|
|
|
|
|
|
854
|
0
|
0
|
|
|
|
0
|
if (!exists($pms->{received_header_times})) { |
855
|
0
|
|
|
|
|
0
|
$self->_get_received_header_times($pms); |
856
|
|
|
|
|
|
|
} |
857
|
0
|
|
|
|
|
0
|
my (@received_header_times) = @{ $pms->{received_header_times} }; |
|
0
|
|
|
|
|
0
|
|
858
|
0
|
0
|
|
|
|
0
|
if (scalar(@received_header_times)) { |
859
|
0
|
|
|
|
|
0
|
push @dates_poss, $received_header_times[0]; |
860
|
|
|
|
|
|
|
} |
861
|
0
|
0
|
|
|
|
0
|
if (defined($pms->{received_fetchmail_time})) { |
862
|
0
|
|
|
|
|
0
|
push @dates_poss, $pms->{received_fetchmail_time}; |
863
|
|
|
|
|
|
|
} |
864
|
|
|
|
|
|
|
|
865
|
0
|
0
|
0
|
|
|
0
|
if (defined($pms->{date_header_time}) && scalar(@received_header_times)) { |
866
|
0
|
0
|
|
|
|
0
|
if (!exists($pms->{date_diff})) { |
867
|
0
|
|
|
|
|
0
|
$self->_check_date_diff($pms); |
868
|
|
|
|
|
|
|
} |
869
|
0
|
|
|
|
|
0
|
push @dates_poss, $pms->{date_header_time} - $pms->{date_diff}; |
870
|
|
|
|
|
|
|
} |
871
|
|
|
|
|
|
|
|
872
|
0
|
0
|
|
|
|
0
|
if (scalar(@dates_poss)) { # use median |
873
|
0
|
|
|
|
|
0
|
$pms->{date_received} = (sort {$b <=> $a} |
|
0
|
|
|
|
|
0
|
|
874
|
|
|
|
|
|
|
(@dates_poss))[int($#dates_poss/2)]; |
875
|
|
|
|
|
|
|
dbg("eval: date chosen from message: " . |
876
|
0
|
|
|
|
|
0
|
scalar(localtime($pms->{date_received}))); |
877
|
|
|
|
|
|
|
} else { |
878
|
0
|
|
|
|
|
0
|
dbg("eval: no dates found in message"); |
879
|
|
|
|
|
|
|
} |
880
|
|
|
|
|
|
|
} |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
sub _check_date_diff { |
883
|
0
|
|
|
0
|
|
0
|
my ($self, $pms) = @_; |
884
|
|
|
|
|
|
|
|
885
|
0
|
|
|
|
|
0
|
$pms->{date_diff} = 0; |
886
|
|
|
|
|
|
|
|
887
|
0
|
0
|
|
|
|
0
|
if (!exists($pms->{date_header_time})) { |
888
|
0
|
|
|
|
|
0
|
$self->_get_date_header_time($pms); |
889
|
|
|
|
|
|
|
} |
890
|
|
|
|
|
|
|
|
891
|
0
|
0
|
|
|
|
0
|
if (!defined($pms->{date_header_time})) { |
892
|
0
|
|
|
|
|
0
|
return; # already have tests for this |
893
|
|
|
|
|
|
|
} |
894
|
|
|
|
|
|
|
|
895
|
0
|
0
|
|
|
|
0
|
if (!exists($pms->{received_header_times})) { |
896
|
0
|
|
|
|
|
0
|
$self->_get_received_header_times($pms); |
897
|
|
|
|
|
|
|
} |
898
|
0
|
|
|
|
|
0
|
my (@header_times) = @{ $pms->{received_header_times} }; |
|
0
|
|
|
|
|
0
|
|
899
|
|
|
|
|
|
|
|
900
|
0
|
0
|
|
|
|
0
|
if (!scalar(@header_times)) { |
901
|
0
|
|
|
|
|
0
|
return; # archived mail? |
902
|
|
|
|
|
|
|
} |
903
|
|
|
|
|
|
|
|
904
|
0
|
|
|
|
|
0
|
my (@diffs) = map {$pms->{date_header_time} - $_} (@header_times); |
|
0
|
|
|
|
|
0
|
|
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
# if the last Received: header has no difference, then we choose to |
907
|
|
|
|
|
|
|
# exclude it |
908
|
0
|
0
|
0
|
|
|
0
|
if ($#diffs > 0 && $diffs[$#diffs] == 0) { |
909
|
0
|
|
|
|
|
0
|
pop(@diffs); |
910
|
|
|
|
|
|
|
} |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
# use the date with the smallest absolute difference |
913
|
|
|
|
|
|
|
# (experimentally, this results in the fewest false positives) |
914
|
0
|
|
|
|
|
0
|
@diffs = sort { abs($a) <=> abs($b) } @diffs; |
|
0
|
|
|
|
|
0
|
|
915
|
0
|
|
|
|
|
0
|
$pms->{date_diff} = $diffs[0]; |
916
|
|
|
|
|
|
|
} |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
sub subject_is_all_caps { |
920
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
921
|
0
|
|
|
|
|
0
|
my $subject = $pms->get('Subject'); |
922
|
|
|
|
|
|
|
|
923
|
0
|
|
|
|
|
0
|
$subject =~ s/^\s+//; |
924
|
0
|
|
|
|
|
0
|
$subject =~ s/\s+$//; |
925
|
0
|
|
|
|
|
0
|
$subject =~ s/^(?:(?:Re|Fwd|Fw|Aw|Antwort|Sv|VS):\s*)+//i; # Bug 6805 |
926
|
0
|
0
|
|
|
|
0
|
return 0 if $subject !~ /\s/; # don't match one word subjects |
927
|
0
|
0
|
|
|
|
0
|
return 0 if (length $subject < 10); # don't match short subjects |
928
|
0
|
|
|
|
|
0
|
$subject =~ s/[^a-zA-Z]//g; # only look at letters |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
# now, check to see if the subject is encoded using a non-ASCII charset. |
931
|
|
|
|
|
|
|
# If so, punt on this test to avoid FPs. We just list the known charsets |
932
|
|
|
|
|
|
|
# this test will FP on, here. |
933
|
0
|
|
|
|
|
0
|
my $subjraw = $pms->get('Subject:raw'); |
934
|
0
|
|
|
|
|
0
|
my $CLTFAC = Mail::SpamAssassin::Constants::CHARSETS_LIKELY_TO_FP_AS_CAPS; |
935
|
0
|
0
|
|
|
|
0
|
if ($subjraw =~ /=\?${CLTFAC}\?/i) { |
936
|
0
|
|
|
|
|
0
|
return 0; |
937
|
|
|
|
|
|
|
} |
938
|
|
|
|
|
|
|
|
939
|
0
|
|
0
|
|
|
0
|
return length($subject) && ($subject eq uc($subject)); |
940
|
|
|
|
|
|
|
} |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
sub check_for_to_in_subject { |
943
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms, $test) = @_; |
944
|
|
|
|
|
|
|
|
945
|
0
|
|
|
|
|
0
|
my $full_to = $pms->get('To:addr'); |
946
|
0
|
0
|
|
|
|
0
|
return 0 unless $full_to ne ''; |
947
|
|
|
|
|
|
|
|
948
|
0
|
|
|
|
|
0
|
my $subject = $pms->get('Subject'); |
949
|
|
|
|
|
|
|
|
950
|
0
|
0
|
|
|
|
0
|
if ($test eq "address") { |
|
|
0
|
|
|
|
|
|
951
|
0
|
|
|
|
|
0
|
return $subject =~ /\b\Q$full_to\E\b/i; # "user@domain.com" |
952
|
|
|
|
|
|
|
} |
953
|
|
|
|
|
|
|
elsif ($test eq "user") { |
954
|
0
|
|
|
|
|
0
|
my $to = $full_to; |
955
|
0
|
|
|
|
|
0
|
$to =~ s/\@.*//; |
956
|
0
|
|
|
|
|
0
|
my $subj = $subject; |
957
|
0
|
|
|
|
|
0
|
$subj =~ s/^\s+//; |
958
|
0
|
|
|
|
|
0
|
$subj =~ s/\s+$//; |
959
|
|
|
|
|
|
|
|
960
|
0
|
|
|
|
|
0
|
return $subject =~ /^(?: |
961
|
|
|
|
|
|
|
(?:re|fw):\s*(?:\w+\s+)?\Q$to\E$ |
962
|
|
|
|
|
|
|
|(?-i:\Q$to\E)\s*[,:;!?-](?:$|\s) |
963
|
|
|
|
|
|
|
|\Q$to\E$ |
964
|
|
|
|
|
|
|
|,\s*\Q$to\E[,:;!?-]$ |
965
|
|
|
|
|
|
|
)/ix; |
966
|
|
|
|
|
|
|
} |
967
|
0
|
|
|
|
|
0
|
return 0; |
968
|
|
|
|
|
|
|
} |
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
sub check_outlook_message_id { |
971
|
81
|
|
|
81
|
0
|
244
|
my ($self, $pms) = @_; |
972
|
81
|
|
|
|
|
173
|
local ($_); |
973
|
|
|
|
|
|
|
|
974
|
81
|
|
|
|
|
251
|
my $id = $pms->get('MESSAGEID'); |
975
|
81
|
100
|
|
|
|
1572
|
return 0 if $id !~ /^<[0-9a-f]{4}([0-9a-f]{8})\$[0-9a-f]{8}\$[0-9a-f]{8}\@/; |
976
|
|
|
|
|
|
|
|
977
|
1
|
|
|
|
|
4
|
my $timetoken = hex($1); |
978
|
1
|
|
|
|
|
4
|
my $x = 0.0023283064365387; |
979
|
1
|
|
|
|
|
3
|
my $y = 27111902.8329849; |
980
|
|
|
|
|
|
|
|
981
|
1
|
|
|
|
|
2
|
my $fudge = 250; |
982
|
|
|
|
|
|
|
|
983
|
1
|
|
|
|
|
3
|
$_ = $pms->get('Date'); |
984
|
1
|
|
50
|
|
|
8
|
$_ = parse_rfc822_date($_) || 0; |
985
|
1
|
|
|
|
|
5
|
my $expected = int (($_ * $x) + $y); |
986
|
1
|
|
|
|
|
3
|
my $diff = $timetoken - $expected; |
987
|
1
|
50
|
|
|
|
4
|
return 0 if (abs($diff) < $fudge); |
988
|
|
|
|
|
|
|
|
989
|
1
|
|
|
|
|
4
|
$_ = $pms->get('Received'); |
990
|
1
|
|
|
|
|
37
|
/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+).*?$/; |
991
|
1
|
|
50
|
|
|
10
|
$_ = parse_rfc822_date($_) || 0; |
992
|
1
|
|
|
|
|
4
|
$expected = int(($_ * $x) + $y); |
993
|
1
|
|
|
|
|
3
|
$diff = $timetoken - $expected; |
994
|
|
|
|
|
|
|
|
995
|
1
|
|
|
|
|
21
|
return (abs($diff) >= $fudge); |
996
|
|
|
|
|
|
|
} |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
sub check_messageid_not_usable { |
999
|
0
|
|
|
0
|
0
|
|
my ($self, $pms) = @_; |
1000
|
0
|
|
|
|
|
|
local ($_); |
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
# Lyris eats message-ids. also some ezmlm, I think :( |
1003
|
0
|
|
|
|
|
|
$_ = $pms->get("List-Unsubscribe"); |
1004
|
0
|
0
|
|
|
|
|
return 1 if (/<mailto:(?:leave-\S+|\S+-unsubscribe)\@\S+>$/i); |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
# ezmlm again |
1007
|
0
|
0
|
|
|
|
|
if($self->gated_through_received_hdr_remover($pms)) { return 1; } |
|
0
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
# Allen notes this as 'Wacky sendmail version?' |
1010
|
0
|
|
|
|
|
|
$_ = $pms->get("Received"); |
1011
|
0
|
0
|
|
|
|
|
return 1 if /\/CWT\/DCE\)/; |
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
# Apr 2 2003 jm: iPlanet rewrites lots of stuff, including Message-IDs |
1014
|
0
|
0
|
|
|
|
|
return 1 if /iPlanet Messaging Server/; |
1015
|
|
|
|
|
|
|
|
1016
|
0
|
|
|
|
|
|
return 0; |
1017
|
|
|
|
|
|
|
} |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
# Return true if the count of $hdr headers are within the given range |
1020
|
|
|
|
|
|
|
sub check_header_count_range { |
1021
|
0
|
|
|
0
|
0
|
|
my ($self, $pms, $hdr, $min, $max) = @_; |
1022
|
0
|
|
|
|
|
|
my %uniq; |
1023
|
0
|
|
|
|
|
|
my @hdrs = grep(!$uniq{$_}++, $pms->{msg}->get_header ($hdr)); |
1024
|
0
|
|
0
|
|
|
|
return (scalar @hdrs >= $min && scalar @hdrs <= $max); |
1025
|
|
|
|
|
|
|
} |
1026
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
sub check_unresolved_template { |
1028
|
0
|
|
|
0
|
0
|
|
my ($self, $pms) = @_; |
1029
|
|
|
|
|
|
|
|
1030
|
0
|
|
|
|
|
|
my $all = $pms->get('ALL'); # cached access |
1031
|
|
|
|
|
|
|
|
1032
|
0
|
|
|
|
|
|
for my $header (split(/\n/, $all)) { |
1033
|
|
|
|
|
|
|
# slightly faster to test in this order |
1034
|
0
|
0
|
0
|
|
|
|
if ($header =~ /%[A-Z][A-Z_-]/ && |
1035
|
|
|
|
|
|
|
$header !~ /^(?:X-VMS-To|X-UIDL|X-Face|To|Cc|From|Subject|References|In-Reply-To|(?:X-|Resent-|X-Original-)?Message-Id):/i) |
1036
|
|
|
|
|
|
|
{ |
1037
|
0
|
|
|
|
|
|
return 1; |
1038
|
|
|
|
|
|
|
} |
1039
|
|
|
|
|
|
|
} |
1040
|
0
|
|
|
|
|
|
return 0; |
1041
|
|
|
|
|
|
|
} |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
sub check_ratware_name_id { |
1044
|
0
|
|
|
0
|
0
|
|
my ($self, $pms) = @_; |
1045
|
|
|
|
|
|
|
|
1046
|
0
|
|
|
|
|
|
my $mid = $pms->get('MESSAGEID'); |
1047
|
0
|
|
|
|
|
|
my $from = $pms->get('From'); |
1048
|
0
|
0
|
|
|
|
|
if ($mid =~ m/<[A-Z]{28}\.([^>]+?)>/) { |
1049
|
0
|
0
|
|
|
|
|
if ($from =~ m/\"[^\"]+\"\s*<\Q$1\E>/) { |
1050
|
0
|
|
|
|
|
|
return 1; |
1051
|
|
|
|
|
|
|
} |
1052
|
|
|
|
|
|
|
} |
1053
|
0
|
|
|
|
|
|
return 0; |
1054
|
|
|
|
|
|
|
} |
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
sub check_ratware_envelope_from { |
1057
|
0
|
|
|
0
|
0
|
|
my ($self, $pms) = @_; |
1058
|
|
|
|
|
|
|
|
1059
|
0
|
|
|
|
|
|
my $to = $pms->get('To:addr'); |
1060
|
0
|
|
|
|
|
|
my $from = $pms->get('EnvelopeFrom:addr'); |
1061
|
|
|
|
|
|
|
|
1062
|
0
|
0
|
0
|
|
|
|
return 0 if $from eq '' || $to eq ''; |
1063
|
0
|
0
|
|
|
|
|
return 0 if $from =~ /^SRS\d[=+-]/i; |
1064
|
|
|
|
|
|
|
|
1065
|
0
|
0
|
|
|
|
|
if ($to =~ /^([^@]+)\@(.+)$/) { |
1066
|
0
|
|
|
|
|
|
my($user,$dom) = ($1,$2); |
1067
|
0
|
|
|
|
|
|
$dom = $self->{main}->{registryboundaries}->trim_domain($dom); |
1068
|
|
|
|
|
|
|
return unless |
1069
|
0
|
0
|
|
|
|
|
($self->{main}->{registryboundaries}->is_domain_valid($dom)); |
1070
|
|
|
|
|
|
|
|
1071
|
0
|
0
|
|
|
|
|
return 1 if ($from =~ /\b\Q$dom\E.\Q$user\E@/i); |
1072
|
|
|
|
|
|
|
} |
1073
|
|
|
|
|
|
|
|
1074
|
0
|
|
|
|
|
|
return 0; |
1075
|
|
|
|
|
|
|
} |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
# ADDED FROM BUG 6487 |
1078
|
|
|
|
|
|
|
sub check_equal_from_domains { |
1079
|
0
|
|
|
0
|
0
|
|
my ($self, $pms) = @_; |
1080
|
|
|
|
|
|
|
|
1081
|
0
|
|
|
|
|
|
my $from = $pms->get('From:addr'); |
1082
|
0
|
|
|
|
|
|
my $envfrom = $pms->get('EnvelopeFrom:addr'); |
1083
|
|
|
|
|
|
|
|
1084
|
0
|
|
|
|
|
|
local $1; |
1085
|
0
|
|
|
|
|
|
my $fromdomain = ''; |
1086
|
|
|
|
|
|
|
#Revised regexp from 6487 comment 3 |
1087
|
0
|
0
|
|
|
|
|
$fromdomain = $1 if $from =~ /\@([^@]*)\z/; |
1088
|
0
|
|
|
|
|
|
$fromdomain =~ s/^.+\.([^\.]+\.[^\.]+)$/$1/; |
1089
|
0
|
0
|
|
|
|
|
return 0 if $fromdomain eq ''; |
1090
|
|
|
|
|
|
|
|
1091
|
0
|
|
|
|
|
|
my $envfromdomain = ''; |
1092
|
0
|
0
|
|
|
|
|
$envfromdomain = $1 if $envfrom =~ /\@([^@]*)\z/; |
1093
|
0
|
|
|
|
|
|
$envfromdomain =~ s/^.+\.([^\.]+\.[^\.]+)$/$1/; |
1094
|
0
|
0
|
|
|
|
|
return 0 if $envfromdomain eq ''; |
1095
|
|
|
|
|
|
|
|
1096
|
0
|
|
|
|
|
|
dbg("eval: From 2nd level domain: $fromdomain, EnvelopeFrom 2nd level domain: $envfromdomain"); |
1097
|
|
|
|
|
|
|
|
1098
|
0
|
0
|
|
|
|
|
return 1 if lc($fromdomain) ne lc($envfromdomain); |
1099
|
|
|
|
|
|
|
|
1100
|
0
|
|
|
|
|
|
return 0; |
1101
|
|
|
|
|
|
|
} |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
########################################################################### |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
# support eval-test verbose debugs using "-Deval" |
1107
|
|
|
|
|
|
|
sub dbg2 { |
1108
|
0
|
0
|
|
0
|
0
|
|
if (would_log('dbg', 'eval') == 2) { |
1109
|
0
|
|
|
|
|
|
dbg(@_); |
1110
|
|
|
|
|
|
|
} |
1111
|
|
|
|
|
|
|
} |
1112
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
1; |