line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# <@LICENSE> |
2
|
|
|
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more |
3
|
|
|
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with |
4
|
|
|
|
|
|
|
# this work for additional information regarding copyright ownership. |
5
|
|
|
|
|
|
|
# The ASF licenses this file to you under the Apache License, Version 2.0 |
6
|
|
|
|
|
|
|
# (the "License"); you may not use this file except in compliance with |
7
|
|
|
|
|
|
|
# the License. You may obtain a copy of the License at: |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software |
12
|
|
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
13
|
|
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14
|
|
|
|
|
|
|
# See the License for the specific language governing permissions and |
15
|
|
|
|
|
|
|
# limitations under the License. |
16
|
|
|
|
|
|
|
# </@LICENSE> |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
package Mail::SpamAssassin::Plugin::HeaderEval; |
19
|
|
|
|
|
|
|
|
20
|
22
|
|
|
22
|
|
144
|
use strict; |
|
22
|
|
|
|
|
52
|
|
|
22
|
|
|
|
|
710
|
|
21
|
22
|
|
|
22
|
|
134
|
use warnings; |
|
22
|
|
|
|
|
51
|
|
|
22
|
|
|
|
|
734
|
|
22
|
|
|
|
|
|
|
# use bytes; |
23
|
22
|
|
|
22
|
|
149
|
use re 'taint'; |
|
22
|
|
|
|
|
50
|
|
|
22
|
|
|
|
|
748
|
|
24
|
22
|
|
|
22
|
|
140
|
use Errno qw(EBADF); |
|
22
|
|
|
|
|
54
|
|
|
22
|
|
|
|
|
1166
|
|
25
|
|
|
|
|
|
|
|
26
|
22
|
|
|
22
|
|
140
|
use Mail::SpamAssassin::Plugin; |
|
22
|
|
|
|
|
52
|
|
|
22
|
|
|
|
|
523
|
|
27
|
22
|
|
|
22
|
|
115
|
use Mail::SpamAssassin::Locales; |
|
22
|
|
|
|
|
67
|
|
|
22
|
|
|
|
|
596
|
|
28
|
22
|
|
|
22
|
|
138
|
use Mail::SpamAssassin::Util qw(get_my_locales parse_rfc822_date); |
|
22
|
|
|
|
|
59
|
|
|
22
|
|
|
|
|
1439
|
|
29
|
22
|
|
|
22
|
|
157
|
use Mail::SpamAssassin::Logger; |
|
22
|
|
|
|
|
45
|
|
|
22
|
|
|
|
|
1253
|
|
30
|
22
|
|
|
22
|
|
146
|
use Mail::SpamAssassin::Constants qw(:sa :ip); |
|
22
|
|
|
|
|
73
|
|
|
22
|
|
|
|
|
78332
|
|
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
our @ISA = qw(Mail::SpamAssassin::Plugin); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# constructor: register the eval rule |
35
|
|
|
|
|
|
|
sub new { |
36
|
63
|
|
|
63
|
1
|
219
|
my $class = shift; |
37
|
63
|
|
|
|
|
215
|
my $mailsaobject = shift; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# some boilerplate... |
40
|
63
|
|
33
|
|
|
492
|
$class = ref($class) || $class; |
41
|
63
|
|
|
|
|
416
|
my $self = $class->SUPER::new($mailsaobject); |
42
|
63
|
|
|
|
|
193
|
bless ($self, $class); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# the important bit! |
45
|
63
|
|
|
|
|
326
|
$self->register_eval_rule("check_for_fake_aol_relay_in_rcvd"); |
46
|
63
|
|
|
|
|
243
|
$self->register_eval_rule("check_for_faraway_charset_in_headers"); |
47
|
63
|
|
|
|
|
240
|
$self->register_eval_rule("check_for_unique_subject_id"); |
48
|
63
|
|
|
|
|
232
|
$self->register_eval_rule("check_illegal_chars"); |
49
|
63
|
|
|
|
|
234
|
$self->register_eval_rule("check_for_forged_hotmail_received_headers"); |
50
|
63
|
|
|
|
|
216
|
$self->register_eval_rule("check_for_no_hotmail_received_headers"); |
51
|
63
|
|
|
|
|
268
|
$self->register_eval_rule("check_for_msn_groups_headers"); |
52
|
63
|
|
|
|
|
288
|
$self->register_eval_rule("check_for_forged_eudoramail_received_headers"); |
53
|
63
|
|
|
|
|
203
|
$self->register_eval_rule("check_for_forged_yahoo_received_headers"); |
54
|
63
|
|
|
|
|
215
|
$self->register_eval_rule("check_for_forged_juno_received_headers"); |
55
|
63
|
|
|
|
|
208
|
$self->register_eval_rule("check_for_forged_gmail_received_headers"); |
56
|
63
|
|
|
|
|
219
|
$self->register_eval_rule("check_for_matching_env_and_hdr_from"); |
57
|
63
|
|
|
|
|
212
|
$self->register_eval_rule("sorted_recipients"); |
58
|
63
|
|
|
|
|
253
|
$self->register_eval_rule("similar_recipients"); |
59
|
63
|
|
|
|
|
249
|
$self->register_eval_rule("check_for_missing_to_header"); |
60
|
63
|
|
|
|
|
211
|
$self->register_eval_rule("check_for_forged_gw05_received_headers"); |
61
|
63
|
|
|
|
|
242
|
$self->register_eval_rule("check_for_shifted_date"); |
62
|
63
|
|
|
|
|
211
|
$self->register_eval_rule("subject_is_all_caps"); |
63
|
63
|
|
|
|
|
218
|
$self->register_eval_rule("check_for_to_in_subject"); |
64
|
63
|
|
|
|
|
234
|
$self->register_eval_rule("check_outlook_message_id"); |
65
|
63
|
|
|
|
|
236
|
$self->register_eval_rule("check_messageid_not_usable"); |
66
|
63
|
|
|
|
|
221
|
$self->register_eval_rule("check_header_count_range"); |
67
|
63
|
|
|
|
|
175
|
$self->register_eval_rule("check_unresolved_template"); |
68
|
63
|
|
|
|
|
217
|
$self->register_eval_rule("check_ratware_name_id"); |
69
|
63
|
|
|
|
|
212
|
$self->register_eval_rule("check_ratware_envelope_from"); |
70
|
63
|
|
|
|
|
206
|
$self->register_eval_rule("gated_through_received_hdr_remover"); |
71
|
63
|
|
|
|
|
207
|
$self->register_eval_rule("received_within_months"); |
72
|
63
|
|
|
|
|
209
|
$self->register_eval_rule("check_equal_from_domains"); |
73
|
|
|
|
|
|
|
|
74
|
63
|
|
|
|
|
585
|
return $self; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# load triplets.txt into memory |
78
|
|
|
|
|
|
|
sub compile_now_start { |
79
|
2
|
|
|
2
|
1
|
7
|
my ($self) = @_; |
80
|
|
|
|
|
|
|
|
81
|
2
|
|
|
|
|
10
|
$self->word_is_in_dictionary("aba"); |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub check_for_fake_aol_relay_in_rcvd { |
85
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
86
|
0
|
|
|
|
|
0
|
local ($_); |
87
|
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
0
|
$_ = $pms->get('Received'); |
89
|
0
|
|
|
|
|
0
|
s/\s/ /gs; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# this is the hostname format used by AOL for their relays. Spammers love |
92
|
|
|
|
|
|
|
# forging it. Don't make it more specific to match aol.com only, though -- |
93
|
|
|
|
|
|
|
# there's another set of spammers who generate fake hostnames to go with |
94
|
|
|
|
|
|
|
# it! |
95
|
0
|
0
|
|
|
|
0
|
if (/ rly-[a-z][a-z]\d\d\./i) { |
96
|
0
|
0
|
|
|
|
0
|
return 0 if /\/AOL-\d+\.\d+\.\d+\)/; # via AOL mail relay |
97
|
0
|
0
|
|
|
|
0
|
return 0 if /ESMTP id (?:RELAY|MAILRELAY|MAILIN)/; # AOLish |
98
|
0
|
|
|
|
|
0
|
return 1; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# spam: Received: from unknown (HELO mta05bw.bigpond.com) (80.71.176.130) by |
102
|
|
|
|
|
|
|
# rly-xw01.mx.aol.com with QMQP; Sat, 15 Jun 2002 23:37:16 -0000 |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# non: Received: from rly-xj02.mx.aol.com (rly-xj02.mail.aol.com [172.20.116.39]) by |
105
|
|
|
|
|
|
|
# omr-r05.mx.aol.com (v83.35) with ESMTP id RELAYIN7-0501132011; Wed, 01 |
106
|
|
|
|
|
|
|
# May 2002 13:20:11 -0400 |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# non: Received: from logs-tr.proxy.aol.com (logs-tr.proxy.aol.com [152.163.201.132]) |
109
|
|
|
|
|
|
|
# by rly-ip01.mx.aol.com (8.8.8/8.8.8/AOL-5.0.0) |
110
|
|
|
|
|
|
|
# with ESMTP id NAA08955 for <sapient-alumni@yahoogroups.com>; |
111
|
|
|
|
|
|
|
# Thu, 4 Apr 2002 13:11:20 -0500 (EST) |
112
|
|
|
|
|
|
|
|
113
|
0
|
|
|
|
|
0
|
return 0; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub check_for_faraway_charset_in_headers { |
117
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
118
|
0
|
|
|
|
|
0
|
my $hdr; |
119
|
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
0
|
my @locales = get_my_locales($self->{main}->{conf}->{ok_locales}); |
121
|
|
|
|
|
|
|
|
122
|
0
|
0
|
|
|
|
0
|
return 0 if grep { $_ eq "all" } @locales; |
|
0
|
|
|
|
|
0
|
|
123
|
|
|
|
|
|
|
|
124
|
0
|
|
|
|
|
0
|
for my $h (qw(From Subject)) { |
125
|
0
|
|
|
|
|
0
|
my @hdrs = $pms->get("$h:raw"); # ??? get() returns a scalar ??? |
126
|
0
|
0
|
|
|
|
0
|
if ($#hdrs >= 0) { |
127
|
0
|
|
|
|
|
0
|
$hdr = join(" ", @hdrs); |
128
|
|
|
|
|
|
|
} else { |
129
|
0
|
|
|
|
|
0
|
$hdr = ''; |
130
|
|
|
|
|
|
|
} |
131
|
0
|
|
|
|
|
0
|
while ($hdr =~ /=\?(.+?)\?.\?.*?\?=/g) { |
132
|
0
|
0
|
|
|
|
0
|
Mail::SpamAssassin::Locales::is_charset_ok_for_locales($1, @locales) |
133
|
|
|
|
|
|
|
or return 1; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
} |
136
|
0
|
|
|
|
|
0
|
0; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub check_for_unique_subject_id { |
140
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
141
|
0
|
|
|
|
|
0
|
local ($_); |
142
|
0
|
|
|
|
|
0
|
$_ = lc $pms->get('Subject'); |
143
|
0
|
|
|
|
|
0
|
study; # study is a no-op since perl 5.16.0, eliminating related bugs |
144
|
|
|
|
|
|
|
|
145
|
0
|
|
|
|
|
0
|
my $id = 0; |
146
|
0
|
0
|
0
|
|
|
0
|
if (/[-_\.\s]{7,}([-a-z0-9]{4,})$/ |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
147
|
|
|
|
|
|
|
|| /\s{10,}(?:\S\s)?(\S+)$/ |
148
|
|
|
|
|
|
|
|| /\s{3,}[-:\#\(\[]+([-a-z0-9]{4,})[\]\)]+$/ |
149
|
|
|
|
|
|
|
|| /\s{3,}[:\#\(\[]*([a-f0-9]{4,})[\]\)]*$/ |
150
|
|
|
|
|
|
|
|| /\s{3,}[-:\#]([a-z0-9]{5,})$/ |
151
|
|
|
|
|
|
|
|| /[\s._]{3,}([^0\s._]\d{3,})$/ |
152
|
|
|
|
|
|
|
|| /[\s._]{3,}\[(\S+)\]$/ |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# (7217vPhZ0-478TLdy5829qicU9-0@26) and similar |
155
|
|
|
|
|
|
|
|| /\(([-\w]{7,}\@\d+)\)$/ |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# Seven or more digits at the end of a subject is almost certainly a id |
158
|
|
|
|
|
|
|
|| /\b(\d{7,})\s*$/ |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# stuff at end of line after "!" or "?" is usually an id |
161
|
|
|
|
|
|
|
|| /[!\?]\s*(\d{4,}|\w+(-\w+)+)\s*$/ |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# 9095IPZK7-095wsvp8715rJgY8-286-28 and similar |
164
|
|
|
|
|
|
|
# excluding 'Re:', etc and the first word |
165
|
|
|
|
|
|
|
|| /(?:\w{2,3}:\s)?\w+\s+(\w{7,}-\w{7,}(-\w+)*)\s*$/ |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# #30D7 and similar |
168
|
|
|
|
|
|
|
|| /\s#\s*([a-f0-9]{4,})\s*$/ |
169
|
|
|
|
|
|
|
) |
170
|
|
|
|
|
|
|
{ |
171
|
0
|
|
|
|
|
0
|
$id = $1; |
172
|
|
|
|
|
|
|
# exempt online purchases |
173
|
0
|
0
|
0
|
|
|
0
|
if ($id =~ /\d{5,}/ |
174
|
|
|
|
|
|
|
&& /(?:item|invoice|order|number|confirmation).{1,6}\Q$id\E\s*$/) |
175
|
|
|
|
|
|
|
{ |
176
|
0
|
|
|
|
|
0
|
$id = 0; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# for the "foo-bar-baz" case, otherwise it won't |
180
|
|
|
|
|
|
|
# be found in the dict: |
181
|
0
|
|
|
|
|
0
|
$id =~ s/-//; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
0
|
|
0
|
|
|
0
|
return ($id && !$self->word_is_in_dictionary($id)); |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# word_is_in_dictionary() |
188
|
|
|
|
|
|
|
# |
189
|
|
|
|
|
|
|
# See if the word looks like an English word, by checking if each triplet |
190
|
|
|
|
|
|
|
# of letters it contains is one that can be found in the English language. |
191
|
|
|
|
|
|
|
# Does not include triplets only found in proper names, or in the Latin |
192
|
|
|
|
|
|
|
# and Greek terms that might be found in a larger dictionary |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
my %triplets; |
195
|
|
|
|
|
|
|
my $triplets_loaded = 0; |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub word_is_in_dictionary { |
198
|
2
|
|
|
2
|
0
|
8
|
my ($self, $word) = @_; |
199
|
2
|
|
|
|
|
4
|
local ($_); |
200
|
2
|
|
|
|
|
11
|
local $/ = "\n"; # Ensure $/ is set appropriately |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# $word =~ tr/A-Z/a-z/; # already done by this stage |
203
|
2
|
|
|
|
|
9
|
$word =~ s/^\s+//; |
204
|
2
|
|
|
|
|
8
|
$word =~ s/\s+$//; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# If it contains a digit, dash, etc, it's not a valid word. |
207
|
|
|
|
|
|
|
# Don't reject words like "can't" and "I'll" |
208
|
2
|
50
|
|
|
|
21
|
return 0 if ($word =~ /[^a-z\']/); |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
# handle a few common "blah blah blah (comment)" styles |
211
|
2
|
50
|
|
|
|
10
|
return 1 if ($word eq "ot"); # off-topic |
212
|
2
|
50
|
|
|
|
67
|
return 1 if ($word =~ /(?:linux|nix|bsd)/); # not in most dicts |
213
|
2
|
50
|
|
|
|
12
|
return 1 if ($word =~ /(?:whew|phew|attn|tha?nx)/); # not in most dicts |
214
|
|
|
|
|
|
|
|
215
|
2
|
|
|
|
|
14
|
my $word_len = length($word); |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# Unique IDs probably aren't going to be only one or two letters long |
218
|
2
|
50
|
|
|
|
15
|
return 1 if ($word_len < 3); |
219
|
|
|
|
|
|
|
|
220
|
2
|
50
|
|
|
|
9
|
if (!$triplets_loaded) { |
221
|
|
|
|
|
|
|
# take a copy to avoid modifying the real one |
222
|
2
|
|
|
|
|
12
|
my @default_triplets_path = @Mail::SpamAssassin::default_rules_path; |
223
|
2
|
|
|
|
|
29
|
s{$}{/triplets.txt} for @default_triplets_path; |
224
|
2
|
|
|
|
|
17
|
my $filename = $self->{main}->first_existing_path (@default_triplets_path); |
225
|
|
|
|
|
|
|
|
226
|
2
|
50
|
|
|
|
17
|
if (!defined $filename) { |
227
|
2
|
|
|
|
|
12
|
dbg("eval: failed to locate the triplets.txt file"); |
228
|
2
|
|
|
|
|
12
|
return 1; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
0
|
|
|
|
|
0
|
local *TRIPLETS; |
232
|
0
|
0
|
|
|
|
0
|
if (!open (TRIPLETS, "<$filename")) { |
233
|
0
|
|
|
|
|
0
|
dbg("eval: failed to open '$filename', cannot check dictionary: $!"); |
234
|
0
|
|
|
|
|
0
|
return 1; |
235
|
|
|
|
|
|
|
} |
236
|
0
|
|
|
|
|
0
|
for($!=0; <TRIPLETS>; $!=0) { |
237
|
0
|
|
|
|
|
0
|
chomp; |
238
|
0
|
|
|
|
|
0
|
$triplets{$_} = 1; |
239
|
|
|
|
|
|
|
} |
240
|
0
|
0
|
0
|
|
|
0
|
defined $_ || $!==0 or |
|
|
0
|
|
|
|
|
|
241
|
|
|
|
|
|
|
$!==EBADF ? dbg("eval: error reading from $filename: $!") |
242
|
|
|
|
|
|
|
: die "error reading from $filename: $!"; |
243
|
0
|
0
|
|
|
|
0
|
close(TRIPLETS) or die "error closing $filename: $!"; |
244
|
|
|
|
|
|
|
|
245
|
0
|
|
|
|
|
0
|
$triplets_loaded = 1; |
246
|
|
|
|
|
|
|
} # if (!$triplets_loaded) |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
|
249
|
0
|
|
|
|
|
0
|
my $i; |
250
|
|
|
|
|
|
|
|
251
|
0
|
|
|
|
|
0
|
for ($i = 0; $i < ($word_len - 2); $i++) { |
252
|
0
|
|
|
|
|
0
|
my $triplet = substr($word, $i, 3); |
253
|
0
|
0
|
|
|
|
0
|
if (!$triplets{$triplet}) { |
254
|
0
|
|
|
|
|
0
|
dbg("eval: unique ID: letter triplet '$triplet' from word '$word' not valid"); |
255
|
0
|
|
|
|
|
0
|
return 0; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
} # for ($i = 0; $i < ($word_len - 2); $i++) |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
# All letter triplets in word were found to be valid |
260
|
0
|
|
|
|
|
0
|
return 1; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# look for 8-bit and other illegal characters that should be MIME |
264
|
|
|
|
|
|
|
# encoded, these might want to exempt languages that do not use |
265
|
|
|
|
|
|
|
# Latin-based alphabets, but only if the user wants it that way |
266
|
|
|
|
|
|
|
sub check_illegal_chars { |
267
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms, $header, $ratio, $count) = @_; |
268
|
|
|
|
|
|
|
|
269
|
0
|
0
|
0
|
|
|
0
|
$header .= ":raw" unless ($header eq "ALL" || $header =~ /:raw$/); |
270
|
0
|
|
|
|
|
0
|
my $str = $pms->get($header); |
271
|
0
|
0
|
0
|
|
|
0
|
return 0 if !defined $str || $str eq ''; |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# avoid overlap between tests |
274
|
0
|
0
|
|
|
|
0
|
if ($header eq "ALL") { |
275
|
|
|
|
|
|
|
# fix continuation lines, then remove Subject and From |
276
|
0
|
|
|
|
|
0
|
$str =~ s/\n[ \t]+/ /gs; |
277
|
0
|
|
|
|
|
0
|
$str =~ s/^(?:Subject|From):.*$//gmi; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# count illegal substrings (RFC 2045) |
281
|
|
|
|
|
|
|
# (non-ASCII + C0 controls except TAB, NL, CR) |
282
|
0
|
|
|
|
|
0
|
my $illegal = $str =~ tr/\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff//; |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
# minor exemptions for Subject |
285
|
0
|
0
|
0
|
|
|
0
|
if ($illegal > 0 && lc $header eq "subject:raw") { |
286
|
|
|
|
|
|
|
# only exempt a single cent sign, pound sign, or registered sign |
287
|
0
|
|
|
|
|
0
|
my $exempt = $str =~ tr/\xa2\xa3\xae//; |
288
|
0
|
0
|
|
|
|
0
|
$illegal-- if $exempt == 1; |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
0
|
0
|
|
|
|
0
|
return 0 if $str eq ''; |
292
|
0
|
|
0
|
|
|
0
|
return (($illegal / length($str)) >= $ratio && $illegal >= $count); |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
# ezmlm has a very bad habit of removing Received: headers! bad ezmlm. |
296
|
|
|
|
|
|
|
# |
297
|
|
|
|
|
|
|
sub gated_through_received_hdr_remover { |
298
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
299
|
|
|
|
|
|
|
|
300
|
0
|
|
|
|
|
0
|
my $txt = $pms->get("Mailing-List",undef); |
301
|
0
|
0
|
0
|
|
|
0
|
if (defined $txt && $txt =~ /^contact \S+\@\S+\; run by ezmlm$/) { |
302
|
0
|
|
|
|
|
0
|
my $dlto = $pms->get("Delivered-To"); |
303
|
0
|
|
|
|
|
0
|
my $rcvd = $pms->get("Received"); |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# ensure we have other indicative headers too |
306
|
0
|
0
|
0
|
|
|
0
|
if ($dlto =~ /^mailing list \S+\@\S+/ && |
307
|
|
|
|
|
|
|
$rcvd =~ /qmail \d+ invoked (?:from network|by .{3,20})\); \d+ ... \d+/) |
308
|
|
|
|
|
|
|
{ |
309
|
0
|
|
|
|
|
0
|
return 1; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
0
|
|
|
|
|
0
|
my $rcvd = $pms->get("Received",undef); |
314
|
0
|
0
|
|
|
|
0
|
if (!defined $rcvd) { |
315
|
|
|
|
|
|
|
# we have no Received headers! These tests cannot run in that case |
316
|
0
|
|
|
|
|
0
|
return 1; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# MSN groups removes Received lines. thanks MSN |
320
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ /from groups\.msn\.com \(\S+\.msn\.com /) { |
321
|
0
|
|
|
|
|
0
|
return 1; |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
|
324
|
0
|
|
|
|
|
0
|
return 0; |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
# FORGED_HOTMAIL_RCVD |
328
|
|
|
|
|
|
|
sub _check_for_forged_hotmail_received_headers { |
329
|
0
|
|
|
0
|
|
0
|
my ($self, $pms) = @_; |
330
|
|
|
|
|
|
|
|
331
|
0
|
0
|
|
|
|
0
|
if (defined $pms->{hotmail_addr_but_no_hotmail_received}) { return; } |
|
0
|
|
|
|
|
0
|
|
332
|
|
|
|
|
|
|
|
333
|
0
|
|
|
|
|
0
|
$pms->{hotmail_addr_with_forged_hotmail_received} = 0; |
334
|
0
|
|
|
|
|
0
|
$pms->{hotmail_addr_but_no_hotmail_received} = 0; |
335
|
|
|
|
|
|
|
|
336
|
0
|
|
|
|
|
0
|
my $rcvd = $pms->get('Received'); |
337
|
0
|
|
|
|
|
0
|
$rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp |
338
|
|
|
|
|
|
|
|
339
|
0
|
0
|
|
|
|
0
|
return if ($rcvd =~ |
340
|
|
|
|
|
|
|
/from mail pickup service by hotmail\.com with Microsoft SMTPSVC;/); |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
# Microsoft passes Hotmail mail directly to MSN Group servers. |
343
|
0
|
0
|
|
|
|
0
|
return if $self->check_for_msn_groups_headers($pms); |
344
|
|
|
|
|
|
|
|
345
|
0
|
|
|
|
|
0
|
my $ip = $pms->get('X-Originating-Ip',undef); |
346
|
0
|
|
|
|
|
0
|
my $IP_ADDRESS = IP_ADDRESS; |
347
|
0
|
|
|
|
|
0
|
my $orig = $pms->get('X-OriginatorOrg',undef); |
348
|
0
|
|
|
|
|
0
|
my $ORIGINATOR = 'hotmail.com'; |
349
|
|
|
|
|
|
|
|
350
|
0
|
0
|
0
|
|
|
0
|
if (defined $ip && $ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
351
|
0
|
0
|
0
|
|
|
0
|
if (defined $orig && $orig =~ /$ORIGINATOR/) { $orig = 1; } else { $orig = 0; } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
# Hotmail formats its received headers like this: |
354
|
|
|
|
|
|
|
# Received: from hotmail.com (f135.law8.hotmail.com [216.33.241.135]) |
355
|
|
|
|
|
|
|
# or like |
356
|
|
|
|
|
|
|
# Received: from EUR01-VE1-obe.outbound.protection.outlook.com (mail-oln040092066056.outbound.protection.outlook.com [40.92.66.56]) |
357
|
|
|
|
|
|
|
# spammers do not ;) |
358
|
|
|
|
|
|
|
|
359
|
0
|
0
|
|
|
|
0
|
if ($self->gated_through_received_hdr_remover($pms)) { return; } |
|
0
|
|
|
|
|
0
|
|
360
|
|
|
|
|
|
|
|
361
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /from (?:\S*\.)?hotmail.com \(\S+\.hotmail(?:\.msn)?\.com[ \)]/ && $ip) |
362
|
0
|
|
|
|
|
0
|
{ return; } |
363
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /from \S*\.outbound\.protection\.outlook\.com \(\S+\.outbound\.protection\.outlook\.com[ \)]/ && $orig) |
364
|
0
|
|
|
|
|
0
|
{ return; } |
365
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /from \S*\.hotmail.com \(\[$IP_ADDRESS\][ \):]/ && $ip) |
366
|
0
|
|
|
|
|
0
|
{ return; } |
367
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /from \S+ by \S+\.hotmail(?:\.msn)?\.com with HTTP\;/ && $ip) |
368
|
0
|
|
|
|
|
0
|
{ return; } |
369
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /from \[66\.218.\S+\] by \S+\.yahoo\.com/ && $ip) |
370
|
0
|
|
|
|
|
0
|
{ return; } |
371
|
|
|
|
|
|
|
|
372
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ /(?:from |HELO |helo=)\S*hotmail\.com\b/) { |
373
|
|
|
|
|
|
|
# HELO'd as hotmail.com, despite not being hotmail |
374
|
0
|
|
|
|
|
0
|
$pms->{hotmail_addr_with_forged_hotmail_received} = 1; |
375
|
|
|
|
|
|
|
} else { |
376
|
|
|
|
|
|
|
# check to see if From claimed to be @hotmail.com |
377
|
0
|
|
|
|
|
0
|
my $from = $pms->get('From:addr'); |
378
|
0
|
0
|
|
|
|
0
|
if ($from !~ /\bhotmail\.com$/i) { return; } |
|
0
|
|
|
|
|
0
|
|
379
|
0
|
|
|
|
|
0
|
$pms->{hotmail_addr_but_no_hotmail_received} = 1; |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# FORGED_HOTMAIL_RCVD |
384
|
|
|
|
|
|
|
sub check_for_forged_hotmail_received_headers { |
385
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
386
|
0
|
|
|
|
|
0
|
$self->_check_for_forged_hotmail_received_headers($pms); |
387
|
0
|
|
|
|
|
0
|
return $pms->{hotmail_addr_with_forged_hotmail_received}; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
# SEMIFORGED_HOTMAIL_RCVD |
391
|
|
|
|
|
|
|
sub check_for_no_hotmail_received_headers { |
392
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
393
|
0
|
|
|
|
|
0
|
$self->_check_for_forged_hotmail_received_headers($pms); |
394
|
0
|
|
|
|
|
0
|
return $pms->{hotmail_addr_but_no_hotmail_received}; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# MSN_GROUPS |
398
|
|
|
|
|
|
|
sub check_for_msn_groups_headers { |
399
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
400
|
|
|
|
|
|
|
|
401
|
0
|
|
|
|
|
0
|
my $to = $pms->get('To'); |
402
|
0
|
0
|
|
|
|
0
|
return 0 unless $to =~ /<(\S+)\@groups\.msn\.com>/i; |
403
|
0
|
|
|
|
|
0
|
my $listname = $1; |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# from Theo Van Dinter, see bug 591 |
406
|
|
|
|
|
|
|
# Updated by DOS, based on messages from Bob Menschel, bug 4301 |
407
|
|
|
|
|
|
|
|
408
|
0
|
0
|
|
|
|
0
|
return 0 unless $pms->get('Received') =~ |
409
|
|
|
|
|
|
|
/from mail pickup service by ((?:p\d\d\.)groups\.msn\.com)\b/; |
410
|
0
|
|
|
|
|
0
|
my $server = $1; |
411
|
|
|
|
|
|
|
|
412
|
0
|
0
|
|
|
|
0
|
if ($listname =~ /^notifications$/) { |
413
|
0
|
0
|
|
|
|
0
|
return 0 unless $pms->get('Message-Id') =~ /^<\S+\@$server>/; |
414
|
|
|
|
|
|
|
} else { |
415
|
0
|
0
|
|
|
|
0
|
return 0 unless $pms->get('Message-Id') =~ /^<$listname-\S+\@groups\.msn\.com>/; |
416
|
0
|
0
|
|
|
|
0
|
return 0 unless $pms->get('EnvelopeFrom:addr') =~ /$listname-bounce\@groups\.msn\.com/; |
417
|
|
|
|
|
|
|
} |
418
|
0
|
|
|
|
|
0
|
return 1; |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
# MSN Groups |
421
|
|
|
|
|
|
|
# Return-path: <ListName-bounce@groups.msn.com> |
422
|
|
|
|
|
|
|
# Received: from groups.msn.com (tk2dcpuba02.msn.com [65.54.195.210]) by |
423
|
|
|
|
|
|
|
# dogma.slashnull.org (8.11.6/8.11.6) with ESMTP id g72K35v10457 for |
424
|
|
|
|
|
|
|
# <zzzzzzzzzzzz@jmason.org>; Fri, 2 Aug 2002 21:03:05 +0100 |
425
|
|
|
|
|
|
|
# Received: from mail pickup service by groups.msn.com with Microsoft |
426
|
|
|
|
|
|
|
# SMTPSVC; Fri, 2 Aug 2002 13:01:30 -0700 |
427
|
|
|
|
|
|
|
# Message-id: <ListName-1392@groups.msn.com> |
428
|
|
|
|
|
|
|
# X-loop: notifications@groups.msn.com |
429
|
|
|
|
|
|
|
# Reply-to: "List Full Name" <ListName@groups.msn.com> |
430
|
|
|
|
|
|
|
# To: "List Full Name" <ListName@groups.msn.com> |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# Return-path: <ListName-bounce@groups.msn.com> |
433
|
|
|
|
|
|
|
# Received: from p04.groups.msn.com ([65.54.195.216]) etc... |
434
|
|
|
|
|
|
|
# Received: from mail pickup service by p04.groups.msn.com with Microsoft SMTPSVC; |
435
|
|
|
|
|
|
|
# Thu, 5 May 2005 20:30:37 -0700 |
436
|
|
|
|
|
|
|
# X-Originating-Ip: 207.68.170.30 |
437
|
|
|
|
|
|
|
# From: =?iso-8859-1?B?IqSj4/D9pEbzeN9s9vLw6qQiIA==?=<zzzzzzzz@hotmail.com> |
438
|
|
|
|
|
|
|
# To: "Managers of List Name" <notifications@groups.msn.com> |
439
|
|
|
|
|
|
|
# Subject: =?iso-8859-1?Q?APPROVAL_NEEDED:_=A4=A3=E3=F0=FD=A4F=F3x=DFl?= |
440
|
|
|
|
|
|
|
# =?iso-8859-1?Q?=F6=F2=F0=EA=A4_applied_to_join_List_Name=2C?= |
441
|
|
|
|
|
|
|
# =?iso-8859-1?Q?_an_MSN_Group?= |
442
|
|
|
|
|
|
|
# Date: Thu, 5 May 2005 20:30:37 -0700 |
443
|
|
|
|
|
|
|
# MIME-Version: 1.0 |
444
|
|
|
|
|
|
|
# Content-Type: multipart/alternative; |
445
|
|
|
|
|
|
|
# boundary="----=_NextPart_000_333944_01C551B1.4BBA02B0" |
446
|
|
|
|
|
|
|
# X-MimeOLE: Produced By Microsoft MimeOLE V5.50.4927.1200 |
447
|
|
|
|
|
|
|
# Message-ID: <TK2DCPUBA042cv0aGlt00020aa3@p04.groups.msn.com> |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
# Return-path: <ListName-bounce@groups.msn.com> |
450
|
|
|
|
|
|
|
# Received: from [65.54.208.83] (helo=p05.groups.msn.com) etc... |
451
|
|
|
|
|
|
|
# Received: from mail pickup service by p05.groups.msn.com with Microsoft SMTPSVC; |
452
|
|
|
|
|
|
|
# Fri, 6 May 2005 14:59:25 -0700 |
453
|
|
|
|
|
|
|
# X-Originating-Ip: 207.68.170.30 |
454
|
|
|
|
|
|
|
# Message-Id: <ListName-101@groups.msn.com> |
455
|
|
|
|
|
|
|
# Reply-To: "List Name" <ListName@groups.msn.com> |
456
|
|
|
|
|
|
|
# From: "whoever" <zzzzzzzzzz@hotmail.com> |
457
|
|
|
|
|
|
|
# To: "List Name" <ListName@groups.msn.com> |
458
|
|
|
|
|
|
|
# Subject: whatever |
459
|
|
|
|
|
|
|
# Date: Fri, 6 May 2005 14:59:25 -0700 |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
########################################################################### |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
sub check_for_forged_eudoramail_received_headers { |
466
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
467
|
|
|
|
|
|
|
|
468
|
0
|
|
|
|
|
0
|
my $from = $pms->get('From:addr'); |
469
|
0
|
0
|
|
|
|
0
|
if ($from !~ /\beudoramail\.com$/i) { return 0; } |
|
0
|
|
|
|
|
0
|
|
470
|
|
|
|
|
|
|
|
471
|
0
|
|
|
|
|
0
|
my $rcvd = $pms->get('Received'); |
472
|
0
|
|
|
|
|
0
|
$rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp |
473
|
|
|
|
|
|
|
|
474
|
0
|
|
|
|
|
0
|
my $ip = $pms->get('X-Sender-Ip',undef); |
475
|
0
|
|
|
|
|
0
|
my $IP_ADDRESS = IP_ADDRESS; |
476
|
0
|
0
|
0
|
|
|
0
|
if (defined $ip && $ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
# Eudoramail formats its received headers like this: |
479
|
|
|
|
|
|
|
# Received: from Unknown/Local ([?.?.?.?]) by shared1-mail.whowhere.com; |
480
|
|
|
|
|
|
|
# Thu Nov 29 13:44:25 2001 |
481
|
|
|
|
|
|
|
# Message-Id: <JGDHDEHPPJECDAAA@shared1-mail.whowhere.com> |
482
|
|
|
|
|
|
|
# Organization: QUALCOMM Eudora Web-Mail (http://www.eudoramail.com:80) |
483
|
|
|
|
|
|
|
# X-Sender-Ip: 192.175.21.146 |
484
|
|
|
|
|
|
|
# X-Mailer: MailCity Service |
485
|
|
|
|
|
|
|
|
486
|
0
|
0
|
|
|
|
0
|
if ($self->gated_through_received_hdr_remover($pms)) { return 0; } |
|
0
|
|
|
|
|
0
|
|
487
|
|
|
|
|
|
|
|
488
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /by \S*whowhere.com\;/ && $ip) { return 0; } |
|
0
|
|
|
|
|
0
|
|
489
|
|
|
|
|
|
|
|
490
|
0
|
|
|
|
|
0
|
return 1; |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
########################################################################### |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
sub check_for_forged_yahoo_received_headers { |
496
|
77
|
|
|
77
|
0
|
258
|
my ($self, $pms) = @_; |
497
|
|
|
|
|
|
|
|
498
|
77
|
|
|
|
|
262
|
my $from = $pms->get('From:addr'); |
499
|
77
|
50
|
|
|
|
427
|
if ($from !~ /\byahoo\.com$/i) { return 0; } |
|
77
|
|
|
|
|
1183
|
|
500
|
|
|
|
|
|
|
|
501
|
0
|
|
|
|
|
0
|
my $rcvd = $pms->get('Received'); |
502
|
|
|
|
|
|
|
|
503
|
0
|
0
|
0
|
|
|
0
|
if ($pms->get("Resent-From") ne '' && $pms->get("Resent-To") ne '') { |
504
|
0
|
|
|
|
|
0
|
my $xrcvd = $pms->get("X-Received"); |
505
|
0
|
0
|
|
|
|
0
|
$rcvd = $xrcvd if $xrcvd ne ''; |
506
|
|
|
|
|
|
|
} |
507
|
0
|
|
|
|
|
0
|
$rcvd =~ s/\s+/ /gs; # just spaces, simplify the regexp |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
# not sure about this |
510
|
|
|
|
|
|
|
#if ($rcvd !~ /from \S*yahoo\.com/) { return 0; } |
511
|
|
|
|
|
|
|
|
512
|
0
|
0
|
|
|
|
0
|
if ($self->gated_through_received_hdr_remover($pms)) { return 0; } |
|
0
|
|
|
|
|
0
|
|
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
# bug 3740: ignore bounces from Yahoo!. only honoured if the |
515
|
|
|
|
|
|
|
# correct rDNS shows up in the trusted relay list, or first untrusted relay |
516
|
|
|
|
|
|
|
# |
517
|
|
|
|
|
|
|
# bug 4528: [ ip=68.142.202.54 rdns=mta122.mail.mud.yahoo.com |
518
|
|
|
|
|
|
|
# helo=mta122.mail.mud.yahoo.com by=eclectic.kluge.net ident= |
519
|
|
|
|
|
|
|
# envfrom= intl=0 id=49F2EAF13B auth= ] |
520
|
|
|
|
|
|
|
# |
521
|
0
|
0
|
0
|
|
|
0
|
if ($pms->{relays_trusted_str} =~ / rdns=\S+\.yahoo\.com / |
522
|
|
|
|
|
|
|
|| $pms->{relays_untrusted_str} =~ /^[^\]]+ rdns=\S+\.yahoo\.com /) |
523
|
0
|
|
|
|
|
0
|
{ return 0; } |
524
|
|
|
|
|
|
|
|
525
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ /by web\S+\.mail\S*\.yahoo\.com via HTTP/) { return 0; } |
|
0
|
|
|
|
|
0
|
|
526
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ /by smtp\S+\.yahoo\.com with SMTP/) { return 0; } |
|
0
|
|
|
|
|
0
|
|
527
|
0
|
|
|
|
|
0
|
my $IP_ADDRESS = IP_ADDRESS; |
528
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ |
529
|
|
|
|
|
|
|
/from \[$IP_ADDRESS\] by \S+\.(?:groups|scd|dcn)\.yahoo\.com with NNFMP/) { |
530
|
0
|
|
|
|
|
0
|
return 0; |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
# used in "forward this news item to a friend" links. There's no better |
534
|
|
|
|
|
|
|
# received hdrs to match on, unfortunately. I'm not sure if the next test is |
535
|
|
|
|
|
|
|
# still useful, as a result. |
536
|
|
|
|
|
|
|
# |
537
|
|
|
|
|
|
|
# search for msgid <20020929140301.451A92940A9@xent.com>, subject "Yahoo! |
538
|
|
|
|
|
|
|
# News Story - Top Stories", date Sep 29 2002 on |
539
|
|
|
|
|
|
|
# <http://xent.com/pipermail/fork/> for an example. |
540
|
|
|
|
|
|
|
# |
541
|
0
|
0
|
0
|
|
|
0
|
if ($rcvd =~ /\bmailer\d+\.bulk\.scd\.yahoo\.com\b/ |
542
|
0
|
|
|
|
|
0
|
&& $from =~ /\@reply\.yahoo\.com$/i) { return 0; } |
543
|
|
|
|
|
|
|
|
544
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ /by \w+\.\w+\.yahoo\.com \(\d+\.\d+\.\d+\/\d+\.\d+\.\d+\)(?: with ESMTP)? id \w+/) { |
545
|
|
|
|
|
|
|
# possibly sent from "mail this story to a friend" |
546
|
0
|
|
|
|
|
0
|
return 0; |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
|
549
|
0
|
|
|
|
|
0
|
return 1; |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
sub check_for_forged_juno_received_headers { |
553
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
554
|
|
|
|
|
|
|
|
555
|
0
|
|
|
|
|
0
|
my $from = $pms->get('From:addr'); |
556
|
0
|
0
|
|
|
|
0
|
if ($from !~ /\bjuno\.com$/i) { return 0; } |
|
0
|
|
|
|
|
0
|
|
557
|
|
|
|
|
|
|
|
558
|
0
|
0
|
|
|
|
0
|
if ($self->gated_through_received_hdr_remover($pms)) { return 0; } |
|
0
|
|
|
|
|
0
|
|
559
|
|
|
|
|
|
|
|
560
|
0
|
|
|
|
|
0
|
my $xorig = $pms->get('X-Originating-IP'); |
561
|
0
|
|
|
|
|
0
|
my $xmailer = $pms->get('X-Mailer'); |
562
|
0
|
|
|
|
|
0
|
my $rcvd = $pms->get('Received'); |
563
|
0
|
|
|
|
|
0
|
my $IP_ADDRESS = IP_ADDRESS; |
564
|
|
|
|
|
|
|
|
565
|
0
|
0
|
|
|
|
0
|
if ($xorig ne '') { |
566
|
|
|
|
|
|
|
# New style Juno has no X-Originating-IP header, and other changes |
567
|
0
|
0
|
0
|
|
|
0
|
if($rcvd !~ /from.*\b(?:juno|untd)\.com.*[\[\(]$IP_ADDRESS[\]\)].*by/ |
568
|
0
|
|
|
|
|
0
|
&& $rcvd !~ / cookie\.(?:juno|untd)\.com /) { return 1; } |
569
|
0
|
0
|
|
|
|
0
|
if($xmailer !~ /Juno /) { return 1; } |
|
0
|
|
|
|
|
0
|
|
570
|
|
|
|
|
|
|
} else { |
571
|
0
|
0
|
|
|
|
0
|
if($rcvd =~ /from.*\bmail\.com.*\[$IP_ADDRESS\].*by/) { |
|
|
0
|
|
|
|
|
|
572
|
0
|
0
|
|
|
|
0
|
if($xmailer !~ /\bmail\.com/) { return 1; } |
|
0
|
|
|
|
|
0
|
|
573
|
|
|
|
|
|
|
} elsif($rcvd =~ /from (webmail\S+\.untd\.com) \(\1 \[$IP_ADDRESS\]\) by/) { |
574
|
0
|
0
|
|
|
|
0
|
if($xmailer !~ /^Webmail Version \d/) { return 1; } |
|
0
|
|
|
|
|
0
|
|
575
|
|
|
|
|
|
|
} else { |
576
|
0
|
|
|
|
|
0
|
return 1; |
577
|
|
|
|
|
|
|
} |
578
|
0
|
0
|
|
|
|
0
|
if($xorig !~ /$IP_ADDRESS/) { return 1; } |
|
0
|
|
|
|
|
0
|
|
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
|
581
|
0
|
|
|
|
|
0
|
return 0; |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
sub check_for_forged_gmail_received_headers { |
585
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
586
|
22
|
|
|
22
|
|
213
|
use constant GOOGLE_MESSAGE_STATE_LENGTH_MIN => 60; |
|
22
|
|
|
|
|
58
|
|
|
22
|
|
|
|
|
1854
|
|
587
|
22
|
|
|
22
|
|
162
|
use constant GOOGLE_SMTP_SOURCE_LENGTH_MIN => 60; |
|
22
|
|
|
|
|
52
|
|
|
22
|
|
|
|
|
8835
|
|
588
|
|
|
|
|
|
|
|
589
|
0
|
|
|
|
|
0
|
my $from = $pms->get('From:addr'); |
590
|
0
|
0
|
|
|
|
0
|
if ($from !~ /\bgmail\.com$/i) { return 0; } |
|
0
|
|
|
|
|
0
|
|
591
|
|
|
|
|
|
|
|
592
|
0
|
|
|
|
|
0
|
my $xgms = $pms->get('X-Gm-Message-State'); |
593
|
0
|
|
|
|
|
0
|
my $xss = $pms->get('X-Google-Smtp-Source'); |
594
|
0
|
|
|
|
|
0
|
my $xreceived = $pms->get('X-Received'); |
595
|
0
|
|
|
|
|
0
|
my $received = $pms->get('Received'); |
596
|
|
|
|
|
|
|
|
597
|
0
|
0
|
|
|
|
0
|
if ($xreceived =~ /by 10\.\S+ with SMTP id \S+/) { return 0; } |
|
0
|
|
|
|
|
0
|
|
598
|
0
|
0
|
|
|
|
0
|
if ($xreceived =~ /by 2002\:\w\:\S+ with SMTP id \S+/) { return 0; } |
|
0
|
|
|
|
|
0
|
|
599
|
0
|
0
|
|
|
|
0
|
if ($received =~ /by smtp\.googlemail\.com with ESMTPSA id \S+/) { |
600
|
0
|
|
|
|
|
0
|
return 0; |
601
|
|
|
|
|
|
|
} |
602
|
0
|
0
|
0
|
|
|
0
|
if ( (length($xgms) >= GOOGLE_MESSAGE_STATE_LENGTH_MIN) && |
603
|
|
|
|
|
|
|
(length($xss) >= GOOGLE_SMTP_SOURCE_LENGTH_MIN)) { |
604
|
0
|
|
|
|
|
0
|
return 0; |
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
|
607
|
0
|
|
|
|
|
0
|
return 1; |
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
sub check_for_matching_env_and_hdr_from { |
611
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) =@_; |
612
|
|
|
|
|
|
|
# two blank headers match so don't bother checking |
613
|
0
|
|
|
|
|
0
|
return (lc $pms->get('EnvelopeFrom:addr') eq lc $pms->get('From:addr')); |
614
|
|
|
|
|
|
|
} |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
sub sorted_recipients { |
617
|
77
|
|
|
77
|
0
|
248
|
my ($self, $pms) = @_; |
618
|
|
|
|
|
|
|
|
619
|
77
|
100
|
|
|
|
282
|
if (!exists $pms->{tocc_sorted}) { |
620
|
36
|
|
|
|
|
182
|
$self->_check_recipients($pms); |
621
|
|
|
|
|
|
|
} |
622
|
77
|
|
|
|
|
1354
|
return $pms->{tocc_sorted}; |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
sub similar_recipients { |
626
|
77
|
|
|
77
|
0
|
276
|
my ($self, $pms, $min, $max) = @_; |
627
|
|
|
|
|
|
|
|
628
|
77
|
100
|
|
|
|
267
|
if (!exists $pms->{tocc_similar}) { |
629
|
41
|
|
|
|
|
191
|
$self->_check_recipients($pms); |
630
|
|
|
|
|
|
|
} |
631
|
|
|
|
|
|
|
return (($min eq 'undef' || $pms->{tocc_similar} >= $min) && |
632
|
77
|
|
33
|
|
|
2030
|
($max eq 'undef' || $pms->{tocc_similar} < $max)); |
633
|
|
|
|
|
|
|
} |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# best experimentally derived values |
636
|
22
|
|
|
22
|
|
197
|
use constant TOCC_SORTED_COUNT => 7; |
|
22
|
|
|
|
|
67
|
|
|
22
|
|
|
|
|
1314
|
|
637
|
22
|
|
|
22
|
|
148
|
use constant TOCC_SIMILAR_COUNT => 5; |
|
22
|
|
|
|
|
54
|
|
|
22
|
|
|
|
|
1168
|
|
638
|
22
|
|
|
22
|
|
147
|
use constant TOCC_SIMILAR_LENGTH => 2; |
|
22
|
|
|
|
|
48
|
|
|
22
|
|
|
|
|
73831
|
|
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
sub _check_recipients { |
641
|
77
|
|
|
77
|
|
222
|
my ($self, $pms) = @_; |
642
|
|
|
|
|
|
|
|
643
|
77
|
|
|
|
|
172
|
my @inputs; |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
# ToCc: pseudo-header works best, but sometimes Bcc: is better |
646
|
77
|
|
|
|
|
274
|
for ('ToCc', 'Bcc') { |
647
|
153
|
|
|
|
|
390
|
my $to = $pms->get($_); # get recipients |
648
|
153
|
|
|
|
|
308
|
$to =~ s/\(.*?\)//g; # strip out the (comments) |
649
|
153
|
|
|
|
|
471
|
push(@inputs, ($to =~ m/([\w.=-]+\@\w+(?:[\w.-]+\.)+\w+)/g)); |
650
|
153
|
100
|
|
|
|
564
|
last if scalar(@inputs) >= TOCC_SIMILAR_COUNT; |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
# remove duplicate addresses only when they appear next to each other |
654
|
77
|
|
|
|
|
161
|
my @address; |
655
|
77
|
|
|
|
|
242
|
my $previous = ''; |
656
|
77
|
|
|
|
|
278
|
while (my $current = shift @inputs) { |
657
|
52
|
50
|
|
|
|
94
|
push(@address, ($previous = $current)) if lc($current) ne lc($previous); |
658
|
52
|
50
|
|
|
|
104
|
last if @address == 256; |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
# ideas that had both poor S/O ratios and poor hit rates: |
662
|
|
|
|
|
|
|
# - testing for reverse sorted recipient lists |
663
|
|
|
|
|
|
|
# - testing To: and Cc: headers separately |
664
|
77
|
|
66
|
|
|
408
|
$pms->{tocc_sorted} = (scalar(@address) >= TOCC_SORTED_COUNT && |
665
|
|
|
|
|
|
|
join(',', @address) eq (join(',', sort @address))); |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
# a good S/O ratio and hit rate is achieved by comparing 2-byte |
668
|
|
|
|
|
|
|
# substrings and requiring 5 or more addresses |
669
|
77
|
|
|
|
|
284
|
$pms->{tocc_similar} = 0; |
670
|
77
|
100
|
|
|
|
303
|
if (scalar (@address) >= TOCC_SIMILAR_COUNT) { |
671
|
1
|
|
|
|
|
2
|
my @user = map { substr($_,0,TOCC_SIMILAR_LENGTH) } @address; |
|
48
|
|
|
|
|
59
|
|
672
|
1
|
|
|
|
|
1
|
my @fqhn = map { m/\@(.*)/ } @address; |
|
48
|
|
|
|
|
99
|
|
673
|
1
|
|
|
|
|
2
|
my @host = map { substr($_,0,TOCC_SIMILAR_LENGTH) } @fqhn; |
|
48
|
|
|
|
|
62
|
|
674
|
1
|
|
|
|
|
2
|
my $hits = 0; |
675
|
1
|
|
|
|
|
2
|
my $combinations = 0; |
676
|
1
|
|
|
|
|
9
|
for (my $i = 0; $i <= $#address; $i++) { |
677
|
48
|
|
|
|
|
68
|
for (my $j = $i+1; $j <= $#address; $j++) { |
678
|
1128
|
100
|
|
|
|
1378
|
$hits++ if $user[$i] eq $user[$j]; |
679
|
1128
|
100
|
100
|
|
|
1870
|
$hits++ if $host[$i] eq $host[$j] && $fqhn[$i] ne $fqhn[$j]; |
680
|
1128
|
|
|
|
|
1402
|
$combinations++; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
} |
683
|
1
|
|
|
|
|
13
|
$pms->{tocc_similar} = $hits / $combinations; |
684
|
|
|
|
|
|
|
} |
685
|
|
|
|
|
|
|
} |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
sub check_for_missing_to_header { |
688
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
689
|
|
|
|
|
|
|
|
690
|
0
|
|
|
|
|
0
|
my $hdr = $pms->get('To'); |
691
|
0
|
0
|
|
|
|
0
|
$hdr = $pms->get('Apparently-To') if $hdr eq ''; |
692
|
0
|
0
|
|
|
|
0
|
return 1 if $hdr eq ''; |
693
|
|
|
|
|
|
|
|
694
|
0
|
|
|
|
|
0
|
return 0; |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
sub check_for_forged_gw05_received_headers { |
698
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
699
|
0
|
|
|
|
|
0
|
local ($_); |
700
|
|
|
|
|
|
|
|
701
|
0
|
|
|
|
|
0
|
my $rcv = $pms->get('Received'); |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
# e.g. |
704
|
|
|
|
|
|
|
# Received: from mail3.icytundra.com by gw05 with ESMTP; Thu, 21 Jun 2001 02:28:32 -0400 |
705
|
0
|
|
|
|
|
0
|
my ($h1, $h2) = ($rcv =~ |
706
|
|
|
|
|
|
|
m/\nfrom\s(\S+)\sby\s(\S+)\swith\sESMTP\;\s+\S\S\S,\s+\d+\s+\S\S\S\s+ |
707
|
|
|
|
|
|
|
\d{4}\s+\d\d:\d\d:\d\d\s+[-+]*\d{4}\n$/xs); |
708
|
|
|
|
|
|
|
|
709
|
0
|
0
|
0
|
|
|
0
|
if (defined ($h1) && defined ($h2) && $h2 !~ /\./) { |
|
|
|
0
|
|
|
|
|
710
|
0
|
|
|
|
|
0
|
return 1; |
711
|
|
|
|
|
|
|
} |
712
|
|
|
|
|
|
|
|
713
|
0
|
|
|
|
|
0
|
0; |
714
|
|
|
|
|
|
|
} |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
########################################################################### |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
sub check_for_shifted_date { |
719
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms, $min, $max) = @_; |
720
|
|
|
|
|
|
|
|
721
|
0
|
0
|
|
|
|
0
|
if (!exists $pms->{date_diff}) { |
722
|
0
|
|
|
|
|
0
|
$self->_check_date_diff($pms); |
723
|
|
|
|
|
|
|
} |
724
|
|
|
|
|
|
|
return (($min eq 'undef' || $pms->{date_diff} >= (3600 * $min)) && |
725
|
0
|
|
0
|
|
|
0
|
($max eq 'undef' || $pms->{date_diff} < (3600 * $max))); |
726
|
|
|
|
|
|
|
} |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
# filters out some false positives in old corpus mail - Allen |
729
|
|
|
|
|
|
|
sub received_within_months { |
730
|
0
|
|
|
0
|
0
|
0
|
my ($self,$pms,$min,$max) = @_; |
731
|
|
|
|
|
|
|
|
732
|
0
|
0
|
|
|
|
0
|
if (!exists($pms->{date_received})) { |
733
|
0
|
|
|
|
|
0
|
$self->_check_date_received($pms); |
734
|
|
|
|
|
|
|
} |
735
|
0
|
|
|
|
|
0
|
my $diff = time() - $pms->{date_received}; |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
# 365.2425 * 24 * 60 * 60 = 31556952 = seconds in year (including leap) |
738
|
|
|
|
|
|
|
|
739
|
0
|
0
|
0
|
|
|
0
|
if (((! defined($min)) || ($min eq 'undef') || |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
740
|
|
|
|
|
|
|
($diff >= (31556952 * ($min/12)))) && |
741
|
|
|
|
|
|
|
((! defined($max)) || ($max eq 'undef') || |
742
|
|
|
|
|
|
|
($diff < (31556952 * ($max/12))))) { |
743
|
0
|
|
|
|
|
0
|
return 1; |
744
|
|
|
|
|
|
|
} else { |
745
|
0
|
|
|
|
|
0
|
return 0; |
746
|
|
|
|
|
|
|
} |
747
|
|
|
|
|
|
|
} |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
sub _get_date_header_time { |
750
|
0
|
|
|
0
|
|
0
|
my ($self, $pms) = @_; |
751
|
|
|
|
|
|
|
|
752
|
0
|
|
|
|
|
0
|
my $time; |
753
|
|
|
|
|
|
|
# a Resent-Date: header takes precedence over any Date: header |
754
|
0
|
|
|
|
|
0
|
DATE: for my $header ('Resent-Date', 'Date') { |
755
|
0
|
|
|
|
|
0
|
my @dates = $pms->{msg}->get_header($header); |
756
|
0
|
|
|
|
|
0
|
for my $date (@dates) { |
757
|
0
|
0
|
0
|
|
|
0
|
if (defined($date) && length($date)) { |
758
|
0
|
|
|
|
|
0
|
chomp($date); |
759
|
0
|
|
|
|
|
0
|
$time = parse_rfc822_date($date); |
760
|
|
|
|
|
|
|
} |
761
|
0
|
0
|
|
|
|
0
|
last DATE if defined($time); |
762
|
|
|
|
|
|
|
} |
763
|
|
|
|
|
|
|
} |
764
|
0
|
0
|
|
|
|
0
|
if (defined($time)) { |
765
|
0
|
|
|
|
|
0
|
$pms->{date_header_time} = $time; |
766
|
|
|
|
|
|
|
} |
767
|
|
|
|
|
|
|
else { |
768
|
0
|
|
|
|
|
0
|
$pms->{date_header_time} = undef; |
769
|
|
|
|
|
|
|
} |
770
|
|
|
|
|
|
|
} |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
sub _get_received_header_times { |
773
|
0
|
|
|
0
|
|
0
|
my ($self, $pms) = @_; |
774
|
|
|
|
|
|
|
|
775
|
0
|
|
|
|
|
0
|
$pms->{received_header_times} = [ () ]; |
776
|
0
|
|
|
|
|
0
|
$pms->{received_fetchmail_time} = undef; |
777
|
|
|
|
|
|
|
|
778
|
0
|
|
|
|
|
0
|
my (@received); |
779
|
0
|
|
|
|
|
0
|
my $received = $pms->get('Received'); |
780
|
0
|
0
|
|
|
|
0
|
if ($received ne '') { |
781
|
0
|
|
|
|
|
0
|
@received = grep {$_ =~ m/\S/} (split(/\n/,$received)); |
|
0
|
|
|
|
|
0
|
|
782
|
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
# if we have no Received: headers, chances are we're archived mail |
784
|
|
|
|
|
|
|
# with a limited set of headers |
785
|
0
|
0
|
|
|
|
0
|
if (!scalar(@received)) { |
786
|
0
|
|
|
|
|
0
|
return; |
787
|
|
|
|
|
|
|
} |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
# handle fetchmail headers |
790
|
0
|
|
|
|
|
0
|
my (@local); |
791
|
0
|
0
|
0
|
|
|
0
|
if (($received[0] =~ |
792
|
|
|
|
|
|
|
m/\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/) || |
793
|
|
|
|
|
|
|
($received[0] =~ m/qmail \d+ invoked by uid \d+/)) { |
794
|
0
|
|
|
|
|
0
|
push @local, (shift @received); |
795
|
|
|
|
|
|
|
} |
796
|
0
|
0
|
0
|
|
|
0
|
if (scalar(@received) && |
|
|
0
|
|
|
|
|
|
797
|
|
|
|
|
|
|
($received[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) { |
798
|
0
|
|
|
|
|
0
|
push @local, (shift @received); |
799
|
|
|
|
|
|
|
} |
800
|
|
|
|
|
|
|
elsif (scalar(@local)) { |
801
|
0
|
|
|
|
|
0
|
unshift @received, (shift @local); |
802
|
|
|
|
|
|
|
} |
803
|
|
|
|
|
|
|
|
804
|
0
|
0
|
|
|
|
0
|
if (scalar(@local)) { |
805
|
0
|
|
|
|
|
0
|
my (@fetchmail_times); |
806
|
0
|
|
|
|
|
0
|
foreach my $rcvd (@local) { |
807
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) { |
808
|
0
|
|
|
|
|
0
|
my $date = $1; |
809
|
0
|
|
|
|
|
0
|
dbg2("eval: trying Received fetchmail header date for real time: $date"); |
810
|
0
|
|
|
|
|
0
|
my $time = parse_rfc822_date($date); |
811
|
0
|
0
|
0
|
|
|
0
|
if (defined($time) && (time() >= $time)) { |
812
|
0
|
|
|
|
|
0
|
dbg2("eval: time_t from date=$time, rcvd=$date"); |
813
|
0
|
|
|
|
|
0
|
push @fetchmail_times, $time; |
814
|
|
|
|
|
|
|
} |
815
|
|
|
|
|
|
|
} |
816
|
|
|
|
|
|
|
} |
817
|
0
|
0
|
|
|
|
0
|
if (scalar(@fetchmail_times) > 1) { |
|
|
0
|
|
|
|
|
|
818
|
|
|
|
|
|
|
$pms->{received_fetchmail_time} = |
819
|
0
|
|
|
|
|
0
|
(sort {$b <=> $a} (@fetchmail_times))[0]; |
|
0
|
|
|
|
|
0
|
|
820
|
|
|
|
|
|
|
} elsif (scalar(@fetchmail_times)) { |
821
|
0
|
|
|
|
|
0
|
$pms->{received_fetchmail_time} = $fetchmail_times[0]; |
822
|
|
|
|
|
|
|
} |
823
|
|
|
|
|
|
|
} |
824
|
|
|
|
|
|
|
|
825
|
0
|
|
|
|
|
0
|
my (@header_times); |
826
|
0
|
|
|
|
|
0
|
foreach my $rcvd (@received) { |
827
|
0
|
0
|
|
|
|
0
|
if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) { |
828
|
0
|
|
|
|
|
0
|
my $date = $1; |
829
|
0
|
|
|
|
|
0
|
dbg2("eval: trying Received header date for real time: $date"); |
830
|
0
|
|
|
|
|
0
|
my $time = parse_rfc822_date($date); |
831
|
0
|
0
|
|
|
|
0
|
if (defined($time)) { |
832
|
0
|
|
|
|
|
0
|
dbg2("eval: time_t from date=$time, rcvd=$date"); |
833
|
0
|
|
|
|
|
0
|
push @header_times, $time; |
834
|
|
|
|
|
|
|
} |
835
|
|
|
|
|
|
|
} |
836
|
|
|
|
|
|
|
} |
837
|
|
|
|
|
|
|
|
838
|
0
|
0
|
|
|
|
0
|
if (scalar(@header_times)) { |
839
|
0
|
|
|
|
|
0
|
$pms->{received_header_times} = [ @header_times ]; |
840
|
|
|
|
|
|
|
} else { |
841
|
0
|
|
|
|
|
0
|
dbg("eval: no dates found in Received headers"); |
842
|
|
|
|
|
|
|
} |
843
|
|
|
|
|
|
|
} |
844
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
sub _check_date_received { |
846
|
0
|
|
|
0
|
|
0
|
my ($self, $pms) = @_; |
847
|
|
|
|
|
|
|
|
848
|
0
|
|
|
|
|
0
|
my (@dates_poss); |
849
|
|
|
|
|
|
|
|
850
|
0
|
|
|
|
|
0
|
$pms->{date_received} = 0; |
851
|
|
|
|
|
|
|
|
852
|
0
|
0
|
|
|
|
0
|
if (!exists($pms->{date_header_time})) { |
853
|
0
|
|
|
|
|
0
|
$self->_get_date_header_time($pms); |
854
|
|
|
|
|
|
|
} |
855
|
|
|
|
|
|
|
|
856
|
0
|
0
|
|
|
|
0
|
if (defined($pms->{date_header_time})) { |
857
|
0
|
|
|
|
|
0
|
push @dates_poss, $pms->{date_header_time}; |
858
|
|
|
|
|
|
|
} |
859
|
|
|
|
|
|
|
|
860
|
0
|
0
|
|
|
|
0
|
if (!exists($pms->{received_header_times})) { |
861
|
0
|
|
|
|
|
0
|
$self->_get_received_header_times($pms); |
862
|
|
|
|
|
|
|
} |
863
|
0
|
|
|
|
|
0
|
my (@received_header_times) = @{ $pms->{received_header_times} }; |
|
0
|
|
|
|
|
0
|
|
864
|
0
|
0
|
|
|
|
0
|
if (scalar(@received_header_times)) { |
865
|
0
|
|
|
|
|
0
|
push @dates_poss, $received_header_times[0]; |
866
|
|
|
|
|
|
|
} |
867
|
0
|
0
|
|
|
|
0
|
if (defined($pms->{received_fetchmail_time})) { |
868
|
0
|
|
|
|
|
0
|
push @dates_poss, $pms->{received_fetchmail_time}; |
869
|
|
|
|
|
|
|
} |
870
|
|
|
|
|
|
|
|
871
|
0
|
0
|
0
|
|
|
0
|
if (defined($pms->{date_header_time}) && scalar(@received_header_times)) { |
872
|
0
|
0
|
|
|
|
0
|
if (!exists($pms->{date_diff})) { |
873
|
0
|
|
|
|
|
0
|
$self->_check_date_diff($pms); |
874
|
|
|
|
|
|
|
} |
875
|
0
|
|
|
|
|
0
|
push @dates_poss, $pms->{date_header_time} - $pms->{date_diff}; |
876
|
|
|
|
|
|
|
} |
877
|
|
|
|
|
|
|
|
878
|
0
|
0
|
|
|
|
0
|
if (scalar(@dates_poss)) { # use median |
879
|
0
|
|
|
|
|
0
|
$pms->{date_received} = (sort {$b <=> $a} |
|
0
|
|
|
|
|
0
|
|
880
|
|
|
|
|
|
|
(@dates_poss))[int($#dates_poss/2)]; |
881
|
|
|
|
|
|
|
dbg("eval: date chosen from message: " . |
882
|
0
|
|
|
|
|
0
|
scalar(localtime($pms->{date_received}))); |
883
|
|
|
|
|
|
|
} else { |
884
|
0
|
|
|
|
|
0
|
dbg("eval: no dates found in message"); |
885
|
|
|
|
|
|
|
} |
886
|
|
|
|
|
|
|
} |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
sub _check_date_diff { |
889
|
0
|
|
|
0
|
|
0
|
my ($self, $pms) = @_; |
890
|
|
|
|
|
|
|
|
891
|
0
|
|
|
|
|
0
|
$pms->{date_diff} = 0; |
892
|
|
|
|
|
|
|
|
893
|
0
|
0
|
|
|
|
0
|
if (!exists($pms->{date_header_time})) { |
894
|
0
|
|
|
|
|
0
|
$self->_get_date_header_time($pms); |
895
|
|
|
|
|
|
|
} |
896
|
|
|
|
|
|
|
|
897
|
0
|
0
|
|
|
|
0
|
if (!defined($pms->{date_header_time})) { |
898
|
0
|
|
|
|
|
0
|
return; # already have tests for this |
899
|
|
|
|
|
|
|
} |
900
|
|
|
|
|
|
|
|
901
|
0
|
0
|
|
|
|
0
|
if (!exists($pms->{received_header_times})) { |
902
|
0
|
|
|
|
|
0
|
$self->_get_received_header_times($pms); |
903
|
|
|
|
|
|
|
} |
904
|
0
|
|
|
|
|
0
|
my (@header_times) = @{ $pms->{received_header_times} }; |
|
0
|
|
|
|
|
0
|
|
905
|
|
|
|
|
|
|
|
906
|
0
|
0
|
|
|
|
0
|
if (!scalar(@header_times)) { |
907
|
0
|
|
|
|
|
0
|
return; # archived mail? |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
|
910
|
0
|
|
|
|
|
0
|
my (@diffs) = map {$pms->{date_header_time} - $_} (@header_times); |
|
0
|
|
|
|
|
0
|
|
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
# if the last Received: header has no difference, then we choose to |
913
|
|
|
|
|
|
|
# exclude it |
914
|
0
|
0
|
0
|
|
|
0
|
if ($#diffs > 0 && $diffs[$#diffs] == 0) { |
915
|
0
|
|
|
|
|
0
|
pop(@diffs); |
916
|
|
|
|
|
|
|
} |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
# use the date with the smallest absolute difference |
919
|
|
|
|
|
|
|
# (experimentally, this results in the fewest false positives) |
920
|
0
|
|
|
|
|
0
|
@diffs = sort { abs($a) <=> abs($b) } @diffs; |
|
0
|
|
|
|
|
0
|
|
921
|
0
|
|
|
|
|
0
|
$pms->{date_diff} = $diffs[0]; |
922
|
|
|
|
|
|
|
} |
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
sub subject_is_all_caps { |
926
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
927
|
0
|
|
|
|
|
0
|
my $subject = $pms->get('Subject'); |
928
|
|
|
|
|
|
|
|
929
|
0
|
|
|
|
|
0
|
$subject =~ s/^\s+//; |
930
|
0
|
|
|
|
|
0
|
$subject =~ s/\s+$//; |
931
|
0
|
|
|
|
|
0
|
$subject =~ s/^(?:(?:Re|Fwd|Fw|Aw|Antwort|Sv):\s*)+//i; # Bug 6805 |
932
|
0
|
0
|
|
|
|
0
|
return 0 if $subject !~ /\s/; # don't match one word subjects |
933
|
0
|
0
|
|
|
|
0
|
return 0 if (length $subject < 10); # don't match short subjects |
934
|
0
|
|
|
|
|
0
|
$subject =~ s/[^a-zA-Z]//g; # only look at letters |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
# now, check to see if the subject is encoded using a non-ASCII charset. |
937
|
|
|
|
|
|
|
# If so, punt on this test to avoid FPs. We just list the known charsets |
938
|
|
|
|
|
|
|
# this test will FP on, here. |
939
|
0
|
|
|
|
|
0
|
my $subjraw = $pms->get('Subject:raw'); |
940
|
0
|
|
|
|
|
0
|
my $CLTFAC = Mail::SpamAssassin::Constants::CHARSETS_LIKELY_TO_FP_AS_CAPS; |
941
|
0
|
0
|
|
|
|
0
|
if ($subjraw =~ /=\?${CLTFAC}\?/i) { |
942
|
0
|
|
|
|
|
0
|
return 0; |
943
|
|
|
|
|
|
|
} |
944
|
|
|
|
|
|
|
|
945
|
0
|
|
0
|
|
|
0
|
return length($subject) && ($subject eq uc($subject)); |
946
|
|
|
|
|
|
|
} |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
sub check_for_to_in_subject { |
949
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms, $test) = @_; |
950
|
|
|
|
|
|
|
|
951
|
0
|
|
|
|
|
0
|
my $full_to = $pms->get('To:addr'); |
952
|
0
|
0
|
|
|
|
0
|
return 0 unless $full_to ne ''; |
953
|
|
|
|
|
|
|
|
954
|
0
|
|
|
|
|
0
|
my $subject = $pms->get('Subject'); |
955
|
|
|
|
|
|
|
|
956
|
0
|
0
|
|
|
|
0
|
if ($test eq "address") { |
|
|
0
|
|
|
|
|
|
957
|
0
|
|
|
|
|
0
|
return $subject =~ /\b\Q$full_to\E\b/i; # "user@domain.com" |
958
|
|
|
|
|
|
|
} |
959
|
|
|
|
|
|
|
elsif ($test eq "user") { |
960
|
0
|
|
|
|
|
0
|
my $to = $full_to; |
961
|
0
|
|
|
|
|
0
|
$to =~ s/\@.*//; |
962
|
0
|
|
|
|
|
0
|
my $subj = $subject; |
963
|
0
|
|
|
|
|
0
|
$subj =~ s/^\s+//; |
964
|
0
|
|
|
|
|
0
|
$subj =~ s/\s+$//; |
965
|
|
|
|
|
|
|
|
966
|
0
|
|
|
|
|
0
|
return $subject =~ /^(?: |
967
|
|
|
|
|
|
|
(?:re|fw):\s*(?:\w+\s+)?\Q$to\E$ |
968
|
|
|
|
|
|
|
|(?-i:\Q$to\E)\s*[,:;!?-](?:$|\s) |
969
|
|
|
|
|
|
|
|\Q$to\E$ |
970
|
|
|
|
|
|
|
|,\s*\Q$to\E[,:;!?-]$ |
971
|
|
|
|
|
|
|
)/ix; |
972
|
|
|
|
|
|
|
} |
973
|
0
|
|
|
|
|
0
|
return 0; |
974
|
|
|
|
|
|
|
} |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
sub check_outlook_message_id { |
977
|
77
|
|
|
77
|
0
|
230
|
my ($self, $pms) = @_; |
978
|
77
|
|
|
|
|
233
|
local ($_); |
979
|
|
|
|
|
|
|
|
980
|
77
|
|
|
|
|
284
|
my $id = $pms->get('MESSAGEID'); |
981
|
77
|
100
|
|
|
|
1416
|
return 0 if $id !~ /^<[0-9a-f]{4}([0-9a-f]{8})\$[0-9a-f]{8}\$[0-9a-f]{8}\@/; |
982
|
|
|
|
|
|
|
|
983
|
1
|
|
|
|
|
4
|
my $timetoken = hex($1); |
984
|
1
|
|
|
|
|
2
|
my $x = 0.0023283064365387; |
985
|
1
|
|
|
|
|
1
|
my $y = 27111902.8329849; |
986
|
|
|
|
|
|
|
|
987
|
1
|
|
|
|
|
2
|
my $fudge = 250; |
988
|
|
|
|
|
|
|
|
989
|
1
|
|
|
|
|
2
|
$_ = $pms->get('Date'); |
990
|
1
|
|
50
|
|
|
10
|
$_ = parse_rfc822_date($_) || 0; |
991
|
1
|
|
|
|
|
9
|
my $expected = int (($_ * $x) + $y); |
992
|
1
|
|
|
|
|
2
|
my $diff = $timetoken - $expected; |
993
|
1
|
50
|
|
|
|
3
|
return 0 if (abs($diff) < $fudge); |
994
|
|
|
|
|
|
|
|
995
|
1
|
|
|
|
|
4
|
$_ = $pms->get('Received'); |
996
|
1
|
|
|
|
|
34
|
/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+).*?$/; |
997
|
1
|
|
50
|
|
|
4
|
$_ = parse_rfc822_date($_) || 0; |
998
|
1
|
|
|
|
|
3
|
$expected = int(($_ * $x) + $y); |
999
|
1
|
|
|
|
|
2
|
$diff = $timetoken - $expected; |
1000
|
|
|
|
|
|
|
|
1001
|
1
|
|
|
|
|
20
|
return (abs($diff) >= $fudge); |
1002
|
|
|
|
|
|
|
} |
1003
|
|
|
|
|
|
|
|
1004
|
|
|
|
|
|
|
sub check_messageid_not_usable { |
1005
|
0
|
|
|
0
|
0
|
|
my ($self, $pms) = @_; |
1006
|
0
|
|
|
|
|
|
local ($_); |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
# Lyris eats message-ids. also some ezmlm, I think :( |
1009
|
0
|
|
|
|
|
|
$_ = $pms->get("List-Unsubscribe"); |
1010
|
0
|
0
|
|
|
|
|
return 1 if (/<mailto:(?:leave-\S+|\S+-unsubscribe)\@\S+>$/i); |
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
# ezmlm again |
1013
|
0
|
0
|
|
|
|
|
if($self->gated_through_received_hdr_remover($pms)) { return 1; } |
|
0
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
# Allen notes this as 'Wacky sendmail version?' |
1016
|
0
|
|
|
|
|
|
$_ = $pms->get("Received"); |
1017
|
0
|
0
|
|
|
|
|
return 1 if /\/CWT\/DCE\)/; |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
# Apr 2 2003 jm: iPlanet rewrites lots of stuff, including Message-IDs |
1020
|
0
|
0
|
|
|
|
|
return 1 if /iPlanet Messaging Server/; |
1021
|
|
|
|
|
|
|
|
1022
|
0
|
|
|
|
|
|
return 0; |
1023
|
|
|
|
|
|
|
} |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
# Return true if the count of $hdr headers are within the given range |
1026
|
|
|
|
|
|
|
sub check_header_count_range { |
1027
|
0
|
|
|
0
|
0
|
|
my ($self, $pms, $hdr, $min, $max) = @_; |
1028
|
0
|
|
|
|
|
|
my %uniq; |
1029
|
0
|
|
|
|
|
|
my @hdrs = grep(!$uniq{$_}++, $pms->{msg}->get_header ($hdr)); |
1030
|
0
|
|
0
|
|
|
|
return (scalar @hdrs >= $min && scalar @hdrs <= $max); |
1031
|
|
|
|
|
|
|
} |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
sub check_unresolved_template { |
1034
|
0
|
|
|
0
|
0
|
|
my ($self, $pms) = @_; |
1035
|
|
|
|
|
|
|
|
1036
|
0
|
|
|
|
|
|
my $all = $pms->get('ALL'); # cached access |
1037
|
0
|
|
|
|
|
|
$all =~ s/\n[ \t]+/ /gs; # fix continuation lines |
1038
|
|
|
|
|
|
|
|
1039
|
0
|
|
|
|
|
|
for my $header (split(/\n/, $all)) { |
1040
|
|
|
|
|
|
|
# slightly faster to test in this order |
1041
|
0
|
0
|
0
|
|
|
|
if ($header =~ /%[A-Z][A-Z_-]/ && |
1042
|
|
|
|
|
|
|
$header !~ /^(?:X-VMS-To|X-UIDL|X-Face|To|Cc|From|Subject|References|In-Reply-To|(?:X-|Resent-|X-Original-)?Message-Id):/i) |
1043
|
|
|
|
|
|
|
{ |
1044
|
0
|
|
|
|
|
|
return 1; |
1045
|
|
|
|
|
|
|
} |
1046
|
|
|
|
|
|
|
} |
1047
|
0
|
|
|
|
|
|
return 0; |
1048
|
|
|
|
|
|
|
} |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
sub check_ratware_name_id { |
1051
|
0
|
|
|
0
|
0
|
|
my ($self, $pms) = @_; |
1052
|
|
|
|
|
|
|
|
1053
|
0
|
|
|
|
|
|
my $mid = $pms->get('MESSAGEID'); |
1054
|
0
|
|
|
|
|
|
my $from = $pms->get('From'); |
1055
|
0
|
0
|
|
|
|
|
if ($mid =~ m/<[A-Z]{28}\.([^>]+?)>/) { |
1056
|
0
|
0
|
|
|
|
|
if ($from =~ m/\"[^\"]+\"\s*<\Q$1\E>/) { |
1057
|
0
|
|
|
|
|
|
return 1; |
1058
|
|
|
|
|
|
|
} |
1059
|
|
|
|
|
|
|
} |
1060
|
0
|
|
|
|
|
|
return 0; |
1061
|
|
|
|
|
|
|
} |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
sub check_ratware_envelope_from { |
1064
|
0
|
|
|
0
|
0
|
|
my ($self, $pms) = @_; |
1065
|
|
|
|
|
|
|
|
1066
|
0
|
|
|
|
|
|
my $to = $pms->get('To:addr'); |
1067
|
0
|
|
|
|
|
|
my $from = $pms->get('EnvelopeFrom:addr'); |
1068
|
|
|
|
|
|
|
|
1069
|
0
|
0
|
0
|
|
|
|
return 0 if $from eq '' || $to eq ''; |
1070
|
0
|
0
|
|
|
|
|
return 0 if $from =~ /^SRS\d=/; |
1071
|
|
|
|
|
|
|
|
1072
|
0
|
0
|
|
|
|
|
if ($to =~ /^([^@]+)@(.+)$/) { |
1073
|
0
|
|
|
|
|
|
my($user,$dom) = ($1,$2); |
1074
|
0
|
|
|
|
|
|
$dom = $self->{main}->{registryboundaries}->trim_domain($dom); |
1075
|
|
|
|
|
|
|
return unless |
1076
|
0
|
0
|
|
|
|
|
($self->{main}->{registryboundaries}->is_domain_valid($dom)); |
1077
|
|
|
|
|
|
|
|
1078
|
0
|
0
|
|
|
|
|
return 1 if ($from =~ /\b\Q$dom\E.\Q$user\E@/i); |
1079
|
|
|
|
|
|
|
} |
1080
|
|
|
|
|
|
|
|
1081
|
0
|
|
|
|
|
|
return 0; |
1082
|
|
|
|
|
|
|
} |
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
# ADDED FROM BUG 6487 |
1085
|
|
|
|
|
|
|
sub check_equal_from_domains { |
1086
|
0
|
|
|
0
|
0
|
|
my ($self, $pms) = @_; |
1087
|
|
|
|
|
|
|
|
1088
|
0
|
|
|
|
|
|
my $from = $pms->get('From:addr'); |
1089
|
0
|
|
|
|
|
|
my $envfrom = $pms->get('EnvelopeFrom:addr'); |
1090
|
|
|
|
|
|
|
|
1091
|
0
|
|
|
|
|
|
local $1; |
1092
|
0
|
|
|
|
|
|
my $fromdomain = ''; |
1093
|
|
|
|
|
|
|
#Revised regexp from 6487 comment 3 |
1094
|
0
|
0
|
|
|
|
|
$fromdomain = $1 if $from =~ /\@([^@]*)\z/; |
1095
|
0
|
|
|
|
|
|
$fromdomain =~ s/^.+\.([^\.]+\.[^\.]+)$/$1/; |
1096
|
0
|
0
|
|
|
|
|
return 0 if $fromdomain eq ''; |
1097
|
|
|
|
|
|
|
|
1098
|
0
|
|
|
|
|
|
my $envfromdomain = ''; |
1099
|
0
|
0
|
|
|
|
|
$envfromdomain = $1 if $envfrom =~ /\@([^@]*)\z/; |
1100
|
0
|
|
|
|
|
|
$envfromdomain =~ s/^.+\.([^\.]+\.[^\.]+)$/$1/; |
1101
|
0
|
0
|
|
|
|
|
return 0 if $envfromdomain eq ''; |
1102
|
|
|
|
|
|
|
|
1103
|
0
|
|
|
|
|
|
dbg("eval: From 2nd level domain: $fromdomain, EnvelopeFrom 2nd level domain: $envfromdomain"); |
1104
|
|
|
|
|
|
|
|
1105
|
0
|
0
|
|
|
|
|
return 1 if lc($fromdomain) ne lc($envfromdomain); |
1106
|
|
|
|
|
|
|
|
1107
|
0
|
|
|
|
|
|
return 0; |
1108
|
|
|
|
|
|
|
} |
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
########################################################################### |
1112
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
# support eval-test verbose debugs using "-Deval" |
1114
|
|
|
|
|
|
|
sub dbg2 { |
1115
|
0
|
0
|
|
0
|
0
|
|
if (would_log('dbg', 'eval') == 2) { |
1116
|
0
|
|
|
|
|
|
dbg(@_); |
1117
|
|
|
|
|
|
|
} |
1118
|
|
|
|
|
|
|
} |
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
1; |