line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# <@LICENSE> |
2
|
|
|
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more |
3
|
|
|
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with |
4
|
|
|
|
|
|
|
# this work for additional information regarding copyright ownership. |
5
|
|
|
|
|
|
|
# The ASF licenses this file to you under the Apache License, Version 2.0 |
6
|
|
|
|
|
|
|
# (the "License"); you may not use this file except in compliance with |
7
|
|
|
|
|
|
|
# the License. You may obtain a copy of the License at: |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software |
12
|
|
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
13
|
|
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14
|
|
|
|
|
|
|
# See the License for the specific language governing permissions and |
15
|
|
|
|
|
|
|
# limitations under the License. |
16
|
|
|
|
|
|
|
# </@LICENSE> |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 NAME |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
MIMEEval - perform various tests against MIME structure and body |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 SYNOPSIS |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
loadplugin Mail::SpamAssassin::Plugin::MIMEEval |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
body NAME_OF_RULE eval:check_for_mime |
27
|
|
|
|
|
|
|
body NAME_OF_RULE eval:check_for_mime_html |
28
|
|
|
|
|
|
|
body NAME_OF_RULE eval:check_for_mime_html_only |
29
|
|
|
|
|
|
|
body NAME_OF_RULE eval:check_mime_multipart_ratio |
30
|
|
|
|
|
|
|
body NAME_OF_RULE eval:check_msg_parse_flags |
31
|
|
|
|
|
|
|
body NAME_OF_RULE eval:check_for_ascii_text_illegal |
32
|
|
|
|
|
|
|
body NAME_OF_RULE eval:check_abundant_unicode_ratio |
33
|
|
|
|
|
|
|
body NAME_OF_RULE eval:check_for_faraway_charset |
34
|
|
|
|
|
|
|
body NAME_OF_RULE eval:check_for_uppercase |
35
|
|
|
|
|
|
|
body NAME_OF_RULE eval:check_ma_non_text |
36
|
|
|
|
|
|
|
body NAME_OF_RULE eval:check_base64_length |
37
|
|
|
|
|
|
|
body NAME_OF_RULE eval:check_qp_ratio |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head1 DESCRIPTION |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
Perform various tests against MIME structure and body. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=cut |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
package Mail::SpamAssassin::Plugin::MIMEEval; |
46
|
|
|
|
|
|
|
|
47
|
21
|
|
|
21
|
|
159
|
use strict; |
|
21
|
|
|
|
|
54
|
|
|
21
|
|
|
|
|
622
|
|
48
|
21
|
|
|
21
|
|
129
|
use warnings; |
|
21
|
|
|
|
|
49
|
|
|
21
|
|
|
|
|
668
|
|
49
|
|
|
|
|
|
|
# use bytes; |
50
|
21
|
|
|
21
|
|
121
|
use re 'taint'; |
|
21
|
|
|
|
|
52
|
|
|
21
|
|
|
|
|
686
|
|
51
|
|
|
|
|
|
|
|
52
|
21
|
|
|
21
|
|
146
|
use Mail::SpamAssassin::Plugin; |
|
21
|
|
|
|
|
56
|
|
|
21
|
|
|
|
|
506
|
|
53
|
21
|
|
|
21
|
|
138
|
use Mail::SpamAssassin::Locales; |
|
21
|
|
|
|
|
52
|
|
|
21
|
|
|
|
|
582
|
|
54
|
21
|
|
|
21
|
|
128
|
use Mail::SpamAssassin::Constants qw(:sa CHARSETS_LIKELY_TO_FP_AS_CAPS); |
|
21
|
|
|
|
|
55
|
|
|
21
|
|
|
|
|
3503
|
|
55
|
21
|
|
|
21
|
|
162
|
use Mail::SpamAssassin::Util qw(untaint_var); |
|
21
|
|
|
|
|
60
|
|
|
21
|
|
|
|
|
1146
|
|
56
|
21
|
|
|
21
|
|
156
|
use Mail::SpamAssassin::Logger; |
|
21
|
|
|
|
|
68
|
|
|
21
|
|
|
|
|
75249
|
|
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
our @ISA = qw(Mail::SpamAssassin::Plugin); |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# constructor: register the eval rule |
61
|
|
|
|
|
|
|
sub new { |
62
|
62
|
|
|
62
|
1
|
247
|
my $class = shift; |
63
|
62
|
|
|
|
|
182
|
my $mailsaobject = shift; |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# some boilerplate... |
66
|
62
|
|
33
|
|
|
812
|
$class = ref($class) || $class; |
67
|
62
|
|
|
|
|
392
|
my $self = $class->SUPER::new($mailsaobject); |
68
|
62
|
|
|
|
|
240
|
bless ($self, $class); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# the important bit! |
71
|
62
|
|
|
|
|
363
|
$self->register_eval_rule("check_for_mime"); |
72
|
62
|
|
|
|
|
288
|
$self->register_eval_rule("check_for_mime_html"); |
73
|
62
|
|
|
|
|
289
|
$self->register_eval_rule("check_for_mime_html_only"); |
74
|
62
|
|
|
|
|
246
|
$self->register_eval_rule("check_mime_multipart_ratio"); |
75
|
62
|
|
|
|
|
288
|
$self->register_eval_rule("check_msg_parse_flags"); |
76
|
62
|
|
|
|
|
221
|
$self->register_eval_rule("check_for_ascii_text_illegal"); |
77
|
62
|
|
|
|
|
203
|
$self->register_eval_rule("check_abundant_unicode_ratio"); |
78
|
62
|
|
|
|
|
255
|
$self->register_eval_rule("check_for_faraway_charset"); |
79
|
62
|
|
|
|
|
211
|
$self->register_eval_rule("check_for_uppercase"); |
80
|
62
|
|
|
|
|
243
|
$self->register_eval_rule("check_ma_non_text"); |
81
|
62
|
|
|
|
|
203
|
$self->register_eval_rule("check_base64_length"); |
82
|
62
|
|
|
|
|
217
|
$self->register_eval_rule("check_qp_ratio"); |
83
|
|
|
|
|
|
|
|
84
|
62
|
|
|
|
|
580
|
return $self; |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
########################################################################### |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
sub are_more_high_bits_set { |
90
|
0
|
|
|
0
|
0
|
0
|
my ($self, $str) = @_; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# TODO: I suspect a tr// trick may be faster here |
93
|
0
|
|
|
|
|
0
|
my $numhis = () = ($str =~ /[\200-\377]/g); |
94
|
0
|
|
|
|
|
0
|
my $numlos = length($str) - $numhis; |
95
|
|
|
|
|
|
|
|
96
|
0
|
0
|
|
|
|
0
|
($numlos <= $numhis && $numhis > 3); |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=over 4 |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=item has_check_for_ascii_text_illegal |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
Adds capability check for "if can()" for check_for_ascii_text_illegal |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=cut |
106
|
|
|
|
|
|
|
|
107
|
0
|
|
|
0
|
1
|
0
|
sub has_check_for_ascii_text_illegal { 1 } |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=item check_for_ascii_text_illegal |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
If a MIME part claims to be text/plain or text/plain;charset=us-ascii and the Content-Transfer-Encoding is 7bit (either explicitly or by default), then we should enforce the actual text being only TAB, NL, SPACE through TILDE, i.e. all 7bit characters excluding NO-WS-CTL (per RFC-2822). |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
All mainstream MTA's get this right. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=cut |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub check_for_ascii_text_illegal { |
118
|
0
|
|
|
0
|
1
|
0
|
my ($self, $pms) = @_; |
119
|
|
|
|
|
|
|
|
120
|
0
|
0
|
|
|
|
0
|
$self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments}; |
121
|
0
|
0
|
|
|
|
0
|
return 0 unless exists $pms->{mime_ascii_text_illegal}; |
122
|
0
|
|
|
|
|
0
|
return ($pms->{mime_ascii_text_illegal} > 0); |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=item has_check_abundant_unicode_ratio |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
Adds capability check for "if can()" for check_abundant_unicode_ratio |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=cut |
130
|
|
|
|
|
|
|
|
131
|
0
|
|
|
0
|
1
|
0
|
sub has_check_abundant_unicode_ratio { 1 } |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=item check_abundant_unicode_ratio |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
A MIME part claiming to be text/plain and containing Unicode characters must be encoded as quoted-printable or base64, or use UTF data coding (typically with 8bit encoding). Any message in 7bit or 8bit encoding containing (HTML) Unicode entities will not render them as Unicode, but literally. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Thus a few such sequences might occur on a mailing list of developers discussing such characters, but a message with a high density of such characters is likely spam. |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=cut |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub check_abundant_unicode_ratio { |
142
|
0
|
|
|
0
|
1
|
0
|
my ($self, $pms, undef, $ratio) = @_; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# validate ratio? |
145
|
0
|
0
|
|
|
|
0
|
return 0 unless ($ratio =~ /^\d{0,3}\.\d{1,3}$/); |
146
|
|
|
|
|
|
|
|
147
|
0
|
0
|
|
|
|
0
|
$self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments}; |
148
|
0
|
0
|
|
|
|
0
|
return 0 unless exists $pms->{mime_text_unicode_ratio}; |
149
|
0
|
|
|
|
|
0
|
return ($pms->{mime_text_unicode_ratio} >= $ratio); |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub check_for_faraway_charset { |
153
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms, $body) = @_; |
154
|
|
|
|
|
|
|
|
155
|
0
|
|
|
|
|
0
|
my $type = $pms->get('Content-Type',undef); |
156
|
|
|
|
|
|
|
|
157
|
0
|
|
|
|
|
0
|
my @locales = Mail::SpamAssassin::Util::get_my_locales($self->{main}->{conf}->{ok_locales}); |
158
|
|
|
|
|
|
|
|
159
|
0
|
0
|
|
|
|
0
|
return 0 if grep { $_ eq "all" } @locales; |
|
0
|
|
|
|
|
0
|
|
160
|
|
|
|
|
|
|
|
161
|
0
|
0
|
|
|
|
0
|
$type = get_charset_from_ct_line($type) if defined $type; |
162
|
|
|
|
|
|
|
|
163
|
0
|
0
|
0
|
|
|
0
|
if (defined $type && |
164
|
|
|
|
|
|
|
!Mail::SpamAssassin::Locales::is_charset_ok_for_locales |
165
|
|
|
|
|
|
|
($type, @locales)) |
166
|
|
|
|
|
|
|
{ |
167
|
|
|
|
|
|
|
# sanity check. Some charsets (e.g. koi8-r) include the ASCII |
168
|
|
|
|
|
|
|
# 7-bit charset as well, so make sure we actually have a high |
169
|
|
|
|
|
|
|
# number of 8-bit chars in the body text first. |
170
|
|
|
|
|
|
|
|
171
|
0
|
|
|
|
|
0
|
$body = join("\n", @$body); |
172
|
0
|
0
|
|
|
|
0
|
if ($self->are_more_high_bits_set ($body)) { |
173
|
0
|
|
|
|
|
0
|
return 1; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
0
|
|
|
|
|
0
|
0; |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
sub check_for_mime { |
181
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms, undef, $test) = @_; |
182
|
|
|
|
|
|
|
|
183
|
0
|
0
|
|
|
|
0
|
$self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments}; |
184
|
0
|
0
|
|
|
|
0
|
return 0 unless exists $pms->{$test}; |
185
|
0
|
|
|
|
|
0
|
return $pms->{$test}; |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# any text/html MIME part |
189
|
|
|
|
|
|
|
sub check_for_mime_html { |
190
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
191
|
|
|
|
|
|
|
|
192
|
0
|
|
|
|
|
0
|
my $ctype = $pms->get('Content-Type'); |
193
|
0
|
0
|
|
|
|
0
|
return 1 if $ctype =~ m{^text/html}i; |
194
|
|
|
|
|
|
|
|
195
|
0
|
0
|
|
|
|
0
|
$self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments}; |
196
|
0
|
0
|
|
|
|
0
|
return 0 unless exists $pms->{mime_body_html_count}; |
197
|
0
|
|
|
|
|
0
|
return ($pms->{mime_body_html_count} > 0); |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# HTML without some other type of MIME text part |
201
|
|
|
|
|
|
|
sub check_for_mime_html_only { |
202
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms) = @_; |
203
|
|
|
|
|
|
|
|
204
|
0
|
|
|
|
|
0
|
my $ctype = $pms->get('Content-Type'); |
205
|
0
|
0
|
|
|
|
0
|
return 1 if $ctype =~ m{^text/html}i; |
206
|
|
|
|
|
|
|
|
207
|
0
|
0
|
|
|
|
0
|
$self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments}; |
208
|
0
|
0
|
|
|
|
0
|
return 0 unless exists $pms->{mime_body_html_count}; |
209
|
0
|
0
|
|
|
|
0
|
return 0 unless exists $pms->{mime_body_text_count}; |
210
|
|
|
|
|
|
|
return ($pms->{mime_body_html_count} > 0 && |
211
|
0
|
|
0
|
|
|
0
|
$pms->{mime_body_text_count} == 0); |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
sub check_mime_multipart_ratio { |
215
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pms, undef, $min, $max) = @_; |
216
|
|
|
|
|
|
|
|
217
|
0
|
0
|
|
|
|
0
|
$self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments}; |
218
|
0
|
0
|
|
|
|
0
|
return 0 unless exists $pms->{mime_multipart_ratio}; |
219
|
|
|
|
|
|
|
return ($pms->{mime_multipart_ratio} >= $min && |
220
|
0
|
|
0
|
|
|
0
|
$pms->{mime_multipart_ratio} < $max); |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
sub _check_mime_header { |
224
|
0
|
|
|
0
|
|
0
|
my ($self, $pms, $ctype, $cte, $cd, $charset, $name) = @_; |
225
|
|
|
|
|
|
|
|
226
|
0
|
|
0
|
|
|
0
|
$charset ||= ''; |
227
|
|
|
|
|
|
|
|
228
|
0
|
0
|
|
|
|
0
|
if ($ctype eq 'text/html') { |
|
|
0
|
|
|
|
|
|
229
|
0
|
|
|
|
|
0
|
$pms->{mime_body_html_count}++; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
elsif ($ctype =~ m@^text@i) { |
232
|
0
|
|
|
|
|
0
|
$pms->{mime_body_text_count}++; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
0
|
0
|
|
|
|
0
|
if ($cte =~ /base64/) { |
|
|
0
|
|
|
|
|
|
236
|
0
|
|
|
|
|
0
|
$pms->{mime_base64_count}++; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
elsif ($cte =~ /quoted-printable/) { |
239
|
0
|
|
|
|
|
0
|
$pms->{mime_qp_count}++; |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
|
242
|
0
|
0
|
0
|
|
|
0
|
if ($cd && $cd =~ /attachment/) { |
243
|
0
|
|
|
|
|
0
|
$pms->{mime_attachment}++; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
0
|
0
|
0
|
|
|
0
|
if ($ctype =~ /^text/ && |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
247
|
|
|
|
|
|
|
$cte =~ /base64/ && |
248
|
|
|
|
|
|
|
(!$charset || $charset =~ /(?:us-ascii|ansi_x3\.4-1968|iso-ir-6|ansi_x3\.4-1986|iso_646\.irv:1991|ascii|iso646-us|us|ibm367|cp367|csascii)/) && |
249
|
|
|
|
|
|
|
!($cd && $cd =~ /^(?:attachment|inline)/)) |
250
|
|
|
|
|
|
|
{ |
251
|
0
|
|
|
|
|
0
|
$pms->{mime_base64_encoded_text} = 1; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
0
|
0
|
0
|
|
|
0
|
if ($charset =~ /iso-\S+-\S+\b/i && |
255
|
|
|
|
|
|
|
$charset !~ /iso-(?:8859-\d{1,2}|2022-(?:jp|kr))\b/) |
256
|
|
|
|
|
|
|
{ |
257
|
0
|
|
|
|
|
0
|
$pms->{mime_bad_iso_charset} = 1; |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# MIME_BASE64_LATIN: now a zero-hitter |
261
|
|
|
|
|
|
|
# if (!$name && |
262
|
|
|
|
|
|
|
# $cte =~ /base64/ && |
263
|
|
|
|
|
|
|
# $charset =~ /\b(?:us-ascii|iso-8859-(?:[12349]|1[0345])|windows-(?:125[0247]))\b/) |
264
|
|
|
|
|
|
|
# { |
265
|
|
|
|
|
|
|
# $pms->{mime_base64_latin} = 1; |
266
|
|
|
|
|
|
|
# } |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
# MIME_QP_NO_CHARSET: now a zero-hitter |
269
|
|
|
|
|
|
|
# if ($cte =~ /quoted-printable/ && $cd =~ /inline/ && !$charset) { |
270
|
|
|
|
|
|
|
# $pms->{mime_qp_inline_no_charset} = 1; |
271
|
|
|
|
|
|
|
# } |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# MIME_HTML_NO_CHARSET: now a zero-hitter |
274
|
|
|
|
|
|
|
# if ($ctype eq 'text/html' && |
275
|
|
|
|
|
|
|
# !(defined($charset) && $charset) && |
276
|
|
|
|
|
|
|
# !($cd && $cd =~ /^(?:attachment|inline)/)) |
277
|
|
|
|
|
|
|
# { |
278
|
|
|
|
|
|
|
# $pms->{mime_html_no_charset} = 1; |
279
|
|
|
|
|
|
|
# } |
280
|
|
|
|
|
|
|
|
281
|
0
|
0
|
|
|
|
0
|
if ($charset =~ /[a-z]/i) { |
282
|
0
|
0
|
|
|
|
0
|
if (defined $pms->{mime_html_charsets}) { |
283
|
0
|
|
|
|
|
0
|
$pms->{mime_html_charsets} .= " ".$charset; |
284
|
|
|
|
|
|
|
} else { |
285
|
0
|
|
|
|
|
0
|
$pms->{mime_html_charsets} = $charset; |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
0
|
0
|
|
|
|
0
|
if (! $pms->{mime_faraway_charset}) { |
289
|
0
|
|
|
|
|
0
|
my @l = Mail::SpamAssassin::Util::get_my_locales($self->{main}->{conf}->{ok_locales}); |
290
|
|
|
|
|
|
|
|
291
|
0
|
0
|
0
|
|
|
0
|
if (!(grep { $_ eq "all" } @l) && |
292
|
|
|
|
|
|
|
!Mail::SpamAssassin::Locales::is_charset_ok_for_locales($charset, @l)) |
293
|
|
|
|
|
|
|
{ |
294
|
0
|
|
|
|
|
0
|
$pms->{mime_faraway_charset} = 1; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
} |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
sub _check_attachments { |
301
|
0
|
|
|
0
|
|
0
|
my ($self, $pms) = @_; |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
# MIME status |
304
|
0
|
|
|
|
|
0
|
my $where = -1; # -1 = start, 0 = nowhere, 1 = header, 2 = body |
305
|
0
|
|
|
|
|
0
|
my $qp_bytes = 0; # total bytes in QP regions |
306
|
0
|
|
|
|
|
0
|
my $qp_count = 0; # QP-encoded bytes in QP regions |
307
|
0
|
|
|
|
|
0
|
my @part_bytes; # MIME part total bytes |
308
|
|
|
|
|
|
|
my @part_type; # MIME part types |
309
|
|
|
|
|
|
|
|
310
|
0
|
|
|
|
|
0
|
my $normal_chars = 0; # MIME text bytes that aren't encoded |
311
|
0
|
|
|
|
|
0
|
my $unicode_chars = 0; # MIME text bytes that are unicode entities |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
# MIME header information |
314
|
0
|
|
|
|
|
0
|
my $part = -1; # MIME part index |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
# indicate the scan has taken place |
317
|
0
|
|
|
|
|
0
|
$pms->{mime_checked_attachments} = 1; |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# results |
320
|
|
|
|
|
|
|
# $pms->{mime_base64_blanks} = 0; # expensive to determine, no longer avail |
321
|
0
|
|
|
|
|
0
|
$pms->{mime_base64_count} = 0; |
322
|
0
|
|
|
|
|
0
|
$pms->{mime_base64_encoded_text} = 0; |
323
|
|
|
|
|
|
|
# $pms->{mime_base64_illegal} = 0; |
324
|
|
|
|
|
|
|
# $pms->{mime_base64_latin} = 0; |
325
|
0
|
|
|
|
|
0
|
$pms->{mime_body_html_count} = 0; |
326
|
0
|
|
|
|
|
0
|
$pms->{mime_body_text_count} = 0; |
327
|
0
|
|
|
|
|
0
|
$pms->{mime_faraway_charset} = 0; |
328
|
|
|
|
|
|
|
# $pms->{mime_html_no_charset} = 0; |
329
|
0
|
|
|
|
|
0
|
$pms->{mime_missing_boundary} = 0; |
330
|
0
|
|
|
|
|
0
|
$pms->{mime_multipart_alternative} = 0; |
331
|
0
|
|
|
|
|
0
|
$pms->{mime_multipart_ratio} = 1.0; |
332
|
0
|
|
|
|
|
0
|
$pms->{mime_qp_count} = 0; |
333
|
|
|
|
|
|
|
# $pms->{mime_qp_illegal} = 0; |
334
|
|
|
|
|
|
|
# $pms->{mime_qp_inline_no_charset} = 0; |
335
|
0
|
|
|
|
|
0
|
$pms->{mime_qp_long_line} = 0; |
336
|
0
|
|
|
|
|
0
|
$pms->{mime_qp_ratio} = 0; |
337
|
0
|
|
|
|
|
0
|
$pms->{mime_ascii_text_illegal} = 0; |
338
|
0
|
|
|
|
|
0
|
$pms->{mime_text_unicode_ratio} = 0; |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
# Get all parts ... |
341
|
0
|
|
|
|
|
0
|
foreach my $p ($pms->{msg}->find_parts(qr/./)) { |
342
|
|
|
|
|
|
|
# message headers |
343
|
0
|
|
|
|
|
0
|
my ($ctype, $boundary, $charset, $name) = Mail::SpamAssassin::Util::parse_content_type($p->get_header("content-type")); |
344
|
|
|
|
|
|
|
|
345
|
0
|
0
|
|
|
|
0
|
if ($ctype eq 'multipart/alternative') { |
346
|
0
|
|
|
|
|
0
|
$pms->{mime_multipart_alternative} = 1; |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
|
349
|
0
|
|
0
|
|
|
0
|
my $cte = $p->get_header('Content-Transfer-Encoding') || ''; |
350
|
0
|
0
|
|
|
|
0
|
chomp($cte = defined($cte) ? lc $cte : ""); |
351
|
|
|
|
|
|
|
|
352
|
0
|
|
0
|
|
|
0
|
my $cd = $p->get_header('Content-Disposition') || ''; |
353
|
0
|
0
|
|
|
|
0
|
chomp($cd = defined($cd) ? lc $cd : ""); |
354
|
|
|
|
|
|
|
|
355
|
0
|
0
|
|
|
|
0
|
$charset = lc $charset if ($charset); |
356
|
0
|
0
|
|
|
|
0
|
$name = lc $name if ($name); |
357
|
|
|
|
|
|
|
|
358
|
0
|
|
|
|
|
0
|
$self->_check_mime_header($pms, $ctype, $cte, $cd, $charset, $name); |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# If we're not in a leaf node in the tree, there will be no raw |
361
|
|
|
|
|
|
|
# section, so skip it. |
362
|
0
|
0
|
|
|
|
0
|
if (! $p->is_leaf()) { |
363
|
0
|
|
|
|
|
0
|
next; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
0
|
|
|
|
|
0
|
$part++; |
367
|
0
|
|
|
|
|
0
|
$part_type[$part] = $ctype; |
368
|
0
|
0
|
|
|
|
0
|
$part_bytes[$part] = 0 if $cd !~ /attachment/; |
369
|
|
|
|
|
|
|
|
370
|
0
|
|
|
|
|
0
|
my $cte_is_base64 = $cte =~ /base64/i; |
371
|
0
|
|
|
|
|
0
|
my $previous = ''; |
372
|
0
|
|
|
|
|
0
|
foreach (@{$p->raw()}) { |
|
0
|
|
|
|
|
0
|
|
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
# if ($cte_is_base64) { |
375
|
|
|
|
|
|
|
# if ($previous =~ /^\s*$/ && /^\s*$/) { # expensive, avoid! |
376
|
|
|
|
|
|
|
# $pms->{mime_base64_blanks} = 1; # never used, don't bother |
377
|
|
|
|
|
|
|
# } |
378
|
|
|
|
|
|
|
# # MIME_BASE64_ILLEGAL: now a zero-hitter |
379
|
|
|
|
|
|
|
# # if (m@[^A-Za-z0-9+/=\n]@ || /=[^=\s]/) { |
380
|
|
|
|
|
|
|
# # $pms->{mime_base64_illegal} = 1; |
381
|
|
|
|
|
|
|
# # } |
382
|
|
|
|
|
|
|
# } |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
# if ($pms->{mime_html_no_charset} && $ctype eq 'text/html' && defined $charset) { |
385
|
|
|
|
|
|
|
# $pms->{mime_html_no_charset} = 0; |
386
|
|
|
|
|
|
|
# } |
387
|
0
|
0
|
0
|
|
|
0
|
if ($pms->{mime_multipart_alternative} && $cd !~ /attachment/ && |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
388
|
|
|
|
|
|
|
($ctype eq 'text/plain' || $ctype eq 'text/html')) { |
389
|
0
|
|
|
|
|
0
|
$part_bytes[$part] += length; |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
|
392
|
0
|
0
|
0
|
|
|
0
|
if ($where != 1 && $cte eq "quoted-printable" && ! /^SPAM: /) { |
|
|
|
0
|
|
|
|
|
393
|
|
|
|
|
|
|
# RFC 5322: Each line SHOULD be no more than 78 characters, |
394
|
|
|
|
|
|
|
# excluding the CRLF. |
395
|
|
|
|
|
|
|
# RFC 2045: The Quoted-Printable encoding REQUIRES that |
396
|
|
|
|
|
|
|
# encoded lines be no more than 76 characters long. |
397
|
|
|
|
|
|
|
# Bug 5491: 6% of email classified as HAM by SA triggered the |
398
|
|
|
|
|
|
|
# MIME_QP_LONG_LINE rule. Apple Mail can generate a QP-line |
399
|
|
|
|
|
|
|
# that is 2 chars too long. Same goes for Outlook Web Access. |
400
|
|
|
|
|
|
|
# lines include one trailing \n character |
401
|
|
|
|
|
|
|
# if (length > 76+1) { # conforms to RFC 5322 and RFC 2045 |
402
|
0
|
0
|
|
|
|
0
|
if (length > 78+1) { # conforms to RFC 5322 only, not RFC 2045 |
403
|
0
|
|
|
|
|
0
|
$pms->{mime_qp_long_line} = 1; |
404
|
|
|
|
|
|
|
} |
405
|
0
|
|
|
|
|
0
|
$qp_bytes += length; |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# MIME_QP_DEFICIENT: zero-hitter now |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
# check for illegal substrings (RFC 2045), hexadecimal values 7F-FF and |
410
|
|
|
|
|
|
|
# control characters other than TAB, or CR and LF as parts of CRLF pairs |
411
|
|
|
|
|
|
|
# if (!$pms->{mime_qp_illegal} && /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/) |
412
|
|
|
|
|
|
|
# { |
413
|
|
|
|
|
|
|
# $pms->{mime_qp_illegal} = 1; |
414
|
|
|
|
|
|
|
# } |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
# count excessive QP bytes |
417
|
0
|
0
|
|
|
|
0
|
if (index($_, '=') != -1) { |
418
|
|
|
|
|
|
|
# whoever wrote this next line is an evil hacker -- jm |
419
|
0
|
|
|
|
|
0
|
my $qp = () = m/=(?:09|3[0-9ABCEF]|[2456][0-9A-F]|7[0-9A-E])/g; |
420
|
0
|
0
|
|
|
|
0
|
if ($qp) { |
421
|
0
|
|
|
|
|
0
|
$qp_count += $qp; |
422
|
|
|
|
|
|
|
# tabs and spaces at end of encoded line are okay. Also, multiple |
423
|
|
|
|
|
|
|
# whitespace at the end of a line are OK, like ">=20=20=20=20=20=20". |
424
|
0
|
|
|
|
|
0
|
my ($trailing) = m/((?:=09|=20)+)\s*$/g; |
425
|
0
|
0
|
|
|
|
0
|
if ($trailing) { |
426
|
0
|
|
|
|
|
0
|
$qp_count -= (length($trailing) / 3); |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
} |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# if our charset is ASCII, this should only contain 7-bit characters |
433
|
|
|
|
|
|
|
# except NUL or a free-standing CR. anything else is a violation of |
434
|
|
|
|
|
|
|
# the definition of charset="us-ascii". |
435
|
0
|
0
|
0
|
|
|
0
|
if ($ctype eq 'text/plain' && (!defined $charset || $charset eq 'us-ascii')) { |
|
|
|
0
|
|
|
|
|
436
|
|
|
|
|
|
|
# no re "strict"; # since perl 5.21.8: Ranges of ASCII printables... |
437
|
0
|
0
|
|
|
|
0
|
if (m/[\x00\x0d\x80-\xff]+/) { |
438
|
0
|
0
|
|
|
|
0
|
if (would_log('dbg', 'eval')) { |
439
|
0
|
|
|
|
|
0
|
my $str = $_; |
440
|
0
|
|
|
|
|
0
|
$str =~ s/([\x00\x0d\x80-\xff]+)/'<' . unpack('H*', $1) . '>'/eg; |
|
0
|
|
|
|
|
0
|
|
441
|
0
|
|
|
|
|
0
|
dbg("check: ascii_text_illegal: matches " . $str . "\n"); |
442
|
|
|
|
|
|
|
} |
443
|
0
|
|
|
|
|
0
|
$pms->{mime_ascii_text_illegal}++; |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
# if we're text/plain, we should never see unicode escapes in this |
448
|
|
|
|
|
|
|
# format, especially not for 7bit or 8bit. |
449
|
0
|
0
|
0
|
|
|
0
|
if ($ctype eq 'text/plain' && ($cte eq '' || $cte eq '7bit' || $cte eq '8bit')) { |
|
|
|
0
|
|
|
|
|
450
|
0
|
|
|
|
|
0
|
my ($text, $subs) = $_; |
451
|
|
|
|
|
|
|
|
452
|
0
|
|
|
|
|
0
|
$subs = $text =~ s/&#x[0-9A-F]{4};//g; |
453
|
0
|
|
|
|
|
0
|
$normal_chars += length($text); |
454
|
0
|
|
|
|
|
0
|
$unicode_chars += $subs; |
455
|
|
|
|
|
|
|
|
456
|
0
|
0
|
0
|
|
|
0
|
if ($subs && would_log('dbg', 'eval')) { |
457
|
0
|
|
|
|
|
0
|
my $str = $_; |
458
|
0
|
0
|
|
|
|
0
|
$str = substr($str, 0, 512) . '...' if (length($str) > 512); |
459
|
0
|
|
|
|
|
0
|
dbg("check: abundant_unicode: " . $str . " (" . $subs . ")\n"); |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
|
463
|
0
|
|
|
|
|
0
|
$previous = $_; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
0
|
0
|
|
|
|
0
|
if ($qp_bytes) { |
468
|
0
|
|
|
|
|
0
|
$pms->{mime_qp_ratio} = $qp_count / $qp_bytes; |
469
|
0
|
|
|
|
|
0
|
$pms->{mime_qp_count} = $qp_count; |
470
|
0
|
|
|
|
|
0
|
$pms->{mime_qp_bytes} = $qp_bytes; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
|
473
|
0
|
0
|
|
|
|
0
|
if ($normal_chars) { |
474
|
0
|
|
|
|
|
0
|
$pms->{mime_text_unicode_ratio} = $unicode_chars / $normal_chars; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
0
|
0
|
|
|
|
0
|
if ($pms->{mime_multipart_alternative}) { |
478
|
0
|
|
|
|
|
0
|
my $text; |
479
|
|
|
|
|
|
|
my $html; |
480
|
|
|
|
|
|
|
# bug 4207: we want the size of the last parts |
481
|
0
|
|
|
|
|
0
|
for (my $i = $part; $i >= 0; $i--) { |
482
|
0
|
0
|
|
|
|
0
|
next if !defined $part_bytes[$i]; |
483
|
0
|
0
|
0
|
|
|
0
|
if (!defined($html) && $part_type[$i] eq 'text/html') { |
|
|
0
|
0
|
|
|
|
|
484
|
0
|
|
|
|
|
0
|
$html = $part_bytes[$i]; |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
elsif (!defined($text) && $part_type[$i] eq 'text/plain') { |
487
|
0
|
|
|
|
|
0
|
$text = $part_bytes[$i]; |
488
|
|
|
|
|
|
|
} |
489
|
0
|
0
|
0
|
|
|
0
|
last if (defined($html) && defined($text)); |
490
|
|
|
|
|
|
|
} |
491
|
0
|
0
|
0
|
|
|
0
|
if (defined($text) && defined($html) && $html > 0) { |
|
|
|
0
|
|
|
|
|
492
|
0
|
|
|
|
|
0
|
$pms->{mime_multipart_ratio} = ($text / $html); |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
# Look to see if any multipart boundaries are not "balanced" |
497
|
0
|
|
|
|
|
0
|
foreach my $val (values %{$pms->{msg}->{mime_boundary_state}}) { |
|
0
|
|
|
|
|
0
|
|
498
|
0
|
0
|
|
|
|
0
|
if ($val != 0) { |
499
|
0
|
|
|
|
|
0
|
$pms->{mime_missing_boundary} = 1; |
500
|
0
|
|
|
|
|
0
|
last; |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
} |
503
|
|
|
|
|
|
|
} |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=item has_check_qp_ratio |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
Adds capability check for "if can()" for check_qp_ratio |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
=cut |
510
|
|
|
|
|
|
|
|
511
|
0
|
|
|
0
|
1
|
0
|
sub has_check_qp_ratio { 1 } |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=item check_qp_ratio |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
Takes a min ratio to use in eval to see if there is an spamminess to the ratio of |
516
|
|
|
|
|
|
|
quoted printable to total bytes in an email. |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=back |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=cut |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
sub check_qp_ratio { |
523
|
0
|
|
|
0
|
1
|
0
|
my ($self, $pms, undef, $min) = @_; |
524
|
|
|
|
|
|
|
|
525
|
0
|
0
|
|
|
|
0
|
$self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments}; |
526
|
0
|
0
|
|
|
|
0
|
return 0 unless exists $pms->{mime_qp_ratio}; |
527
|
|
|
|
|
|
|
|
528
|
0
|
|
|
|
|
0
|
my $qp_ratio = $pms->{mime_qp_ratio}; |
529
|
|
|
|
|
|
|
|
530
|
0
|
|
|
|
|
0
|
dbg("eval: qp_ratio - %s - check for min of %s", $qp_ratio, $min); |
531
|
|
|
|
|
|
|
|
532
|
0
|
0
|
0
|
|
|
0
|
return (defined $qp_ratio && $qp_ratio >= $min) ? 1 : 0; |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
sub check_msg_parse_flags { |
537
|
81
|
|
|
81
|
0
|
347
|
my($self, $pms, $type, $type2) = @_; |
538
|
81
|
50
|
|
|
|
365
|
$type = $type2 if ref($type); |
539
|
81
|
|
|
|
|
1527
|
return defined $pms->{msg}->{$type}; |
540
|
|
|
|
|
|
|
} |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
sub check_for_uppercase { |
543
|
0
|
|
|
0
|
0
|
|
my ($self, $pms, $body, $min, $max) = @_; |
544
|
0
|
|
|
|
|
|
local ($_); |
545
|
|
|
|
|
|
|
|
546
|
0
|
0
|
|
|
|
|
if (exists $pms->{uppercase}) { |
547
|
0
|
|
0
|
|
|
|
return ($pms->{uppercase} > $min && $pms->{uppercase} <= $max); |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
|
550
|
0
|
0
|
|
|
|
|
if ($self->body_charset_is_likely_to_fp($pms)) { |
551
|
0
|
|
|
|
|
|
$pms->{uppercase} = 0; return 0; |
|
0
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
# Dec 20 2002 jm: trade off some speed for low memory footprint, by |
555
|
|
|
|
|
|
|
# iterating over the array computing sums, instead of joining the |
556
|
|
|
|
|
|
|
# array into a giant string and working from that. |
557
|
|
|
|
|
|
|
|
558
|
0
|
|
|
|
|
|
my $len = 0; |
559
|
0
|
|
|
|
|
|
my $lower = 0; |
560
|
0
|
|
|
|
|
|
my $upper = 0; |
561
|
0
|
|
|
|
|
|
foreach (@{$body}) { |
|
0
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
# examine lines in the body that have an intermediate space |
563
|
0
|
0
|
|
|
|
|
next unless /\S\s+\S/; |
564
|
|
|
|
|
|
|
# strip out lingering base64 (currently possible for forwarded messages) |
565
|
0
|
0
|
|
|
|
|
next if /^(?:[A-Za-z0-9+\/=]{60,76} ){2}/; |
566
|
|
|
|
|
|
|
|
567
|
0
|
|
|
|
|
|
my $line = $_; # copy so we don't muck up the original |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
# remove shift-JIS charset codes |
570
|
0
|
|
|
|
|
|
$line =~ s/\x1b\$B.*\x1b\(B//gs; |
571
|
|
|
|
|
|
|
|
572
|
0
|
|
|
|
|
|
$len += length($line); |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
# count numerals as lower case, otherwise 'date|mail' is spam |
575
|
0
|
|
|
|
|
|
$lower += ($line =~ tr/a-z0-9//d); |
576
|
0
|
|
|
|
|
|
$upper += ($line =~ tr/A-Z//); |
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
# report only on mails above a minimum size; otherwise one |
580
|
|
|
|
|
|
|
# or two acronyms can throw it off |
581
|
0
|
0
|
|
|
|
|
if ($len < 200) { |
582
|
0
|
|
|
|
|
|
$pms->{uppercase} = 0; |
583
|
0
|
|
|
|
|
|
return 0; |
584
|
|
|
|
|
|
|
} |
585
|
0
|
0
|
|
|
|
|
if (($upper + $lower) == 0) { |
586
|
0
|
|
|
|
|
|
$pms->{uppercase} = 0; |
587
|
|
|
|
|
|
|
} else { |
588
|
0
|
|
|
|
|
|
$pms->{uppercase} = ($upper / ($upper + $lower)) * 100; |
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
|
591
|
0
|
|
0
|
|
|
|
return ($pms->{uppercase} > $min && $pms->{uppercase} <= $max); |
592
|
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
sub body_charset_is_likely_to_fp { |
595
|
0
|
|
|
0
|
0
|
|
my ($self, $pms) = @_; |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
# check for charsets where this test will FP -- iso-2022-jp, gb2312, |
598
|
|
|
|
|
|
|
# koi8-r etc. |
599
|
|
|
|
|
|
|
# |
600
|
0
|
0
|
|
|
|
|
$self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments}; |
601
|
0
|
|
|
|
|
|
my @charsets; |
602
|
0
|
|
|
|
|
|
my $type = $pms->get('Content-Type',undef); |
603
|
0
|
0
|
|
|
|
|
$type = get_charset_from_ct_line($type) if defined $type; |
604
|
0
|
0
|
|
|
|
|
push (@charsets, $type) if defined $type; |
605
|
0
|
0
|
|
|
|
|
if (defined $pms->{mime_html_charsets}) { |
606
|
0
|
|
|
|
|
|
push (@charsets, split(' ', $pms->{mime_html_charsets})); |
607
|
|
|
|
|
|
|
} |
608
|
|
|
|
|
|
|
|
609
|
0
|
|
|
|
|
|
my $CHARSETS_LIKELY_TO_FP_AS_CAPS = CHARSETS_LIKELY_TO_FP_AS_CAPS; |
610
|
0
|
|
|
|
|
|
foreach my $charset (@charsets) { |
611
|
0
|
0
|
|
|
|
|
if ($charset =~ /^${CHARSETS_LIKELY_TO_FP_AS_CAPS}$/) { |
612
|
0
|
|
|
|
|
|
return 1; |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
} |
615
|
0
|
|
|
|
|
|
return 0; |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
sub get_charset_from_ct_line { |
619
|
0
|
|
|
0
|
0
|
|
my $type = shift; |
620
|
0
|
0
|
|
|
|
|
if (!defined $type) { return; } |
|
0
|
|
|
|
|
|
|
621
|
0
|
0
|
|
|
|
|
if ($type =~ /charset="([^"]+)"/i) { return $1; } |
|
0
|
|
|
|
|
|
|
622
|
0
|
0
|
|
|
|
|
if ($type =~ /charset='([^']+)'/i) { return $1; } |
|
0
|
|
|
|
|
|
|
623
|
0
|
0
|
|
|
|
|
if ($type =~ /charset=(\S+)/i) { return $1; } |
|
0
|
|
|
|
|
|
|
624
|
0
|
|
|
|
|
|
return; |
625
|
|
|
|
|
|
|
} |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
# came up on the users@ list, look for multipart/alternative parts which |
628
|
|
|
|
|
|
|
# include non-text parts -- skip certain types which occur normally in ham |
629
|
|
|
|
|
|
|
sub check_ma_non_text { |
630
|
0
|
|
|
0
|
0
|
|
my($self, $pms) = @_; |
631
|
|
|
|
|
|
|
|
632
|
0
|
|
|
|
|
|
foreach my $map ($pms->{msg}->find_parts(qr@^multipart/alternative$@i)) { |
633
|
0
|
|
|
|
|
|
foreach my $p ($map->find_parts(qr/./, 1, 0)) { |
634
|
0
|
0
|
|
|
|
|
next if (lc $p->{'type'} eq 'multipart/related'); |
635
|
0
|
0
|
|
|
|
|
next if (lc $p->{'type'} eq 'application/rtf'); |
636
|
0
|
0
|
|
|
|
|
next if ($p->{'type'} =~ m@^text/@i); |
637
|
0
|
|
|
|
|
|
return 1; |
638
|
|
|
|
|
|
|
} |
639
|
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
|
641
|
0
|
|
|
|
|
|
return 0; |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
sub check_base64_length { |
645
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
646
|
0
|
|
|
|
|
|
my $pms = shift; |
647
|
0
|
|
|
|
|
|
shift; # body array, unnecessary |
648
|
0
|
|
|
|
|
|
my $min = shift; |
649
|
0
|
|
|
|
|
|
my $max = shift; |
650
|
|
|
|
|
|
|
|
651
|
0
|
0
|
|
|
|
|
if (!defined $pms->{base64_length}) { |
652
|
0
|
|
|
|
|
|
$pms->{base64_length} = $self->_check_base64_length($pms->{msg}); |
653
|
|
|
|
|
|
|
} |
654
|
|
|
|
|
|
|
|
655
|
0
|
0
|
0
|
|
|
|
return 0 if (defined $max && $pms->{base64_length} > $max); |
656
|
0
|
|
|
|
|
|
return $pms->{base64_length} >= $min; |
657
|
|
|
|
|
|
|
} |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
sub _check_base64_length { |
660
|
0
|
|
|
0
|
|
|
my $self = shift; |
661
|
0
|
|
|
|
|
|
my $msg = shift; |
662
|
|
|
|
|
|
|
|
663
|
0
|
|
|
|
|
|
my $result = 0; |
664
|
|
|
|
|
|
|
|
665
|
0
|
|
|
|
|
|
foreach my $p ($msg->find_parts(qr@.@, 1)) { |
666
|
0
|
|
|
|
|
|
my $ctype= |
667
|
|
|
|
|
|
|
Mail::SpamAssassin::Util::parse_content_type($p->get_header('content-type')); |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
# FPs from Google Calendar invites, etc. |
670
|
|
|
|
|
|
|
# perhaps just limit to test, and image? |
671
|
0
|
0
|
|
|
|
|
next if ($ctype eq 'application/ics'); |
672
|
|
|
|
|
|
|
|
673
|
0
|
|
0
|
|
|
|
my $cte = lc($p->get_header('content-transfer-encoding') || ''); |
674
|
0
|
0
|
|
|
|
|
next if ($cte !~ /^base64$/); |
675
|
0
|
|
|
|
|
|
foreach my $l ( @{$p->raw()} ) { |
|
0
|
|
|
|
|
|
|
676
|
0
|
0
|
|
|
|
|
$result = length $l if length $l > $result; |
677
|
|
|
|
|
|
|
} |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
|
680
|
0
|
|
|
|
|
|
return $result; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
1; |