File Coverage

blib/lib/Mail/SpamAssassin/Conf.pm
Criterion Covered Total %
statement 554 969 57.1
branch 115 384 29.9
condition 37 146 25.3
subroutine 60 110 54.5
pod 0 44 0.0
total 766 1653 46.3


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin::Conf - SpamAssassin configuration file
21              
22             =head1 SYNOPSIS
23              
24             # a comment
25              
26             rewrite_header Subject *****SPAM*****
27              
28             full PARA_A_2_C_OF_1618 /Paragraph .a.{0,10}2.{0,10}C. of S. 1618/i
29             describe PARA_A_2_C_OF_1618 Claims compliance with senate bill 1618
30              
31             header FROM_HAS_MIXED_NUMS From =~ /\d+[a-z]+\d+\S*@/i
32             describe FROM_HAS_MIXED_NUMS From: contains numbers mixed in with letters
33              
34             score A_HREF_TO_REMOVE 2.0
35              
36             lang es describe FROM_FORGED_HOTMAIL Forzado From: simula ser de hotmail.com
37              
38             lang pt_BR report O programa detetor de Spam ZOE [...]
39              
40             =head1 DESCRIPTION
41              
42             SpamAssassin is configured using traditional UNIX-style configuration files,
43             loaded from the C</usr/share/spamassassin> and C</etc/mail/spamassassin>
44             directories.
45              
46             The following web page lists the most important configuration settings
47             used to configure SpamAssassin; novices are encouraged to read it first:
48              
49             http://wiki.apache.org/spamassassin/ImportantInitialConfigItems
50              
51             =head1 FILE FORMAT
52              
53             The C<#> character starts a comment, which continues until end of line.
54             B<NOTE:> if the C<#> character is to be used as part of a rule or
55             configuration option, it must be escaped with a backslash. i.e.: C<\#>
56              
57             Whitespace in the files is not significant, but please note that starting a
58             line with whitespace is deprecated, as we reserve its use for multi-line rule
59             definitions, at some point in the future.
60              
61             Currently, each rule or configuration setting must fit on one-line; multi-line
62             settings are not supported yet.
63              
64             File and directory paths can use C<~> to refer to the user's home
65             directory, but no other shell-style path extensions such as globing or
66             C<~user/> are supported.
67              
68             Where appropriate below, default values are listed in parentheses.
69              
70             =head1 USER PREFERENCES
71              
72             The following options can be used in both site-wide (C<local.cf>) and
73             user-specific (C<user_prefs>) configuration files to customize how
74             SpamAssassin handles incoming email messages.
75              
76             =cut
77              
78             package Mail::SpamAssassin::Conf;
79              
80 40     40   245 use strict;
  40         74  
  40         1132  
81 40     40   177 use warnings;
  40         73  
  40         1283  
82             # use bytes;
83 40     40   322 use re 'taint';
  40         90  
  40         1110  
84              
85 40     40   11248 use Mail::SpamAssassin::Util;
  40         136  
  40         2527  
86 40     40   13761 use Mail::SpamAssassin::NetSet;
  40         108  
  40         1526  
87 40     40   253 use Mail::SpamAssassin::Constants qw(:sa :ip);
  40         92  
  40         6668  
88 40     40   13890 use Mail::SpamAssassin::Conf::Parser;
  40         127  
  40         1649  
89 40     40   709 use Mail::SpamAssassin::Logger;
  40         94  
  40         2332  
90 40     40   13752 use Mail::SpamAssassin::Util::TieOneStringHash;
  40         108  
  40         1354  
91 40     40   264 use Mail::SpamAssassin::Util qw(untaint_var);
  40         81  
  40         1736  
92 40     40   231 use File::Spec;
  40         74  
  40         638287  
93              
94             our @ISA = qw();
95              
96             our $COLLECT_REGRESSION_TESTS; # Used only for unit tests.
97              
98             # odd => eval test. Not constants so they can be shared with Parser
99             # TODO: move to Constants.pm?
100             our $TYPE_HEAD_TESTS = 0x0008;
101             our $TYPE_HEAD_EVALS = 0x0009;
102             our $TYPE_BODY_TESTS = 0x000a;
103             our $TYPE_BODY_EVALS = 0x000b;
104             our $TYPE_FULL_TESTS = 0x000c;
105             our $TYPE_FULL_EVALS = 0x000d;
106             our $TYPE_RAWBODY_TESTS = 0x000e;
107             our $TYPE_RAWBODY_EVALS = 0x000f;
108             our $TYPE_URI_TESTS = 0x0010;
109             our $TYPE_URI_EVALS = 0x0011;
110             our $TYPE_META_TESTS = 0x0012;
111             our $TYPE_RBL_EVALS = 0x0013;
112             our $TYPE_EMPTY_TESTS = 0x0014;
113              
114             my @rule_types = ("body_tests", "uri_tests", "uri_evals",
115             "head_tests", "head_evals", "body_evals", "full_tests",
116             "full_evals", "rawbody_tests", "rawbody_evals",
117             "rbl_evals", "meta_tests");
118              
119             #Removed $VERSION per BUG 6422
120             #$VERSION = 'bogus'; # avoid CPAN.pm picking up version strings later
121              
122             # these are variables instead of constants so that other classes can
123             # access them; if they're constants, they'd have to go in Constants.pm
124             # TODO: move to Constants.pm?
125             our $CONF_TYPE_STRING = 1;
126             our $CONF_TYPE_BOOL = 2;
127             our $CONF_TYPE_NUMERIC = 3;
128             our $CONF_TYPE_HASH_KEY_VALUE = 4;
129             our $CONF_TYPE_ADDRLIST = 5;
130             our $CONF_TYPE_TEMPLATE = 6;
131             our $CONF_TYPE_NOARGS = 7;
132             our $CONF_TYPE_STRINGLIST = 8;
133             our $CONF_TYPE_IPADDRLIST = 9;
134             our $CONF_TYPE_DURATION = 10;
135             our $MISSING_REQUIRED_VALUE = '-99999999999999'; # string expected by parser
136             our $INVALID_VALUE = '-99999999999998';
137             our $INVALID_HEADER_FIELD_NAME = '-99999999999997';
138              
139             # set to "1" by the test suite code, to record regression tests
140             # $Mail::SpamAssassin::Conf::COLLECT_REGRESSION_TESTS = 1;
141              
142             # search for "sub new {" to find the start of the code
143             ###########################################################################
144              
145             sub set_default_commands {
146 81     81 0 303 my($self) = @_;
147              
148             # see "perldoc Mail::SpamAssassin::Conf::Parser" for details on this fmt.
149             # push each config item like this, to avoid a POD bug; it can't just accept
150             # ( { ... }, { ... }, { ...} ) otherwise POD parsing dies.
151 81         239 my @cmds;
152              
153             =head2 SCORING OPTIONS
154              
155             =over 4
156              
157             =item required_score n.nn (default: 5)
158              
159             Set the score required before a mail is considered spam. C<n.nn> can
160             be an integer or a real number. 5.0 is the default setting, and is
161             quite aggressive; it would be suitable for a single-user setup, but if
162             you're an ISP installing SpamAssassin, you should probably set the
163             default to be more conservative, like 8.0 or 10.0. It is not
164             recommended to automatically delete or discard messages marked as
165             spam, as your users B<will> complain, but if you choose to do so, only
166             delete messages with an exceptionally high score such as 15.0 or
167             higher. This option was previously known as C<required_hits> and that
168             name is still accepted, but is deprecated.
169              
170             =cut
171              
172 81         1593 push (@cmds, {
173             setting => 'required_score',
174             aliases => ['required_hits'], # backward compatible
175             default => 5,
176             type => $CONF_TYPE_NUMERIC,
177             });
178              
179             =item score SYMBOLIC_TEST_NAME n.nn [ n.nn n.nn n.nn ]
180              
181             Assign scores (the number of points for a hit) to a given test.
182             Scores can be positive or negative real numbers or integers.
183             C<SYMBOLIC_TEST_NAME> is the symbolic name used by SpamAssassin for
184             that test; for example, 'FROM_ENDS_IN_NUMS'.
185              
186             If only one valid score is listed, then that score is always used
187             for a test.
188              
189             If four valid scores are listed, then the score that is used depends
190             on how SpamAssassin is being used. The first score is used when
191             both Bayes and network tests are disabled (score set 0). The second
192             score is used when Bayes is disabled, but network tests are enabled
193             (score set 1). The third score is used when Bayes is enabled and
194             network tests are disabled (score set 2). The fourth score is used
195             when Bayes is enabled and network tests are enabled (score set 3).
196              
197             Setting a rule's score to 0 will disable that rule from running.
198              
199             If any of the score values are surrounded by parenthesis '()', then
200             all of the scores in the line are considered to be relative to the
201             already set score. ie: '(3)' means increase the score for this
202             rule by 3 points in all score sets. '(3) (0) (3) (0)' means increase
203             the score for this rule by 3 in score sets 0 and 2 only.
204              
205             If no score is given for a test by the end of the configuration,
206             a default score is assigned: a score of 1.0 is used for all tests,
207             except those whose names begin with 'T_' (this is used to indicate a
208             rule in testing) which receive 0.01.
209              
210             Note that test names which begin with '__' are indirect rules used
211             to compose meta-match rules and can also act as prerequisites to
212             other rules. They are not scored or listed in the 'tests hit'
213             reports, but assigning a score of 0 to an indirect rule will disable
214             it from running.
215              
216             =cut
217              
218             push (@cmds, {
219             setting => 'score',
220             is_frequent => 1,
221             code => sub {
222 1265     1265   2472 my ($self, $key, $value, $line) = @_;
223 1265         4774 my($rule, @scores) = split(/\s+/, $value);
224 1265 50 33     7551 unless (defined $value && $value !~ /^$/ &&
      66        
      66        
225             (scalar @scores == 1 || scalar @scores == 4)) {
226 0         0 info("config: score: requires a symbolic rule name and 1 or 4 scores");
227 0         0 return $MISSING_REQUIRED_VALUE;
228             }
229              
230             # Figure out if we're doing relative scores, remove the parens if we are
231 1265         2093 my $relative = 0;
232 1265         2201 foreach (@scores) {
233 1841         3649 local ($1);
234 1841 100       3464 if (s/^\((-?\d+(?:\.\d+)?)\)$/$1/) {
235 9         11 $relative = 1;
236             }
237 1841 50       7768 unless (/^-?\d+(?:\.\d+)?$/) {
238 0         0 info("config: score: the non-numeric score ($_) is not valid, " .
239             "a numeric score is required");
240 0         0 return $INVALID_VALUE;
241             }
242             }
243              
244 1265 50 66     3062 if ($relative && !exists $self->{scoreset}->[0]->{$rule}) {
245 0         0 info("config: score: relative score without previous setting in " .
246             "configuration");
247 0         0 return $INVALID_VALUE;
248             }
249              
250             # If we're only passed 1 score, copy it to the other scoresets
251 1265 50       2900 if (@scores) {
252 1265 100       2362 if (@scores != 4) {
253 1073         2420 @scores = ( $scores[0], $scores[0], $scores[0], $scores[0] );
254             }
255              
256             # Set the actual scoreset values appropriately
257 1265         2297 for my $index (0..3) {
258             my $score = $relative ?
259 5060 100       7971 $self->{scoreset}->[$index]->{$rule} + $scores[$index] :
260             $scores[$index];
261              
262 5060         15757 $self->{scoreset}->[$index]->{$rule} = $score + 0.0;
263             }
264             }
265             }
266 81         2219 });
267              
268             =back
269              
270             =head2 WHITELIST AND BLACKLIST OPTIONS
271              
272             =over 4
273              
274             =item whitelist_from user@example.com
275              
276             Used to whitelist sender addresses which send mail that is often tagged
277             (incorrectly) as spam.
278              
279             Use of this setting is not recommended, since it blindly trusts the message,
280             which is routinely and easily forged by spammers and phish senders. The
281             recommended solution is to instead use C<whitelist_auth> or other authenticated
282             whitelisting methods, or C<whitelist_from_rcvd>.
283              
284             Whitelist and blacklist addresses are now file-glob-style patterns, so
285             C<friend@somewhere.com>, C<*@isp.com>, or C<*.domain.net> will all work.
286             Specifically, C<*> and C<?> are allowed, but all other metacharacters
287             are not. Regular expressions are not used for security reasons.
288             Matching is case-insensitive.
289              
290             Multiple addresses per line, separated by spaces, is OK. Multiple
291             C<whitelist_from> lines are also OK.
292              
293             The headers checked for whitelist addresses are as follows: if C<Resent-From>
294             is set, use that; otherwise check all addresses taken from the following
295             set of headers:
296              
297             Envelope-Sender
298             Resent-Sender
299             X-Envelope-From
300             From
301              
302             In addition, the "envelope sender" data, taken from the SMTP envelope data
303             where this is available, is looked up. See C<envelope_sender_header>.
304              
305             e.g.
306              
307             whitelist_from joe@example.com fred@example.com
308             whitelist_from *@example.com
309              
310             =cut
311              
312 81         739 push (@cmds, {
313             setting => 'whitelist_from',
314             type => $CONF_TYPE_ADDRLIST,
315             });
316              
317             =item unwhitelist_from user@example.com
318              
319             Used to override a default whitelist_from entry, so for example a distribution
320             whitelist_from can be overridden in a local.cf file, or an individual user can
321             override a whitelist_from entry in their own C<user_prefs> file.
322             The specified email address has to match exactly (although case-insensitively)
323             the address previously used in a whitelist_from line, which implies that a
324             wildcard only matches literally the same wildcard (not 'any' address).
325              
326             e.g.
327              
328             unwhitelist_from joe@example.com fred@example.com
329             unwhitelist_from *@example.com
330              
331             =cut
332              
333 81         1181 push (@cmds, {
334             command => 'unwhitelist_from',
335             setting => 'whitelist_from',
336             type => $CONF_TYPE_ADDRLIST,
337             code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value
338             });
339              
340             =item whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net
341              
342             Works similarly to whitelist_from, except that in addition to matching
343             a sender address, a relay's rDNS name or its IP address must match too
344             for the whitelisting rule to fire. The first parameter is a sender's e-mail
345             address to whitelist, and the second is a string to match the relay's rDNS,
346             or its IP address. Matching is case-insensitive.
347              
348             This second parameter is matched against a TCP-info information field as
349             provided in a FROM clause of a trace information (i.e. in a Received header
350             field, see RFC 5321). Only the Received header fields inserted by trusted
351             hosts are considered. This parameter can either be a full hostname, or a
352             domain component of that hostname, or an IP address (optionally followed
353             by a slash and a prefix length) in square brackets. The address prefix
354             (mask) length with a slash may stand within brackets along with an address,
355             or may follow the bracketed address. Reverse DNS lookup is done by an MTA,
356             not by SpamAssassin.
357              
358             For backward compatibility as an alternative to a CIDR notation, an IPv4
359             address in brackets may be truncated on classful boundaries to cover whole
360             subnets, e.g. C<[10.1.2.3]>, C<[10.1.2]>, C<[10.1]>, C<[10]>.
361              
362             In other words, if the host that connected to your MX had an IP address
363             192.0.2.123 that mapped to 'sendinghost.example.org', you should specify
364             C<sendinghost.example.org>, or C<example.org>, or C<[192.0.2.123]>, or
365             C<[192.0.2.0/24]>, or C<[192.0.2]> here.
366              
367             Note that this requires that C<internal_networks> be correct. For simple
368             cases, it will be, but for a complex network you may get better results
369             by setting that parameter.
370              
371             It also requires that your mail exchangers be configured to perform DNS
372             reverse lookups on the connecting host's IP address, and to record the
373             result in the generated Received header field according to RFC 5321.
374              
375             e.g.
376              
377             whitelist_from_rcvd joe@example.com example.com
378             whitelist_from_rcvd *@* mail.example.org
379             whitelist_from_rcvd *@axkit.org [192.0.2.123]
380             whitelist_from_rcvd *@axkit.org [192.0.2.0/24]
381             whitelist_from_rcvd *@axkit.org [192.0.2.0]/24
382             whitelist_from_rcvd *@axkit.org [2001:db8:1234::/48]
383             whitelist_from_rcvd *@axkit.org [2001:db8:1234::]/48
384              
385             =item def_whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net
386              
387             Same as C<whitelist_from_rcvd>, but used for the default whitelist entries
388             in the SpamAssassin distribution. The whitelist score is lower, because
389             these are often targets for spammer spoofing.
390              
391             =cut
392              
393             push (@cmds, {
394             setting => 'whitelist_from_rcvd',
395             type => $CONF_TYPE_ADDRLIST,
396             code => sub {
397 0     0   0 my ($self, $key, $value, $line) = @_;
398 0 0 0     0 unless (defined $value && $value !~ /^$/) {
399 0         0 return $MISSING_REQUIRED_VALUE;
400             }
401 0 0       0 unless ($value =~ /^\S+\s+\S+$/) {
402 0         0 return $INVALID_VALUE;
403             }
404 0         0 $self->{parser}->add_to_addrlist_rcvd ('whitelist_from_rcvd',
405             split(/\s+/, $value));
406             }
407 81         1181 });
408              
409             push (@cmds, {
410             setting => 'def_whitelist_from_rcvd',
411             type => $CONF_TYPE_ADDRLIST,
412             code => sub {
413 0     0   0 my ($self, $key, $value, $line) = @_;
414 0 0 0     0 unless (defined $value && $value !~ /^$/) {
415 0         0 return $MISSING_REQUIRED_VALUE;
416             }
417 0 0       0 unless ($value =~ /^\S+\s+\S+$/) {
418 0         0 return $INVALID_VALUE;
419             }
420 0         0 $self->{parser}->add_to_addrlist_rcvd ('def_whitelist_from_rcvd',
421             split(/\s+/, $value));
422             }
423 81         1135 });
424              
425             =item whitelist_allows_relays user@example.com
426              
427             Specify addresses which are in C<whitelist_from_rcvd> that sometimes
428             send through a mail relay other than the listed ones. By default mail
429             with a From address that is in C<whitelist_from_rcvd> that does not match
430             the relay will trigger a forgery rule. Including the address in
431             C<whitelist_allows_relay> prevents that.
432              
433             Whitelist and blacklist addresses are now file-glob-style patterns, so
434             C<friend@somewhere.com>, C<*@isp.com>, or C<*.domain.net> will all work.
435             Specifically, C<*> and C<?> are allowed, but all other metacharacters
436             are not. Regular expressions are not used for security reasons.
437             Matching is case-insensitive.
438              
439             Multiple addresses per line, separated by spaces, is OK. Multiple
440             C<whitelist_allows_relays> lines are also OK.
441              
442             The specified email address does not have to match exactly the address
443             previously used in a whitelist_from_rcvd line as it is compared to the
444             address in the header.
445              
446             e.g.
447              
448             whitelist_allows_relays joe@example.com fred@example.com
449             whitelist_allows_relays *@example.com
450              
451             =cut
452              
453 81         505 push (@cmds, {
454             setting => 'whitelist_allows_relays',
455             type => $CONF_TYPE_ADDRLIST,
456             });
457              
458             =item unwhitelist_from_rcvd user@example.com
459              
460             Used to override a default whitelist_from_rcvd entry, so for example a
461             distribution whitelist_from_rcvd can be overridden in a local.cf file,
462             or an individual user can override a whitelist_from_rcvd entry in
463             their own C<user_prefs> file.
464              
465             The specified email address has to match exactly the address previously
466             used in a whitelist_from_rcvd line.
467              
468             e.g.
469              
470             unwhitelist_from_rcvd joe@example.com fred@example.com
471             unwhitelist_from_rcvd *@axkit.org
472              
473             =cut
474              
475             push (@cmds, {
476             setting => 'unwhitelist_from_rcvd',
477             type => $CONF_TYPE_ADDRLIST,
478             code => sub {
479 0     0   0 my ($self, $key, $value, $line) = @_;
480 0 0 0     0 unless (defined $value && $value !~ /^$/) {
481 0         0 return $MISSING_REQUIRED_VALUE;
482             }
483 0 0       0 unless ($value =~ /^(?:\S+(?:\s+\S+)*)$/) {
484 0         0 return $INVALID_VALUE;
485             }
486 0         0 $self->{parser}->remove_from_addrlist_rcvd('whitelist_from_rcvd',
487             split (/\s+/, $value));
488 0         0 $self->{parser}->remove_from_addrlist_rcvd('def_whitelist_from_rcvd',
489             split (/\s+/, $value));
490             }
491 81         1164 });
492              
493             =item blacklist_from user@example.com
494              
495             Used to specify addresses which send mail that is often tagged (incorrectly) as
496             non-spam, but which the user doesn't want. Same format as C<whitelist_from>.
497              
498             =cut
499              
500 81         654 push (@cmds, {
501             setting => 'blacklist_from',
502             type => $CONF_TYPE_ADDRLIST,
503             });
504              
505             =item unblacklist_from user@example.com
506              
507             Used to override a default blacklist_from entry, so for example a
508             distribution blacklist_from can be overridden in a local.cf file, or
509             an individual user can override a blacklist_from entry in their own
510             C<user_prefs> file. The specified email address has to match exactly
511             the address previously used in a blacklist_from line.
512              
513              
514             e.g.
515              
516             unblacklist_from joe@example.com fred@example.com
517             unblacklist_from *@spammer.com
518              
519             =cut
520              
521              
522 81         571 push (@cmds, {
523             command => 'unblacklist_from',
524             setting => 'blacklist_from',
525             type => $CONF_TYPE_ADDRLIST,
526             code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value
527             });
528              
529              
530             =item whitelist_to user@example.com
531              
532             If the given address appears as a recipient in the message headers
533             (Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will
534             be whitelisted. Useful if you're deploying SpamAssassin system-wide,
535             and don't want some users to have their mail filtered. Same format
536             as C<whitelist_from>.
537              
538             There are three levels of To-whitelisting, C<whitelist_to>, C<more_spam_to>
539             and C<all_spam_to>. Users in the first level may still get some spammish
540             mails blocked, but users in C<all_spam_to> should never get mail blocked.
541              
542             The headers checked for whitelist addresses are as follows: if C<Resent-To> or
543             C<Resent-Cc> are set, use those; otherwise check all addresses taken from the
544             following set of headers:
545              
546             To
547             Cc
548             Apparently-To
549             Delivered-To
550             Envelope-Recipients
551             Apparently-Resent-To
552             X-Envelope-To
553             Envelope-To
554             X-Delivered-To
555             X-Original-To
556             X-Rcpt-To
557             X-Real-To
558              
559             =item more_spam_to user@example.com
560              
561             See above.
562              
563             =item all_spam_to user@example.com
564              
565             See above.
566              
567             =cut
568              
569 81         442 push (@cmds, {
570             setting => 'whitelist_to',
571             type => $CONF_TYPE_ADDRLIST,
572             });
573 81         494 push (@cmds, {
574             setting => 'more_spam_to',
575             type => $CONF_TYPE_ADDRLIST,
576             });
577 81         392 push (@cmds, {
578             setting => 'all_spam_to',
579             type => $CONF_TYPE_ADDRLIST,
580             });
581              
582             =item blacklist_to user@example.com
583              
584             If the given address appears as a recipient in the message headers
585             (Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will
586             be blacklisted. Same format as C<blacklist_from>.
587              
588             =cut
589              
590 81         7902 push (@cmds, {
591             setting => 'blacklist_to',
592             type => $CONF_TYPE_ADDRLIST,
593             });
594              
595             =item whitelist_auth user@example.com
596              
597             Used to specify addresses which send mail that is often tagged (incorrectly) as
598             spam. This is different from C<whitelist_from> and C<whitelist_from_rcvd> in
599             that it first verifies that the message was sent by an authorized sender for
600             the address, before whitelisting.
601              
602             Authorization is performed using one of the installed sender-authorization
603             schemes: SPF (using C<Mail::SpamAssassin::Plugin::SPF>), or DKIM (using
604             C<Mail::SpamAssassin::Plugin::DKIM>). Note that those plugins must be active,
605             and working, for this to operate.
606              
607             Using C<whitelist_auth> is roughly equivalent to specifying duplicate
608             C<whitelist_from_spf>, C<whitelist_from_dk>, and C<whitelist_from_dkim> lines
609             for each of the addresses specified.
610              
611             e.g.
612              
613             whitelist_auth joe@example.com fred@example.com
614             whitelist_auth *@example.com
615              
616             =item def_whitelist_auth user@example.com
617              
618             Same as C<whitelist_auth>, but used for the default whitelist entries
619             in the SpamAssassin distribution. The whitelist score is lower, because
620             these are often targets for spammer spoofing.
621              
622             =cut
623              
624 81         4455 push (@cmds, {
625             setting => 'whitelist_auth',
626             type => $CONF_TYPE_ADDRLIST,
627             });
628              
629 81         439 push (@cmds, {
630             setting => 'def_whitelist_auth',
631             type => $CONF_TYPE_ADDRLIST,
632             });
633              
634             =item unwhitelist_auth user@example.com
635              
636             Used to override a C<whitelist_auth> entry. The specified email address has to
637             match exactly the address previously used in a C<whitelist_auth> line.
638              
639             e.g.
640              
641             unwhitelist_auth joe@example.com fred@example.com
642             unwhitelist_auth *@example.com
643              
644             =cut
645              
646 81         481 push (@cmds, {
647             command => 'unwhitelist_auth',
648             setting => 'whitelist_auth',
649             type => $CONF_TYPE_ADDRLIST,
650             code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value
651             });
652              
653              
654             =item enlist_uri_host (listname) host ...
655              
656             Adds one or more host names or domain names to a named list of URI domains.
657             The named list can then be consulted through a check_uri_host_listed()
658             eval rule implemented by the WLBLEval plugin, which takes the list name as
659             an argument. Parenthesis around a list name are literal - a required syntax.
660              
661             Host names may optionally be prefixed by an exclamantion mark '!', which
662             produces false as a result if this entry matches. This makes it easier
663             to exclude some subdomains when their superdomain is listed, for example:
664              
665             enlist_uri_host (MYLIST) !sub1.example.com !sub2.example.com example.com
666              
667             No wildcards are supported, but subdomains do match implicitly. Lists
668             are independent. Search for each named list starts by looking up the
669             full hostname first, then leading fields are progressively stripped off
670             (e.g.: sub.example.com, example.com, com) until a match is found or we run
671             out of fields. The first matching entry (the most specific) determines if a
672             lookup yielded a true (no '!' prefix) or a false (with a '!' prefix) result.
673              
674             If an URL found in a message contains an IP address in place of a host name,
675             the given list must specify the exact same IP address (instead of a host name)
676             in order to match.
677              
678             Use the delist_uri_host directive to neutralize previous enlist_uri_host
679             settings.
680              
681             Enlisting to lists named 'BLACK' and 'WHITE' have their shorthand directives
682             blacklist_uri_host and whitelist_uri_host and corresponding default rules,
683             but the names 'BLACK' and 'WHITE' are otherwise not special or reserved.
684              
685             =cut
686              
687             push (@cmds, {
688             command => 'enlist_uri_host',
689             setting => 'uri_host_lists',
690             type => $CONF_TYPE_ADDRLIST,
691             code => sub {
692 0     0   0 my($conf, $key, $value, $line) = @_;
693 0         0 local($1,$2);
694 0 0       0 if ($value !~ /^ \( (.*?) \) \s+ (.*) \z/sx) {
695 0         0 return $MISSING_REQUIRED_VALUE;
696             }
697 0         0 my $listname = $1; # corresponds to arg in check_uri_host_in_wblist()
698             # note: must not factor out dereferencing, as otherwise
699             # subhashes would spring up in a copy and be lost
700 0         0 foreach my $host ( split(' ', lc $2) ) {
701 0 0       0 my $v = $host =~ s/^!// ? 0 : 1;
702 0         0 $conf->{uri_host_lists}{$listname}{$host} = $v;
703             }
704             }
705 81         914 });
706              
707             =item delist_uri_host [ (listname) ] host ...
708              
709             Removes one or more specified host names from a named list of URI domains.
710             Removing an unlisted name is ignored (is not an error). Listname is optional,
711             if specified then just the named list is affected, otherwise hosts are
712             removed from all URI host lists created so far. Parenthesis around a list
713             name are a required syntax.
714              
715             Note that directives in configuration files are processed in sequence,
716             the delist_uri_host only applies to previously listed entries and has
717             no effect on enlisted entries in yet-to-be-processed directives.
718              
719             For convenience (similarity to the enlist_uri_host directive) hostnames
720             may be prefixed by a an exclamation mark, which is stripped off from each
721             name and has no meaning here.
722              
723             =cut
724              
725             push (@cmds, {
726             command => 'delist_uri_host',
727             setting => 'uri_host_lists',
728             type => $CONF_TYPE_ADDRLIST,
729             code => sub {
730 0     0   0 my($conf, $key, $value, $line) = @_;
731 0         0 local($1,$2);
732 0 0       0 if ($value !~ /^ (?: \( (.*?) \) \s+ )? (.*) \z/sx) {
733 0         0 return $MISSING_REQUIRED_VALUE;
734             }
735 0 0       0 my @listnames = defined $1 ? $1 : keys %{$conf->{uri_host_lists}};
  0         0  
736 0         0 my @args = split(' ', lc $2);
737 0         0 foreach my $listname (@listnames) {
738 0         0 foreach my $host (@args) {
739 0 0       0 my $v = $host =~ s/^!// ? 0 : 1;
740 0         0 delete $conf->{uri_host_lists}{$listname}{$host};
741             }
742             }
743             }
744 81         3377 });
745              
746             =item enlist_addrlist (listname) user@example.com
747              
748             Adds one or more addresses to a named list of addresses.
749             The named list can then be consulted through a check_from_in_list() or a
750             check_to_in_list() eval rule implemented by the WLBLEval plugin, which takes
751             the list name as an argument. Parenthesis around a list name are literal - a
752             required syntax.
753              
754             Listed addresses are file-glob-style patterns, so C<friend@somewhere.com>,
755             C<*@isp.com>, or C<*.domain.net> will all work.
756             Specifically, C<*> and C<?> are allowed, but all other metacharacters
757             are not. Regular expressions are not used for security reasons.
758             Matching is case-insensitive.
759              
760             Multiple addresses per line, separated by spaces, is OK. Multiple
761             C<enlist_addrlist> lines are also OK.
762              
763             Enlisting an address to the list named blacklist_to is synonymous to using the
764             directive blacklist_to
765              
766             Enlisting an address to the list named blacklist_from is synonymous to using the
767             directive blacklist_from
768              
769             Enlisting an address to the list named whitelist_to is synonymous to using the
770             directive whitelist_to
771              
772             Enlisting an address to the list named whitelist_from is synonymous to using the
773             directive whitelist_from
774              
775             e.g.
776              
777             enlist_addrlist (PAYPAL_ADDRESS) service@paypal.com
778             enlist_addrlist (PAYPAL_ADDRESS) *@paypal.co.uk
779              
780             =cut
781              
782             push (@cmds, {
783             setting => 'enlist_addrlist',
784             type => $CONF_TYPE_ADDRLIST,
785             code => sub {
786 0     0   0 my($conf, $key, $value, $line) = @_;
787 0         0 local($1,$2);
788 0 0       0 if ($value !~ /^ \( (.*?) \) \s+ (.*) \z/sx) {
789 0         0 return $MISSING_REQUIRED_VALUE;
790             }
791 0         0 my $listname = $1; # corresponds to arg in check_uri_host_in_wblist()
792             # note: must not factor out dereferencing, as otherwise
793             # subhashes would spring up in a copy and be lost
794 0         0 $conf->{parser}->add_to_addrlist ($listname, split(/\s+/, $value));
795             }
796 81         9161 });
797              
798             =item blacklist_uri_host host-or-domain ...
799              
800             Is a shorthand for a directive: enlist_uri_host (BLACK) host ...
801              
802             Please see directives enlist_uri_host and delist_uri_host for details.
803              
804             =cut
805              
806             push (@cmds, {
807             command => 'blacklist_uri_host',
808             setting => 'uri_host_lists',
809             type => $CONF_TYPE_ADDRLIST,
810             code => sub {
811 0     0   0 my($conf, $key, $value, $line) = @_;
812 0         0 foreach my $host ( split(' ', lc $value) ) {
813 0 0       0 my $v = $host =~ s/^!// ? 0 : 1;
814 0         0 $conf->{uri_host_lists}{'BLACK'}{$host} = $v;
815             }
816             }
817 81         1433 });
818              
819             =item whitelist_uri_host host-or-domain ...
820              
821             Is a shorthand for a directive: enlist_uri_host (BLACK) host ...
822              
823             Please see directives enlist_uri_host and delist_uri_host for details.
824              
825             =cut
826              
827             push (@cmds, {
828             command => 'whitelist_uri_host',
829             setting => 'uri_host_lists',
830             type => $CONF_TYPE_ADDRLIST,
831             code => sub {
832 0     0   0 my($conf, $key, $value, $line) = @_;
833 0         0 foreach my $host ( split(' ', lc $value) ) {
834 0 0       0 my $v = $host =~ s/^!// ? 0 : 1;
835 0         0 $conf->{uri_host_lists}{'WHITE'}{$host} = $v;
836             }
837             }
838 81         1163 });
839              
840             =back
841              
842             =head2 BASIC MESSAGE TAGGING OPTIONS
843              
844             =over 4
845              
846             =item rewrite_header { subject | from | to } STRING
847              
848             By default, suspected spam messages will not have the C<Subject>,
849             C<From> or C<To> lines tagged to indicate spam. By setting this option,
850             the header will be tagged with C<STRING> to indicate that a message is
851             spam. For the From or To headers, this will take the form of an RFC 2822
852             comment following the address in parantheses. For the Subject header,
853             this will be prepended to the original subject. Note that you should
854             only use the _REQD_ and _SCORE_ tags when rewriting the Subject header
855             if C<report_safe> is 0. Otherwise, you may not be able to remove
856             the SpamAssassin markup via the normal methods. More information
857             about tags is explained below in the B<TEMPLATE TAGS> section.
858              
859             Parentheses are not permitted in STRING if rewriting the From or To headers.
860             (They will be converted to square brackets.)
861              
862             If C<rewrite_header subject> is used, but the message being rewritten
863             does not already contain a C<Subject> header, one will be created.
864              
865             A null value for C<STRING> will remove any existing rewrite for the specified
866             header.
867              
868             =cut
869              
870             push (@cmds, {
871             setting => 'rewrite_header',
872             type => $CONF_TYPE_HASH_KEY_VALUE,
873             code => sub {
874 0     0   0 my ($self, $key, $value, $line) = @_;
875 0         0 my($hdr, $string) = split(/\s+/, $value, 2);
876 0         0 $hdr = ucfirst(lc($hdr));
877              
878 0 0       0 if ($hdr =~ /^$/) {
    0          
879 0         0 return $MISSING_REQUIRED_VALUE;
880             }
881             # We only deal with From, Subject, and To ...
882             elsif ($hdr =~ /^(?:From|Subject|To)$/) {
883 0 0 0     0 unless (defined $string && $string =~ /\S/) {
884 0         0 delete $self->{rewrite_header}->{$hdr};
885 0         0 return;
886             }
887              
888 0 0       0 if ($hdr ne 'Subject') {
889 0         0 $string =~ tr/()/[]/;
890             }
891 0         0 $self->{rewrite_header}->{$hdr} = $string;
892 0         0 return;
893             }
894             else {
895             # if we get here, note the issue, then we'll fail through for an error.
896 0         0 info("config: rewrite_header: ignoring $hdr, not From, Subject, or To");
897 0         0 return $INVALID_VALUE;
898             }
899             }
900 81         1155 });
901              
902             =item add_header { spam | ham | all } header_name string
903              
904             Customized headers can be added to the specified type of messages (spam,
905             ham, or "all" to add to either). All headers begin with C<X-Spam->
906             (so a C<header_name> Foo will generate a header called X-Spam-Foo).
907             header_name is restricted to the character set [A-Za-z0-9_-].
908              
909             The order of C<add_header> configuration options is preserved, inserted
910             headers will follow this order of declarations. When combining C<add_header>
911             with C<clear_headers> and C<remove_header>, keep in mind that C<add_header>
912             appends a new header to the current list, after first removing any existing
913             header fields of the same name. Note also that C<add_header>, C<clear_headers>
914             and C<remove_header> may appear in multiple .cf files, which are interpreted
915             in alphabetic order.
916              
917             C<string> can contain tags as explained below in the B<TEMPLATE TAGS> section.
918             You can also use C<\n> and C<\t> in the header to add newlines and tabulators
919             as desired. A backslash has to be written as \\, any other escaped chars will
920             be silently removed.
921              
922             All headers will be folded if fold_headers is set to C<1>. Note: Manually
923             adding newlines via C<\n> disables any further automatic wrapping (ie:
924             long header lines are possible). The lines will still be properly folded
925             (marked as continuing) though.
926              
927             You can customize existing headers with B<add_header> (only the specified
928             subset of messages will be changed).
929              
930             See also C<clear_headers> and C<remove_header> for removing headers.
931              
932             Here are some examples (these are the defaults, note that Checker-Version can
933             not be changed or removed):
934              
935             add_header spam Flag _YESNOCAPS_
936             add_header all Status _YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ autolearn=_AUTOLEARN_ version=_VERSION_
937             add_header all Level _STARS(*)_
938             add_header all Checker-Version SpamAssassin _VERSION_ (_SUBVERSION_) on _HOSTNAME_
939              
940             =cut
941              
942             push (@cmds, {
943             setting => 'add_header',
944             code => sub {
945 316     316   801 my ($self, $key, $value, $line) = @_;
946 316         1032 local ($1,$2,$3);
947 316 50       2895 if ($value !~ /^(ham|spam|all)\s+([A-Za-z0-9_-]+)\s+(.*?)\s*$/) {
948 0         0 return $INVALID_VALUE;
949             }
950              
951 316         1125 my ($type, $name, $hline) = ($1, $2, $3);
952 316 100       1326 if ($hline =~ /^"(.*)"$/) {
953 63         268 $hline = $1;
954             }
955 316         1195 my @line = split(
956             /\\\\/, # split at double backslashes,
957             $hline."\n" # newline needed to make trailing backslashes work
958             );
959 316         794 foreach (@line) {
960 316         661 s/\\t/\t/g; # expand tabs
961 316         490 s/\\n/\n/g; # expand newlines
962 316         581 s/\\.//g; # purge all other escapes
963             };
964 316         885 $hline = join("\\", @line);
965 316         685 chop($hline); # remove dummy newline again
966 316 100 66     1695 if (($type eq "ham") || ($type eq "all")) {
967             $self->{headers_ham} =
968 253         472 [ grep { lc($_->[0]) ne lc($name) } @{$self->{headers_ham}} ];
  476         1626  
  253         654  
969 253         510 push(@{$self->{headers_ham}}, [$name, $hline]);
  253         872  
970             }
971 316 50 66     1620 if (($type eq "spam") || ($type eq "all")) {
972             $self->{headers_spam} =
973 316         478 [ grep { lc($_->[0]) ne lc($name) } @{$self->{headers_spam}} ];
  729         1841  
  316         725  
974 316         643 push(@{$self->{headers_spam}}, [$name, $hline]);
  316         2044  
975             }
976             }
977 81         1323 });
978              
979             =item remove_header { spam | ham | all } header_name
980              
981             Headers can be removed from the specified type of messages (spam, ham,
982             or "all" to remove from either). All headers begin with C<X-Spam->
983             (so C<header_name> will be appended to C<X-Spam->).
984              
985             See also C<clear_headers> for removing all the headers at once.
986              
987             Note that B<X-Spam-Checker-Version> is not removable because the version
988             information is needed by mail administrators and developers to debug
989             problems. Without at least one header, it might not even be possible to
990             determine that SpamAssassin is running.
991              
992             =cut
993              
994             push (@cmds, {
995             setting => 'remove_header',
996             code => sub {
997 0     0   0 my ($self, $key, $value, $line) = @_;
998 0         0 local ($1,$2);
999 0 0       0 if ($value !~ /^(ham|spam|all)\s+([A-Za-z0-9_-]+)\s*$/) {
1000 0         0 return $INVALID_VALUE;
1001             }
1002              
1003 0         0 my ($type, $name) = ($1, $2);
1004 0 0       0 return if ( $name eq "Checker-Version" );
1005              
1006 0         0 $name = lc($name);
1007 0 0 0     0 if (($type eq "ham") || ($type eq "all")) {
1008             $self->{headers_ham} =
1009 0         0 [ grep { lc($_->[0]) ne $name } @{$self->{headers_ham}} ];
  0         0  
  0         0  
1010             }
1011 0 0 0     0 if (($type eq "spam") || ($type eq "all")) {
1012             $self->{headers_spam} =
1013 0         0 [ grep { lc($_->[0]) ne $name } @{$self->{headers_spam}} ];
  0         0  
  0         0  
1014             }
1015             }
1016 81         7837 });
1017              
1018             =item clear_headers
1019              
1020             Clear the list of headers to be added to messages. You may use this
1021             before any B<add_header> options to prevent the default headers from being
1022             added to the message.
1023              
1024             C<add_header>, C<clear_headers> and C<remove_header> may appear in multiple
1025             .cf files, which are interpreted in alphabetic order, so C<clear_headers>
1026             in a later file will remove all added headers from previously interpreted
1027             configuration files, which may or may not be desired.
1028              
1029             Note that B<X-Spam-Checker-Version> is not removable because the version
1030             information is needed by mail administrators and developers to debug
1031             problems. Without at least one header, it might not even be possible to
1032             determine that SpamAssassin is running.
1033              
1034             =cut
1035              
1036             push (@cmds, {
1037             setting => 'clear_headers',
1038             type => $CONF_TYPE_NOARGS,
1039             code => sub {
1040 63     63   333 my ($self, $key, $value, $line) = @_;
1041 63 50 33     514 unless (!defined $value || $value eq '') {
1042 0         0 return $INVALID_VALUE;
1043             }
1044 63         503 my @h = grep { lc($_->[0]) eq "checker-version" }
1045 63         135 @{$self->{headers_ham}};
  63         304  
1046 63 50       399 $self->{headers_ham} = !@h ? [] : [ $h[0] ];
1047 63 50       446 $self->{headers_spam} = !@h ? [] : [ $h[0] ];
1048             }
1049 81         1188 });
1050              
1051             =item report_safe ( 0 | 1 | 2 ) (default: 1)
1052              
1053             if this option is set to 1, if an incoming message is tagged as spam,
1054             instead of modifying the original message, SpamAssassin will create a
1055             new report message and attach the original message as a message/rfc822
1056             MIME part (ensuring the original message is completely preserved, not
1057             easily opened, and easier to recover).
1058              
1059             If this option is set to 2, then original messages will be attached with
1060             a content type of text/plain instead of message/rfc822. This setting
1061             may be required for safety reasons on certain broken mail clients that
1062             automatically load attachments without any action by the user. This
1063             setting may also make it somewhat more difficult to extract or view the
1064             original message.
1065              
1066             If this option is set to 0, incoming spam is only modified by adding
1067             some C<X-Spam-> headers and no changes will be made to the body. In
1068             addition, a header named B<X-Spam-Report> will be added to spam. You
1069             can use the B<remove_header> option to remove that header after setting
1070             B<report_safe> to 0.
1071              
1072             See B<report_safe_copy_headers> if you want to copy headers from
1073             the original mail into tagged messages.
1074              
1075             =cut
1076              
1077             push (@cmds, {
1078             setting => 'report_safe',
1079             default => 1,
1080             type => $CONF_TYPE_NUMERIC,
1081             code => sub {
1082 63     63   282 my ($self, $key, $value, $line) = @_;
1083 63 50       421 if ($value eq '') {
    50          
1084 0         0 return $MISSING_REQUIRED_VALUE;
1085             }
1086             elsif ($value !~ /^[012]$/) {
1087 0         0 return $INVALID_VALUE;
1088             }
1089              
1090 63         245 $self->{report_safe} = $value+0;
1091 63 50 33     410 if (! $self->{report_safe} &&
1092             ! (grep { lc($_->[0]) eq "report" } @{$self->{headers_spam}}) ) {
1093 0         0 push(@{$self->{headers_spam}}, ["Report", "_REPORT_"]);
  0         0  
1094             }
1095             }
1096 81         1231 });
1097              
1098             =item report_wrap_width (default: 70)
1099              
1100             This option sets the wrap width for description lines in the X-Spam-Report
1101             header, not accounting for tab width.
1102              
1103             =cut
1104              
1105 81         553 push (@cmds, {
1106             setting => 'report_wrap_width',
1107             default => '70',
1108             type => $CONF_TYPE_NUMERIC,
1109             });
1110              
1111             =back
1112              
1113             =head2 LANGUAGE OPTIONS
1114              
1115             =over 4
1116              
1117             =item ok_locales xx [ yy zz ... ] (default: all)
1118              
1119             This option is used to specify which locales are considered OK for
1120             incoming mail. Mail using the B<character sets> that are allowed by
1121             this option will not be marked as possibly being spam in a foreign
1122             language.
1123              
1124             If you receive lots of spam in foreign languages, and never get any non-spam in
1125             these languages, this may help. Note that all ISO-8859-* character sets, and
1126             Windows code page character sets, are always permitted by default.
1127              
1128             Set this to C<all> to allow all character sets. This is the default.
1129              
1130             The rules C<CHARSET_FARAWAY>, C<CHARSET_FARAWAY_BODY>, and
1131             C<CHARSET_FARAWAY_HEADERS> are triggered based on how this is set.
1132              
1133             Examples:
1134              
1135             ok_locales all (allow all locales)
1136             ok_locales en (only allow English)
1137             ok_locales en ja zh (allow English, Japanese, and Chinese)
1138              
1139             Note: if there are multiple ok_locales lines, only the last one is used.
1140              
1141             Select the locales to allow from the list below:
1142              
1143             =over 4
1144              
1145             =item en - Western character sets in general
1146              
1147             =item ja - Japanese character sets
1148              
1149             =item ko - Korean character sets
1150              
1151             =item ru - Cyrillic character sets
1152              
1153             =item th - Thai character sets
1154              
1155             =item zh - Chinese (both simplified and traditional) character sets
1156              
1157             =back
1158              
1159             =cut
1160              
1161 81         686 push (@cmds, {
1162             setting => 'ok_locales',
1163             default => 'all',
1164             type => $CONF_TYPE_STRING,
1165             });
1166              
1167             =item normalize_charset ( 0 | 1) (default: 0)
1168              
1169             Whether to decode non- UTF-8 and non-ASCII textual parts and recode them
1170             to UTF-8 before the text is given over to rules processing. The character
1171             set used for attempted decoding is primarily based on a declared character
1172             set in a Content-Type header, but if the decoding attempt fails a module
1173             Encode::Detect::Detector is consulted (if available) to provide a guess
1174             based on the actual text, and decoding is re-attempted. Even if the option
1175             is enabled no unnecessary decoding and re-encoding work is done when
1176             possible (like with an all-ASCII text with a US-ASCII or extended ASCII
1177             character set declaration, e.g. UTF-8 or ISO-8859-nn or Windows-nnnn).
1178              
1179             Unicode support in old versions of perl or in a core module Encode is likely
1180             to be buggy in places, so if the normalize_charset function is enabled
1181             it is advised to stick to more recent versions of perl (preferably 5.12
1182             or later). The module Encode::Detect::Detector is optional, when necessary
1183             it will be used if it is available.
1184              
1185             =cut
1186              
1187             push (@cmds, {
1188             setting => 'normalize_charset',
1189             default => 0,
1190             type => $CONF_TYPE_BOOL,
1191             code => sub {
1192 0     0   0 my ($self, $key, $value, $line) = @_;
1193 0 0 0     0 unless (defined $value && $value !~ /^$/) {
1194 0         0 return $MISSING_REQUIRED_VALUE;
1195             }
1196 0 0 0     0 if (lc $value eq 'yes' || $value eq '1') { $value = 1 }
  0 0 0     0  
1197 0         0 elsif (lc $value eq 'no' || $value eq '0') { $value = 0 }
1198 0         0 else { return $INVALID_VALUE }
1199              
1200 0         0 $self->{normalize_charset} = $value;
1201              
1202 0 0       0 unless ($] > 5.008004) {
1203 0         0 $self->{parser}->lint_warn("config: normalize_charset requires Perl 5.8.5 or later");
1204 0         0 $self->{normalize_charset} = 0;
1205 0         0 return $INVALID_VALUE;
1206             }
1207 0         0 require HTML::Parser;
1208             #changed to eval to use VERSION so that this version was not incorrectly parsed for CPAN
1209 0 0       0 unless ( eval { HTML::Parser->VERSION(3.46) } ) {
  0         0  
1210 0         0 $self->{parser}->lint_warn("config: normalize_charset requires HTML::Parser 3.46 or later");
1211 0         0 $self->{normalize_charset} = 0;
1212 0         0 return $INVALID_VALUE;
1213             }
1214 0 0       0 unless (eval 'require Encode') {
1215 0         0 $self->{parser}->lint_warn("config: normalize_charset requires Encode");
1216 0         0 $self->{normalize_charset} = 0;
1217 0         0 return $INVALID_VALUE;
1218             }
1219             }
1220 81         1116 });
1221              
1222              
1223             =back
1224              
1225             =head2 NETWORK TEST OPTIONS
1226              
1227             =over 4
1228              
1229             =item trusted_networks IPaddress[/masklen] ... (default: none)
1230              
1231             What networks or hosts are 'trusted' in your setup. B<Trusted> in this case
1232             means that relay hosts on these networks are considered to not be potentially
1233             operated by spammers, open relays, or open proxies. A trusted host could
1234             conceivably relay spam, but will not originate it, and will not forge header
1235             data. DNS blacklist checks will never query for hosts on these networks.
1236              
1237             See C<http://wiki.apache.org/spamassassin/TrustPath> for more information.
1238              
1239             MXes for your domain(s) and internal relays should B<also> be specified using
1240             the C<internal_networks> setting. When there are 'trusted' hosts that
1241             are not MXes or internal relays for your domain(s) they should B<only> be
1242             specified in C<trusted_networks>.
1243              
1244             The C<IPaddress> can be an IPv4 address (in a dot-quad form), or an IPv6
1245             address optionally enclosed in square brackets. Scoped link-local IPv6
1246             addresses are syntactically recognized but the interface scope is currently
1247             ignored (e.g. [fe80::1234%eth0] ) and should be avoided.
1248              
1249             If a C</masklen> is specified, it is considered a CIDR-style 'netmask' length,
1250             specified in bits. If it is not specified, but less than 4 octets of an IPv4
1251             address are specified with a trailing dot, an implied netmask length covers
1252             all addresses in remaining octets (i.e. implied masklen is /8 or /16 or /24).
1253             If masklen is not specified, and there is not trailing dot, then just a single
1254             IP address specified is used, as if the masklen were C</32> with an IPv4
1255             address, or C</128> in case of an IPv6 address.
1256              
1257             If a network or host address is prefaced by a C<!> the matching network or
1258             host will be excluded from the list even if a less specific (shorter netmask
1259             length) subnet is later specified in the list. This allows a subset of
1260             a wider network to be exempt. In case of specifying overlapping subnets,
1261             specify more specific subnets first (tighter matching, i.e. with a longer
1262             netmask length), followed by less specific (shorter netmask length) subnets
1263             to get predictable results regarless of the search algorithm used - when
1264             Net::Patricia module is installed the search finds the tightest matching
1265             entry in the list, while a sequential search as used in absence of the
1266             module Net::Patricia will find the first matching entry in the list.
1267              
1268             Note: 127.0.0.0/8 and ::1 are always included in trusted_networks, regardless
1269             of your config.
1270              
1271             Examples:
1272              
1273             trusted_networks 192.168.0.0/16 # all in 192.168.*.*
1274             trusted_networks 192.168. # all in 192.168.*.*
1275             trusted_networks 212.17.35.15 # just that host
1276             trusted_networks !10.0.1.5 10.0.1/24 # all in 10.0.1.* but not 10.0.1.5
1277             trusted_networks 2001:db8:1::1 !2001:db8:1::/64 2001:db8::/32
1278             # 2001:db8::/32 and 2001:db8:1::1/128, except the rest of 2001:db8:1::/64
1279              
1280             This operates additively, so a C<trusted_networks> line after another one
1281             will append new entries to the list of trusted networks. To clear out the
1282             existing entries, use C<clear_trusted_networks>.
1283              
1284             If C<trusted_networks> is not set and C<internal_networks> is, the value
1285             of C<internal_networks> will be used for this parameter.
1286              
1287             If neither C<trusted_networks> or C<internal_networks> is set, a basic
1288             inference algorithm is applied. This works as follows:
1289              
1290             =over 4
1291              
1292             =item *
1293              
1294             If the 'from' host has an IP address in a private (RFC 1918) network range,
1295             then it's trusted
1296              
1297             =item *
1298              
1299             If there are authentication tokens in the received header, and
1300             the previous host was trusted, then this host is also trusted
1301              
1302             =item *
1303              
1304             Otherwise this host, and all further hosts, are consider untrusted.
1305              
1306             =back
1307              
1308             =cut
1309              
1310 81         507 push (@cmds, {
1311             setting => 'trusted_networks',
1312             type => $CONF_TYPE_IPADDRLIST,
1313             });
1314              
1315             =item clear_trusted_networks
1316              
1317             Empty the list of trusted networks.
1318              
1319             =cut
1320              
1321             push (@cmds, {
1322             setting => 'clear_trusted_networks',
1323             type => $CONF_TYPE_NOARGS,
1324             code => sub {
1325 32     32   151 my ($self, $key, $value, $line) = @_;
1326 32 50 33     258 unless (!defined $value || $value eq '') {
1327 0         0 return $INVALID_VALUE;
1328             }
1329 32         240 $self->{trusted_networks} = $self->new_netset('trusted_networks',1);
1330 32         156 $self->{trusted_networks_configured} = 0;
1331             }
1332 81         1094 });
1333              
1334             =item internal_networks IPaddress[/masklen] ... (default: none)
1335              
1336             What networks or hosts are 'internal' in your setup. B<Internal> means
1337             that relay hosts on these networks are considered to be MXes for your
1338             domain(s), or internal relays. This uses the same syntax as
1339             C<trusted_networks>, above - see there for details.
1340              
1341             This value is used when checking 'dial-up' or dynamic IP address
1342             blocklists, in order to detect direct-to-MX spamming.
1343              
1344             Trusted relays that accept mail directly from dial-up connections
1345             (i.e. are also performing a role of mail submission agents - MSA)
1346             should not be listed in C<internal_networks>. List them only in
1347             C<trusted_networks>.
1348              
1349             If C<trusted_networks> is set and C<internal_networks> is not, the value
1350             of C<trusted_networks> will be used for this parameter.
1351              
1352             If neither C<trusted_networks> nor C<internal_networks> is set, no addresses
1353             will be considered local; in other words, any relays past the machine where
1354             SpamAssassin is running will be considered external.
1355              
1356             Every entry in C<internal_networks> must appear in C<trusted_networks>; in
1357             other words, C<internal_networks> is always a subset of the trusted set.
1358              
1359             Note: 127/8 and ::1 are always included in internal_networks, regardless of
1360             your config.
1361              
1362             =cut
1363              
1364 81         495 push (@cmds, {
1365             setting => 'internal_networks',
1366             type => $CONF_TYPE_IPADDRLIST,
1367             });
1368              
1369             =item clear_internal_networks
1370              
1371             Empty the list of internal networks.
1372              
1373             =cut
1374              
1375             push (@cmds, {
1376             setting => 'clear_internal_networks',
1377             type => $CONF_TYPE_NOARGS,
1378             code => sub {
1379 32     32   157 my ($self, $key, $value, $line) = @_;
1380 32 50 33     262 unless (!defined $value || $value eq '') {
1381 0         0 return $INVALID_VALUE;
1382             }
1383 32         128 $self->{internal_networks} = $self->new_netset('internal_networks',1);
1384 32         194 $self->{internal_networks_configured} = 0;
1385             }
1386 81         1069 });
1387              
1388             =item msa_networks IPaddress[/masklen] ... (default: none)
1389              
1390             The networks or hosts which are acting as MSAs in your setup (but not also
1391             as MX relays). This uses the same syntax as C<trusted_networks>, above - see
1392             there for details.
1393              
1394             B<MSA> means that the relay hosts on these networks accept mail from your
1395             own users and authenticates them appropriately. These relays will never
1396             accept mail from hosts that aren't authenticated in some way. Examples of
1397             authentication include, IP lists, SMTP AUTH, POP-before-SMTP, etc.
1398              
1399             All relays found in the message headers after the MSA relay will take
1400             on the same trusted and internal classifications as the MSA relay itself,
1401             as defined by your I<trusted_networks> and I<internal_networks> configuration.
1402              
1403             For example, if the MSA relay is trusted and internal so will all of the
1404             relays that precede it.
1405              
1406             When using msa_networks to identify an MSA it is recommended that you treat
1407             that MSA as both trusted and internal. When an MSA is not included in
1408             msa_networks you should treat the MSA as trusted but not internal, however
1409             if the MSA is also acting as an MX or intermediate relay you must always
1410             treat it as both trusted and internal and ensure that the MSA includes
1411             visible auth tokens in its Received header to identify submission clients.
1412              
1413             B<Warning:> Never include an MSA that also acts as an MX (or is also an
1414             intermediate relay for an MX) or otherwise accepts mail from
1415             non-authenticated users in msa_networks. Doing so will result in unknown
1416             external relays being trusted.
1417              
1418             =cut
1419              
1420 81         404 push (@cmds, {
1421             setting => 'msa_networks',
1422             type => $CONF_TYPE_IPADDRLIST,
1423             });
1424              
1425             =item clear_msa_networks
1426              
1427             Empty the list of msa networks.
1428              
1429             =cut
1430              
1431             push (@cmds, {
1432             setting => 'clear_msa_networks',
1433             type => $CONF_TYPE_NOARGS,
1434             code => sub {
1435 32     32   148 my ($self, $key, $value, $line) = @_;
1436 32 50 33     246 unless (!defined $value || $value eq '') {
1437 0         0 return $INVALID_VALUE;
1438             }
1439             $self->{msa_networks} =
1440 32         136 $self->new_netset('msa_networks',0); # no loopback IP
1441 32         143 $self->{msa_networks_configured} = 0;
1442             }
1443 81         835 });
1444              
1445             =item originating_ip_headers header ... (default: X-Yahoo-Post-IP X-Originating-IP X-Apparently-From X-SenderIP)
1446              
1447             A list of header field names from which an originating IP address can
1448             be obtained. For example, webmail servers may record a client IP address
1449             in X-Originating-IP.
1450              
1451             These IP addresses are virtually appended into the Received: chain, so they
1452             are used in RBL checks where appropriate.
1453              
1454             Currently the IP addresses are not added into X-Spam-Relays-* header fields,
1455             but they may be in the future.
1456              
1457             =cut
1458              
1459             push (@cmds, {
1460             setting => 'originating_ip_headers',
1461             default => [],
1462             type => $CONF_TYPE_STRINGLIST,
1463             code => sub {
1464 126     126   403 my ($self, $key, $value, $line) = @_;
1465 126 50 33     836 unless (defined $value && $value !~ /^$/) {
1466 0         0 return $MISSING_REQUIRED_VALUE;
1467             }
1468 126         553 foreach my $hfname (split(/\s+/, $value)) {
1469             # avoid duplicates, consider header field names case-insensitive
1470 252         705 push(@{$self->{originating_ip_headers}}, $hfname)
1471 252 50       350 if !grep(lc($_) eq lc($hfname), @{$self->{originating_ip_headers}});
  252         949  
1472             }
1473             }
1474 81         1115 });
1475              
1476             =item clear_originating_ip_headers
1477              
1478             Empty the list of 'originating IP address' header field names.
1479              
1480             =cut
1481              
1482             push (@cmds, {
1483             setting => 'clear_originating_ip_headers',
1484             type => $CONF_TYPE_NOARGS,
1485             code => sub {
1486 63     63   274 my ($self, $key, $value, $line) = @_;
1487 63 50 33     509 unless (!defined $value || $value eq '') {
1488 0         0 return $INVALID_VALUE;
1489             }
1490 63         302 $self->{originating_ip_headers} = [];
1491             }
1492 81         1030 });
1493              
1494             =item always_trust_envelope_sender ( 0 | 1 ) (default: 0)
1495              
1496             Trust the envelope sender even if the message has been passed through one or
1497             more trusted relays. See also C<envelope_sender_header>.
1498              
1499             =cut
1500              
1501 81         544 push (@cmds, {
1502             setting => 'always_trust_envelope_sender',
1503             default => 0,
1504             type => $CONF_TYPE_BOOL,
1505             });
1506              
1507             =item skip_rbl_checks ( 0 | 1 ) (default: 0)
1508              
1509             Turning on the skip_rbl_checks setting will disable the DNSEval plugin,
1510             which implements Real-time Block List (or: Blackhole List) (RBL) lookups.
1511              
1512             By default, SpamAssassin will run RBL checks. Individual blocklists may
1513             be disabled selectively by setting a score of a corresponding rule to 0.
1514              
1515             See also a related configuration parameter skip_uribl_checks,
1516             which controls the URIDNSBL plugin (documented in the URIDNSBL man page).
1517              
1518             =cut
1519              
1520 81         442 push (@cmds, {
1521             setting => 'skip_rbl_checks',
1522             default => 0,
1523             type => $CONF_TYPE_BOOL,
1524             });
1525              
1526             =item dns_available { yes | no | test[: domain1 domain2...] } (default: yes)
1527              
1528             Tells SpamAssassin whether DNS resolving is available or not. A value I<yes>
1529             indicates DNS resolving is available, a value I<no> indicates DNS resolving
1530             is not available - both of these values apply unconditionally and skip initial
1531             DNS tests, which can be slow or unreliable.
1532              
1533             When the option value is a I<test> (with or without arguments), SpamAssassin
1534             will query some domain names on the internet during initialization, attempting
1535             to determine if DNS resolving is working or not. A space-separated list
1536             of domain names may be specified explicitly, or left to a built-in default
1537             of a dozen or so domain names. From an explicit or a default list a subset
1538             of three domain names is picked randomly for checking. The test queries for
1539             NS records of these domain: if at least one query returns a success then
1540             SpamAssassin considers DNS resolving as available, otherwise not.
1541              
1542             The problem is that the test can introduce some startup delay if a network
1543             connection is down, and in some cases it can wrongly guess that DNS is
1544             unavailable because a test connection failed, what causes disabling several
1545             DNS-dependent tests.
1546              
1547             Please note, the DNS test queries for NS records, so specify domain names,
1548             not host names.
1549              
1550             Since version 3.4.0 of SpamAssassin a default setting for option
1551             I<dns_available> is I<yes>. A default in older versions was I<test>.
1552              
1553             =cut
1554              
1555             push (@cmds, {
1556             setting => 'dns_available',
1557             default => 'yes',
1558             type => $CONF_TYPE_STRING,
1559             code => sub {
1560 1     1   35 my ($self, $key, $value, $line) = @_;
1561 1 50       29 if ($value =~ /^test(?::\s*\S.*)?$/) {
    50          
    0          
1562 0         0 $self->{dns_available} = $value;
1563             }
1564             elsif ($value =~ /^(?:yes|1)$/) {
1565 1         24 $self->{dns_available} = 'yes';
1566             }
1567             elsif ($value =~ /^(?:no|0)$/) {
1568 0         0 $self->{dns_available} = 'no';
1569             }
1570             else {
1571 0         0 return $INVALID_VALUE;
1572             }
1573             }
1574 81         1071 });
1575              
1576             =item dns_server ip-addr-port (default: entries provided by Net::DNS)
1577              
1578             Specifies an IP address of a DNS server, and optionally its port number.
1579             The I<dns_server> directive may be specified multiple times, each entry
1580             adding to a list of available resolving name servers. The I<ip-addr-port>
1581             argument can either be an IPv4 or IPv6 address, optionally enclosed in
1582             brackets, and optionally followed by a colon and a port number. In absence
1583             of a port number a standard port number 53 is assumed. When an IPv6 address
1584             is specified along with a port number, the address B<must> be enclosed in
1585             brackets to avoid parsing ambiguity regarding a colon separator. A scoped
1586             link-local IP address is allowed (assuming underlying modules allow it).
1587              
1588             Examples :
1589             dns_server 127.0.0.1
1590             dns_server 127.0.0.1:53
1591             dns_server [127.0.0.1]:53
1592             dns_server [::1]:53
1593             dns_server fe80::1%lo0
1594             dns_server [fe80::1%lo0]:53
1595              
1596             In absence of I<dns_server> directives, the list of name servers is provided
1597             by Net::DNS module, which typically obtains the list from /etc/resolv.conf,
1598             but this may be platform dependent. Please consult the Net::DNS::Resolver
1599             documentation for details.
1600              
1601             =cut
1602              
1603             push (@cmds, {
1604             setting => 'dns_server',
1605             type => $CONF_TYPE_STRING,
1606             code => sub {
1607 1     1   4 my ($self, $key, $value, $line) = @_;
1608 1         4 my($address,$port); local($1,$2,$3);
  1         6  
1609 1 50       8 if ($value =~ /^(?: \[ ([^\]]*) \] | ([^:]*) ) : (\d+) \z/sx) {
    0          
1610 1 50       6 $address = defined $1 ? $1 : $2; $port = $3;
  1         2  
1611             } elsif ($value =~ /^(?: \[ ([^\]]*) \] |
1612             ([0-9A-F.:]+ (?: %[A-Z0-9._~-]* )? ) ) \z/six) {
1613 0 0       0 $address = defined $1 ? $1 : $2; $port = '53';
  0         0  
1614             } else {
1615 0         0 return $INVALID_VALUE;
1616             }
1617 1         3 my $scope = ''; # scoped IP address?
1618 1 50       4 $scope = $1 if $address =~ s/ ( % [A-Z0-9._~-]* ) \z//xsi;
1619 1         3 my $IP_ADDRESS = IP_ADDRESS; # IP_ADDRESS regexp does not handle scope
1620 1 50 33     45 if ($address =~ /$IP_ADDRESS/ && $port >= 1 && $port <= 65535) {
      33        
1621 1 50       6 $self->{dns_servers} = [] if !$self->{dns_servers};
1622             # checked, untainted, stored in a normalized form
1623 1         3 push(@{$self->{dns_servers}}, untaint_var("[$address$scope]:$port"));
  1         9  
1624             } else {
1625 0         0 return $INVALID_VALUE;
1626             }
1627             }
1628 81         1104 });
1629              
1630             =item clear_dns_servers
1631              
1632             Empty the list of explicitly configured DNS servers through a I<dns_server>
1633             directive, falling back to Net::DNS -supplied defaults.
1634              
1635             =cut
1636              
1637             push (@cmds, {
1638             setting => 'clear_dns_servers',
1639             type => $CONF_TYPE_NOARGS,
1640             code => sub {
1641 1     1   5 my ($self, $key, $value, $line) = @_;
1642 1 50 33     9 unless (!defined $value || $value eq '') {
1643 0         0 return $INVALID_VALUE;
1644             }
1645 1         16 undef $self->{dns_servers};
1646             }
1647 81         1193 });
1648              
1649             =item dns_local_ports_permit ranges...
1650              
1651             Add the specified ports or ports ranges to the set of allowed port numbers
1652             that can be used as local port numbers when sending DNS queries to a resolver.
1653              
1654             The argument is a whitespace-separated or a comma-separated list of
1655             single port numbers n, or port number pairs (i.e. m-n) delimited by a '-',
1656             representing a range. Allowed port numbers are between 1 and 65535.
1657              
1658             Directives I<dns_local_ports_permit> and I<dns_local_ports_avoid> are processed
1659             in order in which they appear in configuration files. Each directive adds
1660             (or subtracts) its subsets of ports to a current set of available ports.
1661             Whatever is left in the set by the end of configuration processing
1662             is made available to a DNS resolving client code.
1663              
1664             If the resulting set of port numbers is empty (see also the directive
1665             I<dns_local_ports_none>), then SpamAssassin does not apply its ports
1666             randomization logic, but instead leaves the operating system to choose
1667             a suitable free local port number.
1668              
1669             The initial set consists of all port numbers in the range 1024-65535.
1670             Note that system config files already modify the set and remove all the
1671             IANA registered port numbers and some other ranges, so there is rarely
1672             a need to adjust the ranges by site-specific directives.
1673              
1674             See also directives I<dns_local_ports_permit> and I<dns_local_ports_none>.
1675              
1676             =cut
1677              
1678             push (@cmds, {
1679             setting => 'dns_local_ports_permit',
1680             type => $CONF_TYPE_STRING,
1681             is_admin => 1,
1682             code => sub {
1683 0     0   0 my($self, $key, $value, $line) = @_;
1684 0         0 my(@port_ranges); local($1,$2);
  0         0  
1685 0         0 foreach my $range (split(/[ \t,]+/, $value)) {
1686 0 0       0 if ($range =~ /^(\d{1,5})\z/) {
    0          
1687             # don't allow adding a port number 0
1688 0 0 0     0 if ($1 < 1 || $1 > 65535) { return $INVALID_VALUE }
  0         0  
1689 0         0 push(@port_ranges, [$1,$1]);
1690             } elsif ($range =~ /^(\d{1,5})-(\d{1,5})\z/) {
1691 0 0 0     0 if ($1 < 1 || $1 > 65535) { return $INVALID_VALUE }
  0         0  
1692 0 0 0     0 if ($2 < 1 || $2 > 65535) { return $INVALID_VALUE }
  0         0  
1693 0         0 push(@port_ranges, [$1,$2]);
1694             } else {
1695 0         0 return $INVALID_VALUE;
1696             }
1697             }
1698 0         0 foreach my $p (@port_ranges) {
1699 0         0 undef $self->{dns_available_portscount}; # invalidate derived data
1700             set_ports_range(\$self->{dns_available_ports_bitset},
1701 0         0 $p->[0], $p->[1], 1);
1702             }
1703             }
1704 81         1290 });
1705              
1706             =item dns_local_ports_avoid ranges...
1707              
1708             Remove specified ports or ports ranges from the set of allowed port numbers
1709             that can be used as local port numbers when sending DNS queries to a resolver.
1710              
1711             Please see directive I<dns_local_ports_permit> for details.
1712              
1713             =cut
1714              
1715             push (@cmds, {
1716             setting => 'dns_local_ports_avoid',
1717             type => $CONF_TYPE_STRING,
1718             is_admin => 1,
1719             code => sub {
1720 0     0   0 my($self, $key, $value, $line) = @_;
1721 0         0 my(@port_ranges); local($1,$2);
  0         0  
1722 0         0 foreach my $range (split(/[ \t,]+/, $value)) {
1723 0 0       0 if ($range =~ /^(\d{1,5})\z/) {
    0          
1724 0 0       0 if ($1 > 65535) { return $INVALID_VALUE }
  0         0  
1725             # don't mind clearing also the port number 0
1726 0         0 push(@port_ranges, [$1,$1]);
1727             } elsif ($range =~ /^(\d{1,5})-(\d{1,5})\z/) {
1728 0 0 0     0 if ($1 > 65535 || $2 > 65535) { return $INVALID_VALUE }
  0         0  
1729 0         0 push(@port_ranges, [$1,$2]);
1730             } else {
1731 0         0 return $INVALID_VALUE;
1732             }
1733             }
1734 0         0 foreach my $p (@port_ranges) {
1735 0         0 undef $self->{dns_available_portscount}; # invalidate derived data
1736             set_ports_range(\$self->{dns_available_ports_bitset},
1737 0         0 $p->[0], $p->[1], 0);
1738             }
1739             }
1740 81         1181 });
1741              
1742             =item dns_local_ports_none
1743              
1744             Is a fast shorthand for:
1745              
1746             dns_local_ports_avoid 1-65535
1747              
1748             leaving the set of available DNS query local port numbers empty. In all
1749             respects (apart from speed) it is equivalent to the shown directive, and can
1750             be freely mixed with I<dns_local_ports_permit> and I<dns_local_ports_avoid>.
1751              
1752             If the resulting set of port numbers is empty, then SpamAssassin does not
1753             apply its ports randomization logic, but instead leaves the operating system
1754             to choose a suitable free local port number.
1755              
1756             See also directives I<dns_local_ports_permit> and I<dns_local_ports_avoid>.
1757              
1758             =cut
1759              
1760             push (@cmds, {
1761             setting => 'dns_local_ports_none',
1762             type => $CONF_TYPE_NOARGS,
1763             is_admin => 1,
1764             code => sub {
1765 0     0   0 my ($self, $key, $value, $line) = @_;
1766 0 0 0     0 unless (!defined $value || $value eq '') {
1767 0         0 return $INVALID_VALUE;
1768             }
1769 0         0 undef $self->{dns_available_portscount}; # invalidate derived data
1770 0         0 wipe_ports_range(\$self->{dns_available_ports_bitset}, 0);
1771             }
1772 81         1126 });
1773              
1774             =item dns_test_interval n (default: 600 seconds)
1775              
1776             If dns_available is set to I<test>, the dns_test_interval time in number
1777             of seconds will tell SpamAssassin how often to retest for working DNS.
1778             A numeric value is optionally suffixed by a time unit (s, m, h, d, w,
1779             indicating seconds (default), minutes, hours, days, weeks).
1780              
1781             =cut
1782              
1783 81         604 push (@cmds, {
1784             setting => 'dns_test_interval',
1785             default => 600,
1786             type => $CONF_TYPE_DURATION,
1787             });
1788              
1789             =item dns_options opts (default: norotate, nodns0x20, edns=4096)
1790              
1791             Provides a (whitespace or comma -separated) list of options applying
1792             to DNS resolving. Available options are: I<rotate>, I<dns0x20> and
1793             I<edns> (or I<edns0>). Option name may be negated by prepending a I<no>
1794             (e.g. I<norotate>, I<NoEDNS>) to counteract a previously enabled option.
1795             Option names are not case-sensitive. The I<dns_options> directive may
1796             appear in configuration files multiple times, the last setting prevails.
1797              
1798             Option I<edns> (or I<edsn0>) may take a value which specifies a requestor's
1799             acceptable UDP payload size according to EDNS0 specifications (RFC 6891,
1800             ex RFC 2671) e.g. I<edns=4096>. When EDNS0 is off (I<noedns> or I<edns=512>)
1801             a traditional implied UDP payload size is 512 bytes, which is also a minimum
1802             allowed value for this option. When the option is specified but a value
1803             is not provided, a conservative default of 1220 bytes is implied. It is
1804             recommended to keep I<edns> enabled when using a local recursive DNS server
1805             which supports EDNS0 (like most modern DNS servers do), a suitable setting
1806             in this case is I<edns=4096>, which is also a default. Allowing UDP payload
1807             size larger than 512 bytes can avoid truncation of resource records in large
1808             DNS responses (like in TXT records of some SPF and DKIM responses, or when
1809             an unreasonable number of A records is published by some domain). The option
1810             should be disabled when a recursive DNS server is only reachable through
1811             non- RFC 6891 compliant middleboxes (such as some old-fashioned firewall)
1812             which bans DNS UDP payload sizes larger than 512 bytes. A suitable value
1813             when a non-local recursive DNS server is used and a middlebox B<does> allow
1814             EDNS0 but blocks fragmented IP packets is perhaps 1220 bytes, allowing a
1815             DNS UDP packet to fit within a single IP packet in most cases (a slightly
1816             less conservative range would be 1280-1410 bytes).
1817              
1818             Option I<rotate> causes SpamAssassin to choose a DNS server at random
1819             from all servers listed in C</etc/resolv.conf> every I<dns_test_interval>
1820             seconds, effectively spreading the load over all currently available DNS
1821             servers when there are many spamd workers.
1822              
1823             Option I<dns0x20> enables randomization of letters in a DNS query label
1824             according to draft-vixie-dnsext-dns0x20, decreasing a chance of collisions
1825             of responses (by chance or by a malicious intent) by increasing spread
1826             as provided by a 16-bit query ID and up to 16 bits of a port number,
1827             with additional bits as encoded by flipping case (upper/lower) of letters
1828             in a query. The number of additional random bits corresponds to the number
1829             of letters in a query label. Should work reliably with all mainstream
1830             DNS servers - do not turn on if you see frequent info messages
1831             "dns: no callback for id:" in the log, or if RBL or URIDNS lookups
1832             do not work for no apparent reason.
1833              
1834             =cut
1835              
1836             push (@cmds, {
1837             setting => 'dns_options',
1838             type => $CONF_TYPE_HASH_KEY_VALUE,
1839             code => sub {
1840 0     0   0 my ($self, $key, $value, $line) = @_;
1841 0         0 foreach my $option (split (/[\s,]+/, lc $value)) {
1842 0         0 local($1,$2);
1843 0 0       0 if ($option =~ /^no(rotate|dns0x20)\z/) {
    0          
    0          
    0          
1844 0         0 $self->{dns_options}->{$1} = 0;
1845             } elsif ($option =~ /^no(edns)0?\z/) {
1846 0         0 $self->{dns_options}->{$1} = 0;
1847             } elsif ($option =~ /^(rotate|dns0x20)\z/) {
1848 0         0 $self->{dns_options}->{$1} = 1;
1849             } elsif ($option =~ /^(edns)0? (?: = (\d+) )? \z/x) {
1850             # RFC 6891 (ex RFC 2671) - EDNS0, value is a requestor's UDP payload
1851             # size, defaults to some UDP packet size likely to fit into a single
1852             # IP packet which is more likely to pass firewalls which choke on IP
1853             # fragments. RFC 2460: min MTU is 1280 for IPv6, minus 40 bytes for
1854             # basic header, yielding 1240. RFC 3226 prescribes a min of 1220 for
1855             # RFC 2535 compliant servers. RFC 6891: choosing between 1280 and
1856             # 1410 bytes for IP (v4 or v6) over Ethernet would be reasonable.
1857             #
1858 0   0     0 $self->{dns_options}->{$1} = $2 || 1220;
1859 0 0       0 return $INVALID_VALUE if $self->{dns_options}->{$1} < 512;
1860             } else {
1861 0         0 return $INVALID_VALUE;
1862             }
1863             }
1864             }
1865 81         915 });
1866              
1867             =item dns_query_restriction (allow|deny) domain1 domain2 ...
1868              
1869             Option allows disabling of rules which would result in a DNS query to one of
1870             the listed domains. The first argument must be a literal C<allow> or C<deny>,
1871             remaining arguments are domains names.
1872              
1873             Most DNS queries (with some exceptions) are subject to dns_query_restriction.
1874             A domain to be queried is successively stripped-off of its leading labels
1875             (thus yielding a series of its parent domains), and on each iteration a
1876             check is made against an associative array generated by dns_query_restriction
1877             options. Search stops at the first match (i.e. the tightest match), and the
1878             matching entry with its C<allow> or C<deny> value then controls whether a
1879             DNS query is allowed to be launched.
1880              
1881             If no match is found an implicit default is to allow a query. The purpose of
1882             an explicit C<allow> entry is to be able to override a previously configured
1883             C<deny> on the same domain or to override an entry (possibly yet to be
1884             configured in subsequent config directives) on one of its parent domains.
1885             Thus an 'allow zen.spamhaus.org' with a 'deny spamhaus.org' would permit
1886             DNS queries on a specific DNS BL zone but deny queries to other zones under
1887             the same parent domain.
1888              
1889             Domains are matched case-insensitively, no wildcards are recognized,
1890             there should be no leading or trailing dot.
1891              
1892             Specifying a block on querying a domain name has a similar effect as setting
1893             a score of corresponding DNSBL and URIBL rules to zero, and can be a handy
1894             alternative to hunting for such rules when a site policy does not allow
1895             certain DNS block lists to be queried.
1896              
1897             Example:
1898             dns_query_restriction deny dnswl.org surbl.org
1899             dns_query_restriction allow zen.spamhaus.org
1900             dns_query_restriction deny spamhaus.org mailspike.net spamcop.net
1901              
1902             =cut
1903              
1904             push (@cmds, {
1905             setting => 'dns_query_restriction',
1906             type => $CONF_TYPE_STRING,
1907             code => sub {
1908 0     0   0 my ($self, $key, $value, $line) = @_;
1909 0 0 0     0 defined $value && $value =~ s/^(allow|deny)\s+//i
1910             or return $INVALID_VALUE;
1911 0 0       0 my $blocked = lc($1) eq 'deny' ? 1 : 0;
1912 0         0 foreach my $domain (split(' ', $value)) {
1913 0         0 $domain =~ s/^\.//; $domain =~ s/\.\z//; # strip dots
  0         0  
1914 0         0 $self->{dns_query_blocked}{lc $domain} = $blocked;
1915             }
1916             }
1917 81         1206 });
1918              
1919             =item clear_dns_query_restriction
1920              
1921             The option removes any entries entered by previous 'dns_query_restriction'
1922             options, leaving the list empty, i.e. allowing DNS queries for any domain
1923             (including any DNS BL zone).
1924              
1925             =cut
1926              
1927             push (@cmds, {
1928             setting => 'clear_dns_query_restriction',
1929             aliases => ['clear_dns_query_restrictions'],
1930             type => $CONF_TYPE_NOARGS,
1931             code => sub {
1932 0     0   0 my ($self, $key, $value, $line) = @_;
1933 0 0 0     0 return $INVALID_VALUE if defined $value && $value ne '';
1934 0         0 delete $self->{dns_query_blocked};
1935             }
1936 81         1140 });
1937              
1938             =back
1939              
1940             =head2 LEARNING OPTIONS
1941              
1942             =over 4
1943              
1944             =item use_learner ( 0 | 1 ) (default: 1)
1945              
1946             Whether to use any machine-learning classifiers with SpamAssassin, such as the
1947             default 'BAYES_*' rules. Setting this to 0 will disable use of any and all
1948             human-trained classifiers.
1949              
1950             =cut
1951              
1952 81         462 push (@cmds, {
1953             setting => 'use_learner',
1954             default => 1,
1955             type => $CONF_TYPE_BOOL,
1956             });
1957              
1958             =item use_bayes ( 0 | 1 ) (default: 1)
1959              
1960             Whether to use the naive-Bayesian-style classifier built into
1961             SpamAssassin. This is a master on/off switch for all Bayes-related
1962             operations.
1963              
1964             =cut
1965              
1966 81         347 push (@cmds, {
1967             setting => 'use_bayes',
1968             default => 1,
1969             type => $CONF_TYPE_BOOL,
1970             });
1971              
1972             =item use_bayes_rules ( 0 | 1 ) (default: 1)
1973              
1974             Whether to use rules using the naive-Bayesian-style classifier built
1975             into SpamAssassin. This allows you to disable the rules while leaving
1976             auto and manual learning enabled.
1977              
1978             =cut
1979              
1980 81         394 push (@cmds, {
1981             setting => 'use_bayes_rules',
1982             default => 1,
1983             type => $CONF_TYPE_BOOL,
1984             });
1985              
1986             =item bayes_auto_learn ( 0 | 1 ) (default: 1)
1987              
1988             Whether SpamAssassin should automatically feed high-scoring mails (or
1989             low-scoring mails, for non-spam) into its learning systems. The only
1990             learning system supported currently is a naive-Bayesian-style classifier.
1991              
1992             See the documentation for the
1993             C<Mail::SpamAssassin::Plugin::AutoLearnThreshold> plugin module
1994             for details on how Bayes auto-learning is implemented by default.
1995              
1996             =cut
1997              
1998 81         384 push (@cmds, {
1999             setting => 'bayes_auto_learn',
2000             default => 1,
2001             type => $CONF_TYPE_BOOL,
2002             });
2003              
2004             =item bayes_token_sources (default: header visible invisible uri)
2005              
2006             Controls which sources in a mail message can contribute tokens (e.g. words,
2007             phrases, etc.) to a Bayes classifier. The argument is a space-separated list
2008             of keywords: I<header>, I<visible>, I<invisible>, I<uri>, I<mimepart>), each
2009             of which may be prefixed by a I<no> to indicate its exclusion. Additionally
2010             two reserved keywords are allowed: I<all> and I<none> (or: I<noall>). The list
2011             of keywords is processed sequentially: a keyword I<all> adds all available
2012             keywords to a set being built, a I<none> or I<noall> clears the set, other
2013             non-negated keywords are added to the set, and negated keywords are removed
2014             from the set. Keywords are case-insensitive.
2015              
2016             The default set is: I<header> I<visible> I<invisible> I<uri>, which is
2017             equivalent for example to: I<All> I<NoMIMEpart>. The reason why I<mimepart>
2018             is not currently in a default set is that it is a newer source (introduced
2019             with SpamAssassin version 3.4.1) and not much experience has yet been gathered
2020             regarding its usefulness.
2021              
2022             See also option C<bayes_ignore_header> for a fine-grained control on individual
2023             header fields under the umbrella of a more general keyword I<header> here.
2024              
2025             Keywords imply the following data sources:
2026              
2027             =over 4
2028              
2029             =item I<header> - tokens collected from a message header section
2030              
2031             =item I<visible> - words from visible text (plain or HTML) in a message body
2032              
2033             =item I<invisible> - hidden/invisible text in HTML parts of a message body
2034              
2035             =item I<uri> - URIs collected from a message body
2036              
2037             =item I<mimepart> - digests (hashes) of all MIME parts (textual or non-textual) of a message, computed after Base64 and quoted-printable decoding, suffixed by their Content-Type
2038              
2039             =item I<all> - adds all the above keywords to the set being assembled
2040              
2041             =item I<none> or I<noall> - removes all keywords from the set
2042              
2043             =back
2044              
2045             The C<bayes_token_sources> directive may appear multiple times, its keywords
2046             are interpreted sequentially, adding or removing items from the final set
2047             as they appear in their order in C<bayes_token_sources> directive(s).
2048              
2049             =cut
2050              
2051             push (@cmds, {
2052             setting => 'bayes_token_sources',
2053             default => { map(($_,1), qw(header visible invisible uri)) }, # mimepart
2054             type => $CONF_TYPE_HASH_KEY_VALUE,
2055             code => sub {
2056 0     0   0 my ($self, $key, $value, $line) = @_;
2057 0 0       0 return $MISSING_REQUIRED_VALUE if $value eq '';
2058 0   0     0 my $h = ($self->{bayes_token_sources} ||= {});
2059 0         0 my %all_kw = map(($_,1), qw(header visible invisible uri mimepart));
2060 0         0 foreach (split(' ', lc $value)) {
2061 0 0 0     0 if (/^(none|noall)\z/) {
    0          
    0          
2062 0         0 %$h = ();
2063             } elsif ($_ eq 'all') {
2064 0         0 %$h = %all_kw;
2065             } elsif (/^(no)?(.+)\z/s && exists $all_kw{$2}) {
2066 0 0       0 $h->{$2} = defined $1 ? 0 : 1;
2067             } else {
2068 0         0 return $INVALID_VALUE;
2069             }
2070             }
2071             }
2072 81         2071 });
2073              
2074             =item bayes_ignore_header header_name
2075              
2076             If you receive mail filtered by upstream mail systems, like
2077             a spam-filtering ISP or mailing list, and that service adds
2078             new headers (as most of them do), these headers may provide
2079             inappropriate cues to the Bayesian classifier, allowing it
2080             to take a "short cut". To avoid this, list the headers using this
2081             setting. Example:
2082              
2083             bayes_ignore_header X-Upstream-Spamfilter
2084             bayes_ignore_header X-Upstream-SomethingElse
2085              
2086             =cut
2087              
2088             push (@cmds, {
2089             setting => 'bayes_ignore_header',
2090             default => [],
2091             type => $CONF_TYPE_STRINGLIST,
2092             code => sub {
2093 0     0   0 my ($self, $key, $value, $line) = @_;
2094 0 0       0 if ($value eq '') {
2095 0         0 return $MISSING_REQUIRED_VALUE;
2096             }
2097 0         0 push (@{$self->{bayes_ignore_headers}}, split(/\s+/, $value));
  0         0  
2098             }
2099 81         1281 });
2100              
2101             =item bayes_ignore_from user@example.com
2102              
2103             Bayesian classification and autolearning will not be performed on mail
2104             from the listed addresses. Program C<sa-learn> will also ignore the
2105             listed addresses if it is invoked using the C<--use-ignores> option.
2106             One or more addresses can be listed, see C<whitelist_from>.
2107              
2108             Spam messages from certain senders may contain many words that
2109             frequently occur in ham. For example, one might read messages from a
2110             preferred bookstore but also get unwanted spam messages from other
2111             bookstores. If the unwanted messages are learned as spam then any
2112             messages discussing books, including the preferred bookstore and
2113             antiquarian messages would be in danger of being marked as spam. The
2114             addresses of the annoying bookstores would be listed. (Assuming they
2115             were halfway legitimate and didn't send you mail through myriad
2116             affiliates.)
2117              
2118             Those who have pieces of spam in legitimate messages or otherwise
2119             receive ham messages containing potentially spammy words might fear
2120             that some spam messages might be in danger of being marked as ham.
2121             The addresses of the spam mailing lists, correspondents, etc. would
2122             be listed.
2123              
2124             =cut
2125              
2126 81         549 push (@cmds, {
2127             setting => 'bayes_ignore_from',
2128             type => $CONF_TYPE_ADDRLIST,
2129             });
2130              
2131             =item bayes_ignore_to user@example.com
2132              
2133             Bayesian classification and autolearning will not be performed on mail
2134             to the listed addresses. See C<bayes_ignore_from> for details.
2135              
2136             =cut
2137              
2138 81         534 push (@cmds, {
2139             setting => 'bayes_ignore_to',
2140             type => $CONF_TYPE_ADDRLIST,
2141             });
2142              
2143             =item bayes_min_ham_num (Default: 200)
2144              
2145             =item bayes_min_spam_num (Default: 200)
2146              
2147             To be accurate, the Bayes system does not activate until a certain number of
2148             ham (non-spam) and spam have been learned. The default is 200 of each ham and
2149             spam, but you can tune these up or down with these two settings.
2150              
2151             =cut
2152              
2153 81         456 push (@cmds, {
2154             setting => 'bayes_min_ham_num',
2155             default => 200,
2156             type => $CONF_TYPE_NUMERIC,
2157             });
2158 81         424 push (@cmds, {
2159             setting => 'bayes_min_spam_num',
2160             default => 200,
2161             type => $CONF_TYPE_NUMERIC,
2162             });
2163              
2164             =item bayes_learn_during_report (Default: 1)
2165              
2166             The Bayes system will, by default, learn any reported messages
2167             (C<spamassassin -r>) as spam. If you do not want this to happen, set
2168             this option to 0.
2169              
2170             =cut
2171              
2172 81         392 push (@cmds, {
2173             setting => 'bayes_learn_during_report',
2174             default => 1,
2175             type => $CONF_TYPE_BOOL,
2176             });
2177              
2178             =item bayes_sql_override_username
2179              
2180             Used by BayesStore::SQL storage implementation.
2181              
2182             If this options is set the BayesStore::SQL module will override the set
2183             username with the value given. This could be useful for implementing global or
2184             group bayes databases.
2185              
2186             =cut
2187              
2188 81         388 push (@cmds, {
2189             setting => 'bayes_sql_override_username',
2190             default => '',
2191             type => $CONF_TYPE_STRING,
2192             });
2193              
2194             =item bayes_use_hapaxes (default: 1)
2195              
2196             Should the Bayesian classifier use hapaxes (words/tokens that occur only
2197             once) when classifying? This produces significantly better hit-rates.
2198              
2199             =cut
2200              
2201 81         339 push (@cmds, {
2202             setting => 'bayes_use_hapaxes',
2203             default => 1,
2204             type => $CONF_TYPE_BOOL,
2205             });
2206              
2207             =item bayes_journal_max_size (default: 102400)
2208              
2209             SpamAssassin will opportunistically sync the journal and the database.
2210             It will do so once a day, but will sync more often if the journal file
2211             size goes above this setting, in bytes. If set to 0, opportunistic
2212             syncing will not occur.
2213              
2214             =cut
2215              
2216 81         341 push (@cmds, {
2217             setting => 'bayes_journal_max_size',
2218             default => 102400,
2219             type => $CONF_TYPE_NUMERIC,
2220             });
2221              
2222             =item bayes_expiry_max_db_size (default: 150000)
2223              
2224             What should be the maximum size of the Bayes tokens database? When expiry
2225             occurs, the Bayes system will keep either 75% of the maximum value, or
2226             100,000 tokens, whichever has a larger value. 150,000 tokens is roughly
2227             equivalent to a 8Mb database file.
2228              
2229             =cut
2230              
2231 81         313 push (@cmds, {
2232             setting => 'bayes_expiry_max_db_size',
2233             default => 150000,
2234             type => $CONF_TYPE_NUMERIC,
2235             });
2236              
2237             =item bayes_auto_expire (default: 1)
2238              
2239             If enabled, the Bayes system will try to automatically expire old tokens
2240             from the database. Auto-expiry occurs when the number of tokens in the
2241             database surpasses the bayes_expiry_max_db_size value. If a bayes datastore
2242             backend does not implement individual key/value expirations, the setting
2243             is silently ignored.
2244              
2245             =cut
2246              
2247 81         393 push (@cmds, {
2248             setting => 'bayes_auto_expire',
2249             default => 1,
2250             type => $CONF_TYPE_BOOL,
2251             });
2252              
2253             =item bayes_token_ttl (default: 3w, i.e. 3 weeks)
2254              
2255             Time-to-live / expiration time in seconds for tokens kept in a Bayes database.
2256             A numeric value is optionally suffixed by a time unit (s, m, h, d, w,
2257             indicating seconds (default), minutes, hours, days, weeks).
2258              
2259             If bayes_auto_expire is true and a Bayes datastore backend supports it
2260             (currently only Redis), this setting controls deletion of expired tokens
2261             from a bayes database. The value is observed on a best-effort basis, exact
2262             timing promises are not necessarily kept. If a bayes datastore backend
2263             does not implement individual key/value expirations, the setting is silently
2264             ignored.
2265              
2266             =cut
2267              
2268 81         433 push (@cmds, {
2269             setting => 'bayes_token_ttl',
2270             default => 3*7*24*60*60, # seconds (3 weeks)
2271             type => $CONF_TYPE_DURATION,
2272             });
2273              
2274             =item bayes_seen_ttl (default: 8d, i.e. 8 days)
2275              
2276             Time-to-live / expiration time in seconds for 'seen' entries
2277             (i.e. mail message digests with their status) kept in a Bayes database.
2278             A numeric value is optionally suffixed by a time unit (s, m, h, d, w,
2279             indicating seconds (default), minutes, hours, days, weeks).
2280              
2281             If bayes_auto_expire is true and a Bayes datastore backend supports it
2282             (currently only Redis), this setting controls deletion of expired 'seen'
2283             entries from a bayes database. The value is observed on a best-effort basis,
2284             exact timing promises are not necessarily kept. If a bayes datastore backend
2285             does not implement individual key/value expirations, the setting is silently
2286             ignored.
2287              
2288             =cut
2289              
2290 81         319 push (@cmds, {
2291             setting => 'bayes_seen_ttl',
2292             default => 8*24*60*60, # seconds (8 days)
2293             type => $CONF_TYPE_DURATION,
2294             });
2295              
2296             =item bayes_learn_to_journal (default: 0)
2297              
2298             If this option is set, whenever SpamAssassin does Bayes learning, it
2299             will put the information into the journal instead of directly into the
2300             database. This lowers contention for locking the database to execute
2301             an update, but will also cause more access to the journal and cause a
2302             delay before the updates are actually committed to the Bayes database.
2303              
2304             =cut
2305              
2306 81         402 push (@cmds, {
2307             setting => 'bayes_learn_to_journal',
2308             default => 0,
2309             type => $CONF_TYPE_BOOL,
2310             });
2311              
2312             =back
2313              
2314             =head2 MISCELLANEOUS OPTIONS
2315              
2316             =over 4
2317              
2318             =item time_limit n (default: 300)
2319              
2320             Specifies a limit on elapsed time in seconds that SpamAssassin is allowed
2321             to spend before providing a result. The value may be fractional and must
2322             not be negative, zero is interpreted as unlimited. The default is 300
2323             seconds for consistency with the spamd default setting of --timeout-child .
2324              
2325             This is a best-effort advisory setting, processing will not be abruptly
2326             aborted at an arbitrary point in processing when the time limit is exceeded,
2327             but only on reaching one of locations in the program flow equipped with a
2328             time test. Currently equipped with the test are the main checking loop,
2329             asynchronous DNS lookups, plugins which are calling external programs.
2330             Rule evaluation is guarded by starting a timer (alarm) on each set of
2331             compiled rules.
2332              
2333             When a message is passed to Mail::SpamAssassin::parse, a deadline time
2334             is established as a sum of current time and the C<time_limit> setting.
2335              
2336             This deadline may also be specified by a caller through an option
2337             'master_deadline' in $suppl_attrib on a call to parse(), possibly providing
2338             a more accurate deadline taking into account past and expected future
2339             processing of a message in a mail filtering setup. If both the config
2340             option as well as a 'master_deadline' option in a call are provided,
2341             the shorter time limit of the two is used (since version 3.3.2).
2342             Note that spamd (and possibly third-party callers of SpamAssassin) will
2343             supply the 'master_deadline' option in a call based on its --timeout-child
2344             option (or equivalent), unlike the command line C<spamassassin>, which has
2345             no such command line option.
2346              
2347             When a time limit is exceeded, most of the remaining tests will be skipped,
2348             as well as auto-learning. Whatever tests fired so far will determine the
2349             final score. The behaviour is similar to short-circuiting with attribute 'on',
2350             as implemented by a Shortcircuit plugin. A synthetic hit on a rule named
2351             TIME_LIMIT_EXCEEDED with a near-zero default score is generated, so that
2352             the report will reflect the event. A score for TIME_LIMIT_EXCEEDED may
2353             be provided explicitly in a configuration file, for example to achieve
2354             whitelisting or blacklisting effect for messages with long processing times.
2355              
2356             The C<time_limit> option is a useful protection against excessive processing
2357             time on certain degenerate or unusually long or complex mail messages, as well
2358             as against some DoS attacks. It is also needed in time-critical pre-queue
2359             filtering setups (e.g. milter, proxy, integration with MTA), where message
2360             processing must finish before a SMTP client times out. RFC 5321 prescribes
2361             in section 4.5.3.2.6 the 'DATA Termination' time limit of 10 minutes,
2362             although it is not unusual to see some SMTP clients abort sooner on waiting
2363             for a response. A sensible C<time_limit> for a pre-queue filtering setup is
2364             maybe 50 seconds, assuming that clients are willing to wait at least a minute.
2365              
2366             =cut
2367              
2368 81         275 push (@cmds, {
2369             setting => 'time_limit',
2370             default => 300,
2371             type => $CONF_TYPE_DURATION,
2372             });
2373              
2374             =item lock_method type
2375              
2376             Select the file-locking method used to protect database files on-disk. By
2377             default, SpamAssassin uses an NFS-safe locking method on UNIX; however, if you
2378             are sure that the database files you'll be using for Bayes and AWL storage will
2379             never be accessed over NFS, a non-NFS-safe locking system can be selected.
2380              
2381             This will be quite a bit faster, but may risk file corruption if the files are
2382             ever accessed by multiple clients at once, and one or more of them is accessing
2383             them through an NFS filesystem.
2384              
2385             Note that different platforms require different locking systems.
2386              
2387             The supported locking systems for C<type> are as follows:
2388              
2389             =over 4
2390              
2391             =item I<nfssafe> - an NFS-safe locking system
2392              
2393             =item I<flock> - simple UNIX C<flock()> locking
2394              
2395             =item I<win32> - Win32 locking using C<sysopen (..., O_CREAT|O_EXCL)>.
2396              
2397             =back
2398              
2399             nfssafe and flock are only available on UNIX, and win32 is only available
2400             on Windows. By default, SpamAssassin will choose either nfssafe or
2401             win32 depending on the platform in use.
2402              
2403             =cut
2404              
2405             push (@cmds, {
2406             setting => 'lock_method',
2407             default => '',
2408             type => $CONF_TYPE_STRING,
2409             code => sub {
2410 0     0   0 my ($self, $key, $value, $line) = @_;
2411 0 0       0 if ($value !~ /^(nfssafe|flock|win32)$/) {
2412 0         0 return $INVALID_VALUE;
2413             }
2414            
2415 0         0 $self->{lock_method} = $value;
2416             # recreate the locker
2417 0         0 $self->{main}->create_locker();
2418             }
2419 81         1117 });
2420              
2421             =item fold_headers ( 0 | 1 ) (default: 1)
2422              
2423             By default, headers added by SpamAssassin will be whitespace folded.
2424             In other words, they will be broken up into multiple lines instead of
2425             one very long one and each continuation line will have a tabulator
2426             prepended to mark it as a continuation of the preceding one.
2427              
2428             The automatic wrapping can be disabled here. Note that this can generate very
2429             long lines. RFC 2822 required that header lines do not exceed 998 characters
2430             (not counting the final CRLF).
2431              
2432             =cut
2433              
2434 81         604 push (@cmds, {
2435             setting => 'fold_headers',
2436             default => 1,
2437             type => $CONF_TYPE_BOOL,
2438             });
2439              
2440             =item report_safe_copy_headers header_name ...
2441              
2442             If using C<report_safe>, a few of the headers from the original message
2443             are copied into the wrapper header (From, To, Cc, Subject, Date, etc.)
2444             If you want to have other headers copied as well, you can add them
2445             using this option. You can specify multiple headers on the same line,
2446             separated by spaces, or you can just use multiple lines.
2447              
2448             =cut
2449              
2450             push (@cmds, {
2451             setting => 'report_safe_copy_headers',
2452             default => [],
2453             type => $CONF_TYPE_STRINGLIST,
2454             code => sub {
2455 0     0   0 my ($self, $key, $value, $line) = @_;
2456 0 0       0 if ($value eq '') {
2457 0         0 return $MISSING_REQUIRED_VALUE;
2458             }
2459 0         0 push(@{$self->{report_safe_copy_headers}}, split(/\s+/, $value));
  0         0  
2460             }
2461 81         885 });
2462              
2463             =item envelope_sender_header Name-Of-Header
2464              
2465             SpamAssassin will attempt to discover the address used in the 'MAIL FROM:'
2466             phase of the SMTP transaction that delivered this message, if this data has
2467             been made available by the SMTP server. This is used in the C<EnvelopeFrom>
2468             pseudo-header, and for various rules such as SPF checking.
2469              
2470             By default, various MTAs will use different headers, such as the following:
2471              
2472             X-Envelope-From
2473             Envelope-Sender
2474             X-Sender
2475             Return-Path
2476              
2477             SpamAssassin will attempt to use these, if some heuristics (such as the header
2478             placement in the message, or the absence of fetchmail signatures) appear to
2479             indicate that they are safe to use. However, it may choose the wrong headers
2480             in some mailserver configurations. (More discussion of this can be found
2481             in bug 2142 and bug 4747 in the SpamAssassin BugZilla.)
2482              
2483             To avoid this heuristic failure, the C<envelope_sender_header> setting may be
2484             helpful. Name the header that your MTA or MDA adds to messages containing the
2485             address used at the MAIL FROM step of the SMTP transaction.
2486              
2487             If the header in question contains C<E<lt>> or C<E<gt>> characters at the start
2488             and end of the email address in the right-hand side, as in the SMTP
2489             transaction, these will be stripped.
2490              
2491             If the header is not found in a message, or if it's value does not contain an
2492             C<@> sign, SpamAssassin will issue a warning in the logs and fall back to its
2493             default heuristics.
2494              
2495             (Note for MTA developers: we would prefer if the use of a single header be
2496             avoided in future, since that precludes 'downstream' spam scanning.
2497             C<http://wiki.apache.org/spamassassin/EnvelopeSenderInReceived> details a
2498             better proposal, storing the envelope sender at each hop in the C<Received>
2499             header.)
2500              
2501             example:
2502              
2503             envelope_sender_header X-SA-Exim-Mail-From
2504              
2505             =cut
2506              
2507 81         504 push (@cmds, {
2508             setting => 'envelope_sender_header',
2509             default => undef,
2510             type => $CONF_TYPE_STRING,
2511             });
2512              
2513             =item describe SYMBOLIC_TEST_NAME description ...
2514              
2515             Used to describe a test. This text is shown to users in the detailed report.
2516              
2517             Note that test names which begin with '__' are reserved for meta-match
2518             sub-rules, and are not scored or listed in the 'tests hit' reports.
2519              
2520             Also note that by convention, rule descriptions should be limited in
2521             length to no more than 50 characters.
2522              
2523             =cut
2524              
2525 81         608 push (@cmds, {
2526             command => 'describe',
2527             setting => 'descriptions',
2528             is_frequent => 1,
2529             type => $CONF_TYPE_HASH_KEY_VALUE,
2530             });
2531              
2532             =item report_charset CHARSET (default: unset)
2533              
2534             Set the MIME Content-Type charset used for the text/plain report which
2535             is attached to spam mail messages.
2536              
2537             =cut
2538              
2539 81         482 push (@cmds, {
2540             setting => 'report_charset',
2541             default => '',
2542             type => $CONF_TYPE_STRING,
2543             });
2544              
2545             =item report ...some text for a report...
2546              
2547             Set the report template which is attached to spam mail messages. See the
2548             C<10_default_prefs.cf> configuration file in C</usr/share/spamassassin> for an
2549             example.
2550              
2551             If you change this, try to keep it under 78 columns. Each C<report>
2552             line appends to the existing template, so use C<clear_report_template>
2553             to restart.
2554              
2555             Tags can be included as explained above.
2556              
2557             =cut
2558              
2559 81         434 push (@cmds, {
2560             command => 'report',
2561             setting => 'report_template',
2562             default => '',
2563             type => $CONF_TYPE_TEMPLATE,
2564             });
2565              
2566             =item clear_report_template
2567              
2568             Clear the report template.
2569              
2570             =cut
2571              
2572 81         536 push (@cmds, {
2573             command => 'clear_report_template',
2574             setting => 'report_template',
2575             type => $CONF_TYPE_NOARGS,
2576             code => \&Mail::SpamAssassin::Conf::Parser::set_template_clear
2577             });
2578              
2579             =item report_contact ...text of contact address...
2580              
2581             Set what _CONTACTADDRESS_ is replaced with in the above report text.
2582             By default, this is 'the administrator of that system', since the hostname
2583             of the system the scanner is running on is also included.
2584              
2585             =cut
2586              
2587 81         381 push (@cmds, {
2588             setting => 'report_contact',
2589             default => 'the administrator of that system',
2590             type => $CONF_TYPE_STRING,
2591             });
2592              
2593             =item report_hostname ...hostname to use...
2594              
2595             Set what _HOSTNAME_ is replaced with in the above report text.
2596             By default, this is determined dynamically as whatever the host running
2597             SpamAssassin calls itself.
2598              
2599             =cut
2600              
2601 81         367 push (@cmds, {
2602             setting => 'report_hostname',
2603             default => '',
2604             type => $CONF_TYPE_STRING,
2605             });
2606              
2607             =item unsafe_report ...some text for a report...
2608              
2609             Set the report template which is attached to spam mail messages which contain a
2610             non-text/plain part. See the C<10_default_prefs.cf> configuration file in
2611             C</usr/share/spamassassin> for an example.
2612              
2613             Each C<unsafe-report> line appends to the existing template, so use
2614             C<clear_unsafe_report_template> to restart.
2615              
2616             Tags can be used in this template (see above for details).
2617              
2618             =cut
2619              
2620 81         372 push (@cmds, {
2621             command => 'unsafe_report',
2622             setting => 'unsafe_report_template',
2623             default => '',
2624             type => $CONF_TYPE_TEMPLATE,
2625             });
2626              
2627             =item clear_unsafe_report_template
2628              
2629             Clear the unsafe_report template.
2630              
2631             =cut
2632              
2633 81         436 push (@cmds, {
2634             command => 'clear_unsafe_report_template',
2635             setting => 'unsafe_report_template',
2636             type => $CONF_TYPE_NOARGS,
2637             code => \&Mail::SpamAssassin::Conf::Parser::set_template_clear
2638             });
2639              
2640             =item mbox_format_from_regex
2641              
2642             Set a specific regular expression to be used for mbox file From separators.
2643              
2644             For example, this setting will allow sa-learn to process emails stored in
2645             a kmail 2 mbox:
2646              
2647             mbox_format_from_regex /^From \S+ ?[[:upper:]][[:lower:]]{2}(?:, \d\d [[:upper:]][[:lower:]]{2} \d{4} [0-2]\d:\d\d:\d\d [+-]\d{4}| [[:upper:]][[:lower:]]{2} [ 1-3]\d [ 0-2]\d:\d\d:\d\d \d{4})/
2648              
2649              
2650             =cut
2651              
2652 81         346 push (@cmds, {
2653             setting => 'mbox_format_from_regex',
2654             type => $CONF_TYPE_STRING
2655             });
2656              
2657              
2658             =item parse_dkim_uris ( 0 | 1 ) (default: 1)
2659              
2660             If this option is set to 1 and the message contains DKIM headers, the headers will be parsed for URIs to process alongside URIs found in the body with some rules and modules (ex. URIDNSBL)
2661              
2662             =cut
2663              
2664 81         327 push (@cmds, {
2665             setting => 'parse_dkim_uris',
2666             default => 1,
2667             type => $CONF_TYPE_BOOL,
2668             });
2669              
2670             =back
2671              
2672             =head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
2673              
2674             These settings differ from the ones above, in that they are considered
2675             'privileged'. Only users running C<spamassassin> from their procmailrc's or
2676             forward files, or sysadmins editing a file in C</etc/mail/spamassassin>, can
2677             use them. C<spamd> users cannot use them in their C<user_prefs> files, for
2678             security and efficiency reasons, unless C<allow_user_rules> is enabled (and
2679             then, they may only add rules from below).
2680              
2681             =over 4
2682              
2683             =item allow_user_rules ( 0 | 1 ) (default: 0)
2684              
2685             This setting allows users to create rules (and only rules) in their
2686             C<user_prefs> files for use with C<spamd>. It defaults to off, because
2687             this could be a severe security hole. It may be possible for users to
2688             gain root level access if C<spamd> is run as root. It is NOT a good
2689             idea, unless you have some other way of ensuring that users' tests are
2690             safe. Don't use this unless you are certain you know what you are
2691             doing. Furthermore, this option causes spamassassin to recompile all
2692             the tests each time it processes a message for a user with a rule in
2693             his/her C<user_prefs> file, which could have a significant effect on
2694             server load. It is not recommended.
2695              
2696             Note that it is not currently possible to use C<allow_user_rules> to modify an
2697             existing system rule from a C<user_prefs> file with C<spamd>.
2698              
2699             =cut
2700              
2701             push (@cmds, {
2702             setting => 'allow_user_rules',
2703             is_priv => 1,
2704             default => 0,
2705             type => $CONF_TYPE_BOOL,
2706             code => sub {
2707 0     0   0 my ($self, $key, $value, $line) = @_;
2708 0 0       0 if ($value eq '') {
    0          
2709 0         0 return $MISSING_REQUIRED_VALUE;
2710             }
2711             elsif ($value !~ /^[01]$/) {
2712 0         0 return $INVALID_VALUE;
2713             }
2714              
2715 0         0 $self->{allow_user_rules} = $value+0;
2716 0 0       0 dbg("config: " . ($self->{allow_user_rules} ? "allowing":"not allowing") . " user rules!");
2717             }
2718 81         8932 });
2719              
2720             =item redirector_pattern /pattern/modifiers
2721              
2722             A regex pattern that matches both the redirector site portion, and
2723             the target site portion of a URI.
2724              
2725             Note: The target URI portion must be surrounded in parentheses and
2726             no other part of the pattern may create a backreference.
2727              
2728             Example: http://chkpt.zdnet.com/chkpt/whatever/spammer.domain/yo/dude
2729              
2730             redirector_pattern /^https?:\/\/(?:opt\.)?chkpt\.zdnet\.com\/chkpt\/\w+\/(.*)$/i
2731              
2732             =cut
2733              
2734             push (@cmds, {
2735             setting => 'redirector_pattern',
2736             is_priv => 1,
2737             code => sub {
2738 441     441   985 my ($self, $key, $value, $line) = @_;
2739 441 50       1563 if ($value eq '') {
    50          
2740 0         0 return $MISSING_REQUIRED_VALUE;
2741             }
2742             elsif (!$self->{parser}->is_delimited_regexp_valid("redirector_pattern", $value)) {
2743 0         0 return $INVALID_VALUE;
2744             }
2745              
2746             # convert to qr// while including modifiers
2747 441         1237 local ($1,$2,$3);
2748 441         1951 $value =~ /^m?(\W)(.*)(?:\1|>|}|\)|\])(.*?)$/;
2749 441         1025 my $pattern = $2;
2750 441 50       1531 $pattern = "(?".$3.")".$pattern if $3;
2751 441         6382 $pattern = qr/$pattern/;
2752              
2753 441         878 push @{$self->{main}->{conf}->{redirector_patterns}}, $pattern;
  441         2619  
2754             # dbg("config: adding redirector regex: " . $value);
2755             }
2756 81         1149 });
2757              
2758             =item header SYMBOLIC_TEST_NAME header op /pattern/modifiers [if-unset: STRING]
2759              
2760             Define a test. C<SYMBOLIC_TEST_NAME> is a symbolic test name, such as
2761             'FROM_ENDS_IN_NUMS'. C<header> is the name of a mail header field,
2762             such as 'Subject', 'To', 'From', etc. Header field names are matched
2763             case-insensitively (conforming to RFC 5322 section 1.2.2), except for
2764             all-capitals metaheader fields such as ALL, MESSAGEID, ALL-TRUSTED.
2765              
2766             Appending a modifier C<:raw> to a header field name will inhibit decoding of
2767             quoted-printable or base-64 encoded strings, and will preserve all whitespace
2768             inside the header string. The C<:raw> may also be applied to pseudo-headers
2769             e.g. C<ALL:raw> will return a pristine (unmodified) header section.
2770              
2771             Appending a modifier C<:addr> to a header field name will cause everything
2772             except the first email address to be removed from the header field. It is
2773             mainly applicable to header fields 'From', 'Sender', 'To', 'Cc' along with
2774             their 'Resent-*' counterparts, and the 'Return-Path'.
2775              
2776             Appending a modifier C<:name> to a header field name will cause everything
2777             except the first display name to be removed from the header field. It is
2778             mainly applicable to header fields containing a single mail address: 'From',
2779             'Sender', along with their 'Resent-From' and 'Resent-Sender' counterparts.
2780              
2781             It is syntactically permitted to append more than one modifier to a header
2782             field name, although currently most combinations achieve no additional effect,
2783             for example C<From:addr:raw> or C<From:raw:addr> is currently the same as
2784             C<From:addr> .
2785              
2786             For example, appending C<:addr> to a header name will result in example@foo
2787             in all of the following cases:
2788              
2789             =over 4
2790              
2791             =item example@foo
2792              
2793             =item example@foo (Foo Blah)
2794              
2795             =item example@foo, example@bar
2796              
2797             =item display: example@foo (Foo Blah), example@bar ;
2798              
2799             =item Foo Blah <example@foo>
2800              
2801             =item "Foo Blah" <example@foo>
2802              
2803             =item "'Foo Blah'" <example@foo>
2804              
2805             =back
2806              
2807             For example, appending C<:name> to a header name will result in "Foo Blah"
2808             (without quotes) in all of the following cases:
2809              
2810             =over 4
2811              
2812             =item example@foo (Foo Blah)
2813              
2814             =item example@foo (Foo Blah), example@bar
2815              
2816             =item display: example@foo (Foo Blah), example@bar ;
2817              
2818             =item Foo Blah <example@foo>
2819              
2820             =item "Foo Blah" <example@foo>
2821              
2822             =item "'Foo Blah'" <example@foo>
2823              
2824             =back
2825              
2826             There are several special pseudo-headers that can be specified:
2827              
2828             =over 4
2829              
2830             =item C<ALL> can be used to mean the text of all the message's headers.
2831             Note that all whitespace inside the headers, at line folds, is currently
2832             compressed into a single space (' ') character. To obtain a pristine
2833             (unmodified) header section, use C<ALL:raw> - the :raw modifier is documented
2834             above.
2835              
2836             =item C<ToCc> can be used to mean the contents of both the 'To' and 'Cc'
2837             headers.
2838              
2839             =item C<EnvelopeFrom> is the address used in the 'MAIL FROM:' phase of the SMTP
2840             transaction that delivered this message, if this data has been made available
2841             by the SMTP server. See C<envelope_sender_header> for more information
2842             on how to set this.
2843              
2844             =item C<MESSAGEID> is a symbol meaning all Message-Id's found in the message;
2845             some mailing list software moves the real 'Message-Id' to 'Resent-Message-Id'
2846             or to 'X-Message-Id', then uses its own one in the 'Message-Id' header.
2847             The value returned for this symbol is the text from all 3 headers, separated
2848             by newlines.
2849              
2850             =item C<X-Spam-Relays-Untrusted>, C<X-Spam-Relays-Trusted>,
2851             C<X-Spam-Relays-Internal> and C<X-Spam-Relays-External> represent a portable,
2852             pre-parsed representation of the message's network path, as recorded in the
2853             Received headers, divided into 'trusted' vs 'untrusted' and 'internal' vs
2854             'external' sets. See C<http://wiki.apache.org/spamassassin/TrustedRelays> for
2855             more details.
2856              
2857             =back
2858              
2859             C<op> is either C<=~> (contains regular expression) or C<!~> (does not contain
2860             regular expression), and C<pattern> is a valid Perl regular expression, with
2861             C<modifiers> as regexp modifiers in the usual style. Note that multi-line
2862             rules are not supported, even if you use C<x> as a modifier. Also note that
2863             the C<#> character must be escaped (C<\#>) or else it will be considered to be
2864             the start of a comment and not part of the regexp.
2865              
2866             If the C<[if-unset: STRING]> tag is present, then C<STRING> will
2867             be used if the header is not found in the mail message.
2868              
2869             Test names must not start with a number, and must contain only
2870             alphanumerics and underscores. It is suggested that lower-case characters
2871             not be used, and names have a length of no more than 22 characters,
2872             as an informal convention. Dashes are not allowed.
2873              
2874             Note that test names which begin with '__' are reserved for meta-match
2875             sub-rules, and are not scored or listed in the 'tests hit' reports.
2876             Test names which begin with 'T_' are reserved for tests which are
2877             undergoing QA, and these are given a very low score.
2878              
2879             If you add or modify a test, please be sure to run a sanity check afterwards
2880             by running C<spamassassin --lint>. This will avoid confusing error
2881             messages, or other tests being skipped as a side-effect.
2882              
2883             =item header SYMBOLIC_TEST_NAME exists:header_field_name
2884              
2885             Define a header field existence test. C<header_field_name> is the name
2886             of a header field to test for existence. Not to be confused with a
2887             test for a nonempty header field body, which can be implemented by a
2888             C<header SYMBOLIC_TEST_NAME header =~ /\S/> rule as described above.
2889              
2890             =item header SYMBOLIC_TEST_NAME eval:name_of_eval_method([arguments])
2891              
2892             Define a header eval test. C<name_of_eval_method> is the name of
2893             a method registered by a C<Mail::SpamAssassin::Plugin> object.
2894             C<arguments> are optional arguments to the function call.
2895              
2896             =item header SYMBOLIC_TEST_NAME eval:check_rbl('set', 'zone' [, 'sub-test'])
2897              
2898             Check a DNSBL (a DNS blacklist or whitelist). This will retrieve Received:
2899             headers from the message, extract the IP addresses, select which ones are
2900             'untrusted' based on the C<trusted_networks> logic, and query that DNSBL
2901             zone. There's a few things to note:
2902              
2903             =over 4
2904              
2905             =item duplicated or private IPs
2906              
2907             Duplicated IPs are only queried once and reserved IPs are not queried.
2908             Private IPs are those listed in
2909             <http://www.iana.org/assignments/ipv4-address-space>,
2910             <http://duxcw.com/faq/network/privip.htm>,
2911             <http://duxcw.com/faq/network/autoip.htm>, or
2912             <http://tools.ietf.org/html/rfc5735> as private.
2913              
2914             =item the 'set' argument
2915              
2916             This is used as a 'zone ID'. If you want to look up a multiple-meaning zone
2917             like SORBS, you can then query the results from that zone using it;
2918             but all check_rbl_sub() calls must use that zone ID.
2919              
2920             Also, if more than one IP address gets a DNSBL hit for a particular rule, it
2921             does not affect the score because rules only trigger once per message.
2922              
2923             =item the 'zone' argument
2924              
2925             This is the root zone of the DNSBL.
2926              
2927             The domain name is considered to be a fully qualified domain name
2928             (i.e. not subject to DNS resolver's search or default domain options).
2929             No trailing period is needed, and will be removed if specified.
2930              
2931             =item the 'sub-test' argument
2932              
2933             This optional argument behaves the same as the sub-test argument in
2934             C<check_rbl_sub()> below.
2935              
2936             =item selecting all IPs except for the originating one
2937              
2938             This is accomplished by placing '-notfirsthop' at the end of the set name.
2939             This is useful for querying against DNS lists which list dialup IP
2940             addresses; the first hop may be a dialup, but as long as there is at least
2941             one more hop, via their outgoing SMTP server, that's legitimate, and so
2942             should not gain points. If there is only one hop, that will be queried
2943             anyway, as it should be relaying via its outgoing SMTP server instead of
2944             sending directly to your MX (mail exchange).
2945              
2946             =item selecting IPs by whether they are trusted
2947              
2948             When checking a 'nice' DNSBL (a DNS whitelist), you cannot trust the IP
2949             addresses in Received headers that were not added by trusted relays. To
2950             test the first IP address that can be trusted, place '-firsttrusted' at the
2951             end of the set name. That should test the IP address of the relay that
2952             connected to the most remote trusted relay.
2953              
2954             Note that this requires that SpamAssassin know which relays are trusted. For
2955             simple cases, SpamAssassin can make a good estimate. For complex cases, you
2956             may get better results by setting C<trusted_networks> manually.
2957              
2958             In addition, you can test all untrusted IP addresses by placing '-untrusted'
2959             at the end of the set name. Important note -- this does NOT include the
2960             IP address from the most recent 'untrusted line', as used in '-firsttrusted'
2961             above. That's because we're talking about the trustworthiness of the
2962             IP address data, not the source header line, here; and in the case of
2963             the most recent header (the 'firsttrusted'), that data can be trusted.
2964             See the Wiki page at C<http://wiki.apache.org/spamassassin/TrustedRelays>
2965             for more information on this.
2966              
2967             =item Selecting just the last external IP
2968              
2969             By using '-lastexternal' at the end of the set name, you can select only
2970             the external host that connected to your internal network, or at least
2971             the last external host with a public IP.
2972              
2973             =back
2974              
2975             =item header SYMBOLIC_TEST_NAME eval:check_rbl_txt('set', 'zone')
2976              
2977             Same as check_rbl(), except querying using IN TXT instead of IN A records.
2978             If the zone supports it, it will result in a line of text describing
2979             why the IP is listed, typically a hyperlink to a database entry.
2980              
2981             =item header SYMBOLIC_TEST_NAME eval:check_rbl_sub('set', 'sub-test')
2982              
2983             Create a sub-test for 'set'. If you want to look up a multi-meaning zone
2984             like relays.osirusoft.com, you can then query the results from that zone
2985             using the zone ID from the original query. The sub-test may either be an
2986             IPv4 dotted address for RBLs that return multiple A records or a
2987             non-negative decimal number to specify a bitmask for RBLs that return a
2988             single A record containing a bitmask of results, a SenderBase test
2989             beginning with "sb:", or (if none of the preceding options seem to fit) a
2990             regular expression.
2991              
2992             Note: the set name must be exactly the same for as the main query rule,
2993             including selections like '-notfirsthop' appearing at the end of the set
2994             name.
2995              
2996             =cut
2997              
2998             push (@cmds, {
2999             setting => 'header',
3000             is_frequent => 1,
3001             is_priv => 1,
3002             code => sub {
3003 2867     2867   5303 my ($self, $key, $value, $line) = @_;
3004 2867         7243 local ($1,$2);
3005 2867 100       12141 if ($value =~ /^(\S+)\s+(?:rbl)?eval:(.*)$/) {
    100          
3006 2300         5888 my ($rulename, $fn) = ($1, $2);
3007 2300         8125 dbg("config: header eval rule name is $rulename function is $fn");
3008 2300 50       8638 if ($fn !~ /^\w+(\(.*\))?$/) {
3009 0         0 return $INVALID_VALUE;
3010             }
3011 2300 100       5563 if ($fn =~ /^check_(?:rbl|dns)/) {
3012 1         12 $self->{parser}->add_test ($rulename, $fn, $TYPE_RBL_EVALS);
3013             }
3014             else {
3015 2299         6410 $self->{parser}->add_test ($rulename, $fn, $TYPE_HEAD_EVALS);
3016             }
3017             }
3018             elsif ($value =~ /^(\S+)\s+exists:(.*)$/) {
3019 63         327 my ($rulename, $header_name) = ($1, $2);
3020             # RFC 5322 section 3.6.8, ftext printable US-ASCII ch not including ":"
3021 63 50       631 if ($header_name !~ /\S/) {
    50          
3022 0         0 return $MISSING_REQUIRED_VALUE;
3023             # } elsif ($header_name !~ /^([!-9;-\176]+)$/) {
3024             } elsif ($header_name !~ /^([^: \t]+)$/) { # be generous
3025 0         0 return $INVALID_HEADER_FIELD_NAME;
3026             }
3027 63         484 $self->{parser}->add_test ($rulename, "defined($header_name)",
3028             $TYPE_HEAD_TESTS);
3029 63         470 $self->{descriptions}->{$rulename} = "Found a $header_name header";
3030             }
3031             else {
3032 504         1953 my @values = split(/\s+/, $value, 2);
3033 504 50       1211 if (@values != 2) {
3034 0         0 return $MISSING_REQUIRED_VALUE;
3035             }
3036 504         1907 $self->{parser}->add_test (@values, $TYPE_HEAD_TESTS);
3037             }
3038             }
3039 81         1337 });
3040              
3041             =item body SYMBOLIC_TEST_NAME /pattern/modifiers
3042              
3043             Define a body pattern test. C<pattern> is a Perl regular expression. Note:
3044             as per the header tests, C<#> must be escaped (C<\#>) or else it is considered
3045             the beginning of a comment.
3046              
3047             The 'body' in this case is the textual parts of the message body;
3048             any non-text MIME parts are stripped, and the message decoded from
3049             Quoted-Printable or Base-64-encoded format if necessary. The message
3050             Subject header is considered part of the body and becomes the first
3051             paragraph when running the rules. All HTML tags and line breaks will
3052             be removed before matching.
3053              
3054             =item body SYMBOLIC_TEST_NAME eval:name_of_eval_method([args])
3055              
3056             Define a body eval test. See above.
3057              
3058             =cut
3059              
3060             push (@cmds, {
3061             setting => 'body',
3062             is_frequent => 1,
3063             is_priv => 1,
3064             code => sub {
3065 319     319   769 my ($self, $key, $value, $line) = @_;
3066 319         875 local ($1,$2);
3067 319 100       1410 if ($value =~ /^(\S+)\s+eval:(.*)$/) {
3068 85         364 my ($rulename, $fn) = ($1, $2);
3069 85         573 dbg("config: body eval rule name is $rulename function is $fn");
3070              
3071 85 50       606 if ($fn !~ /^\w+(\(.*\))?$/) {
3072 0         0 return $INVALID_VALUE;
3073             }
3074 85         470 $self->{parser}->add_test ($rulename, $fn, $TYPE_BODY_EVALS);
3075             }
3076             else {
3077 234         1376 my @values = split(/\s+/, $value, 2);
3078 234 50       847 if (@values != 2) {
3079 0         0 return $MISSING_REQUIRED_VALUE;
3080             }
3081 234         1005 $self->{parser}->add_test (@values, $TYPE_BODY_TESTS);
3082             }
3083             }
3084 81         1147 });
3085              
3086             =item uri SYMBOLIC_TEST_NAME /pattern/modifiers
3087              
3088             Define a uri pattern test. C<pattern> is a Perl regular expression. Note: as
3089             per the header tests, C<#> must be escaped (C<\#>) or else it is considered
3090             the beginning of a comment.
3091              
3092             The 'uri' in this case is a list of all the URIs in the body of the email,
3093             and the test will be run on each and every one of those URIs, adjusting the
3094             score if a match is found. Use this test instead of one of the body tests
3095             when you need to match a URI, as it is more accurately bound to the start/end
3096             points of the URI, and will also be faster.
3097              
3098             =cut
3099              
3100             # we don't do URI evals yet - maybe later
3101             # if (/^uri\s+(\S+)\s+eval:(.*)$/) {
3102             # $self->{parser}->add_test ($1, $2, $TYPE_URI_EVALS);
3103             # next;
3104             # }
3105             push (@cmds, {
3106             setting => 'uri',
3107             is_priv => 1,
3108             code => sub {
3109 63     63   284 my ($self, $key, $value, $line) = @_;
3110 63         419 my @values = split(/\s+/, $value, 2);
3111 63 50       714 if (@values != 2) {
3112 0         0 return $MISSING_REQUIRED_VALUE;
3113             }
3114 63         359 $self->{parser}->add_test (@values, $TYPE_URI_TESTS);
3115             }
3116 81         1225 });
3117              
3118             =item rawbody SYMBOLIC_TEST_NAME /pattern/modifiers
3119              
3120             Define a raw-body pattern test. C<pattern> is a Perl regular expression.
3121             Note: as per the header tests, C<#> must be escaped (C<\#>) or else it is
3122             considered the beginning of a comment.
3123              
3124             The 'raw body' of a message is the raw data inside all textual parts. The
3125             text will be decoded from base64 or quoted-printable encoding, but HTML
3126             tags and line breaks will still be present. Multiline expressions will
3127             need to be used to match strings that are broken by line breaks.
3128              
3129             =item rawbody SYMBOLIC_TEST_NAME eval:name_of_eval_method([args])
3130              
3131             Define a raw-body eval test. See above.
3132              
3133             =cut
3134              
3135             push (@cmds, {
3136             setting => 'rawbody',
3137             is_frequent => 1,
3138             is_priv => 1,
3139             code => sub {
3140 0     0   0 my ($self, $key, $value, $line) = @_;
3141 0         0 local ($1,$2);
3142 0 0       0 if ($value =~ /^(\S+)\s+eval:(.*)$/) {
3143 0         0 $self->{parser}->add_test ($1, $2, $TYPE_RAWBODY_EVALS);
3144             } else {
3145 0         0 my @values = split(/\s+/, $value, 2);
3146 0 0       0 if (@values != 2) {
3147 0         0 return $MISSING_REQUIRED_VALUE;
3148             }
3149 0         0 $self->{parser}->add_test (@values, $TYPE_RAWBODY_TESTS);
3150             }
3151             }
3152 81         1183 });
3153              
3154             =item full SYMBOLIC_TEST_NAME /pattern/modifiers
3155              
3156             Define a full message pattern test. C<pattern> is a Perl regular expression.
3157             Note: as per the header tests, C<#> must be escaped (C<\#>) or else it is
3158             considered the beginning of a comment.
3159              
3160             The full message is the pristine message headers plus the pristine message
3161             body, including all MIME data such as images, other attachments, MIME
3162             boundaries, etc.
3163              
3164             =item full SYMBOLIC_TEST_NAME eval:name_of_eval_method([args])
3165              
3166             Define a full message eval test. See above.
3167              
3168             =cut
3169              
3170             push (@cmds, {
3171             setting => 'full',
3172             is_priv => 1,
3173             code => sub {
3174 248     248   622 my ($self, $key, $value, $line) = @_;
3175 248         664 local ($1,$2);
3176 248 50       1165 if ($value =~ /^(\S+)\s+eval:(.*)$/) {
3177 248         807 $self->{parser}->add_test ($1, $2, $TYPE_FULL_EVALS);
3178             } else {
3179 0         0 my @values = split(/\s+/, $value, 2);
3180 0 0       0 if (@values != 2) {
3181 0         0 return $MISSING_REQUIRED_VALUE;
3182             }
3183 0         0 $self->{parser}->add_test (@values, $TYPE_FULL_TESTS);
3184             }
3185             }
3186 81         942 });
3187              
3188             =item meta SYMBOLIC_TEST_NAME boolean expression
3189              
3190             Define a boolean expression test in terms of other tests that have
3191             been hit or not hit. For example:
3192              
3193             meta META1 TEST1 && !(TEST2 || TEST3)
3194              
3195             Note that English language operators ("and", "or") will be treated as
3196             rule names, and that there is no C<XOR> operator.
3197              
3198             =item meta SYMBOLIC_TEST_NAME boolean arithmetic expression
3199              
3200             Can also define an arithmetic expression in terms of other tests,
3201             with an unhit test having the value "0" and a hit test having a
3202             nonzero value. The value of a hit meta test is that of its arithmetic
3203             expression. The value of a hit eval test is that returned by its
3204             method. The value of a hit header, body, rawbody, uri, or full test
3205             which has the "multiple" tflag is the number of times the test hit.
3206             The value of any other type of hit test is "1".
3207              
3208             For example:
3209              
3210             meta META2 (3 * TEST1 - 2 * TEST2) > 0
3211              
3212             Note that Perl builtins and functions, like C<abs()>, B<can't> be
3213             used, and will be treated as rule names.
3214              
3215             If you want to define a meta-rule, but do not want its individual sub-rules to
3216             count towards the final score unless the entire meta-rule matches, give the
3217             sub-rules names that start with '__' (two underscores). SpamAssassin will
3218             ignore these for scoring.
3219              
3220             =cut
3221              
3222             push (@cmds, {
3223             setting => 'meta',
3224             is_frequent => 1,
3225             is_priv => 1,
3226             code => sub {
3227 72     72   321 my ($self, $key, $value, $line) = @_;
3228 72         423 my @values = split(/\s+/, $value, 2);
3229 72 50       320 if (@values != 2) {
3230 0         0 return $MISSING_REQUIRED_VALUE;
3231             }
3232 72 50       351 if ($values[1] =~ /\*\s*\*/) {
3233 0         0 info("config: found invalid '**' or '* *' operator in meta command");
3234 0         0 return $INVALID_VALUE;
3235             }
3236 72         360 $self->{parser}->add_test (@values, $TYPE_META_TESTS);
3237             }
3238 81         1247 });
3239              
3240             =item reuse SYMBOLIC_TEST_NAME [ OLD_SYMBOLIC_TEST_NAME_1 ... ]
3241              
3242             Defines the name of a test that should be "reused" during the scoring
3243             process. If a message has an X-Spam-Status header that shows a hit for
3244             this rule or any of the old rule names given, a hit will be added for
3245             this rule when B<mass-check --reuse> is used. Examples:
3246              
3247             C<reuse SPF_PASS>
3248              
3249             C<reuse MY_NET_RULE_V2 MY_NET_RULE_V1>
3250              
3251             The actual logic for reuse tests is done by
3252             B<Mail::SpamAssassin::Plugin::Reuse>.
3253              
3254             =cut
3255              
3256             push (@cmds, {
3257             setting => 'reuse',
3258             is_priv => 1,
3259             code => sub {
3260 0     0   0 my ($self, $key, $value, $line) = @_;
3261 0 0       0 if ($value !~ /\s*(\w+)(?:\s+(?:\w+(?:\s+\w+)*))?\s*$/) {
3262 0         0 return $INVALID_VALUE;
3263             }
3264 0         0 my $rule_name = $1;
3265             # don't overwrite tests, just define them so scores, priorities work
3266 0 0       0 if (!exists $self->{tests}->{$rule_name}) {
3267 0         0 $self->{parser}->add_test($rule_name, undef, $TYPE_EMPTY_TESTS);
3268             }
3269             }
3270 81         1011 });
3271              
3272             =item tflags SYMBOLIC_TEST_NAME flags
3273              
3274             Used to set flags on a test. Parameter is a space-separated list of flag
3275             names or flag name = value pairs.
3276             These flags are used in the score-determination back end system for details
3277             of the test's behaviour. Please see C<bayes_auto_learn> for more information
3278             about tflag interaction with those systems. The following flags can be set:
3279              
3280             =over 4
3281              
3282             =item net
3283              
3284             The test is a network test, and will not be run in the mass checking system
3285             or if B<-L> is used, therefore its score should not be modified.
3286              
3287             =item nice
3288              
3289             The test is intended to compensate for common false positives, and should be
3290             assigned a negative score.
3291              
3292             =item userconf
3293              
3294             The test requires user configuration before it can be used (like
3295             language-specific tests).
3296              
3297             =item learn
3298              
3299             The test requires training before it can be used.
3300              
3301             =item noautolearn
3302              
3303             The test will explicitly be ignored when calculating the score for
3304             learning systems.
3305              
3306             =item autolearn_force
3307              
3308             The test will be subject to less stringent autolearn thresholds.
3309              
3310             Normally, SpamAssassin will require 3 points from the header and 3
3311             points from the body to be auto-learned as spam. This option keeps
3312             the threshold at 6 points total but changes it to have no regard to the
3313             source of the points.
3314              
3315             =item noawl
3316              
3317             This flag is specific when using AWL plugin.
3318              
3319             Normally, AWL plugin normalizes scores via auto-whitelist. In some scenarios
3320             it works against the system administrator when trying to add some rules to
3321             correct miss-classified email. When AWL plugin searches the email and finds
3322             the noawl flag it will exit without normalizing the score nor storing the
3323             value in db.
3324              
3325             =item multiple
3326              
3327             The test will be evaluated multiple times, for use with meta rules.
3328             Only affects header, body, rawbody, uri, and full tests.
3329              
3330             =item maxhits=N
3331              
3332             If B<multiple> is specified, limit the number of hits found to N.
3333             If the rule is used in a meta that counts the hits (e.g. __RULENAME > 5),
3334             this is a way to avoid wasted extra work (use "tflags multiple maxhits=6").
3335              
3336             For example:
3337              
3338             uri __KAM_COUNT_URIS /^./
3339             tflags __KAM_COUNT_URIS multiple maxhits=16
3340             describe __KAM_COUNT_URIS A multiple match used to count URIs in a message
3341              
3342             meta __KAM_HAS_0_URIS (__KAM_COUNT_URIS == 0)
3343             meta __KAM_HAS_1_URIS (__KAM_COUNT_URIS >= 1)
3344             meta __KAM_HAS_2_URIS (__KAM_COUNT_URIS >= 2)
3345             meta __KAM_HAS_3_URIS (__KAM_COUNT_URIS >= 3)
3346             meta __KAM_HAS_4_URIS (__KAM_COUNT_URIS >= 4)
3347             meta __KAM_HAS_5_URIS (__KAM_COUNT_URIS >= 5)
3348             meta __KAM_HAS_10_URIS (__KAM_COUNT_URIS >= 10)
3349             meta __KAM_HAS_15_URIS (__KAM_COUNT_URIS >= 15)
3350              
3351             =item ips_only
3352              
3353             This flag is specific to rules invoking an URIDNSBL plugin,
3354             it is documented there.
3355              
3356             =item domains_only
3357              
3358             This flag is specific to rules invoking an URIDNSBL plugin,
3359             it is documented there.
3360              
3361             =item ns
3362              
3363             This flag is specific to rules invoking an URIDNSBL plugin,
3364             it is documented there.
3365              
3366             =item a
3367              
3368             This flag is specific to rules invoking an URIDNSBL plugin,
3369             it is documented there.
3370              
3371             =back
3372              
3373             =cut
3374              
3375 81         571 push (@cmds, {
3376             setting => 'tflags',
3377             is_frequent => 1,
3378             is_priv => 1,
3379             type => $CONF_TYPE_HASH_KEY_VALUE,
3380             });
3381              
3382             =item priority SYMBOLIC_TEST_NAME n
3383              
3384             Assign a specific priority to a test. All tests, except for DNS and Meta
3385             tests, are run in increasing priority value order (negative priority values
3386             are run before positive priority values). The default test priority is 0
3387             (zero).
3388              
3389             The values <-99999999999999> and <-99999999999998> have a special meaning
3390             internally, and should not be used.
3391              
3392             =cut
3393              
3394 81         459 push (@cmds, {
3395             setting => 'priority',
3396             is_priv => 1,
3397             type => $CONF_TYPE_HASH_KEY_VALUE,
3398             });
3399              
3400             =back
3401              
3402             =head1 ADMINISTRATOR SETTINGS
3403              
3404             These settings differ from the ones above, in that they are considered 'more
3405             privileged' -- even more than the ones in the B<PRIVILEGED SETTINGS> section.
3406             No matter what C<allow_user_rules> is set to, these can never be set from a
3407             user's C<user_prefs> file when spamc/spamd is being used. However, all
3408             settings can be used by local programs run directly by the user.
3409              
3410             =over 4
3411              
3412             =item version_tag string
3413              
3414             This tag is appended to the SA version in the X-Spam-Status header. You should
3415             include it when you modify your ruleset, especially if you plan to distribute it.
3416             A good choice for I<string> is your last name or your initials followed by a
3417             number which you increase with each change.
3418              
3419             The version_tag will be lowercased, and any non-alphanumeric or period
3420             character will be replaced by an underscore.
3421              
3422             e.g.
3423              
3424             version_tag myrules1 # version=2.41-myrules1
3425              
3426             =cut
3427              
3428             push (@cmds, {
3429             setting => 'version_tag',
3430             is_admin => 1,
3431             code => sub {
3432 0     0   0 my ($self, $key, $value, $line) = @_;
3433 0 0       0 if ($value eq '') {
3434 0         0 return $MISSING_REQUIRED_VALUE;
3435             }
3436 0         0 my $tag = lc($value);
3437 0         0 $tag =~ tr/a-z0-9./_/c;
3438 0         0 foreach (@Mail::SpamAssassin::EXTRA_VERSION) {
3439 0 0       0 if($_ eq $tag) { $tag = undef; last; }
  0         0  
  0         0  
3440             }
3441 0 0       0 push(@Mail::SpamAssassin::EXTRA_VERSION, $tag) if($tag);
3442             }
3443 81         897 });
3444              
3445             =item test SYMBOLIC_TEST_NAME (ok|fail) Some string to test against
3446              
3447             Define a regression testing string. You can have more than one regression test
3448             string per symbolic test name. Simply specify a string that you wish the test
3449             to match.
3450              
3451             These tests are only run as part of the test suite - they should not affect the
3452             general running of SpamAssassin.
3453              
3454             =cut
3455              
3456             push (@cmds, {
3457             setting => 'test',
3458             is_admin => 1,
3459             code => sub {
3460 1827 50   1827   3537 return unless defined $COLLECT_REGRESSION_TESTS;
3461 0         0 my ($self, $key, $value, $line) = @_;
3462 0         0 local ($1,$2,$3);
3463 0 0       0 if ($value !~ /^(\S+)\s+(ok|fail)\s+(.*)$/) { return $INVALID_VALUE; }
  0         0  
3464 0         0 $self->{parser}->add_regression_test($1, $2, $3);
3465             }
3466 81         975 });
3467              
3468             =item rbl_timeout t [t_min] [zone] (default: 15 3)
3469              
3470             All DNS queries are made at the beginning of a check and we try to read
3471             the results at the end. This value specifies the maximum period of time
3472             (in seconds) to wait for a DNS query. If most of the DNS queries have
3473             succeeded for a particular message, then SpamAssassin will not wait for
3474             the full period to avoid wasting time on unresponsive server(s), but will
3475             shrink the timeout according to a percentage of queries already completed.
3476             As the number of queries remaining approaches 0, the timeout value will
3477             gradually approach a t_min value, which is an optional second parameter
3478             and defaults to 0.2 * t. If t is smaller than t_min, the initial timeout
3479             is set to t_min. Here is a chart of queries remaining versus the timeout
3480             in seconds, for the default 15 second / 3 second timeout setting:
3481              
3482             queries left 100% 90% 80% 70% 60% 50% 40% 30% 20% 10% 0%
3483             timeout 15 14.9 14.5 13.9 13.1 12.0 10.7 9.1 7.3 5.3 3
3484              
3485             For example, if 20 queries are made at the beginning of a message check
3486             and 16 queries have returned (leaving 20%), the remaining 4 queries should
3487             finish within 7.3 seconds since their query started or they will be timed out.
3488             Note that timed out queries are only aborted when there is nothing else left
3489             for SpamAssassin to do - long evaluation of other rules may grant queries
3490             additional time.
3491              
3492             If a parameter 'zone' is specified (it must end with a letter, which
3493             distinguishes it from other numeric parametrs), then the setting only
3494             applies to DNS queries against the specified DNS domain (host, domain or
3495             RBL (sub)zone). Matching is case-insensitive, the actual domain may be a
3496             subdomain of the specified zone.
3497              
3498             =cut
3499              
3500             push (@cmds, {
3501             setting => 'rbl_timeout',
3502             is_admin => 1,
3503             default => 15,
3504             code => sub {
3505 1     1   5 my ($self, $key, $value, $line) = @_;
3506 1 50 33     23 unless (defined $value && $value !~ /^$/) {
3507 0         0 return $MISSING_REQUIRED_VALUE;
3508             }
3509 1         7 local ($1,$2,$3);
3510 1 50       8 unless ($value =~ /^ ( \+? \d+ (?: \. \d*)? [smhdw]? )
3511             (?: \s+ ( \+? \d+ (?: \. \d*)? [smhdw]? ) )?
3512             (?: \s+ (\S* [a-zA-Z]) )? $/xsi) {
3513 0         0 return $INVALID_VALUE;
3514             }
3515 1         6 my($timeout, $timeout_min, $zone) = ($1, $2, $3);
3516 1         4 foreach ($timeout, $timeout_min) {
3517 2 50 66     14 if (defined $_ && s/\s*([smhdw])\z//i) {
3518             $_ *= { s => 1, m => 60, h => 3600,
3519 0         0 d => 24*3600, w => 7*24*3600 }->{lc $1};
3520             }
3521             }
3522 1 50       5 if (!defined $zone) { # a global setting
3523 1         4 $self->{rbl_timeout} = 0 + $timeout;
3524 1 50       7 $self->{rbl_timeout_min} = 0 + $timeout_min if defined $timeout_min;
3525             }
3526             else { # per-zone settings
3527 0         0 $zone =~ s/^\.//; $zone =~ s/\.\z//; # strip leading and trailing dot
  0         0  
3528 0         0 $zone = lc $zone;
3529 0         0 $self->{by_zone}{$zone}{rbl_timeout} = 0 + $timeout;
3530             $self->{by_zone}{$zone}{rbl_timeout_min} =
3531 0 0       0 0 + $timeout_min if defined $timeout_min;
3532             }
3533             },
3534 81         1188 type => $CONF_TYPE_DURATION,
3535             });
3536              
3537             =item util_rb_tld tld1 tld2 ...
3538              
3539             This option maintains list of valid TLDs in the RegistryBoundaries code.
3540             TLDs include things like com, net, org, etc.
3541              
3542             =cut
3543              
3544             # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
3545             # transitional period and to be removed later. TLDs are now maintained in
3546             # sa-update 20_aux_tlds.cf.
3547 81         2568 foreach (qw/
3548             ac academy accountants active actor ad ae aero af ag agency ai airforce al am an
3549             ao aq ar archi army arpa as asia associates at attorney au auction audio autos
3550             aw ax axa az ba bar bargains bayern bb bd be beer berlin best bf bg bh bi bid
3551             bike bio biz bj black blackfriday blue bm bmw bn bnpparibas bo boo boutique br
3552             brussels bs bt build builders business buzz bv bw by bz bzh ca cab camera camp
3553             cancerresearch capetown capital caravan cards care career careers cash cat
3554             catering cc cd center ceo cern cf cg ch cheap christmas church ci citic city ck
3555             cl claims cleaning click clinic clothing club cm cn co codes coffee college
3556             cologne com community company computer condos construction consulting
3557             contractors cooking cool coop country cr credit creditcard cruises cu cuisinella
3558             cv cw cx cy cymru cz dad dance dating day de deals degree democrat dental
3559             dentist desi diamonds diet digital direct directory discount dj dk dm dnp do
3560             domains durban dz eat ec edu education ee eg email engineer engineering
3561             enterprises equipment er es esq estate et eu eus events exchange expert exposed
3562             fail farm feedback fi finance financial fish fishing fitness fj fk flights
3563             florist fm fo foo foundation fr frl frogans fund furniture futbol ga gal gallery
3564             gb gbiz gd ge gent gf gg gh gi gift gifts gives gl glass global globo gm gmail
3565             gmo gn gop gov gp gq gr graphics gratis green gripe gs gt gu guide guitars guru
3566             gw gy hamburg haus healthcare help here hiphop hiv hk hm hn holdings holiday
3567             homes horse host hosting house how hr ht hu id ie il im immo immobilien in
3568             industries info ing ink institute insure int international investments io iq ir
3569             is it je jetzt jm jo jobs joburg jp juegos kaufen ke kg kh ki kim kitchen kiwi
3570             km kn koeln kp kr krd kred kw ky kz la lacaixa land lawyer lb lc lease lgbt li
3571             life lighting limited limo link lk loans london lotto lr ls lt ltda lu luxe
3572             luxury lv ly ma maison management mango market marketing mc md me media meet
3573             melbourne meme menu mg mh miami mil mini mk ml mm mn mo mobi moda moe monash
3574             mortgage moscow motorcycles mov mp mq mr ms mt mu museum mv mw mx my mz na
3575             nagoya name navy nc ne net network neustar new nf ng ngo nhk ni ninja nl no np
3576             nr nra nrw nu nyc nz okinawa om ong onl ooo org organic otsuka ovh pa paris
3577             partners parts pe pf pg ph photo photography photos physio pics pictures pink
3578             pizza pk pl place plumbing pm pn post pr praxi press pro prod productions
3579             properties property ps pt pub pw py qa qpon quebec re realtor recipes red rehab
3580             reise reisen ren rentals repair report republican rest restaurant reviews rich
3581             rio ro rocks rodeo rs rsvp ru ruhr rw ryukyu sa saarland sarl sb sc sca scb
3582             schmidt schule scot sd se services sexy sg sh shiksha shoes si singles sj sk sl
3583             sm sn so social software sohu solar solutions soy space spiegel sr st su
3584             supplies supply support surf surgery suzuki sv sx sy systems sz tatar tattoo tax
3585             tc td technology tel tf tg th tienda tips tirol tj tk tl tm tn to today tokyo
3586             tools top town toys tr trade training travel tt tv tw tz ua ug uk university
3587             uno uol us uy uz va vacations vc ve vegas ventures versicherung vet vg vi viajes
3588             villas vision vlaanderen vn vodka vote voting voto voyage vu wales wang watch
3589             webcam website wed wf whoswho wien wiki williamhill works ws wtc wtf xn--1qqw23a
3590             xn--3bst00m xn--3ds443g xn--3e0b707e xn--45brj9c xn--4gbrim xn--55qw42g
3591             xn--55qx5d xn--6frz82g xn--6qq986b3xl xn--80adxhks xn--80ao21a xn--80asehdb
3592             xn--80aswg xn--90a3ac xn--c1avg xn--cg4bki xn--clchc0ea0b2g2a9gcd xn--czr694b
3593             xn--czru2d xn--d1acj3b xn--fiq228c5hs xn--fiq64b xn--fiqs8s xn--fiqz9s
3594             xn--fpcrj9c3d xn--fzc2c9e2c xn--gecrj9c xn--h2brj9c xn--i1b6b1a6a2e xn--io0a7i
3595             xn--j1amh xn--j6w193g xn--kprw13d xn--kpry57d xn--kput3i xn--l1acc
3596             xn--lgbbat1ad8j xn--mgb9awbf xn--mgba3a4f16a xn--mgbaam7a8h xn--mgbab2bd
3597             xn--mgbayh7gpa xn--mgbbh1a71e xn--mgbc0a9azcg xn--mgberp4a5d4ar xn--mgbx4cd0ab
3598             xn--ngbc5azd xn--nqv7f xn--nqv7fs00ema xn--o3cw4h xn--ogbpf8fl xn--p1ai
3599             xn--pgbs0dh xn--q9jyb4c xn--rhqv96g xn--s9brj9c xn--ses554g xn--unup4y xn--vhquv
3600             xn--wgbh1c xn--wgbl6a xn--xhq521b xn--xkc2al3hye2a xn--xkc2dl3a5ee0h
3601             xn--yfro4i67o xn--ygbi2ammx xn--zfr164b xxx xyz yachts yandex ye yokohama
3602             youtube yt za zm zone zw
3603 56619         130119 /) { $self->{valid_tlds}{lc $_} = 1; }
3604              
3605             push (@cmds, {
3606             setting => 'util_rb_tld',
3607             is_admin => 1,
3608             code => sub {
3609 8568     8568   13828 my ($self, $key, $value, $line) = @_;
3610 8568 50 33     29584 unless (defined $value && $value !~ /^$/) {
3611 0         0 return $MISSING_REQUIRED_VALUE;
3612             }
3613 8568 50       54576 unless ($value =~ /^[^\s.]+(?:\s+[^\s.]+)*$/) {
3614 0         0 return $INVALID_VALUE;
3615             }
3616 8568         56868 foreach (split(/\s+/, $value)) {
3617 97146         188542 $self->{valid_tlds}{lc $_} = 1;
3618             }
3619 8568         29242 dbg("config: added tld list - $value");
3620             }
3621 81         1243 });
3622              
3623             =item util_rb_2tld 2tld-1.tld 2tld-2.tld ...
3624              
3625             This option maintains list of valid 2nd-level TLDs in the RegistryBoundaries
3626             code. 2TLDs include things like co.uk, fed.us, etc.
3627              
3628             =cut
3629              
3630             # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
3631             # transitional period and to be removed later. TLDs are now maintained in
3632             # sa-update 20_aux_tlds.cf.
3633 81         4191 foreach (qw/
3634             com.ac edu.ac gov.ac mil.ac net.ac org.ac nom.ad ac.ae co.ae com.ae gov.ae
3635             mil.ae name.ae net.ae org.ae pro.ae sch.ae com.af edu.af gov.af net.af
3636             co.ag com.ag net.ag nom.ag org.ag com.ai edu.ai gov.ai net.ai off.ai
3637             org.ai com.al edu.al gov.al net.al org.al com.an edu.an net.an org.an
3638             co.ao ed.ao gv.ao it.ao og.ao pb.ao com.ar edu.ar gov.ar int.ar mil.ar
3639             net.ar org.ar e164.arpa in-addr.arpa ip6.arpa iris.arpa uri.arpa urn.arpa
3640             ac.at co.at gv.at or.at priv.at act.au asn.au com.au conf.au csiro.au
3641             edu.au gov.au id.au info.au net.au nsw.au nt.au org.au otc.au oz.au qld.au
3642             sa.au tas.au telememo.au vic.au wa.au com.aw biz.az com.az edu.az gov.az
3643             info.az int.az mil.az name.az net.az org.az pp.az co.ba com.ba edu.ba
3644             gov.ba mil.ba net.ba org.ba rs.ba unbi.ba unsa.ba com.bb edu.bb gov.bb
3645             net.bb org.bb ac.bd com.bd edu.bd gov.bd mil.bd net.bd org.bd ac.be
3646             belgie.be dns.be fgov.be gov.bf biz.bh cc.bh com.bh edu.bh gov.bh info.bh
3647             net.bh org.bh com.bm edu.bm gov.bm net.bm org.bm com.bn edu.bn net.bn
3648             org.bn com.bo edu.bo gob.bo gov.bo int.bo mil.bo net.bo org.bo tv.bo
3649             adm.br adv.br agr.br am.br arq.br art.br ato.br bio.br bmd.br cim.br
3650             cng.br cnt.br com.br coop.br dpn.br eco.br ecn.br edu.br eng.br esp.br
3651             etc.br eti.br far.br fm.br fnd.br fot.br fst.br g12.br ggf.br gov.br
3652             imb.br ind.br inf.br jor.br lel.br mat.br med.br mil.br mus.br net.br
3653             nom.br not.br ntr.br odo.br org.br ppg.br pro.br psc.br psi.br qsl.br
3654             rec.br slg.br srv.br tmp.br trd.br tur.br tv.br vet.br zlg.br com.bs
3655             net.bs org.bs com.bt edu.bt gov.bt net.bt org.bt co.bw org.bw gov.by
3656             mil.by com.bz net.bz org.bz ab.ca bc.ca gc.ca mb.ca nb.ca nf.ca nl.ca
3657             ns.ca nt.ca nu.ca on.ca pe.ca qc.ca sk.ca yk.ca co.ck edu.ck gov.ck net.ck
3658             org.ck ac.cn ah.cn bj.cn com.cn cq.cn edu.cn fj.cn gd.cn gov.cn gs.cn
3659             gx.cn gz.cn ha.cn hb.cn he.cn hi.cn hk.cn hl.cn hn.cn jl.cn js.cn jx.cn
3660             ln.cn mo.cn net.cn nm.cn nx.cn org.cn qh.cn sc.cn sd.cn sh.cn sn.cn sx.cn
3661             tj.cn tw.cn xj.cn xz.cn yn.cn zj.cn arts.co com.co edu.co firm.co gov.co
3662             info.co int.co mil.co net.co nom.co org.co rec.co web.co lkd.co.im
3663             ltd.co.im plc.co.im co.cm com.cm net.cm au.com br.com cn.com de.com eu.com
3664             gb.com hu.com no.com qc.com ru.com sa.com se.com uk.com us.com uy.com
3665             za.com ac.cr co.cr ed.cr fi.cr go.cr or.cr sa.cr com.cu edu.cu gov.cu
3666             inf.cu net.cu org.cu gov.cx ac.cy biz.cy com.cy ekloges.cy gov.cy ltd.cy
3667             name.cy net.cy org.cy parliament.cy press.cy pro.cy tm.cy co.dk com.dm
3668             edu.dm gov.dm net.dm org.dm art.do com.do edu.do gob.do gov.do mil.do
3669             net.do org.do sld.do web.do art.dz asso.dz com.dz edu.dz gov.dz net.dz
3670             org.dz pol.dz com.ec edu.ec fin.ec gov.ec info.ec k12.ec med.ec mil.ec
3671             net.ec org.ec pro.ec gob.ec co.ee com.ee edu.ee fie.ee med.ee org.ee
3672             pri.ee com.eg edu.eg eun.eg gov.eg mil.eg net.eg org.eg sci.eg com.er
3673             edu.er gov.er ind.er mil.er net.er org.er com.es edu.es gob.es nom.es
3674             org.es biz.et com.et edu.et gov.et info.et name.et net.et org.et aland.fi
3675             ac.fj biz.fj com.fj gov.fj id.fj info.fj mil.fj name.fj net.fj org.fj
3676             pro.fj school.fj ac.fk co.fk com.fk gov.fk net.fk nom.fk org.fk tm.fr
3677             asso.fr nom.fr prd.fr presse.fr com.fr gouv.fr com.ge edu.ge gov.ge mil.ge
3678             net.ge org.ge pvt.ge ac.gg alderney.gg co.gg gov.gg guernsey.gg ind.gg
3679             ltd.gg net.gg org.gg sark.gg sch.gg com.gh edu.gh gov.gh mil.gh org.gh
3680             com.gi edu.gi gov.gi ltd.gi mod.gi org.gi ac.gn com.gn gov.gn net.gn
3681             org.gn asso.gp com.gp edu.gp net.gp org.gp com.gr edu.gr gov.gr net.gr
3682             org.gr com.gt edu.gt gob.gt ind.gt mil.gt net.gt org.gt com.gu edu.gu
3683             gov.gu mil.gu net.gu org.gu com.hk edu.hk gov.hk idv.hk net.hk org.hk
3684             com.hn edu.hn gob.hn mil.hn net.hn org.hn com.hr from.hr iz.hr name.hr
3685             adult.ht art.ht asso.ht com.ht coop.ht edu.ht firm.ht gouv.ht info.ht
3686             med.ht net.ht org.ht perso.ht pol.ht pro.ht rel.ht shop.ht 2000.hu
3687             agrar.hu bolt.hu casino.hu city.hu co.hu erotica.hu erotika.hu film.hu
3688             forum.hu games.hu hotel.hu info.hu ingatlan.hu jogasz.hu konyvelo.hu
3689             lakas.hu media.hu news.hu org.hu priv.hu reklam.hu sex.hu shop.hu sport.hu
3690             suli.hu szex.hu tm.hu tozsde.hu utazas.hu video.hu ac.id co.id go.id
3691             mil.id net.id or.id sch.id web.id gov.ie ac.il co.il gov.il idf.il k12.il
3692             muni.il net.il org.il ac.im co.im gov.im net.im nic.im org.im ac.in co.in
3693             edu.in ernet.in firm.in gen.in gov.in ind.in mil.in net.in nic.in org.in
3694             res.in com.io gov.io mil.io net.io org.io ac.ir co.ir gov.ir id.ir net.ir
3695             org.ir sch.ir edu.it gov.it ac.je co.je gov.je ind.je jersey.je ltd.je
3696             net.je org.je sch.je com.jm edu.jm gov.jm net.jm org.jm com.jo edu.jo
3697             gov.jo mil.jo net.jo org.jo ac.jp ad.jp aichi.jp akita.jp aomori.jp
3698             chiba.jp co.jp ed.jp ehime.jp fukui.jp fukuoka.jp fukushima.jp gifu.jp
3699             go.jp gov.jp gr.jp gunma.jp hiroshima.jp hokkaido.jp hyogo.jp ibaraki.jp
3700             ishikawa.jp iwate.jp kagawa.jp kagoshima.jp kanagawa.jp kanazawa.jp
3701             kawasaki.jp kitakyushu.jp kobe.jp kochi.jp kumamoto.jp kyoto.jp lg.jp
3702             matsuyama.jp mie.jp miyagi.jp miyazaki.jp nagano.jp nagasaki.jp nagoya.jp
3703             nara.jp ne.jp net.jp niigata.jp oita.jp okayama.jp okinawa.jp or.jp org.jp
3704             osaka.jp saga.jp saitama.jp sapporo.jp sendai.jp shiga.jp shimane.jp
3705             shizuoka.jp takamatsu.jp tochigi.jp tokushima.jp tokyo.jp tottori.jp
3706             toyama.jp utsunomiya.jp wakayama.jp yamagata.jp yamaguchi.jp yamanashi.jp
3707             yokohama.jp ac.ke co.ke go.ke ne.ke new.ke or.ke sc.ke com.kg edu.kg
3708             gov.kg mil.kg net.kg org.kg com.kh edu.kh gov.kh mil.kh net.kh org.kh
3709             per.kh ac.kr busan.kr chungbuk.kr chungnam.kr co.kr daegu.kr daejeon.kr
3710             es.kr gangwon.kr go.kr gwangju.kr gyeongbuk.kr gyeonggi.kr gyeongnam.kr
3711             hs.kr incheon.kr jeju.kr jeonbuk.kr jeonnam.kr kg.kr kyonggi.kr mil.kr
3712             ms.kr ne.kr or.kr pe.kr re.kr sc.kr seoul.kr ulsan.kr com.kw edu.kw gov.kw
3713             mil.kw net.kw org.kw com.ky edu.ky gov.ky net.ky org.ky com.kz edu.kz
3714             gov.kz mil.kz net.kz org.kz com.la net.la org.la com.lb edu.lb gov.lb
3715             mil.lb net.lb org.lb com.lc edu.lc gov.lc net.lc org.lc assn.lk com.lk
3716             edu.lk gov.lk grp.lk hotel.lk int.lk ltd.lk net.lk ngo.lk org.lk sch.lk
3717             soc.lk web.lk com.lr edu.lr gov.lr net.lr org.lr co.ls org.ls gov.lt
3718             mil.lt asn.lv com.lv conf.lv edu.lv gov.lv id.lv mil.lv net.lv org.lv
3719             biz.ly com.ly edu.ly gov.ly id.ly med.ly net.ly org.ly plc.ly sch.ly ac.ma
3720             co.ma gov.ma net.ma org.ma press.ma asso.mc tm.mc ac.me co.me edu.me
3721             gov.me its.me net.me org.me priv.me com.mg edu.mg gov.mg mil.mg nom.mg
3722             org.mg prd.mg tm.mg army.mil navy.mil com.mk org.mk com.mm edu.mm gov.mm
3723             net.mm org.mm edu.mn gov.mn org.mn com.mo edu.mo gov.mo net.mo org.mo
3724             music.mobi weather.mobi co.mp edu.mp gov.mp net.mp org.mp com.mt edu.mt
3725             gov.mt net.mt org.mt tm.mt uu.mt co.mu com.mu aero.mv biz.mv com.mv
3726             coop.mv edu.mv gov.mv info.mv int.mv mil.mv museum.mv name.mv net.mv
3727             org.mv pro.mv ac.mw co.mw com.mw coop.mw edu.mw gov.mw int.mw museum.mw
3728             net.mw org.mw com.mx edu.mx gob.mx net.mx org.mx com.my edu.my gov.my
3729             mil.my name.my net.my org.my alt.na com.na cul.na edu.na net.na org.na
3730             telecom.na unam.na com.nc net.nc org.nc de.net gb.net uk.net ac.ng com.ng
3731             edu.ng gov.ng net.ng org.ng sch.ng ac.ni biz.ni com.ni edu.ni gob.ni in.ni
3732             info.ni int.ni mil.ni net.ni nom.ni org.ni web.ni fhs.no folkebibl.no
3733             fylkesbibl.no herad.no idrett.no kommune.no mil.no museum.no priv.no
3734             stat.no tel.no vgs.no com.np edu.np gov.np mil.np net.np org.np biz.nr
3735             co.nr com.nr edu.nr fax.nr gov.nr info.nr mob.nr mobil.nr mobile.nr net.nr
3736             org.nr tel.nr tlf.nr ac.nz co.nz cri.nz geek.nz gen.nz govt.nz iwi.nz
3737             maori.nz mil.nz net.nz org.nz school.nz ac.om biz.om co.om com.om edu.om
3738             gov.om med.om mil.om mod.om museum.om net.om org.om pro.om sch.om dk.org
3739             eu.org abo.pa ac.pa com.pa edu.pa gob.pa ing.pa med.pa net.pa nom.pa
3740             org.pa sld.pa com.pe edu.pe gob.pe mil.pe net.pe nom.pe org.pe com.pf
3741             edu.pf org.pf ac.pg com.pg net.pg com.ph edu.ph gov.ph mil.ph net.ph
3742             ngo.ph org.ph biz.pk com.pk edu.pk fam.pk gob.pk gok.pk gon.pk gop.pk
3743             gos.pk gov.pk net.pk org.pk web.pk art.pl biz.pl com.pl edu.pl gov.pl
3744             info.pl mil.pl net.pl ngo.pl org.pl biz.pr com.pr edu.pr gov.pr info.pr
3745             isla.pr name.pr net.pr org.pr pro.pr cpa.pro law.pro med.pro com.ps edu.ps
3746             gov.ps net.ps org.ps plo.ps sec.ps com.pt edu.pt gov.pt int.pt net.pt
3747             nome.pt org.pt publ.pt com.py edu.py gov.py net.py org.py com.qa edu.qa
3748             gov.qa net.qa org.qa asso.re com.re nom.re arts.ro com.ro firm.ro info.ro
3749             nom.ro nt.ro org.ro rec.ro store.ro tm.ro www.ro ac.rs co.rs edu.rs gov.rs
3750             in.rs org.rs ac.ru com.ru edu.ru gov.ru int.ru mil.ru net.ru org.ru pp.ru
3751             ac.rw co.rw com.rw edu.rw gouv.rw gov.rw int.rw mil.rw net.rw com.sa
3752             edu.sa gov.sa med.sa net.sa org.sa pub.sa sch.sa com.sb edu.sb gov.sb
3753             net.sb org.sb com.sc edu.sc gov.sc net.sc org.sc com.sd edu.sd gov.sd
3754             info.sd med.sd net.sd org.sd sch.sd tv.sd ab.se ac.se bd.se brand.se c.se
3755             d.se e.se f.se fh.se fhsk.se fhv.se g.se h.se i.se k.se komforb.se
3756             kommunalforbund.se komvux.se lanarb.se lanbib.se m.se mil.se n.se
3757             naturbruksgymn.se o.se org.se parti.se pp.se press.se s.se sshn.se t.se
3758             tm.se u.se w.se x.se y.se z.se com.sg edu.sg gov.sg idn.sg net.sg org.sg
3759             per.sg com.sh edu.sh gov.sh mil.sh net.sh org.sh edu.sk gov.sk mil.sk
3760             co.st com.st consulado.st edu.st embaixada.st gov.st mil.st net.st org.st
3761             principe.st saotome.st store.st com.sv edu.sv gob.sv org.sv red.sv com.sy
3762             gov.sy net.sy org.sy at.tf bg.tf ca.tf ch.tf cz.tf de.tf edu.tf eu.tf
3763             int.tf net.tf pl.tf ru.tf sg.tf us.tf ac.th co.th go.th in.th mi.th net.th
3764             or.th ac.tj biz.tj co.tj com.tj edu.tj go.tj gov.tj int.tj mil.tj name.tj
3765             net.tj org.tj web.tj com.tn edunet.tn ens.tn fin.tn gov.tn ind.tn info.tn
3766             intl.tn nat.tn net.tn org.tn rnrt.tn rns.tn rnu.tn tourism.tn gov.to
3767             av.tr bbs.tr bel.tr biz.tr com.tr dr.tr edu.tr gen.tr gov.tr
3768             info.tr k12.tr mil.tr name.tr net.tr org.tr pol.tr tel.tr web.tr aero.tt
3769             at.tt au.tt be.tt biz.tt ca.tt co.tt com.tt coop.tt de.tt dk.tt edu.tt
3770             es.tt eu.tt fr.tt gov.tt info.tt int.tt it.tt jobs.tt mobi.tt museum.tt
3771             name.tt net.tt nic.tt org.tt pro.tt se.tt travel.tt uk.tt us.tt co.tv
3772             gov.tv club.tw com.tw ebiz.tw edu.tw game.tw gov.tw idv.tw mil.tw net.tw
3773             org.tw ac.tz co.tz go.tz ne.tz or.tz cherkassy.ua chernigov.ua
3774             chernovtsy.ua ck.ua cn.ua co.ua com.ua crimea.ua cv.ua dn.ua
3775             dnepropetrovsk.ua donetsk.ua dp.ua edu.ua gov.ua if.ua in.ua
3776             ivano-frankivsk.ua kh.ua kharkov.ua kherson.ua khmelnitskiy.ua kiev.ua
3777             kirovograd.ua km.ua kr.ua ks.ua kv.ua lg.ua lugansk.ua lutsk.ua lviv.ua
3778             mk.ua net.ua nikolaev.ua od.ua odessa.ua org.ua pl.ua poltava.ua rovno.ua
3779             rv.ua sebastopol.ua sumy.ua te.ua ternopil.ua uzhgorod.ua vinnica.ua vn.ua
3780             zaporizhzhe.ua zhitomir.ua zp.ua zt.ua ac.ug co.ug go.ug ne.ug or.ug sc.ug
3781             ac.uk bl.uk british-library.uk co.uk edu.uk gov.uk icnet.uk jet.uk ltd.uk
3782             me.uk mod.uk national-library-scotland.uk net.uk nhs.uk nic.uk nls.uk
3783             org.uk parliament.uk plc.uk police.uk sch.uk ak.us al.us ar.us az.us ca.us
3784             co.us ct.us dc.us de.us dni.us fed.us fl.us ga.us hi.us ia.us id.us il.us
3785             in.us isa.us kids.us ks.us ky.us la.us ma.us md.us me.us mi.us mn.us mo.us
3786             ms.us mt.us nc.us nd.us ne.us nh.us nj.us nm.us nsn.us nv.us ny.us oh.us
3787             ok.us or.us pa.us ri.us sc.us sd.us tn.us tx.us ut.us va.us vt.us wa.us
3788             wi.us wv.us wy.us com.uy edu.uy gub.uy mil.uy net.uy org.uy vatican.va
3789             arts.ve bib.ve co.ve com.ve edu.ve firm.ve gov.ve info.ve int.ve mil.ve
3790             net.ve nom.ve org.ve rec.ve store.ve tec.ve web.ve co.vi com.vi edu.vi
3791             gov.vi net.vi org.vi ac.vn biz.vn com.vn edu.vn gov.vn health.vn info.vn
3792             int.vn name.vn net.vn org.vn pro.vn ch.vu com.vu de.vu edu.vu fr.vu net.vu
3793             org.vu com.ws edu.ws gov.ws net.ws org.ws com.ye edu.ye gov.ye mil.ye
3794             net.ye org.ye ac.za alt.za bourse.za city.za co.za edu.za gov.za law.za
3795             mil.za net.za ngo.za nom.za org.za school.za tm.za web.za ac.zm co.zm
3796             com.zm edu.zm gov.zm org.zm sch.zm ac.zw co.zw gov.zw org.zw
3797 128547         238673 /) { $self->{two_level_domains}{lc $_} = 1; }
3798              
3799             push (@cmds, {
3800             setting => 'util_rb_2tld',
3801             is_admin => 1,
3802             code => sub {
3803 28918     28918   44412 my ($self, $key, $value, $line) = @_;
3804 28918 50 33     94271 unless (defined $value && $value !~ /^$/) {
3805 0         0 return $MISSING_REQUIRED_VALUE;
3806             }
3807 28918 50       126879 unless ($value =~ /^[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+)*$/) {
3808 0         0 return $INVALID_VALUE;
3809             }
3810 28918         102181 foreach (split(/\s+/, $value)) {
3811 117685         267903 $self->{two_level_domains}{lc $_} = 1;
3812             }
3813             }
3814 81         1282 });
3815              
3816             =item util_rb_3tld 3tld1.some.tld 3tld2.other.tld ...
3817              
3818             This option maintains list of valid 3rd-level TLDs in the RegistryBoundaries
3819             code. 3TLDs include things like demon.co.uk, plc.co.im, etc.
3820              
3821             =cut
3822              
3823             # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
3824             # transitional period and to be removed later. TLDs are now maintained in
3825             # sa-update 20_aux_tlds.cf.
3826 81         550 foreach (qw/
3827             demon.co.uk esc.edu.ar lkd.co.im plc.co.im
3828 324         1105 /) { $self->{three_level_domains}{lc $_} = 1; }
3829              
3830             push (@cmds, {
3831             setting => 'util_rb_3tld',
3832             is_admin => 1,
3833             code => sub {
3834 3025     3025   4765 my ($self, $key, $value, $line) = @_;
3835 3025 50 33     10227 unless (defined $value && $value !~ /^$/) {
3836 0         0 return $MISSING_REQUIRED_VALUE;
3837             }
3838 3025 50       10203 unless ($value =~ /^[^\s.]+\.[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+\.[^\s.]+)*$/) {
3839 0         0 return $INVALID_VALUE;
3840             }
3841 3025         7090 foreach (split(/\s+/, $value)) {
3842 3025         11647 $self->{three_level_domains}{lc $_} = 1;
3843             }
3844             }
3845 81         951 });
3846              
3847             =item clear_util_rb
3848              
3849             Empty internal list of valid TLDs (including 2nd and 3rd level) which
3850             RegistryBoundaries code uses. Only useful if you want to override the
3851             standard lists supplied by sa-update.
3852              
3853             =cut
3854              
3855             push (@cmds, {
3856             setting => 'clear_util_rb',
3857             type => $CONF_TYPE_NOARGS,
3858             code => sub {
3859 63     63   261 my ($self, $key, $value, $line) = @_;
3860 63 50 33     551 unless (!defined $value || $value eq '') {
3861 0         0 return $INVALID_VALUE;
3862             }
3863 63         17352 $self->{valid_tlds} = ();
3864 63         30873 $self->{two_level_domains} = ();
3865 63         394 $self->{three_level_domains} = ();
3866 63         344 dbg("config: cleared tld lists");
3867             }
3868 81         1042 });
3869              
3870             =item bayes_path /path/filename (default: ~/.spamassassin/bayes)
3871              
3872             This is the directory and filename for Bayes databases. Several databases
3873             will be created, with this as the base directory and filename, with C<_toks>,
3874             C<_seen>, etc. appended to the base. The default setting results in files
3875             called C<~/.spamassassin/bayes_seen>, C<~/.spamassassin/bayes_toks>, etc.
3876              
3877             By default, each user has their own in their C<~/.spamassassin> directory with
3878             mode 0700/0600. For system-wide SpamAssassin use, you may want to reduce disk
3879             space usage by sharing this across all users. However, Bayes appears to be
3880             more effective with individual user databases.
3881              
3882             =cut
3883              
3884             push (@cmds, {
3885             setting => 'bayes_path',
3886             is_admin => 1,
3887             default => '__userstate__/bayes',
3888             type => $CONF_TYPE_STRING,
3889             code => sub {
3890 63     63   275 my ($self, $key, $value, $line) = @_;
3891 63 50 33     523 unless (defined $value && $value !~ /^$/) {
3892 0         0 return $MISSING_REQUIRED_VALUE;
3893             }
3894 63 50       2738 if (-d $value) {
3895 0         0 return $INVALID_VALUE;
3896             }
3897 63         480 $self->{bayes_path} = $value;
3898             }
3899 81         1166 });
3900              
3901             =item bayes_file_mode (default: 0700)
3902              
3903             The file mode bits used for the Bayesian filtering database files.
3904              
3905             Make sure you specify this using the 'x' mode bits set, as it may also be used
3906             to create directories. However, if a file is created, the resulting file will
3907             not have any execute bits set (the umask is set to 111). The argument is a
3908             string of octal digits, it is converted to a numeric value internally.
3909              
3910             =cut
3911              
3912             push (@cmds, {
3913             setting => 'bayes_file_mode',
3914             is_admin => 1,
3915             default => '0700',
3916             type => $CONF_TYPE_NUMERIC,
3917             code => sub {
3918 0     0   0 my ($self, $key, $value, $line) = @_;
3919 0 0       0 if ($value !~ /^0?[0-7]{3}$/) { return $INVALID_VALUE }
  0         0  
3920 0         0 $self->{bayes_file_mode} = untaint_var($value);
3921             }
3922 81         1056 });
3923              
3924             =item bayes_store_module Name::Of::BayesStore::Module
3925              
3926             If this option is set, the module given will be used as an alternate
3927             to the default bayes storage mechanism. It must conform to the
3928             published storage specification (see
3929             Mail::SpamAssassin::BayesStore). For example, set this to
3930             Mail::SpamAssassin::BayesStore::SQL to use the generic SQL storage
3931             module.
3932              
3933             =cut
3934              
3935             push (@cmds, {
3936             setting => 'bayes_store_module',
3937             is_admin => 1,
3938             default => '',
3939             type => $CONF_TYPE_STRING,
3940             code => sub {
3941 6     6   28 my ($self, $key, $value, $line) = @_;
3942 6         19 local ($1);
3943 6 50       34 if ($value !~ /^([_A-Za-z0-9:]+)$/) { return $INVALID_VALUE; }
  0         0  
3944 6         30 $self->{bayes_store_module} = $1;
3945             }
3946 81         993 });
3947              
3948             =item bayes_sql_dsn DBI::databasetype:databasename:hostname:port
3949              
3950             Used for BayesStore::SQL storage implementation.
3951              
3952             This option give the connect string used to connect to the SQL based Bayes storage.
3953              
3954             =cut
3955              
3956 81         431 push (@cmds, {
3957             setting => 'bayes_sql_dsn',
3958             is_admin => 1,
3959             default => '',
3960             type => $CONF_TYPE_STRING,
3961             });
3962              
3963             =item bayes_sql_username
3964              
3965             Used by BayesStore::SQL storage implementation.
3966              
3967             This option gives the username used by the above DSN.
3968              
3969             =cut
3970              
3971 81         512 push (@cmds, {
3972             setting => 'bayes_sql_username',
3973             is_admin => 1,
3974             default => '',
3975             type => $CONF_TYPE_STRING,
3976             });
3977              
3978             =item bayes_sql_password
3979              
3980             Used by BayesStore::SQL storage implementation.
3981              
3982             This option gives the password used by the above DSN.
3983              
3984             =cut
3985              
3986 81         459 push (@cmds, {
3987             setting => 'bayes_sql_password',
3988             is_admin => 1,
3989             default => '',
3990             type => $CONF_TYPE_STRING,
3991             });
3992              
3993             =item bayes_sql_username_authorized ( 0 | 1 ) (default: 0)
3994              
3995             Whether to call the services_authorized_for_username plugin hook in BayesSQL.
3996             If the hook does not determine that the user is allowed to use bayes or is
3997             invalid then then database will not be initialized.
3998              
3999             NOTE: By default the user is considered invalid until a plugin returns
4000             a true value. If you enable this, but do not have a proper plugin
4001             loaded, all users will turn up as invalid.
4002              
4003             The username passed into the plugin can be affected by the
4004             bayes_sql_override_username config option.
4005              
4006             =cut
4007              
4008 81         511 push (@cmds, {
4009             setting => 'bayes_sql_username_authorized',
4010             is_admin => 1,
4011             default => 0,
4012             type => $CONF_TYPE_BOOL,
4013             });
4014              
4015             =item user_scores_dsn DBI:databasetype:databasename:hostname:port
4016              
4017             If you load user scores from an SQL database, this will set the DSN
4018             used to connect. Example: C<DBI:mysql:spamassassin:localhost>
4019              
4020             If you load user scores from an LDAP directory, this will set the DSN used to
4021             connect. You have to write the DSN as an LDAP URL, the components being the
4022             host and port to connect to, the base DN for the search, the scope of the
4023             search (base, one or sub), the single attribute being the multivalued attribute
4024             used to hold the configuration data (space separated pairs of key and value,
4025             just as in a file) and finally the filter being the expression used to filter
4026             out the wanted username. Note that the filter expression is being used in a
4027             sprintf statement with the username as the only parameter, thus is can hold a
4028             single __USERNAME__ expression. This will be replaced with the username.
4029              
4030             Example: C<ldap://localhost:389/dc=koehntopp,dc=de?saconfig?uid=__USERNAME__>
4031              
4032             =cut
4033              
4034 81         380 push (@cmds, {
4035             setting => 'user_scores_dsn',
4036             is_admin => 1,
4037             default => '',
4038             type => $CONF_TYPE_STRING,
4039             });
4040              
4041             =item user_scores_sql_username username
4042              
4043             The authorized username to connect to the above DSN.
4044              
4045             =cut
4046              
4047 81         533 push (@cmds, {
4048             setting => 'user_scores_sql_username',
4049             is_admin => 1,
4050             default => '',
4051             type => $CONF_TYPE_STRING,
4052             });
4053              
4054             =item user_scores_sql_password password
4055              
4056             The password for the database username, for the above DSN.
4057              
4058             =cut
4059              
4060 81         354 push (@cmds, {
4061             setting => 'user_scores_sql_password',
4062             is_admin => 1,
4063             default => '',
4064             type => $CONF_TYPE_STRING,
4065             });
4066              
4067             =item user_scores_sql_custom_query query
4068              
4069             This option gives you the ability to create a custom SQL query to
4070             retrieve user scores and preferences. In order to work correctly your
4071             query should return two values, the preference name and value, in that
4072             order. In addition, there are several "variables" that you can use
4073             as part of your query, these variables will be substituted for the
4074             current values right before the query is run. The current allowed
4075             variables are:
4076              
4077             =over 4
4078              
4079             =item _TABLE_
4080              
4081             The name of the table where user scores and preferences are stored. Currently
4082             hardcoded to userpref, to change this value you need to create a new custom
4083             query with the new table name.
4084              
4085             =item _USERNAME_
4086              
4087             The current user's username.
4088              
4089             =item _MAILBOX_
4090              
4091             The portion before the @ as derived from the current user's username.
4092              
4093             =item _DOMAIN_
4094              
4095             The portion after the @ as derived from the current user's username, this
4096             value may be null.
4097              
4098             =back
4099              
4100             The query must be one continuous line in order to parse correctly.
4101              
4102             Here are several example queries, please note that these are broken up
4103             for easy reading, in your config it should be one continuous line.
4104              
4105             =over 4
4106              
4107             =item Current default query:
4108              
4109             C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' ORDER BY username ASC>
4110              
4111             =item Use global and then domain level defaults:
4112              
4113             C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' OR username = '@~'||_DOMAIN_ ORDER BY username ASC>
4114              
4115             =item Maybe global prefs should override user prefs:
4116              
4117             C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' ORDER BY username DESC>
4118              
4119             =back
4120              
4121             =cut
4122              
4123 81         348 push (@cmds, {
4124             setting => 'user_scores_sql_custom_query',
4125             is_admin => 1,
4126             default => undef,
4127             type => $CONF_TYPE_STRING,
4128             });
4129              
4130             =item user_scores_ldap_username
4131              
4132             This is the Bind DN used to connect to the LDAP server. It defaults
4133             to the empty string (""), allowing anonymous binding to work.
4134              
4135             Example: C<cn=master,dc=koehntopp,dc=de>
4136              
4137             =cut
4138              
4139 81         344 push (@cmds, {
4140             setting => 'user_scores_ldap_username',
4141             is_admin => 1,
4142             default => '',
4143             type => $CONF_TYPE_STRING,
4144             });
4145              
4146             =item user_scores_ldap_password
4147              
4148             This is the password used to connect to the LDAP server. It defaults
4149             to the empty string ("").
4150              
4151             =cut
4152              
4153 81         740 push (@cmds, {
4154             setting => 'user_scores_ldap_password',
4155             is_admin => 1,
4156             default => '',
4157             type => $CONF_TYPE_STRING,
4158             });
4159              
4160             =item user_scores_fallback_to_global (default: 1)
4161              
4162             Fall back to global scores and settings if userprefs can't be loaded
4163             from SQL or LDAP, instead of passing the message through unprocessed.
4164              
4165             =cut
4166              
4167 81         325 push (@cmds, {
4168             setting => 'user_scores_fallback_to_global',
4169             is_admin => 1,
4170             default => 1,
4171             type => $CONF_TYPE_BOOL,
4172             });
4173              
4174             =item loadplugin PluginModuleName [/path/module.pm]
4175              
4176             Load a SpamAssassin plugin module. The C<PluginModuleName> is the perl module
4177             name, used to create the plugin object itself.
4178              
4179             C</path/to/module.pm> is the file to load, containing the module's perl code;
4180             if it's specified as a relative path, it's considered to be relative to the
4181             current configuration file. If it is omitted, the module will be loaded
4182             using perl's search path (the C<@INC> array).
4183              
4184             See C<Mail::SpamAssassin::Plugin> for more details on writing plugins.
4185              
4186             =cut
4187              
4188             push (@cmds, {
4189             setting => 'loadplugin',
4190             is_admin => 1,
4191             code => sub {
4192 4680     4680   8493 my ($self, $key, $value, $line) = @_;
4193 4680 50       8513 if ($value eq '') {
4194 0         0 return $MISSING_REQUIRED_VALUE;
4195             }
4196 4680         6011 my ($package, $path);
4197 4680         11107 local ($1,$2);
4198 4680 50       28483 if ($value =~ /^(\S+)\s+(\S+)$/) {
    50          
4199 0         0 ($package, $path) = ($1, $2);
4200             } elsif ($value =~ /^\S+$/) {
4201 4680         8059 ($package, $path) = ($value, undef);
4202             } else {
4203 0         0 return $INVALID_VALUE;
4204             }
4205             # is blindly untainting safe? it is no worse than before
4206 4680         13103 $_ = untaint_var($_) for ($package,$path);
4207 4680         9936 $self->load_plugin ($package, $path);
4208             }
4209 81         872 });
4210              
4211             =item tryplugin PluginModuleName [/path/module.pm]
4212              
4213             Same as C<loadplugin>, but silently ignored if the .pm file cannot be found in
4214             the filesystem.
4215              
4216             =cut
4217              
4218             push (@cmds, {
4219             setting => 'tryplugin',
4220             is_admin => 1,
4221             code => sub {
4222 0     0   0 my ($self, $key, $value, $line) = @_;
4223 0 0       0 if ($value eq '') {
4224 0         0 return $MISSING_REQUIRED_VALUE;
4225             }
4226 0         0 my ($package, $path);
4227 0         0 local ($1,$2);
4228 0 0       0 if ($value =~ /^(\S+)\s+(\S+)$/) {
    0          
4229 0         0 ($package, $path) = ($1, $2);
4230             } elsif ($value =~ /^\S+$/) {
4231 0         0 ($package, $path) = ($value, undef);
4232             } else {
4233 0         0 return $INVALID_VALUE;
4234             }
4235             # is blindly untainting safe? it is no worse than before
4236 0         0 $_ = untaint_var($_) for ($package,$path);
4237 0         0 $self->load_plugin ($package, $path, 1);
4238             }
4239 81         936 });
4240              
4241             =item ignore_always_matching_regexps (Default: 0)
4242              
4243             Ignore any rule which contains a regexp which always matches.
4244             Currently only catches regexps which contain '||', or which begin or
4245             end with a '|'. Also ignore rules with C<some> combinatorial explosions.
4246              
4247             =cut
4248              
4249 81         566 push (@cmds, {
4250             setting => 'ignore_always_matching_regexps',
4251             is_admin => 1,
4252             default => 0,
4253             type => $CONF_TYPE_BOOL,
4254             });
4255              
4256             =back
4257              
4258             =head1 PREPROCESSING OPTIONS
4259              
4260             =over 4
4261              
4262             =item include filename
4263              
4264             Include configuration lines from C<filename>. Relative paths are considered
4265             relative to the current configuration file or user preferences file.
4266              
4267             =item if (boolean perl expression)
4268              
4269             Used to support conditional interpretation of the configuration
4270             file. Lines between this and a corresponding C<else> or C<endif> line
4271             will be ignored unless the expression evaluates as true
4272             (in the perl sense; that is, defined and non-0 and non-empty string).
4273              
4274             The conditional accepts a limited subset of perl for security -- just enough to
4275             perform basic arithmetic comparisons. The following input is accepted:
4276              
4277             =over 4
4278              
4279             =item numbers, whitespace, arithmetic operations and grouping
4280              
4281             Namely these characters and ranges:
4282              
4283             ( ) - + * / _ . , < = > ! ~ 0-9 whitespace
4284              
4285             =item version
4286              
4287             This will be replaced with the version number of the currently-running
4288             SpamAssassin engine. Note: The version used is in the internal SpamAssassin
4289             version format which is C<x.yyyzzz>, where x is major version, y is minor
4290             version, and z is maintenance version. So 3.0.0 is C<3.000000>, and 3.4.80
4291             is C<3.004080>.
4292              
4293             =item perl_version
4294              
4295             (Introduced in 3.4.1) This will be replaced with the version number of the
4296             currently-running perl engine. Note: The version used is in the $] version
4297             format which is C<x.yyyzzz>, where x is major version, y is minor version,
4298             and z is maintenance version. So 5.8.8 is C<5.008008>, and 5.10.0 is
4299             C<5.010000>. Use to protect rules that incorporate RE syntax elements
4300             introduced in later versions of perl, such as the C<++> non-backtracking
4301             match introduced in perl 5.10. For example:
4302              
4303             # Avoid lint error on older perl installs
4304             # Check SA version first to avoid warnings on checking perl_version on older SA
4305             if version > 3.004001 && perl_version >= 5.018000
4306             body INVALID_RE_SYNTAX_IN_PERL_BEFORE_5_18 /(?[ \p{Thai} & \p{Digit} ])/
4307             endif
4308              
4309             Note that the above will still generate a warning on perl older than 5.10.0;
4310             to avoid that warning do this instead:
4311              
4312             # Avoid lint error on older perl installs
4313             if can(Mail::SpamAssassin::Conf::perl_min_version_5010000)
4314             body INVALID_RE_SYNTAX_IN_PERL_5_8 /\w++/
4315             endif
4316              
4317             Warning: a can() test is only defined for perl 5.10.0!
4318              
4319              
4320             =item plugin(Name::Of::Plugin)
4321              
4322             This is a function call that returns C<1> if the plugin named
4323             C<Name::Of::Plugin> is loaded, or C<undef> otherwise.
4324              
4325             =item has(Name::Of::Package::function_name)
4326              
4327             This is a function call that returns C<1> if the perl package named
4328             C<Name::Of::Package> includes a function called C<function_name>, or C<undef>
4329             otherwise. Note that packages can be SpamAssassin plugins or built-in classes,
4330             there's no difference in this respect. Internally this invokes UNIVERSAL::can.
4331              
4332             =item can(Name::Of::Package::function_name)
4333              
4334             This is a function call that returns C<1> if the perl package named
4335             C<Name::Of::Package> includes a function called C<function_name>
4336             B<and> that function returns a true value when called with no arguments,
4337             otherwise C<undef> is returned.
4338              
4339             Is similar to C<has>, except that it also calls the named function,
4340             testing its return value (unlike the perl function UNIVERSAL::can).
4341             This makes it possible for a 'feature' function to determine its result
4342             value at run time.
4343              
4344             =back
4345              
4346             If the end of a configuration file is reached while still inside a
4347             C<if> scope, a warning will be issued, but parsing will restart on
4348             the next file.
4349              
4350             For example:
4351              
4352             if (version > 3.000000)
4353             header MY_FOO ...
4354             endif
4355              
4356             loadplugin MyPlugin plugintest.pm
4357              
4358             if plugin (MyPlugin)
4359             header MY_PLUGIN_FOO eval:check_for_foo()
4360             score MY_PLUGIN_FOO 0.1
4361             endif
4362              
4363             =item ifplugin PluginModuleName
4364              
4365             An alias for C<if plugin(PluginModuleName)>.
4366              
4367             =item else
4368              
4369             Used to support conditional interpretation of the configuration
4370             file. Lines between this and a corresponding C<endif> line,
4371             will be ignored unless the conditional expression evaluates as false
4372             (in the perl sense; that is, not defined and not 0 and non-empty string).
4373              
4374             =item require_version n.nnnnnn
4375              
4376             Indicates that the entire file, from this line on, requires a certain
4377             version of SpamAssassin to run. If a different (older or newer) version
4378             of SpamAssassin tries to read the configuration from this file, it will
4379             output a warning instead, and ignore it.
4380              
4381             Note: The version used is in the internal SpamAssassin version format which is
4382             C<x.yyyzzz>, where x is major version, y is minor version, and z is maintenance
4383             version. So 3.0.0 is C<3.000000>, and 3.4.80 is C<3.004080>.
4384              
4385             =cut
4386              
4387             push (@cmds, {
4388             setting => 'require_version',
4389             type => $CONF_TYPE_STRING,
4390       0     code => sub {
4391             }
4392 81         670 });
4393              
4394             =back
4395              
4396             =head1 TEMPLATE TAGS
4397              
4398             The following C<tags> can be used as placeholders in certain options.
4399             They will be replaced by the corresponding value when they are used.
4400              
4401             Some tags can take an argument (in parentheses). The argument is
4402             optional, and the default is shown below.
4403              
4404             _YESNO_ "Yes" for spam, "No" for nonspam (=ham)
4405             _YESNO(spam_str,ham_str)_ returns the first argument ("Yes" if missing)
4406             for spam, and the second argument ("No" if missing) for ham
4407             _YESNOCAPS_ "YES" for spam, "NO" for nonspam (=ham)
4408             _YESNOCAPS(spam_str,ham_str)_ same as _YESNO(...)_, but uppercased
4409             _SCORE(PAD)_ message score, if PAD is included and is either spaces or
4410             zeroes, then pad scores with that many spaces or zeroes
4411             (default, none) ie: _SCORE(0)_ makes 2.4 become 02.4,
4412             _SCORE(00)_ is 002.4. 12.3 would be 12.3 and 012.3
4413             respectively.
4414             _REQD_ message threshold
4415             _VERSION_ version (eg. 3.0.0 or 3.1.0-r26142-foo1)
4416             _SUBVERSION_ sub-version/code revision date (eg. 2004-01-10)
4417             _RULESVERSION_ comma-separated list of rules versions, retrieved from
4418             an '# UPDATE version' comment in rules files; if there is
4419             more than one set of rules (update channels) the order
4420             is unspecified (currently sorted by names of files);
4421             _HOSTNAME_ hostname of the machine the mail was processed on
4422             _REMOTEHOSTNAME_ hostname of the machine the mail was sent from, only
4423             available with spamd
4424             _REMOTEHOSTADDR_ ip address of the machine the mail was sent from, only
4425             available with spamd
4426             _BAYES_ bayes score
4427             _TOKENSUMMARY_ number of new, neutral, spammy, and hammy tokens found
4428             _BAYESTC_ number of new tokens found
4429             _BAYESTCLEARNED_ number of seen tokens found
4430             _BAYESTCSPAMMY_ number of spammy tokens found
4431             _BAYESTCHAMMY_ number of hammy tokens found
4432             _HAMMYTOKENS(N)_ the N most significant hammy tokens (default, 5)
4433             _SPAMMYTOKENS(N)_ the N most significant spammy tokens (default, 5)
4434             _DATE_ rfc-2822 date of scan
4435             _STARS(*)_ one "*" (use any character) for each full score point
4436             (note: limited to 50 'stars')
4437             _SENDERDOMAIN_ a domain name of the envelope sender address, lowercased
4438             _AUTHORDOMAIN_ a domain name of the author address (the From header
4439             field), lowercased; note that RFC 5322 allows a mail
4440             message to have multiple authors - currently only the
4441             domain name of the first email address is returned
4442             _RELAYSTRUSTED_ relays used and deemed to be trusted (see the
4443             'X-Spam-Relays-Trusted' pseudo-header)
4444             _RELAYSUNTRUSTED_ relays used that can not be trusted (see the
4445             'X-Spam-Relays-Untrusted' pseudo-header)
4446             _RELAYSINTERNAL_ relays used and deemed to be internal (see the
4447             'X-Spam-Relays-Internal' pseudo-header)
4448             _RELAYSEXTERNAL_ relays used and deemed to be external (see the
4449             'X-Spam-Relays-External' pseudo-header)
4450             _LASTEXTERNALIP_ IP address of client in the external-to-internal
4451             SMTP handover
4452             _LASTEXTERNALRDNS_ reverse-DNS of client in the external-to-internal
4453             SMTP handover
4454             _LASTEXTERNALHELO_ HELO string used by client in the external-to-internal
4455             SMTP handover
4456             _AUTOLEARN_ autolearn status ("ham", "no", "spam", "disabled",
4457             "failed", "unavailable")
4458             _AUTOLEARNSCORE_ portion of message score used by autolearn
4459             _TESTS(,)_ tests hit separated by "," (or other separator)
4460             _TESTSSCORES(,)_ as above, except with scores appended (eg. AWL=-3.0,...)
4461             _SUBTESTS(,)_ subtests (start with "__") hit separated by ","
4462             (or other separator)
4463             _DCCB_ DCC's "Brand"
4464             _DCCR_ DCC's results
4465             _PYZOR_ Pyzor results
4466             _RBL_ full results for positive RBL queries in DNS URI format
4467             _LANGUAGES_ possible languages of mail
4468             _PREVIEW_ content preview
4469             _REPORT_ terse report of tests hit (for header reports)
4470             _SUMMARY_ summary of tests hit for standard report (for body reports)
4471             _CONTACTADDRESS_ contents of the 'report_contact' setting
4472             _HEADER(NAME)_ includes the value of a message header. value is the same
4473             as is found for header rules (see elsewhere in this doc)
4474             _TIMING_ timing breakdown report
4475             _ADDEDHEADERHAM_ resulting header fields as requested by add_header for spam
4476             _ADDEDHEADERSPAM_ resulting header fields as requested by add_header for ham
4477             _ADDEDHEADER_ same as ADDEDHEADERHAM for ham or ADDEDHEADERSPAM for spam
4478              
4479             If a tag reference uses the name of a tag which is not in this list or defined
4480             by a loaded plugin, the reference will be left intact and not replaced by any
4481             value.
4482              
4483             Additional, plugin specific, template tags can be found in the documentation for
4484             the following plugins:
4485              
4486             L<Mail::SpamAssassin::Plugin::ASN>
4487             L<Mail::SpamAssassin::Plugin::AWL>
4488             L<Mail::SpamAssassin::Plugin::TxRep>
4489              
4490             The C<HAMMYTOKENS> and C<SPAMMYTOKENS> tags have an optional second argument
4491             which specifies a format. See the B<HAMMYTOKENS/SPAMMYTOKENS TAG FORMAT>
4492             section, below, for details.
4493              
4494             =head2 HAMMYTOKENS/SPAMMYTOKENS TAG FORMAT
4495              
4496             The C<HAMMYTOKENS> and C<SPAMMYTOKENS> tags have an optional second argument
4497             which specifies a format: C<_SPAMMYTOKENS(N,FMT)_>, C<_HAMMYTOKENS(N,FMT)_>
4498             The following formats are available:
4499              
4500             =over 4
4501              
4502             =item short
4503              
4504             Only the tokens themselves are listed.
4505             I<For example, preference file entry:>
4506              
4507             C<add_header all Spammy _SPAMMYTOKENS(2,short)_>
4508              
4509             I<Results in message header:>
4510              
4511             C<X-Spam-Spammy: remove.php, UD:jpg>
4512              
4513             Indicating that the top two spammy tokens found are C<remove.php>
4514             and C<UD:jpg>. (The token itself follows the last colon, the
4515             text before the colon indicates something about the token.
4516             C<UD> means the token looks like it might be part of a domain name.)
4517              
4518             =item compact
4519              
4520             The token probability, an abbreviated declassification distance (see
4521             example), and the token are listed.
4522             I<For example, preference file entry:>
4523              
4524             C<add_header all Spammy _SPAMMYTOKENS(2,compact)_>
4525              
4526             I<Results in message header:>
4527              
4528             C<0.989-6--remove.php, 0.988-+--UD:jpg>
4529              
4530             Indicating that the probabilities of the top two tokens are 0.989 and
4531             0.988, respectively. The first token has a declassification distance
4532             of 6, meaning that if the token had appeared in at least 6 more ham
4533             messages it would not be considered spammy. The C<+> for the second
4534             token indicates a declassification distance greater than 9.
4535              
4536             =item long
4537              
4538             Probability, declassification distance, number of times seen in a ham
4539             message, number of times seen in a spam message, age and the token are
4540             listed.
4541              
4542             I<For example, preference file entry:>
4543              
4544             C<add_header all Spammy _SPAMMYTOKENS(2,long)_>
4545              
4546             I<Results in message header:>
4547              
4548             C<X-Spam-Spammy: 0.989-6--0h-4s--4d--remove.php, 0.988-33--2h-25s--1d--UD:jpg>
4549              
4550             In addition to the information provided by the compact option,
4551             the long option shows that the first token appeared in zero
4552             ham messages and four spam messages, and that it was last
4553             seen four days ago. The second token appeared in two ham messages,
4554             25 spam messages and was last seen one day ago.
4555             (Unlike the C<compact> option, the long option shows declassification
4556             distances that are greater than 9.)
4557              
4558             =back
4559              
4560             =cut
4561              
4562 81         767 return \@cmds;
4563             }
4564              
4565             ###########################################################################
4566              
4567             # settings that were once part of core, but are now in (possibly-optional)
4568             # bundled plugins. These will be warned about, but do not generate a fatal
4569             # error when "spamassassin --lint" is run like a normal syntax error would.
4570              
4571             our @MIGRATED_SETTINGS = qw{
4572             ok_languages
4573             };
4574              
4575             ###########################################################################
4576              
4577             sub new {
4578 81     81 0 716 my $class = shift;
4579 81   33     731 $class = ref($class) || $class;
4580 81         679 my $self = {
4581             main => shift,
4582             registered_commands => [],
4583 81         351 }; bless ($self, $class);
4584              
4585 81         3585 $self->{parser} = Mail::SpamAssassin::Conf::Parser->new($self);
4586 81         980 $self->{parser}->register_commands($self->set_default_commands());
4587              
4588 81         513 $self->{errors} = 0;
4589 81         290 $self->{plugins_loaded} = { };
4590              
4591 81         722 $self->{tests} = { };
4592 81         321 $self->{test_types} = { };
4593 81         430 $self->{scoreset} = [ {}, {}, {}, {} ];
4594 81         383 $self->{scoreset_current} = 0;
4595 81         605 $self->set_score_set (0);
4596 81         292 $self->{tflags} = { };
4597 81         391 $self->{source_file} = { };
4598              
4599             # keep descriptions in a slow but space-efficient single-string
4600             # data structure
4601 81 50       242 tie %{$self->{descriptions}}, 'Mail::SpamAssassin::Util::TieOneStringHash'
  81         1848  
4602             or warn "tie failed";
4603              
4604             # after parsing, tests are refiled into these hashes for each test type.
4605             # this allows e.g. a full-text test to be rewritten as a body test in
4606             # the user's user_prefs file.
4607 81         345 $self->{body_tests} = { };
4608 81         251 $self->{uri_tests} = { };
4609 81         218 $self->{uri_evals} = { }; # not used/implemented yet
4610 81         219 $self->{head_tests} = { };
4611 81         205 $self->{head_evals} = { };
4612 81         211 $self->{body_evals} = { };
4613 81         226 $self->{full_tests} = { };
4614 81         194 $self->{full_evals} = { };
4615 81         183 $self->{rawbody_tests} = { };
4616 81         215 $self->{rawbody_evals} = { };
4617 81         190 $self->{meta_tests} = { };
4618 81         192 $self->{eval_plugins} = { };
4619 81         378 $self->{duplicate_rules} = { };
4620              
4621             # testing stuff
4622 81         355 $self->{regression_tests} = { };
4623              
4624 81         249 $self->{rewrite_header} = { };
4625 81         238 $self->{want_rebuild_for_type} = { };
4626 81         218 $self->{user_defined_rules} = { };
4627 81         245 $self->{headers_spam} = [ ];
4628 81         233 $self->{headers_ham} = [ ];
4629              
4630 81         239 $self->{bayes_ignore_headers} = [ ];
4631 81         188 $self->{bayes_ignore_from} = { };
4632 81         180 $self->{bayes_ignore_to} = { };
4633              
4634 81         176 $self->{whitelist_auth} = { };
4635 81         176 $self->{def_whitelist_auth} = { };
4636 81         225 $self->{whitelist_from} = { };
4637 81         221 $self->{whitelist_allows_relays} = { };
4638 81         215 $self->{blacklist_from} = { };
4639 81         170 $self->{whitelist_from_rcvd} = { };
4640 81         181 $self->{def_whitelist_from_rcvd} = { };
4641              
4642 81         189 $self->{blacklist_to} = { };
4643 81         220 $self->{whitelist_to} = { };
4644 81         200 $self->{more_spam_to} = { };
4645 81         220 $self->{all_spam_to} = { };
4646              
4647 81         821 $self->{trusted_networks} = $self->new_netset('trusted_networks',1);
4648 81         326 $self->{internal_networks} = $self->new_netset('internal_networks',1);
4649 81         337 $self->{msa_networks} = $self->new_netset('msa_networks',0); # no loopback IP
4650 81         383 $self->{trusted_networks_configured} = 0;
4651 81         268 $self->{internal_networks_configured} = 0;
4652              
4653             # Make sure we add in X-Spam-Checker-Version
4654 81         163 { my $r = [ "Checker-Version",
  81         244  
4655             "SpamAssassin _VERSION_ (_SUBVERSION_) on _HOSTNAME_" ];
4656 81         155 push(@{$self->{headers_spam}}, $r);
  81         256  
4657 81         187 push(@{$self->{headers_ham}}, $r);
  81         255  
4658             }
4659              
4660             # RFC 6891: A good compromise may be the use of an EDNS maximum payload size
4661             # of 4096 octets as a starting point.
4662 81         316 $self->{dns_options}->{edns} = 4096;
4663              
4664             # these should potentially be settable by end-users
4665             # perhaps via plugin?
4666 81         207 $self->{num_check_received} = 9;
4667 81         238 $self->{bayes_expiry_pct} = 0.75;
4668 81         287 $self->{bayes_expiry_period} = 43200;
4669 81         290 $self->{bayes_expiry_max_exponent} = 9;
4670              
4671 81         277 $self->{encapsulated_content_description} = 'original message before SpamAssassin';
4672              
4673 81         561 $self;
4674             }
4675              
4676             sub mtime {
4677 0     0 0 0 my $self = shift;
4678 0 0       0 if (@_) {
4679 0         0 $self->{mtime} = shift;
4680             }
4681 0         0 return $self->{mtime};
4682             }
4683              
4684             ###########################################################################
4685              
4686             sub parse_scores_only {
4687 0     0 0 0 my ($self) = @_;
4688 0         0 $self->{parser}->parse ($_[1], 1);
4689             }
4690              
4691             sub parse_rules {
4692 79     79 0 245 my ($self) = @_;
4693 79         757 $self->{parser}->parse ($_[1], 0);
4694             }
4695              
4696             ###########################################################################
4697              
4698             sub set_score_set {
4699 162     162 0 786 my ($self, $set) = @_;
4700 162         673 $self->{scores} = $self->{scoreset}->[$set];
4701 162         507 $self->{scoreset_current} = $set;
4702 162         1248 dbg("config: score set $set chosen.");
4703             }
4704              
4705             sub get_score_set {
4706 421     421 0 852 my($self) = @_;
4707 421         1222 return $self->{scoreset_current};
4708             }
4709              
4710             sub get_rule_types {
4711 0     0 0 0 my ($self) = @_;
4712 0         0 return @rule_types;
4713             }
4714              
4715             sub get_rule_keys {
4716 0     0 0 0 my ($self, $test_type, $priority) = @_;
4717              
4718             # special case rbl_evals since they do not have a priority
4719 0 0       0 if ($test_type eq 'rbl_evals') {
4720 0         0 return keys(%{$self->{$test_type}});
  0         0  
4721             }
4722              
4723 0 0       0 if (defined($priority)) {
4724 0         0 return keys(%{$self->{$test_type}->{$priority}});
  0         0  
4725             }
4726             else {
4727 0         0 my @rules;
4728 0         0 foreach my $pri (keys(%{$self->{priorities}})) {
  0         0  
4729 0         0 push(@rules, keys(%{$self->{$test_type}->{$pri}}));
  0         0  
4730             }
4731 0         0 return @rules;
4732             }
4733             }
4734              
4735             sub get_rule_value {
4736 0     0 0 0 my ($self, $test_type, $rulename, $priority) = @_;
4737              
4738             # special case rbl_evals since they do not have a priority
4739 0 0       0 if ($test_type eq 'rbl_evals') {
4740 0         0 return keys(%{$self->{$test_type}->{$rulename}});
  0         0  
4741             }
4742              
4743 0 0       0 if (defined($priority)) {
4744 0         0 return $self->{$test_type}->{$priority}->{$rulename};
4745             }
4746             else {
4747 0         0 foreach my $pri (keys(%{$self->{priorities}})) {
  0         0  
4748 0 0       0 if (exists($self->{$test_type}->{$pri}->{$rulename})) {
4749 0         0 return $self->{$test_type}->{$pri}->{$rulename};
4750             }
4751             }
4752 0         0 return; # if we get here we didn't find the rule
4753             }
4754             }
4755              
4756             sub delete_rule {
4757 0     0 0 0 my ($self, $test_type, $rulename, $priority) = @_;
4758              
4759             # special case rbl_evals since they do not have a priority
4760 0 0       0 if ($test_type eq 'rbl_evals') {
4761 0         0 return delete($self->{$test_type}->{$rulename});
4762             }
4763              
4764 0 0       0 if (defined($priority)) {
4765 0         0 return delete($self->{$test_type}->{$priority}->{$rulename});
4766             }
4767             else {
4768 0         0 foreach my $pri (keys(%{$self->{priorities}})) {
  0         0  
4769 0 0       0 if (exists($self->{$test_type}->{$pri}->{$rulename})) {
4770 0         0 return delete($self->{$test_type}->{$pri}->{$rulename});
4771             }
4772             }
4773 0         0 return; # if we get here we didn't find the rule
4774             }
4775             }
4776              
4777             # trim_rules ($regexp)
4778             #
4779             # Remove all rules that don't match the given regexp (or are sub-rules of
4780             # meta-tests that match the regexp).
4781              
4782             sub trim_rules {
4783 0     0 0 0 my ($self, $regexp) = @_;
4784              
4785 0         0 my @all_rules;
4786              
4787 0         0 foreach my $rule_type ($self->get_rule_types()) {
4788 0         0 push(@all_rules, $self->get_rule_keys($rule_type));
4789             }
4790              
4791 0         0 my @rules_to_keep = grep(/$regexp/, @all_rules);
4792              
4793 0 0       0 if (@rules_to_keep == 0) {
4794 0         0 die "config: trim_rules: all rules excluded, nothing to test\n";
4795             }
4796              
4797 0         0 my @meta_tests = grep(/$regexp/, $self->get_rule_keys('meta_tests'));
4798 0         0 foreach my $meta (@meta_tests) {
4799 0         0 push(@rules_to_keep, $self->add_meta_depends($meta))
4800             }
4801              
4802 0         0 my %rules_to_keep_hash;
4803              
4804 0         0 foreach my $rule (@rules_to_keep) {
4805 0         0 $rules_to_keep_hash{$rule} = 1;
4806             }
4807              
4808 0         0 foreach my $rule_type ($self->get_rule_types()) {
4809 0         0 foreach my $rulekey ($self->get_rule_keys($rule_type)) {
4810             $self->delete_rule($rule_type, $rulekey)
4811 0 0       0 if (!$rules_to_keep_hash{$rulekey});
4812             }
4813             }
4814             } # trim_rules()
4815              
4816             sub add_meta_depends {
4817 0     0 0 0 my ($self, $meta) = @_;
4818              
4819 0         0 my @rules;
4820 0         0 my @tokens = $self->get_rule_value('meta_tests', $meta) =~ m/(\w+)/g;
4821              
4822 0         0 @tokens = grep(!/^\d+$/, @tokens);
4823             # @tokens now only consists of sub-rules
4824              
4825 0         0 foreach my $token (@tokens) {
4826 0 0       0 die "config: meta test $meta depends on itself\n" if $token eq $meta;
4827 0         0 push(@rules, $token);
4828              
4829             # If the sub-rule is a meta-test, recurse
4830 0 0       0 if ($self->get_rule_value('meta_tests', $token)) {
4831 0         0 push(@rules, $self->add_meta_depends($token));
4832             }
4833             } # foreach my $token (@tokens)
4834              
4835 0         0 return @rules;
4836             } # add_meta_depends()
4837              
4838             sub is_rule_active {
4839 88     88 0 165 my ($self, $test_type, $rulename, $priority) = @_;
4840              
4841             # special case rbl_evals since they do not have a priority
4842 88 50       141 if ($test_type eq 'rbl_evals') {
4843 0 0       0 return 0 unless ($self->{$test_type}->{$rulename});
4844 0         0 return ($self->{scores}->{$rulename});
4845             }
4846              
4847             # first determine if the rule is defined
4848 88 50       120 if (defined($priority)) {
4849             # we have a specific priority
4850 0 0       0 return 0 unless ($self->{$test_type}->{$priority}->{$rulename});
4851             }
4852             else {
4853             # no specific priority so we must loop over all currently defined
4854             # priorities to see if the rule is defined
4855 88         97 my $found_p = 0;
4856 88         99 foreach my $pri (keys %{$self->{priorities}}) {
  88         237  
4857 88 50       184 if ($self->{$test_type}->{$pri}->{$rulename}) {
4858 88         96 $found_p = 1;
4859 88         119 last;
4860             }
4861             }
4862 88 50       164 return 0 unless ($found_p);
4863             }
4864              
4865 88         216 return ($self->{scores}->{$rulename});
4866             }
4867              
4868             ###########################################################################
4869              
4870             # treats a bitset argument as a bit vector of all possible port numbers (8 kB)
4871             # and sets bit values to $value (0 or 1) in the specified range of port numbers
4872             #
4873             sub set_ports_range {
4874 1     1 0 4 my($bitset_ref, $port_range_lo, $port_range_hi, $value) = @_;
4875 1 50       5 $port_range_lo = 0 if $port_range_lo < 0;
4876 1 50       4 $port_range_hi = 65535 if $port_range_hi > 65535;
4877 1 50       4 if (!defined $$bitset_ref) { # provide a sensible default
    0          
4878 1         13 wipe_ports_range($bitset_ref, 1); # turn on all bits 0..65535
4879 1         508 vec($$bitset_ref,$_,1) = 0 for 0..1023; # avoid 0 and privileged ports
4880             } elsif ($$bitset_ref eq '') { # repopulate the bitset (late configuration)
4881 0         0 wipe_ports_range($bitset_ref, 0); # turn off all bits 0..65535
4882             }
4883 1 50       4 $value = !$value ? 0 : 1;
4884 1         5 for (my $j = $port_range_lo; $j <= $port_range_hi; $j++) {
4885 1         7 vec($$bitset_ref,$j,1) = $value;
4886             }
4887             }
4888              
4889             sub wipe_ports_range {
4890 1     1 0 3 my($bitset_ref, $value) = @_;
4891 1 50       12 $value = !$value ? "\000" : "\377";
4892 1         16 $$bitset_ref = $value x 8192; # quickly turn all bits 0..65535 on or off
4893             }
4894              
4895             ###########################################################################
4896              
4897             sub add_to_addrlist {
4898 31     31 0 1587 my $self = shift; $self->{parser}->add_to_addrlist(@_);
  31         70  
4899             }
4900             sub add_to_addrlist_rcvd {
4901 0     0 0 0 my $self = shift; $self->{parser}->add_to_addrlist_rcvd(@_);
  0         0  
4902             }
4903             sub remove_from_addrlist {
4904 0     0 0 0 my $self = shift; $self->{parser}->remove_from_addrlist(@_);
  0         0  
4905             }
4906             sub remove_from_addrlist_rcvd {
4907 0     0 0 0 my $self = shift; $self->{parser}->remove_from_addrlist_rcvd(@_);
  0         0  
4908             }
4909              
4910             ###########################################################################
4911              
4912             sub regression_tests {
4913 0     0 0 0 my $self = shift;
4914 0 0       0 if (@_ == 1) {
4915             # we specified a symbolic name, return the strings
4916 0         0 my $name = shift;
4917 0         0 my $tests = $self->{regression_tests}->{$name};
4918 0         0 return @$tests;
4919             }
4920             else {
4921             # no name asked for, just return the symbolic names we have tests for
4922 0         0 return keys %{$self->{regression_tests}};
  0         0  
4923             }
4924             }
4925              
4926             ###########################################################################
4927              
4928             sub finish_parsing {
4929 79     79 0 274 my ($self, $user) = @_;
4930 79         535 $self->{parser}->finish_parsing($user);
4931             }
4932              
4933             ###########################################################################
4934              
4935             sub found_any_rules {
4936 80     80 0 278 my ($self) = @_;
4937 80 100       349 if (!defined $self->{found_any_rules}) {
4938 79         221 $self->{found_any_rules} = (scalar keys %{$self->{tests}} > 0);
  79         452  
4939             }
4940 80         255 return $self->{found_any_rules};
4941             }
4942              
4943             ###########################################################################
4944              
4945             sub get_description_for_rule {
4946 162     162 0 384 my ($self, $rule) = @_;
4947             # as silly as it looks, localized $1 here prevents an outer $1 from getting
4948             # tainted by the expression or assignment in the next line, bug 6148
4949 162         489 local($1);
4950 162         1137 my $rule_descr = $self->{descriptions}->{$rule};
4951 162         620 return $rule_descr;
4952             }
4953              
4954             ###########################################################################
4955              
4956             sub maybe_header_only {
4957 13     13 0 56 my($self,$rulename) = @_;
4958 13         44 my $type = $self->{test_types}->{$rulename};
4959              
4960 13 50       56 if ($rulename =~ /AUTOLEARNTEST/i) {
4961 0         0 dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}.");
4962             }
4963            
4964 13 50       72 return 0 if (!defined ($type));
4965              
4966 13 100 100     113 if (($type == $TYPE_HEAD_TESTS) || ($type == $TYPE_HEAD_EVALS)) {
    50          
4967 9         36 return 1;
4968              
4969             } elsif ($type == $TYPE_META_TESTS) {
4970 0         0 my $tflags = $self->{tflags}->{$rulename};
4971 0   0     0 $tflags ||= '';
4972 0 0       0 if ($tflags =~ m/\bnet\b/i) {
4973 0         0 return 0;
4974             } else {
4975 0         0 return 1;
4976             }
4977             }
4978              
4979 4         43 return 0;
4980             }
4981              
4982             sub maybe_body_only {
4983 4     4 0 15 my($self,$rulename) = @_;
4984 4         12 my $type = $self->{test_types}->{$rulename};
4985              
4986 4 50       16 if ($rulename =~ /AUTOLEARNTEST/i) {
4987 0         0 dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}.");
4988             }
4989              
4990 4 50       28 return 0 if (!defined ($type));
4991              
4992 4 50 33     68 if (($type == $TYPE_BODY_TESTS) || ($type == $TYPE_BODY_EVALS)
    0 33        
      33        
4993             || ($type == $TYPE_URI_TESTS) || ($type == $TYPE_URI_EVALS))
4994             {
4995             # some rawbody go off of headers...
4996 4         26 return 1;
4997              
4998             } elsif ($type == $TYPE_META_TESTS) {
4999 0   0     0 my $tflags = $self->{tflags}->{$rulename}; $tflags ||= '';
  0         0  
5000 0 0       0 if ($tflags =~ m/\bnet\b/i) {
5001 0         0 return 0;
5002             } else {
5003 0         0 return 1;
5004             }
5005             }
5006              
5007 0         0 return 0;
5008             }
5009              
5010             ###########################################################################
5011              
5012             sub load_plugin {
5013 4680     4680 0 7996 my ($self, $package, $path, $silent) = @_;
5014 4680 50       7561 if ($path) {
5015 0         0 $path = $self->{parser}->fix_path_relative_to_current_file($path);
5016             }
5017             # it wouldn't hurt to do some checking on validity of $package
5018             # and $path before untainting them
5019 4680         9411 $self->{main}->{plugins}->load_plugin(untaint_var($package), $path, $silent);
5020             }
5021              
5022             sub load_plugin_succeeded {
5023 1793     1793 0 3655 my ($self, $plugin, $package, $path) = @_;
5024 1793         13876 $self->{plugins_loaded}->{$package} = 1;
5025             }
5026              
5027             sub register_eval_rule {
5028 9449     9449 0 12530 my ($self, $pluginobj, $nameofsub) = @_;
5029 9449         28714 $self->{eval_plugins}->{$nameofsub} = $pluginobj;
5030             }
5031              
5032             ###########################################################################
5033              
5034             sub clone {
5035 3     3 0 4 my ($self, $source, $dest) = @_;
5036              
5037 3 100       7 unless (defined $source) {
5038 1         2 $source = $self;
5039             }
5040 3 100       9 unless (defined $dest) {
5041 2         3 $dest = $self;
5042             }
5043              
5044 3         5 my %done;
5045              
5046             # keys that should not be copied in ->clone().
5047             # bug 4179: include want_rebuild_for_type, so that if a user rule
5048             # is defined, its method will be recompiled for future scans in
5049             # order to *remove* the generated method calls
5050 3         18 my @NON_COPIED_KEYS = qw(
5051             main eval_plugins plugins_loaded registered_commands sed_path_cache parser
5052             scoreset scores want_rebuild_for_type
5053             );
5054              
5055             # special cases. first, skip anything that cannot be changed
5056             # by users, and the stuff we take care of here
5057 3         12 foreach my $var (@NON_COPIED_KEYS) {
5058 27         44 $done{$var} = undef;
5059             }
5060              
5061             # keys that should can be copied using a ->clone() method, in ->clone()
5062 3         12 my @CLONABLE_KEYS = qw(
5063             internal_networks trusted_networks msa_networks
5064             );
5065              
5066 3         6 foreach my $key (@CLONABLE_KEYS) {
5067 9         26 $dest->{$key} = $source->{$key}->clone();
5068 9         18 $done{$key} = undef;
5069             }
5070              
5071             # two-level hashes
5072 3         4 foreach my $key (qw(uri_host_lists askdns)) {
5073 6         21 my $v = $source->{$key};
5074 6         14 my $dest_key_ref = $dest->{$key} = {}; # must start from scratch!
5075 6         9 while(my($k2,$v2) = each %{$v}) {
  6         17  
5076 0         0 %{$dest_key_ref->{$k2}} = %{$v2};
  0         0  
  0         0  
5077             }
5078 6         15 $done{$key} = undef;
5079             }
5080              
5081             # bug 4179: be smarter about cloning the rule-type structures;
5082             # some are like this: $self->{type}->{priority}->{name} = 'value';
5083             # which is an extra level that the below code won't deal with
5084 3         11 foreach my $t (@rule_types) {
5085 36         41 foreach my $k (keys %{$source->{$t}}) {
  36         81  
5086 12         19 my $v = $source->{$t}->{$k};
5087 12         15 my $i = ref $v;
5088 12 50       25 if ($i eq 'HASH') {
    0          
5089 12         13 %{$dest->{$t}->{$k}} = %{$v};
  12         76  
  12         32  
5090             }
5091             elsif ($i eq 'ARRAY') {
5092 0         0 @{$dest->{$t}->{$k}} = @{$v};
  0         0  
  0         0  
5093             }
5094             else {
5095 0         0 $dest->{$t}->{$k} = $v;
5096             }
5097             }
5098 36         67 $done{$t} = undef;
5099             }
5100              
5101             # and now, copy over all the rest -- the less complex cases.
5102 3         5 while(my($k,$v) = each %{$source}) {
  622         1365  
5103 619 100       951 next if exists $done{$k}; # we handled it above
5104 558         742 $done{$k} = undef;
5105 558         683 my $i = ref($v);
5106              
5107             # Not a reference, or a scalar? Just copy the value over.
5108 558 100       842 if ($i eq '') {
    50          
    100          
    50          
5109 438         702 $dest->{$k} = $v;
5110             }
5111             elsif ($i eq 'SCALAR') {
5112 0         0 $dest->{$k} = $$v;
5113             }
5114             elsif ($i eq 'ARRAY') {
5115 21         24 @{$dest->{$k}} = @{$v};
  21         56  
  21         30  
5116             }
5117             elsif ($i eq 'HASH') {
5118 99         96 %{$dest->{$k}} = %{$v};
  99         3097  
  99         1658  
5119             }
5120             else {
5121             # throw a warning for debugging -- should never happen in normal usage
5122 0         0 warn "config: dup unknown type $k, $i\n";
5123             }
5124             }
5125              
5126 3         7 foreach my $cmd (@{$self->{registered_commands}}) {
  3         13  
5127 588         940 my $k = $cmd->{setting};
5128 588 100       972 next if exists $done{$k}; # we handled it above
5129 63         91 $done{$k} = undef;
5130 63         110 $dest->{$k} = $source->{$k};
5131             }
5132              
5133             # scoresets
5134 3         30 delete $dest->{scoreset};
5135 3         10 for my $i (0 .. 3) {
5136 12         18 %{$dest->{scoreset}->[$i]} = %{$source->{scoreset}->[$i]};
  12         199  
  12         100  
5137             }
5138              
5139             # deal with $conf->{scores}, it needs to be a reference into the scoreset
5140             # hash array dealy. Do it at the end since scoreset_current isn't set
5141             # otherwise.
5142 3         16 $dest->{scores} = $dest->{scoreset}->[$dest->{scoreset_current}];
5143              
5144             # ensure we don't copy the path cache from the master
5145 3         8 delete $dest->{sed_path_cache};
5146              
5147 3         67 return 1;
5148             }
5149              
5150             ###########################################################################
5151              
5152             sub free_uncompiled_rule_source {
5153 2     2 0 8 my ($self) = @_;
5154              
5155 2 50 33     25 if (!$self->{main}->{keep_config_parsing_metadata} &&
5156             !$self->{allow_user_rules})
5157             {
5158 2         9 delete $self->{if_stack};
5159             #delete $self->{source_file};
5160             #delete $self->{meta_dependencies};
5161             }
5162             }
5163              
5164             sub new_netset {
5165 339     339 0 1098 my ($self, $netset_name, $add_loopback) = @_;
5166 339         2581 my $set = Mail::SpamAssassin::NetSet->new($netset_name);
5167 339 100       879 if ($add_loopback) {
5168 226         1187 $set->add_cidr('127.0.0.0/8');
5169 226         672 $set->add_cidr('::1');
5170             }
5171 339         1328 return $set;
5172             }
5173              
5174             ###########################################################################
5175              
5176             sub finish {
5177 40     40 0 142 my ($self) = @_;
5178 40         82 untie %{$self->{descriptions}};
  40         422  
5179 40         112 %{$self} = ();
  40         35338  
5180             }
5181              
5182             ###########################################################################
5183              
5184 0     0 0 0 sub sa_die { Mail::SpamAssassin::sa_die(@_); }
5185              
5186             ###########################################################################
5187              
5188             # subroutines available to conditionalize rules, for example:
5189             # if (can(Mail::SpamAssassin::Conf::feature_originating_ip_headers))
5190              
5191 0     0 0 0 sub feature_originating_ip_headers { 1 }
5192 0     0 0 0 sub feature_dns_local_ports_permit_avoid { 1 }
5193 0     0 0 0 sub feature_bayes_auto_learn_on_error { 1 }
5194 0     0 0 0 sub feature_uri_host_listed { 1 }
5195 0     0 0 0 sub feature_yesno_takes_args { 1 }
5196 0     0 0 0 sub feature_bug6558_free { 1 }
5197 0     0 0 0 sub feature_edns { 1 } # supports 'dns_options edns' config option
5198 0     0 0 0 sub feature_dns_query_restriction { 1 } # supported config option
5199 126     126 0 1801 sub feature_registryboundaries { 1 } # replaces deprecated registrarboundaries
5200 0     0 0   sub perl_min_version_5010000 { return $] >= 5.010000 } # perl version check ("perl_version" not neatly backwards-compatible)
5201              
5202             ###########################################################################
5203              
5204             1;
5205             __END__
5206              
5207             =head1 LOCALI[SZ]ATION
5208              
5209             A line starting with the text C<lang xx> will only be interpreted
5210             if the user is in that locale, allowing test descriptions and
5211             templates to be set for that language.
5212              
5213             The locales string should specify either both the language and country, e.g.
5214             C<lang pt_BR>, or just the language, e.g. C<lang de>.
5215              
5216             =head1 SEE ALSO
5217              
5218             C<Mail::SpamAssassin>
5219             C<spamassassin>
5220             C<spamd>
5221              
5222             =cut