File Coverage

blib/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
Criterion Covered Total %
statement 552 766 72.0
branch 298 416 71.6
condition 59 90 65.5
subroutine 11 11 100.0
pod 0 4 0.0
total 920 1287 71.4


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             # ---------------------------------------------------------------------------
19              
20             # So, what's the difference between a trusted and untrusted Received header?
21             # Basically, relays we *know* are trustworthy are 'trusted', all others after
22             # the last one of those are 'untrusted'.
23             #
24             # We determine trust by detecting if they are inside the network ranges
25             # specified in 'trusted_networks'. There is also an inference algorithm
26             # which determines other trusted relays without user configuration.
27             #
28             # There's another type of Received header: the semi-trusted one. This is the
29             # header added by *our* MX, at the boundary of trust; we can trust the IP
30             # address (and possibly rDNS) in this header, but that's about it; HELO name is
31             # untrustworthy. We just use this internally for now.
32             #
33             # Finally, there's also 'internal_networks'. These are the networks that you
34             # control; your MXes should be included. This way, if you specify a wide range
35             # of trusted hosts, a mail that is relayed from a dynamic IP address via a
36             # 'trusted' host will not hit RCVD_IN_DYNABLOCK.
37              
38             # ---------------------------------------------------------------------------
39              
40 41     41   261 use strict; # make Test::Perl::Critic happy
  41         80  
  41         1865  
41              
42             use strict;
43             use warnings;
44 41     41   245 # use bytes;
  41         88  
  41         1003  
45 41     41   186 use re 'taint';
  41         81  
  41         1445  
46              
47 41     41   230 use Mail::SpamAssassin::Dns;
  41         74  
  41         1367  
48             use Mail::SpamAssassin::PerMsgStatus;
49 41     41   10772 use Mail::SpamAssassin::Constants qw(:ip);
  41         283  
  41         3568  
50 41     41   402  
  41         175  
  41         1466  
51 41     41   323 # ---------------------------------------------------------------------------
  41         122  
  41         270298  
52              
53             my ($self, $pms, $msg) = @_;
54              
55             # a caller may assert that a message is coming from inside or from an
56 102     102 0 359 # authenticated roaming users; this info may not be available in mail
57             # header section, e.g. in case of nonstandard authentication mechanisms
58             my $originating; # boolean
59             if (exists $msg->{suppl_attrib}->{originating}) {
60             $originating = $msg->{suppl_attrib}->{originating} || 0;
61 102         186 dbg("metadata: set originating from suppl_attrib: %s", $originating);
62 102 50       353 }
63 0   0     0  
64 0         0 $self->{relays_trusted} = [ ];
65             $self->{num_relays_trusted} = 0;
66             $self->{relays_trusted_str} = '';
67 102         379  
68 102         366 $self->{relays_untrusted} = [ ];
69 102         384 $self->{num_relays_untrusted} = 0;
70             $self->{relays_untrusted_str} = '';
71 102         377  
72 102         266 $self->{relays_internal} = [ ];
73 102         486 $self->{num_relays_internal} = 0;
74             $self->{relays_internal_str} = '';
75 102         315  
76 102         281 $self->{relays_external} = [ ];
77 102         315 $self->{num_relays_external} = 0;
78             $self->{relays_external_str} = '';
79 102         279  
80 102         290 $self->{num_relays_unparseable} = 0;
81 102         407  
82             $self->{last_trusted_relay_index} = -1; # last counting from the top,
83 102         250 $self->{last_internal_relay_index} = -1; # first in time
84              
85 102         436 $self->{allow_mailfetch_markers} = 1; # This needs to be set for the
86 102         291 # first Received: header
87             # now figure out what relays are trusted...
88 102         230 my $trusted = $pms->{main}->{conf}->{trusted_networks};
89             my $internal = $pms->{main}->{conf}->{internal_networks};
90             my $msa = $pms->{main}->{conf}->{msa_networks};
91 102         245 my $did_user_specify_trust = $pms->{main}->{conf}->{trusted_networks_configured};
92 102         193 my $did_user_specify_internal = $pms->{main}->{conf}->{internal_networks_configured};
93 102         228 my $in_trusted = 1;
94 102         215 my $in_internal = 1;
95 102         211 my $found_msa = 0;
96 102         214  
97 102         243 unless ($did_user_specify_trust && $did_user_specify_internal) {
98 102         202 if (!$did_user_specify_trust && !$did_user_specify_internal) {
99             dbg('config: trusted_networks are not configured; it is recommended '.
100 102 100 100     436 'that you configure trusted_networks manually');
101 82 100 100     638 } elsif (!$did_user_specify_internal) {
    100          
102 44         220 # use 'trusted' for 'internal'; compatibility with SpamAssassin 2.60
103             $internal = $trusted;
104             dbg('config: internal_networks not configured, using trusted_networks '.
105             'configuration for internal_networks; if you really want '.
106 36         69 'internal_networks to only contain the required 127/8 add '.
107 36         153 "'internal_networks !0/0' to your configuration");
108             } else {
109             # use 'internal' for 'trusted'; I don't know why we let people define
110             # internal without trusted, but we do... and we rely on trusted being set
111             $trusted = $internal;
112             dbg('config: trusted_networks not configured, using internal_networks '.
113             'configuration for trusted_networks');
114 2         5 }
115 2         7 }
116              
117             my $IP_ADDRESS = IP_ADDRESS;
118             my $IP_PRIVATE = IP_PRIVATE;
119             my $LOCALHOST = LOCALHOST;
120 102         325  
121 102         272 my @hdrs = $msg->get_header('Received');
122 102         239  
123             # Now add the single line headers like X-Originating-IP. (bug 5680)
124 102         596 # we convert them into synthetic "Received" headers so we can share
125             # code below.
126             foreach my $header (@{$pms->{main}->{conf}->{originating_ip_headers}}) {
127             my $str = $msg->get_header($header);
128             next unless ($str && $str =~ m/($IP_ADDRESS)/);
129 102         210 push @hdrs, "from X-Originating-IP: $1\n";
  102         508  
130 348         660 }
131 348 100 66     1447  
132 4         39 foreach my $line ( @hdrs ) {
133              
134             # qmail-scanner support hack: we may have had one of these set from the
135 102         329 # previous (read: more recent) Received header. if so, add it on to this
136             # header's set, since that's the handover it was describing.
137              
138             my $qms_env_from;
139             if ($self->{qmail_scanner_env_from}) {
140             $qms_env_from = $self->{qmail_scanner_env_from};
141 71         143 delete $self->{qmail_scanner_env_from};
142 71 50       232 }
143 0         0  
144 0         0 $line =~ s/\n[ \t]+/ /gs;
145              
146             my $relay = $self->parse_received_line ($line);
147 71         184 if (!defined $relay) {
148             dbg("received-header: unparseable: $line");
149 71         236 $self->{num_relays_unparseable}++;
150 71 50       220 }
151 0         0  
152 0         0 # undefined or 0 means there's no result, so goto the next header
153             unless ($relay) {
154             $self->{last_trusted_relay_index}++ if $in_trusted;
155             $self->{last_internal_relay_index}++ if $in_internal;
156 71 100       207 next;
157 5 100       22 }
158 5 100       14  
159 5         13 # hack for qmail-scanner, as described above; add in the saved
160             # metadata
161             if ($qms_env_from) {
162             $relay->{envfrom} = $qms_env_from;
163             $self->make_relay_as_string($relay);
164 66 50       202 }
165 0         0  
166 0         0 # relay status only changes when we're still in the trusted portion of the
167             # relays and we haven't yet found an MSA
168             if ($in_trusted && !$found_msa) {
169             unless ($did_user_specify_trust || $did_user_specify_internal) {
170             # OK, infer the trusted/untrusted handover, we don't have real info
171 66 100 100     336 my $inferred_as_trusted = 0;
172 46 100 100     280  
173             # if the 'from' IP addr is in a reserved net range, it's not on
174 9         37 # the public internet.
175             if ($relay->{ip_private}) {
176             dbg("received-header: 'from' ".$relay->{ip}." has private IP");
177             $inferred_as_trusted = 1;
178 9 100       42 }
179 4         51  
180 4         15 # if we find authentication tokens in the received header we can extend
181             # the trust boundary to that host
182             if ($relay->{auth}) {
183             dbg("received-header: authentication method ".$relay->{auth});
184             $inferred_as_trusted = 1;
185 9 50       46 }
186 0         0  
187 0         0 # if the user didn't specify any trusted/internal config, everything
188             # we assume as trusted is also internal, just like we'd do if they
189             # specified trusted but not any internal networks or vice versa
190             if (!$inferred_as_trusted) {
191             dbg("received-header: do not trust any hosts from here on");
192             $in_trusted = 0;
193 9 100       70 $in_internal = 0;
194 5         32 }
195 5         11  
196 5         13 } else {
197             # trusted_networks matches?
198             if (!$relay->{auth} && !$trusted->contains_ip($relay->{ip})) {
199             if (!$originating) {
200             $in_trusted = 0; # break the trust chain
201 37 100 66     260 } else { # caller asserts a msg was submitted from inside or auth'd
202 7 50       27 $found_msa = 1; # let's assume the previous hop was actually
203 7         18 # an MSA, and propagate trust from here on
204             dbg('received-header: originating, '.
205 0         0 '%s and remaining relays will be considered trusted%s',
206             $relay->{ip}, !$in_internal ? '' : ', but no longer internal');
207             }
208             $in_internal = 0; # if it's not trusted it's not internal
209 0 0       0 } else {
210             # internal_networks matches?
211 7         16 if ($in_internal && !$relay->{auth} && !$internal->contains_ip($relay->{ip})) {
212             $in_internal = 0;
213             }
214 30 100 33     237 # msa_networks matches?
      66        
215 6         15 if ($msa->contains_ip($relay->{ip})) {
216             dbg('received-header: found MSA relay, remaining relays will be'.
217             ' considered trusted: '.($in_trusted ? 'yes' : 'no').
218 30 100       97 ' internal: '.($in_internal ? 'yes' : 'no'));
219 3 50       26 $found_msa = 1;
    100          
220             $relay->{msa} = 1;
221             }
222 3         6 }
223 3         10 }
224             }
225              
226             dbg("received-header: relay ".$relay->{ip}.
227             " trusted? ".($in_trusted ? "yes" : "no").
228             " internal? ".($in_internal ? "yes" : "no").
229             " msa? ".($relay->{msa} ? "yes" : "no"));
230              
231             $relay->{internal} = $in_internal;
232 66 100       636 $relay->{msa} ||= 0;
    100          
    100          
233              
234 66         181 # be sure to mark up the as_string version for users too
235 66   100     307 $relay->{as_string} =~ s/ intl=\d / intl=$relay->{internal} /;
236             $relay->{as_string} =~ s/ msa=\d / msa=$relay->{msa} /;
237              
238 66         566 if ($in_trusted) {
239 66         432 push (@{$self->{relays_trusted}}, $relay);
240             $self->{allow_mailfetch_markers} = 1;
241 66 100       177 $self->{last_trusted_relay_index}++;
242 37         61 } else {
  37         97  
243 37         74 push (@{$self->{relays_untrusted}}, $relay);
244 37         68 $self->{allow_mailfetch_markers} = 0;
245             }
246 29         45  
  29         71  
247 29         62 if ($in_internal) {
248             push (@{$self->{relays_internal}}, $relay);
249             $self->{last_internal_relay_index}++;
250 66 100       128 } else {
251 30         51 push (@{$self->{relays_external}}, $relay);
  30         77  
252 30         103 }
253             }
254 36         54  
  36         125  
255             $self->{relays_trusted_str} = join(' ', map { $_->{as_string} }
256             @{$self->{relays_trusted}});
257             $self->{relays_untrusted_str} = join(' ', map { $_->{as_string} }
258 36         177 @{$self->{relays_untrusted}});
259 102         197 $self->{relays_internal_str} = join(' ', map { $_->{as_string} }
  102         435  
260 29         99 @{$self->{relays_internal}});
261 102         252 $self->{relays_external_str} = join(' ', map { $_->{as_string} }
  102         379  
262 29         113 @{$self->{relays_external}});
263 102         262  
  102         278  
264 36         119 # OK, we've now split the relay list into trusted and untrusted.
265 102         238  
  102         312  
266             # add the stringified representation to the message object, so Bayes
267             # and rules can use it. Note that rule_tests.t does not impl put_metadata,
268             # so protect against that here. These will not appear in the final
269             # message; they're just used internally.
270              
271             if ($self->{msg}->can ("delete_header")) {
272             $self->{msg}->delete_header ("X-Spam-Relays-Trusted");
273             $self->{msg}->delete_header ("X-Spam-Relays-Untrusted");
274 102 50       713 $self->{msg}->delete_header ("X-Spam-Relays-Internal");
275 102         475 $self->{msg}->delete_header ("X-Spam-Relays-External");
276 102         453  
277 102         458 if ($self->{msg}->can ("put_metadata")) {
278 102         387 $self->{msg}->put_metadata ("X-Spam-Relays-Trusted",
279             $self->{relays_trusted_str});
280 102 50       622 $self->{msg}->put_metadata ("X-Spam-Relays-Untrusted",
281             $self->{relays_untrusted_str});
282 102         620 $self->{msg}->put_metadata ("X-Spam-Relays-Internal",
283             $self->{relays_internal_str});
284 102         399 $self->{msg}->put_metadata ("X-Spam-Relays-External",
285             $self->{relays_external_str});
286 102         431 }
287             }
288 102         373  
289             # be helpful; save some cumbersome typing
290             $self->{num_relays_trusted} = scalar (@{$self->{relays_trusted}});
291             $self->{num_relays_untrusted} = scalar (@{$self->{relays_untrusted}});
292             $self->{num_relays_internal} = scalar (@{$self->{relays_internal}});
293 102         235 $self->{num_relays_external} = scalar (@{$self->{relays_external}});
  102         272  
294 102         164  
  102         228  
295 102         165 dbg("metadata: X-Spam-Relays-Trusted: ".$self->{relays_trusted_str});
  102         200  
296 102         167 dbg("metadata: X-Spam-Relays-Untrusted: ".$self->{relays_untrusted_str});
  102         236  
297             dbg("metadata: X-Spam-Relays-Internal: ".$self->{relays_internal_str});
298 102         520 dbg("metadata: X-Spam-Relays-External: ".$self->{relays_external_str});
299 102         523 }
300 102         450  
301 102         356 # ---------------------------------------------------------------------------
302              
303             # returns undef if the header just couldn't be parsed
304             # returns 0 if the header was specifically skipped
305             # returns a hash of information if the header is parsed, including:
306             # ip => $ip,
307             # by => $by,
308             # helo => $helo,
309             # id => $id,
310             # ident => $ident,
311             # envfrom => $envfrom,
312             # lc_by => (lc $by),
313             # lc_helo => (lc $helo),
314             # auth => $auth
315             #
316             my ($self) = shift;
317             local ($_) = shift;
318             local ($1,$2,$3,$4,$5,$6);
319              
320 216     216 0 45115 s/\s+/ /g;
321 216         477 s/^ //;
322 216         1006 s/ $//;
323              
324 216         2322 # get rid of invalid semicolon at the end of the header
325 216         432 1 while s/\s?;$//;
326 216         471  
327             my $ip = '';
328             my $helo = '';
329 216         584 my $rdns = '';
330             my $by = '';
331 216         356 my $id = '';
332 216         324 my $ident = '';
333 216         272 my $envfrom = undef;
334 216         247 my $mta_looked_up_dns = 0;
335 216         260 my $IP_ADDRESS = IP_ADDRESS;
336 216         269 my $IP_PRIVATE = IP_PRIVATE;
337 216         279 my $LOCALHOST = LOCALHOST;
338 216         267 my $auth = '';
339 216         293  
340 216         249 # ---------------------------------------------------------------------------
341 216         254  
342 216         307 # We care about lines starting with from. all of the others are ignorable:
343             # Bug 4943: give /^(from/ a chance to be parsed
344             #
345             # (qmail 27981 invoked by uid 225); 14 Mar 2003 07:24:34 -0000
346             # (qmail 84907 invoked from network); 13 Feb 2003 20:59:28 -0000
347             # (ofmipd 208.31.42.38); 17 Mar 2003 04:09:01 -0000
348             # by faerber.muc.de (OpenXP/32 v3.9.4 (Win32) alpha @ 2003-03-07-1751d); 07 Mar 2003 22:10:29 +0000
349             # by x.x.org (bulk_mailer v1.13); Wed, 26 Mar 2003 20:44:41 -0600
350             # by SPIDERMAN with Internet Mail Service (5.5.2653.19) id <19AF8VY2>; Tue, 25 Mar 2003 11:58:27 -0500
351             # by oak.ein.cz (Postfix, from userid 1002) id DABBD1BED3; Thu, 13 Feb 2003 14:02:21 +0100 (CET)
352             # OTM-MIX(otm-mix00) id k5N1aDtp040896; Fri, 23 Jun 2006 10:36:14 +0900 (JST)
353             # at Infodrom Oldenburg (/\##/\ Smail-3.2.0.102 1998-Aug-2 #2) from infodrom.org by finlandia.Infodrom.North.DE via smail from stdin id <m1FglM8-000okjC@finlandia.Infodrom.North.DE> for debian-security-announce@lists.debian.org; Thu, 18 May 2006 18:28:08 +0200 (CEST)
354             # with ECARTIS (v1.0.0; list bind-announce); Fri, 18 Aug 2006 07:19:58 +0000 (UTC)
355             # Received: Message by Barricade wilhelm.eyp.ee with ESMTP id h1I7hGU06122 for <spamassassin-talk@lists.sourceforge.net>; Tue, 18 Feb 2003 09:43:16 +0200
356             return 0 if (!/^\(?from /i);
357              
358             # from www-data by wwwmail.documenta.de (Exim 4.50) with local for <example@vandinter.org> id 1GFbZc-0006QV-L8; Tue, 22 Aug 2006 21:06:04 +0200
359             # from server.yourhostingaccount.com with local for example@vandinter.org id 1GDtdl-0002GU-QE (8710); Thu, 17 Aug 2006 21:59:17 -0400
360 216 100       817 return 0 if /\bwith local for\b/;
361              
362             # Received: from virtual-access.org by bolero.conactive.com ; Thu, 20 Feb 2003 23:32:58 +0100
363             # Received: FROM ca-ex-bridge1.nai.com BY scwsout1.nai.com ; Fri Feb 07 10:18:12 2003 -0800
364 214 50       562 # but not: Received: from [86.122.158.69] by mta2.iomartmail.com; Thu, 2 Aug 2007 21:50:04 -0200
365             if (/^from (\S+) by [^\s;]+ ?;/i && $1 !~ /^\[[\d.]+\]$/) { return 0; }
366              
367             # ---------------------------------------------------------------------------
368              
369 214 100 100     1022 # Let's get rid of the date at the end
  1         8  
370             # ; Tue, 23 May 2006 13:06:35 -0400
371             s/[\s;]+(?:(?:Mon|T(?:ue|hu)|Wed|Fri|S(?:at|un)), )?\d+ (?:J(?:an|u[nl])|Feb|Ma[ry]|A(?:pr|ug)|Sep|Oct|Nov|Dec) \d+ \d+:\d+(?::\d+)? \S+$//;
372              
373             # from av0001.technodiva.com (localhost [127.0.0.1])by localhost.technodiva.com (Postfix) with ESMTP id 846CF2117for <proftp-user@lists.sourceforge.net>; Mon, 7 Aug 2006 17:48:07 +0200 (MEST)
374             s/\)by /) by /;
375 213         2223  
376             # ---------------------------------------------------------------------------
377              
378 213         448 # OK -- given knowledge of most Received header formats,
379             # break them down. We have to do something like this, because
380             # some MTAs will swap position of rdns and helo -- so we can't
381             # simply use simplistic regexps.
382              
383             # try to catch unique message identifier
384             if (/ id <?([^\s<>;]{3,})/) {
385             $id = $1;
386             }
387              
388 213 100       693 if (/\bhelo=([-A-Za-z0-9\.\^+_&:=?!@%*\$\\\/]+)(?:[^-A-Za-z0-9\.\^+_&:=?!@%*\$\\\/]|$)/) {
389 112         248 $helo = $1;
390             }
391             elsif (/\b(?:HELO|EHLO) ([-A-Za-z0-9\.\^+_&:=?!@%*\$\\\/]+)(?:[^-A-Za-z0-9\.\^+_&:=?!@%*\$\\\/]|$)/) {
392 213 100       1035 $helo = $1;
    100          
393 9         21 }
394             if (/ by (\S+)(?:[^-A-Za-z0-9\;\.]|$)/) { $by = $1; }
395              
396 17         39 # ---------------------------------------------------------------------------
397              
398 213 100       718 # try to catch authenticated message identifier
  207         528  
399             #
400             # with ESMTPA, ESMTPSA, LMTPA, LMTPSA should cover RFC 3848 compliant MTAs,
401             # UTF8SMTPA and UTF8LMTPA are covered by RFC 4954 and RFC 6531,
402             # with ASMTP (Authenticated SMTP) is used by Earthlink, Exim 4.34, and others
403             # with HTTP should only be authenticated webmail sessions
404             # with HTTPU is used by Communigate Pro with Pronto! webmail interface
405             # with HTTPS is used by Horde adjusts the Received header to say "HTTPS" when
406             # a connection is made over HTTPS
407             # IANA registry: https://www.iana.org/assignments/mail-parameters/mail-parameters.xhtml
408             if (/ by / && / with ((?:ES|L|UTF8S|UTF8L)MTPS?A|ASMTP|HTTP[SU]?)(?: |;|$)/i) {
409             $auth = $1;
410             }
411             # GMail should use ESMTPSA to indicate that it is in fact authenticated,
412 213 100 100     6480 # but doesn't.
    50 33        
    100 100        
    100 66        
    100 66        
    100 100        
    100          
    50          
    50          
    100          
413 14         31 elsif (/ by mx\.google\.com with ESMTPS id [a-z0-9]{1,4}sm[0-9]{2,9}[a-z]{3}\.[0-9]{1,3}\.[0-9]{4}\.(?:[0-6][0-9]\.){4}[0-6][0-9]/ && /\(version=([^ ]+) cipher=([^\)]+)\)/ ) {
414             $auth = 'GMail - transport=' . $1 . ' cipher=' . $2;
415             }
416             # Courier v0.47 and possibly others
417             elsif (/^from .*?(?:\]\)|\)\]) \(AUTH: (LOGIN|PLAIN|DIGEST-MD5|CRAM-MD5) \S+(?:, .*?)?\) by /) {
418 0         0 $auth = $1;
419             }
420             # Sendmail, MDaemon, some webmail servers, and others
421             elsif (/authenticated/ && /^from .*?(?:\](?: \([^)]*\))?\)|\)\]) .*?\(.*?authenticated.*?\).*? by/) {
422 4         10 $auth = 'Sendmail';
423             }
424             # workaround for GMX, which authenticates users but does not indicate it properly - # SMTP version
425             elsif (/from \S* \((?:HELO|EHLO) (\S*)\) \[(${IP_ADDRESS})\] by (mail\.gmx\.(?:net|com)) \([^\)]+\) with ((?:ESMTP|SMTP))/) {
426 6         13 $auth = "GMX ($4 / $3)";
427             }
428             # Critical Path Messaging Server
429             elsif (/ \(authenticated as /&&/\) by .+ \(\d{1,2}\.\d\.\d{3}(?:\.\d{1,3})?\) \(authenticated as .+\) id /) {
430 1         6 $auth = 'CriticalPath';
431             }
432             # Postfix 2.3 and later with "smtpd_sasl_authenticated_header yes"
433             # Normally $1 is "Postfix", but could be changed with mail_name (Bug 5646)
434 3         6 elsif (/\) \(Authenticated sender: \S+\) by \S+ \(([^\)]+)\) with /) {
435             $auth = $1 eq 'Postfix' ? $1 : "Postfix ($1)";
436             }
437             # Communigate Pro - Bug 6495 adds HTTP as possible transmission method
438             # Bug 7277: XIMSS used by Pronto and other custom apps, IMAP supports XMIT extension
439 1 50       5 elsif (/CommuniGate Pro (HTTP|SMTP|XIMSS|IMAP)/ && / \(account /) {
440             $auth = 'Communigate';
441             }
442             # Microsoft Exchange (complete with syntax error)
443             elsif (/ with Microsoft Exchange Server HTTP-DAV\b/) {
444 0         0 $auth = 'HTTP-DAV';
445             }
446             # froufrou mailers like United Internet use a '(via HTTP)' comment, Bug 7101
447             elsif (/ by / && / \(via (HTTP.?)\)(?: |;|$)/i) {
448 0         0 $auth = $1;
449             }
450              
451             # ---------------------------------------------------------------------------
452 1         3  
453             if (s/^from //) {
454             # try to catch enveloper senders
455             if (/(?:return-path:? |envelope-(?:sender|from)[ =])(\S+)\b/i) {
456             $envfrom = $1;
457 213 100       977 }
    100          
    50          
458              
459 206 100       1520 # from 142.169.110.122 (SquirrelMail authenticated user synapse) by
460 6         15 # mail.nomis80.org with HTTP; Sat, 3 Apr 2004 10:33:43 -0500 (EST)
461             # Expanded to NaSMail Bug 6783
462             if (/ \((?:SquirrelMail|NaSMail) authenticated user /) {
463             #REVERTING bug 3236 and implementing re: bug 6549
464             if (/(${IP_ADDRESS})\b(?![.-]).{10,80}by (\S+) with HTTP/) {
465             $ip = $1; $by = $2; goto enough;
466 206 100       477 }
467             }
468 3 50       299  
469 3         8 # AOL WebMail headers
  3         8  
  3         170  
470             if (/aol\.com/ && /with HTTP \(WebMailUI\)/) {
471             # Received: from 82.135.198.129 by FWM-M18.sysops.aol.com (64.12.168.82) with HTTP (WebMailUI); Tue, 19 Jun 2007 11:16:54 -0400
472             if(/(${IP_ADDRESS}) by (\S+) \(${IP_ADDRESS}\) with HTTP \(WebMailUI\)/) {
473             $ip = $1; $by = $2; goto enough;
474 203 50 66     606 }
475             }
476 0 0       0  
477 0         0 # catch MS-ish headers here
  0         0  
  0         0  
478             if (/ SMTPSVC/) {
479             # MS servers using this fmt do not lookup the rDNS.
480             # Received: from inet-vrs-05.redmond.corp.microsoft.com ([157.54.6.157])
481             # by INET-IMC-05.redmond.corp.microsoft.com with Microsoft
482 203 100       2168 # SMTPSVC(5.0.2195.6624); Thu, 6 Mar 2003 12:02:35 -0800
    50          
    100          
    100          
    100          
    100          
    100          
    50          
483             # Received: from 0 ([61.31.135.91]) by bass.bass.com.eg with Microsoft
484             # SMTPSVC(5.0.2195.6713); Tue, 21 Sep 2004 08:59:06 +0300
485             # Received: from 0 ([61.31.138.57] RDNS failed) by nccdi.com with
486             # Microsoft SMTPSVC(6.0.3790.0); Thu, 23 Sep 2004 08:51:06 -0700
487             # Received: from tthompson ([217.35.105.172] unverified) by
488             # mail.neosinteractive.com with Microsoft SMTPSVC(5.0.2195.5329);
489             # Tue, 11 Mar 2003 13:23:01 +0000
490             # Received: from ([172.16.1.78]) by email2.codeworksonline.com with Microsoft SMTPSVC(5.0.2195.6713); Wed, 6 Sep 2006 21:14:29 -0400
491             if (/^(\S*) \(\[(${IP_ADDRESS})\][^\)]{0,40}\) by (\S+) with Microsoft SMTPSVC/) {
492             $helo = $1; $ip = $2; $by = $3; goto enough;
493             }
494              
495 7 100       514 # Received: from mail pickup service by mail1.insuranceiq.com with
496 5         15 # Microsoft SMTPSVC; Thu, 13 Feb 2003 19:05:39 -0500
  5         17  
  5         10  
  5         310  
497             if (/^mail pickup service by (\S+) with Microsoft SMTPSVC$/) {
498             return 0;
499             }
500             }
501 2 100       14  
502 1         7 # Microsoft SMTP Server
503             elsif (/ with (?:Microsoft SMTP Server|mapi id) (?:\([^\)]+\) )?\d+\.\d+\.\d+\.\d+(?:$| )/) {
504             # Received: from EXC-DAG-02.global.net (10.45.252.152) by EXC-DAG-02.global.net
505             # (10.45.252.152) with Microsoft SMTP Server (version=TLS1_2,
506             # cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.1261.35;
507             # Mon, 29 Oct 2018 11:17:19 +0100
508             # Received: from AM5PR0402MB2836.eurprd04.prod.outlook.com
509             # ([fe80::19bd:c588:dd17:5226]) by AM5PR0402MB2836.eurprd04.prod.outlook.com
510             # ([fe80::19bd:c588:dd17:5226%6]) with mapi id 15.20.1943.023;
511             # Wed, 5 Jun 2019 10:17:08 +0000
512             if (/^(\S+) \(\[?(${IP_ADDRESS})(?:%[A-Z0-9._~-]*)?\]?\) by (\S+) /) {
513             $helo = $1; $ip = $2; $by = $3; $id = ''; goto enough;
514             }
515             }
516 0 0       0  
517 0         0 elsif (/\[XMail /) { # bug 3791, bug 4053
  0         0  
  0         0  
  0         0  
  0         0  
518             # Received: from list.brainbuzz.com (63.146.189.86:23198) by mx1.yourtech.net with [XMail 1.20 ESMTP Server] id <S72E> for <jason@ellingson.org.spamassassin.org> from <bounce-cscommunity-11965901@list.cramsession.com.spamassassin.org>; Sat, 18 Sep 2004 23:17:54 -0500
519             # Received: from list.brainbuzz.com (63.146.189.86:23198) by mx1.yourtech.net (209.32.147.34:25) with [XMail 1.20 ESMTP Server] id <S72E> for <jason@ellingson.org.spamassassin.org> from <bounce-cscommunity-11965901@list.cramsession.com.spamassassin.org>; Sat, 18 Sep 2004 23:17:54 -0500
520             if (/^(\S+) \((\[?${IP_ADDRESS}\]?)(?::\d+)\) by (\S+)(?: \(\S+\))? with \[XMail/)
521             {
522             $helo = $1; $ip = $2; $by = $3;
523             / id <(\S+)>/ and $id = $1;
524 2 50       220 / from <(\S+)>/ and $envfrom = $1;
525             goto enough;
526 2         6 }
  2         4  
  2         3  
527 2 50       9 }
528 2 50       9  
529 2         116 # from ([10.225.209.19:33672]) by ecelerity-va-1 (ecelerity HEAD) with SMTP id EE/20-30863-33CE1054; Fri, 08 Sep 2006 18:18:27 -0400
530             # from ([127.0.0.1:32923]) by bm1-21.ed10.com (ecelerity 2.1.1ea r(11031M)) with ECSTREAM id 8B/57-16227-3764EB44 for <example@vandinter.org>; Wed, 19 Jul 2006 10:49:23 -0400
531             # from ([192.168.1.151:49601] helo=dev1.democracyinaction.org) by m12.prod.democracyinaction.com (ecelerity 2.1.1.3 r(11743)) with ESMTP id 52/92-02454-89FBA054 for <example@vandinter.org>; Fri, 15 Sep 2006 10:58:32 -0400
532             elsif (/\(ecelerity\b/) {
533             if (/^\(\[(${IP_ADDRESS}):\d+\] helo=(\S+)\) by (\S+) /) {
534             $ip = $1; $helo = $2; $by = $3;
535             goto enough;
536             }
537 3 100       243  
538 2         8 if (/^\S+ \(\[(${IP_ADDRESS}):\d+\]\) by (\S+) /) {
  2         4  
  2         4  
539 2         198 $ip = $1; $by = $2;
540             goto enough;
541             }
542 1 50       198 }
543 1         4  
  1         4  
544 1         59 elsif (/Exim/) {
545             # one of the HUGE number of Exim formats :(
546             # This must be scriptable. (update: it is. cf bug 3950, 3582)
547             # mss 2004-09-27: See <http://www.exim.org/exim-html-4.40/doc/html/spec_14.html#IX1315>
548              
549             # from root (helo=candygram.thunk.org) by thunker.thunk.org with local-esmtps (tls_cipher TLS-1.0:RSA_AES_256_CBC_SHA:32) (Exim 4.50 #1 (Debian)) id 1FwHqR-0008Bw-OG; Fri, 30 Jun 2006 08:11:35 -0400
550             # from root (helo=localhost) by broadcast.iac.iafrica.com with local-bsmtp (Exim 4.30; FreeBSD) id 1GN22d-0000xp-2K for example@vandinter.org; Tue, 12 Sep 2006 08:46:43 +0200
551             # from smarter (helo=localhost) by mx1-out.lists.smarterliving.com with local-bsmtp (Exim 4.24) id 1GIRA2-0007IZ-4n for example@vandinter.org; Wed, 30 Aug 2006 10:35:22 -0400
552             # Received: from andrew by trinity.supernews.net with local (Exim 4.12) id 18xeL6-000Dn1-00; Tue, 25 Mar 2003 02:39:00 +0000
553             if (/\bwith local(?:-\S+)? /) { return 0; }
554              
555             # Received: from [61.174.163.26] (helo=host) by sc8-sf-list1.sourceforge.net with smtp (Exim 3.31-VA-mm2 #1 (Debian)) id 18t2z0-0001NX-00 for <razor-users@lists.sourceforge.net>; Wed, 12 Mar 2003 01:57:10 -0800
556             # Received: from [218.19.142.229] (helo=hotmail.com ident=yiuhyotp) by yzordderrex with smtp (Exim 3.35 #1 (Debian)) id 194BE5-0005Zh-00; Sat, 12 Apr 2003 03:58:53 +0100
557 11 50       24 if (/^\[(${IP_ADDRESS})\] \((.*?)\) by (\S+) /) {
  0         0  
558             $ip = $1; my $sub = $2; $by = $3;
559             $sub =~ s/helo=(\S+)// and $helo = $1;
560             $sub =~ s/ident=(\S*)// and $ident = $1;
561 11 100       283 goto enough;
562 1         5 }
  1         2  
  1         2  
563 1 50       4  
564 1 50       6 # Received: from sc8-sf-list1-b.sourceforge.net ([10.3.1.13] helo=sc8-sf-list1.sourceforge.net) by sc8-sf-list2.sourceforge.net with esmtp (Exim 3.31-VA-mm2 #1 (Debian)) id 18t301-0007Bh-00; Wed, 12 Mar 2003 01:58:13 -0800
565 1         91 # Received: from dsl092-072-213.bos1.dsl.speakeasy.net ([66.92.72.213] helo=blazing.arsecandle.org) by sc8-sf-list1.sourceforge.net with esmtp (Cipher TLSv1:DES-CBC3-SHA:168) (Exim 3.31-VA-mm2 #1 (Debian)) id 18lyuU-0007TI-00 for <SpamAssassin-talk@lists.sourceforge.net>; Thu, 20 Feb 2003 14:11:18 -0800
566             # Received: from eclectic.kluge.net ([66.92.69.221] ident=[W9VcNxE2vKxgWHD05PJbLzIHSxcmZQ/O]) by sc8-sf-list1.sourceforge.net with esmtp (Cipher TLSv1:DES-CBC3-SHA:168) (Exim 3.31-VA-mm2 #1 (Debian)) id 18m0hT-00031I-00 for <spamassassin-talk@lists.sourceforge.net>; Thu, 20 Feb 2003 16:06:00 -0800
567             # Received: from mail.ssccbelen.edu.pe ([216.244.149.154]) by yzordderrex
568             # with esmtp (Exim 3.35 #1 (Debian)) id 18tqiz-000702-00 for
569             # <jm@example.com>; Fri, 14 Mar 2003 15:03:57 +0000
570             # Received: from server040.webpack.hosteurope.de ([80.237.130.48]:52313)
571             # by vps832469583.serverpool.info with esmtps
572             # (TLS-1.0:DHE_RSA_3DES_EDE_CBC_SHA:24) (Exim 4.50) id 1GzVLs-0002Oz-7b...
573             if (/^(\S+) \(\[(${IP_ADDRESS})\](.*?)\) by (\S+) /) {
574             $rdns=$1; $ip = $2; my $sub = $3; $by = $4;
575             $helo=$rdns; # default, apparently: bug 5112
576             $sub =~ s/helo=(\S+)// and $helo = $1;
577 10 100       263 $sub =~ s/ident=(\S*)// and $ident = $1;
578 7         16 goto enough;
  7         13  
  7         14  
  7         12  
579 7         9 }
580 7 100       26  
581 7 50       16 # Received: from boggle.ihug.co.nz [203.109.252.209] by grunt6.ihug.co.nz
582 7         401 # with esmtp (Exim 3.35 #1 (Debian)) id 18SWRe-0006X6-00; Sun, 29 Dec
583             # 2002 18:57:06 +1300
584             if (/^(\S+) \[(${IP_ADDRESS})\](:\d+)? by (\S+) /) {
585             $rdns= $1; $ip = $2; $helo = $1; $by = $4; goto enough;
586             }
587              
588 3 50       192 # attempt to deal with other odd Exim formats; just match little bits
589 0         0 # of the header.
  0         0  
  0         0  
  0         0  
  0         0  
590             # Received: from helene8.i.pinwand.net (helene.cats.ms) [10.0.8.6.13219]
591             # (mail) by lisbeth.i.pinwand.net with esmtp (Exim 3.35 #1 (Debian)) id
592             # 1CO5y7-0001vC-00; Sun, 31 Oct 2004 04:01:23 +0100
593             if (/^(\S+) /) {
594             $rdns= $1; # assume this is the rDNS, not HELO. is this appropriate?
595             }
596             if (/ \((\S+)\) /) {
597 3 50       17 $helo = $1;
598 3         8 }
599             if (/ \[(${IP_ADDRESS})(?:\.\d+)?\] /) {
600 3 100       14 $ip = $1;
601 2         4 }
602             if (/by (\S+) /) {
603 3 100       184 $by = $1;
604 1         3 # now, if we have a "by" and an IP, that's enough for most uses;
605             # we have to make do with that.
606 3 50       17 if ($ip) { goto enough; }
607 3         4 }
608              
609             # else it's probably forged. fall through
610 3 100       9 }
  1         60  
611              
612             elsif (/ \(Postfix\) with/) {
613             # Received: from localhost (unknown [127.0.0.1])
614             # by cabbage.jmason.org (Postfix) with ESMTP id A96E18BD97
615             # for <jm@localhost>; Thu, 13 Mar 2003 15:23:15 -0500 (EST)
616             if ( /^(\S+) \((\S+) \[(${IP_ADDRESS})\]\) by (\S+) / ) {
617             $mta_looked_up_dns = 1;
618             $helo = $1; $rdns = $2; $ip = $3; $by = $4;
619             if ($rdns eq 'unknown') { $rdns = ''; }
620 20 100       1092 goto enough;
621 17         42 }
622 17         46  
  17         42  
  17         35  
  17         35  
623 17 100       57 # Received: from 207.8.214.3 (unknown[211.94.164.65])
  4         7  
624 17         1748 # by puzzle.pobox.com (Postfix) with SMTP id 9029AFB732;
625             # Sat, 8 Nov 2003 17:57:46 -0500 (EST)
626             # (Pobox.com version: reported in bug 2745)
627             if ( /^(\S+) \((\S+)\[(${IP_ADDRESS})\]\) by (\S+) / ) {
628             $mta_looked_up_dns = 1;
629             $helo = $1; $rdns = $2; $ip = $3; $by = $4;
630             if ($rdns eq 'unknown') { $rdns = ''; }
631 3 50       206 goto enough;
632 0         0 }
633 0         0 }
  0         0  
  0         0  
  0         0  
634 0 0       0  
  0         0  
635 0         0 elsif (/\(Scalix SMTP Relay/) {
636             # from DPLAPTOP ( 72.242.176.162) by mail.puryear-it.com (Scalix SMTP Relay 10.0.1.3) via ESMTP; Fri, 23 Jun 2006 16:39:47 -0500 (CDT)
637             if (/^(\S+) \( ?(${IP_ADDRESS})\) by (\S+)/) {
638             $helo = $1; $ip = $2; $by = $3; goto enough;
639             }
640             }
641 1 50       202  
642 1         3 elsif (/ \(Lotus Domino /) {
  1         2  
  1         2  
  1         64  
643             # it seems Domino never records the rDNS: bug 5926
644             if (/^(\S+) \(\[(${IP_ADDRESS})\]\) by (\S+) \(Lotus/) {
645             $mta_looked_up_dns = 0;
646             $helo = $1; $ip = $2; $by = $3; goto enough;
647             }
648 0 0       0 }
649 0         0  
650 0         0 # Received: from 217.137.58.28 ([217.137.58.28])
  0         0  
  0         0  
  0         0  
651             # by webmail.ukonline.net (IMP) with HTTP
652             # for <anarchyintheuk@localhost>; Sun, 11 Apr 2004 00:31:07 +0100
653             if (/\bwith HTTP\b/ && # more efficient split up this way
654             /^(${IP_ADDRESS}) \(\[${IP_ADDRESS}\]\) by (\S+)/)
655             {
656             # some smarty-pants decided to fake a numeric HELO for HTTP
657 165 50 66     820 # no rDNS for this format?
658             $ip = $1; $by = $2; goto enough;
659             }
660              
661             # MiB: 2003/11/29 Some qmail-ldap headers may be misinterpreted as sendmail-headers
662 0         0 # resulting in a messed-up interpretation. We have to skip sendmail tests
  0         0  
  0         0  
663             # if we find evidence that this is a qmail-ldap header.
664             #
665             unless (/ by \S+ \(qmail-\S+\) with /) {
666             #
667             # sendmail:
668             # Received: from mail1.insuranceiq.com (host66.insuranceiq.com [65.217.159.66] (may be forged)) by dogma.slashnull.org (8.11.6/8.11.6) with ESMTP id h2F0c2x31856 for <jm@jmason.org>; Sat, 15 Mar 2003 00:38:03 GMT
669 165 100       389 # Received: from BAY0-HMR08.adinternal.hotmail.com (bay0-hmr08.bay0.hotmail.com [65.54.241.207]) by dogma.slashnull.org (8.11.6/8.11.6) with ESMTP id h2DBpvs24047 for <webmaster@efi.ie>; Thu, 13 Mar 2003 11:51:57 GMT
670             # Received: from ran-out.mx.develooper.com (IDENT:qmailr@one.develooper.com [64.81.84.115]) by dogma.slashnull.org (8.11.6/8.11.6) with SMTP id h381Vvf19860 for <jm-cpan@jmason.org>; Tue, 8 Apr 2003 02:31:57 +0100
671             # from rev.net (natpool62.rev.net [63.148.93.62] (may be forged)) (authenticated) by mail.rev.net (8.11.4/8.11.4) with ESMTP id h0KKa7d32306 for <spamassassin-talk@lists.sourceforge.net>
672             #
673             if (/^(\S+) \((\S+) \[(${IP_ADDRESS})\].*\) by (\S+) \(/) {
674             $mta_looked_up_dns = 1;
675             $helo = $1; $rdns = $2; $ip = $3; $by = $4;
676             $rdns =~ s/^IDENT:([^\@]*)\@// and $ident = $1; # remove IDENT lookups
677 164 100       2526 $rdns =~ s/^([^\@]*)\@// and $ident = $1; # remove IDENT lookups
678 27         111 goto enough;
679 27         77 }
  27         61  
  27         54  
  27         50  
680 27 50       70 }
681 27 50       62  
682 27         1721 # ---------------------------------------------------------------------------
683              
684             ## OK, AT THIS POINT FORMATS GET A BIT NON-STANDARD
685              
686             # Received: from ns.elcanto.co.kr (66.161.246.58 [66.161.246.58]) by
687             # mail.ssccbelen.edu.pe with SMTP (Microsoft Exchange Internet Mail Service
688             # Version 5.5.1960.3) id G69TW478; Thu, 13 Mar 2003 14:01:10 -0500
689             if (/^(\S+) \((\S+) \[(${IP_ADDRESS})\]\) by (\S+) with \S+ \(/) {
690             $mta_looked_up_dns = 1;
691             $rdns = $2; $ip = $3; $helo = $1; $by = $4; goto enough;
692             }
693 138 100       1823  
694 2         10 # from mail2.detr.gsi.gov.uk ([51.64.35.18] helo=ahvfw.dtlr.gsi.gov.uk) by mail4.gsi.gov.uk with smtp id 190K1R-0000me-00 for spamassassin-talk-admin@lists.sourceforge.net; Tue, 01 Apr 2003 12:33:46 +0100
695 2         6 if (/^(\S+) \(\[(${IP_ADDRESS})\] helo=(\S+)\) by (\S+) with /) {
  2         7  
  2         7  
  2         5  
  2         118  
696             $rdns = $1; $ip = $2; $helo = $3; $by = $4;
697             goto enough;
698             }
699 136 50       1332  
700 0         0 # from 12-211-5-69.client.attbi.com (<unknown.domain>[12.211.5.69]) by rwcrmhc53.attbi.com (rwcrmhc53) with SMTP id <2002112823351305300akl1ue>; Thu, 28 Nov 2002 23:35:13 +0000
  0         0  
  0         0  
  0         0  
701 0         0 if (/^(\S+) \(<unknown\S*>\[(${IP_ADDRESS})\]\) by (\S+) /) {
702             $helo = $1; $ip = $2; $by = $3;
703             goto enough;
704             }
705 136 50       1259  
706 0         0 # from attbi.com (h000502e08144.ne.client2.attbi.com[24.128.27.103]) by rwcrmhc53.attbi.com (rwcrmhc53) with SMTP id <20030222193438053008f7tee>; Sat, 22 Feb 2003 19:34:39 +0000
  0         0  
  0         0  
707 0         0 if (/^(\S+) \((\S+\.\S+)\[(${IP_ADDRESS})\]\) by (\S+) /) {
708             $mta_looked_up_dns = 1;
709             $helo = $1; $rdns = $2; $ip = $3; $by = $4;
710             goto enough;
711 136 50       1332 }
712 0         0  
713 0         0  
  0         0  
  0         0  
  0         0  
714 0         0 # Received: from 4wtgRl (kgbxn@[211.244.147.115]) by dogma.slashnull.org (8.11.6/8.11.6) with SMTP id h8BBsUJ18848; Thu, 11 Sep 2003 12:54:31 +0100
715             if (/^(\S+) \((\S*)\@\[(${IP_ADDRESS})\].*\) by (\S+) \(/) {
716             $mta_looked_up_dns = 1; # this one does. there just wasn't one
717             $helo = $1; $ip = $3; $by = $4;
718             $ident = $2;
719 136 50       1459 goto enough;
720 0         0 }
721 0         0  
  0         0  
  0         0  
722 0         0 # Received: from 213.123.174.21 by lw11fd.law11.hotmail.msn.com with HTTP;
723 0         0 # Wed, 24 Jul 2002 16:36:44 GMT
724             if (/by (\S+\.hotmail\.msn\.com) /) {
725             $by = $1;
726             /^(${IP_ADDRESS}) / and $ip = $1;
727             goto enough;
728 136 100       341 }
729 1         6  
730 1 50       205 # Received: from x71-x56-x24-5.webspeed.dk (HELO niels) (69.96.3.15) by la.mx.develooper.com (qpsmtpd/0.27-dev) with SMTP; Fri, 02 Jan 2004 19:26:52 -0800
731 1         68 # Received: from sc8-sf-sshgate.sourceforge.net (HELO sc8-sf-netmisc.sourceforge.net) (66.35.250.220) by la.mx.develooper.com (qpsmtpd/0.27-dev) with ESMTP; Fri, 02 Jan 2004 14:44:41 -0800
732             # Received: from mx10.topofferz.net (HELO ) (69.6.60.10) by blazing.arsecandle.org with SMTP; 3 Mar 2004 20:34:38 -0000
733             if (/^(\S+) \((?:HELO|EHLO) (\S*)\) \((${IP_ADDRESS})\) by (\S+) \(qpsmtpd\/\S+\) with (?:ESMTP|SMTP)/) {
734             $rdns = $1; $helo = $2; $ip = $3; $by = $4; goto enough;
735             }
736              
737 135 50       1366 # Received: from mail-backend.DDDD.com (LHLO mail-backend.DDDD.com) (10.2.2.20) by mail-backend.DDDD.com with LMTP; Thu, 18 Jun 2015 16:50:56 -0700 (PDT)
738 0         0 if (/^(\S+) \(LHLO (\S*)\) \((${IP_ADDRESS})\) by (\S+) with LMTP/) {
  0         0  
  0         0  
  0         0  
  0         0  
739             $rdns = $1; $helo = $2; $ip = $3; $by = $4; goto enough;
740             }
741              
742 135 100       1415 # from dslb-082-083-045-064.pools.arcor-ip.net (EHLO homepc) [82.83.45.64] by mail.gmx.net (mp010) with SMTP; 03 Feb 2007 13:13:47 +0100
743 1         3 if (/^(\S+) \((?:HELO|EHLO) (\S*)\) \[(${IP_ADDRESS})\] by (\S+) \([^\)]+\) with (?:ESMTP|SMTP)/) {
  1         2  
  1         4  
  1         2  
  1         58  
744             $rdns = $1; $helo = $2; $ip = $3; $by = $4; goto enough;
745             }
746              
747 134 100       2706 # MiB (Michel Bouissou, 2003/11/16)
748 1         4 # Moved some tests up because they might match on qmail tests, where this
  1         3  
  1         2  
  1         3  
  1         61  
749             # is not qmail
750             #
751             # Received: from imo-m01.mx.aol.com ([64.12.136.4]) by eagle.glenraven.com
752             # via smtpd (for [198.85.87.98]) with SMTP; Wed, 08 Oct 2003 16:25:37 -0400
753             if (/^(\S+) \(\[(${IP_ADDRESS})\]\) by (\S+) via smtpd \(for \S+\) with SMTP\(/) {
754             $helo = $1; $ip = $2; $by = $3; goto enough;
755             }
756              
757 133 50       1349 # Try to match most of various qmail possibilities
758 0         0 #
  0         0  
  0         0  
  0         0  
759             # General format:
760             # Received: from postfix3-2.free.fr (HELO machine.domain.com) (foobar@213.228.0.169) by totor.bouissou.net with SMTP; 14 Nov 2003 08:05:50 -0000
761             #
762             # "from (remote.rDNS|unknown)" is always there
763             # "(HELO machine.domain.com)" is there only if HELO differs from remote rDNS.
764             # HELO may be "" -- ie no string. "HELO" may also be "EHLO". HELO string
765             # may be an IP in fmt [1.2.3.4] -- do not strip [ and ], they are important.
766             # "foobar@" is remote IDENT info, specified only if ident given by remote
767             # Remote IP always appears between (parentheses), with or without IDENT@
768             # "by local.system.domain.com" always appears
769             #
770             # Protocol can be different from "SMTP", i.e. "RC4-SHA encrypted SMTP" or "QMQP"
771             # qmail's reported protocol shouldn't be "ESMTP", so by allowing only "with (.* )(SMTP|QMQP)"
772             # we should avoid matching on some sendmailish Received: lines that reports remote IP
773             # between ([218.0.185.24]) like qmail-ldap does, but use "with ESMTP".
774             #
775             # Normally, qmail-smtpd remote IP isn't between square brackets [], but some versions of
776             # qmail-ldap seem to add square brackets around remote IP. These versions of qmail-ldap
777             # use a longer format that also states the (envelope-sender <sender@domain>) and the
778             # qmail-ldap version. Example:
779             # Received: from unknown (HELO terpsichore.farfalle.com) (jdavid@[216.254.40.70]) (envelope-sender <jdavid@farfalle.com>) by mail13.speakeasy.net (qmail-ldap-1.03) with SMTP for <jm@jmason.org>; 12 Feb 2003 18:23:19 -0000
780             #
781             # Some others of the numerous qmail patches out there can also add variants of their own
782             #
783             # Received: from 211.245.85.228 (EHLO ) (211.245.85.228) by mta232.mail.scd.yahoo.com with SMTP; Sun, 25 Jan 2004 00:24:37 -0800
784             #
785             # bug 4813: make sure that the line doesn't have " id " after the
786             # protocol since that's a sendmail line and not qmail ...
787             if (/^\S+( \((?:HELO|EHLO) \S*\))? \((\S+\@)?\[?${IP_ADDRESS}\]?\)( \(envelope-sender <\S+>\))? by \S+( \(.+\))* with (.* )?(SMTP|QMQP)(?! id )/ ) {
788             if (/^(\S+) \((?:HELO|EHLO) ([^ \(\)]*)\) \((\S*)\@\[?(${IP_ADDRESS})\]?\)( \(envelope-sender <\S+>\))? by (\S+)/) {
789             $rdns = $1; $helo = $2; $ident = $3; $ip = $4; $by = $6;
790             }
791 133 100       2315 elsif (/^(\S+) \((?:HELO|EHLO) ([^ \(\)]*)\) \(\[?(${IP_ADDRESS})\]?\)( \(envelope-sender <\S+>\))? by (\S+)/) {
792 54 100       3405 $rdns = $1; $helo = $2; $ip = $3; $by = $5;
    100          
    100          
    50          
793 4         12 }
  4         10  
  4         6  
  4         7  
  4         6  
794             elsif (/^(\S+) \((\S*)\@\[?(${IP_ADDRESS})\]?\)( \(envelope-sender <\S+>\))? by (\S+)/) {
795             # note: absence of HELO means that it matched rDNS in qmail-land
796 7         17 $helo = $rdns = $1; $ident = $2; $ip = $3; $by = $5;
  7         14  
  7         11  
  7         13  
797             }
798             elsif (/^(\S+) \(\[?(${IP_ADDRESS})\]?\)( \(envelope-sender <\S+>\))? by (\S+)/) {
799             $helo = $rdns = $1; $ip = $2; $by = $4;
800 4         11 }
  4         9  
  4         5  
  4         8  
801             # qmail doesn't perform rDNS requests by itself, but is usually called
802             # by tcpserver or a similar daemon that passes rDNS information to qmail-smtpd.
803 39         180 # If qmail puts something else than "unknown" in the rDNS field, it means that
  39         114  
  39         123  
804             # it received this information from the daemon that called it. If qmail-smtpd
805             # writes "Received: from unknown", it means that either the remote has no
806             # rDNS, or qmail was called by a daemon that didn't gave the rDNS information.
807             if ($rdns ne "unknown") {
808             $mta_looked_up_dns = 1;
809             } else {
810             $rdns = '';
811 54 100       172 }
812 49         97 goto enough;
813              
814 5         8 }
815             # /MiB
816 54         5683
817             # Received: from [193.220.176.134] by web40310.mail.yahoo.com via HTTP;
818             # Wed, 12 Feb 2003 14:22:21 PST
819             if (/ via HTTP$/&&/^\[(${IP_ADDRESS})\] by (\S+) via HTTP$/) {
820             $ip = $1; $by = $2; goto enough;
821             }
822              
823 79 50 33     230 # Received: from 192.168.5.158 ( [192.168.5.158]) as user jason@localhost by mail.reusch.net with HTTP; Mon, 8 Jul 2002 23:24:56 -0400
824 0         0 if (/^(\S+) \( \[(${IP_ADDRESS})\]\).*? by (\S+) /) {
  0         0  
  0         0  
825             # TODO: is $1 helo?
826             $ip = $2; $by = $3; goto enough;
827             }
828 79 50       944  
829             # Received: from (64.52.135.194 [64.52.135.194]) by mail.unearthed.com with ESMTP id BQB0hUH2 Thu, 20 Feb 2003 16:13:20 -0700 (PST)
830 0         0 if (/^\((\S+) \[(${IP_ADDRESS})\]\) by (\S+) /) {
  0         0  
  0         0  
831             $helo = $1; $ip = $2; $by = $3; goto enough;
832             }
833              
834 79 50       983 # Received: from [65.167.180.251] by relent.cedata.com (MessageWall 1.1.0) with SMTP; 20 Feb 2003 23:57:15 -0000
835 0         0 if (/^\[(${IP_ADDRESS})\] by (\S+) /) {
  0         0  
  0         0  
  0         0  
836             $ip = $1; $by = $2; goto enough;
837             }
838              
839 79 100       982 # from ([172.16.1.78]) by email2.codeworksonline.com with Microsoft SMTPSVC(5.0.2195.6713); Wed, 6 Sep 2006 21:14:29 -0400
840 1         3 # from (130.215.36.186) by mcafee.wpi.edu via smtp id 021b_7e19a55a_ea7e_11da_83a9_00304811e63a; Tue, 23 May 2006 13:06:35 -0400
  1         3  
  1         59  
841             # from ([172.21.2.10]) by out-relay4.mtahq.org with ESMTP id 4420961.8281; Tue, 22 Aug 2006 17:53:08 -0400
842             if (/^\(\[?(${IP_ADDRESS})\]?\) by (\S+) /) {
843             $ip = $1; $by = $2; goto enough;
844             }
845              
846 78 100       984 # Received: from acecomms [202.83.84.95] by mailscan.acenet.net.au [202.83.84.27] with SMTP (MDaemon.PRO.v5.0.6.R) for <spamassassin-talk@lists.sourceforge.net>; Fri, 21 Feb 2003 09:32:27 +1000
847 4         19 if (/^(\S+) \[(${IP_ADDRESS})\] by (\S+) \[${IP_ADDRESS}\] with /) {
  4         18  
  4         419  
848             $mta_looked_up_dns = 1;
849             $helo = $1; $ip = $2; $by = $3;
850             goto enough;
851 74 50       1659 }
852 0         0  
853 0         0 # Received: smtp510.aspkunden.de [(134.97.4.21)] by mail.aspemail.de (134.97.4.24) (MDaemon PRO v19.0.2) with ESMTP id md50018233933.msg; Tue, 16 Jul 2019 11:39:22 +0200
  0         0  
  0         0  
854 0         0 if (/^(\S+) \[\((${IP_ADDRESS})\)\] by (\S+) \(${IP_ADDRESS}\) /) {
855             $helo = $1; $ip = $2; $by = $3;
856             goto enough;
857             }
858 74 50       1586  
859 0         0 # Received: from mail.sxptt.zj.cn ([218.0.185.24]) by dogma.slashnull.org
  0         0  
  0         0  
860 0         0 # (8.11.6/8.11.6) with ESMTP id h2FH0Zx11330 for <webmaster@efi.ie>;
861             # Sat, 15 Mar 2003 17:00:41 GMT
862             if (/^(\S+) \(\[(${IP_ADDRESS})\]\) by (\S+) \(/) { # sendmail
863             $mta_looked_up_dns = 1;
864             $helo = $1; $ip = $2; $by = $3; goto enough;
865             }
866 74 100       962  
867 4         8 # Received: from umr-mail7.umr.edu (umr-mail7.umr.edu [131.151.1.64]) via ESMTP by mrelay1.cc.umr.edu (8.12.1/) id h06GHYLZ022481; Mon, 6 Jan 2003 10:17:34 -0600
868 4         12 # Received: from Agni (localhost [::ffff:127.0.0.1]) (TLS: TLSv1/SSLv3, 168bits,DES-CBC3-SHA) by agni.forevermore.net with esmtp; Mon, 28 Oct 2002 14:48:52 -0800
  4         11  
  4         9  
  4         249  
869             # Received: from gandalf ([4.37.75.131]) (authenticated bits=0) by herald.cc.purdue.edu (8.12.5/8.12.5/herald) with ESMTP id g9JLefrm028228 for <spamassassin-talk@lists.sourceforge.net>; Sat, 19 Oct 2002 16:40:41 -0500 (EST)
870             # Received: from bushinternet.com (softdnserr [::ffff:61.99.99.67]) by mail.cs.helsinki.fi with esmtp; Fri, 22 Aug 2003 12:25:41 +0300
871             if (/^(\S+) \((\S+) \[(${IP_ADDRESS})\]\).*? by (\S+)\b/) { # sendmail
872             if ($2 eq 'softdnserr') {
873             $mta_looked_up_dns = 0; # bug 2326: couriertcpd
874             } else {
875 70 100       1010 $mta_looked_up_dns = 1; $rdns = $2;
876 4 100       15 }
877 1         2 $helo = $1; $ip = $3; $by = $4; goto enough;
878             }
879 3         6  
  3         6  
880             # from jsoliday.acs.internap.com ([63.251.66.24.63559]) by
881 4         8 # mailhost.acs.internap.com with esmtp (v3.35.1) id 1GNrLz-000295-00;
  4         5  
  4         7  
  4         239  
882             # Thu, 14 Sep 2006 09:34:07 -0400
883             if (/^(\S+) \(\[(${IP_ADDRESS})(?:[.:]\d+)?\]\).*? by (\S+) /) {
884             $mta_looked_up_dns = 1;
885             $helo = $1; $ip = $2; $by = $3; goto enough;
886             }
887 66 100       1038  
888 6         7 # Received: from roissy (p573.as1.exs.dublin.eircom.net [159.134.226.61])
889 6         16 # (authenticated bits=0) by slate.dublin.wbtsystems.com (8.12.6/8.12.6)
  6         10  
  6         9  
  6         364  
890             # with ESMTP id g9MFWcvb068860 for <jm@jmason.org>;
891             # Tue, 22 Oct 2002 16:32:39 +0100 (IST)
892             if (/^(\S+) \((\S+) \[(${IP_ADDRESS})\]\)(?: \(authenticated bits=\d+\))? by (\S+) \(/) { # sendmail
893             $mta_looked_up_dns = 1;
894             $helo = $1; $rdns = $2; $ip = $3; $by = $4; goto enough;
895             }
896 60 50       911  
897 0         0 # Identify fetch-from-server incidents:
898 0         0 # Fetchmail:
  0         0  
  0         0  
  0         0  
  0         0  
899             # Received: from cabbage.jmason.org [127.0.0.1]
900             # by localhost with IMAP (fetchmail-5.9.0)
901             # for jm@localhost (single-drop); Thu, 13 Mar 2003 20:39:56 -0800 (PST)
902             #
903             # Getmail:
904             # Received: from pop3.mail.dk (195.41.46.251) by loki.valhalla with POP3;
905             # 14 Apr 2010 11:14:29 -0000
906             #
907             if (/with (?:POP3|IMAP)/) {
908             $self->found_pop_fetcher_sig();
909             return 0; # skip mail fetcher handovers
910             }
911 60 100       183  
912 4         18 # Let's try to support a few qmailish formats in one;
913 4         41 # http://issues.apache.org/SpamAssassin/show_bug.cgi?id=2744#c14 :
914             # Received: from unknown (HELO feux01a-isp) (213.199.4.210) by totor.bouissou.net with SMTP; 1 Nov 2003 07:05:19 -0000
915             # Received: from adsl-207-213-27-129.dsl.lsan03.pacbell.net (HELO merlin.net.au) (Owner50@207.213.27.129) by totor.bouissou.net with SMTP; 10 Nov 2003 06:30:34 -0000
916             if (/^(\S+) \((?:HELO|EHLO) ([^\)]*)\) \((\S*@)?\[?(${IP_ADDRESS})\]?\).* by (\S+) /)
917             {
918             $mta_looked_up_dns = 1;
919             $rdns = $1;
920 56 100       732 $helo = $2;
921             $ident = (defined $3) ? $3 : '';
922 1         3 $ip = $4;
923 1         3 $by = $5;
924 1         3 if ($ident) {
925 1 50       4 $ident =~ s/\@$//;
926 1         2 }
927 1         3 goto enough;
928 1 50       3 }
929 1         5  
930             # Received: from x1-6-00-04-bd-d2-e0-a3.k317.webspeed.dk (benelli@80.167.158.170) by totor.bouissou.net with SMTP; 5 Nov 2003 23:18:42 -0000
931 1         60 if (/^(\S+) \((\S*@)?\[?(${IP_ADDRESS})\]?\).* by (\S+) /)
932             {
933             $mta_looked_up_dns = 1;
934             # bug 2744 notes that if HELO == rDNS, qmail drops it.
935 55 100       780 $rdns = $1; $helo = $rdns; $ident = (defined $2) ? $2 : '';
936             $ip = $3; $by = $4;
937 4         7 if ($ident) { $ident =~ s/\@$//; }
938             goto enough;
939 4 50       10 }
  4         6  
  4         11  
940 4         7  
  4         6  
941 4 50       9 # Received: from [129.24.215.125] by ws1-7.us4.outblaze.com with http for
  0         0  
942 4         238 # _bushisevil_@mail.com; Thu, 13 Feb 2003 15:59:28 -0500
943             if (/ with http for /&&/^\[(${IP_ADDRESS})\] by (\S+) with http for /) {
944             $ip = $1; $by = $2; goto enough;
945             }
946              
947 51 50 33     148 # Received: from snake.corp.yahoo.com(216.145.52.229) by x.x.org via smap (V1.3)
948 0         0 # id xma093673; Wed, 26 Mar 03 20:43:24 -0600
  0         0  
  0         0  
949             if (/ via smap /&&/^(\S+)\((${IP_ADDRESS})\) by (\S+) via smap /) {
950             $mta_looked_up_dns = 1;
951             $rdns = $1; $ip = $2; $by = $3; goto enough;
952             }
953 51 50 33     113  
954 0         0 # Received: from smtp.greyware.com(208.14.208.51, HELO smtp.sff.net) by x.x.org via smap (V1.3)
955 0         0 # id xma002908; Fri, 27 Feb 04 14:16:56 -0800
  0         0  
  0         0  
  0         0  
956             if (/^(\S+)\((${IP_ADDRESS}), (?:HELO|EHLO) (\S*)\) by (\S+) via smap /) {
957             $mta_looked_up_dns = 1;
958             $rdns = $1; $ip = $2; $helo = $3; $by = $4; goto enough;
959             }
960 51 50       635  
961 0         0 # Received: from [192.168.0.71] by web01-nyc.clicvu.com (Post.Office MTA
962 0         0 # v3.5.3 release 223 ID# 0-64039U1000L100S0V35) with SMTP id com for
  0         0  
  0         0  
  0         0  
  0         0  
963             # <x@x.org>; Tue, 25 Mar 2003 11:42:04 -0500
964             if (/ \(Post/&&/^\[(${IP_ADDRESS})\] by (\S+) \(Post/) {
965             $ip = $1; $by = $2; goto enough;
966             }
967              
968 51 50 33     154 # Received: from [127.0.0.1] by euphoria (ArGoSoft Mail Server
969 0         0 # Freeware, Version 1.8 (1.8.2.5)); Sat, 8 Feb 2003 09:45:32 +0200
  0         0  
  0         0  
970             if (/ \(ArGoSoft/&&/^\[(${IP_ADDRESS})\] by (\S+) \(ArGoSoft/) {
971             $ip = $1; $by = $2; goto enough;
972             }
973              
974 51 50 33     110 # Received: from 157.54.8.23 by inet-vrs-05.redmond.corp.microsoft.com
975 0         0 # (InterScan E-Mail VirusWall NT); Thu, 06 Mar 2003 12:02:35 -0800
  0         0  
  0         0  
976             # Received: from 10.165.130.62 by CNNIMAIL12.CNN.COM (SMTPL release 1.0d) with TCP; Fri, 1 Sep 2006 20:28:14 -0400
977             if (/^(${IP_ADDRESS}) by (\S+) \((?:SMTPL|InterScan)\b/) {
978             $ip = $1; $by = $2; goto enough;
979             }
980              
981 51 100       859 # Received: from faerber.muc.de by slarti.muc.de with BSMTP (rsmtp-qm-ot 0.4)
982 1         5 # for asrg@ietf.org; 7 Mar 2003 21:10:38 -0000
  1         2  
  1         67  
983             if (/ with BSMTP/&&/^\S+ by \S+ with BSMTP/) {
984             return 0; # BSMTP != a TCP/IP handover, ignore it
985             }
986              
987 50 50 33     128 # Received: from spike (spike.ig.co.uk [193.32.60.32]) by mail.ig.co.uk with
988 0         0 # SMTP id h27CrCD03362 for <asrg@ietf.org>; Fri, 7 Mar 2003 12:53:12 GMT
989             if (/^(\S+) \((\S+) \[(${IP_ADDRESS})\]\) by (\S+) with /) {
990             $mta_looked_up_dns = 1;
991             $helo = $1; $rdns = $2; $ip = $3; $by = $4; goto enough;
992             }
993 50 50       640  
994 0         0 # Received: from customer254-217.iplannetworks.net (HELO AGAMENON)
995 0         0 # (baldusi@200.69.254.217 with plain) by smtp.mail.vip.sc5.yahoo.com with
  0         0  
  0         0  
  0         0  
  0         0  
996             # SMTP; 11 Mar 2003 21:03:28 -0000
997             if (/^(\S+) \((?:HELO|EHLO) (\S*)\) \((?:(\S+)\@)?(${IP_ADDRESS}).*?\) by (\S+) with /) {
998             $mta_looked_up_dns = 1;
999             $rdns = $1; $helo = $2; $ip = $4; $by = $5;
1000             $ident = $3 if defined $3;
1001 50 50       751 goto enough;
1002 0         0 }
1003 0         0  
  0         0  
  0         0  
  0         0  
1004 0 0       0 # Received: from [192.168.1.104] (account nazgul HELO [192.168.1.104])
1005 0         0 # by somewhere.com (CommuniGate Pro SMTP 3.5.7) with ESMTP-TLS id 2088434;
1006             # Fri, 07 Mar 2003 13:05:06 -0500
1007             if (/^\[(${IP_ADDRESS})\] \((?:account \S+ )?(?:HELO|EHLO) (\S*)\) by (\S+) \(/) {
1008             $ip = $1; $helo = $2; $by = $3; goto enough;
1009             }
1010              
1011 50 100       708 # Received: from host.example.com ([192.0.2.1] verified)
1012 7         27 # by mail.example.net (CommuniGate Pro SMTP 5.1.13)
  7         12  
  7         12  
  7         492  
1013             # with ESMTP id 9786656 for user@example.net; Thu, 27 Mar 2008 15:08:17 +0600
1014             if (/ \(CommuniGate Pro/ && /^(\S+) \(\[(${IP_ADDRESS})\] verified\) by (\S+) \(/) {
1015             $mta_looked_up_dns = 1;
1016             $rdns = $1; $helo = $1; $ip = $2; $by = $3; goto enough;
1017             }
1018 43 100 100     342  
1019 4         8 # Received: from ([10.0.0.6]) by mail0.ciphertrust.com with ESMTP ; Thu,
1020 4         9 # 13 Mar 2003 06:26:21 -0500 (EST)
  4         6  
  4         6  
  4         7  
  4         247  
1021             if (/^\(\[(${IP_ADDRESS})\]\) by (\S+) with /) {
1022             $ip = $1; $by = $2; goto enough;
1023             }
1024              
1025 39 50       576 # Received: from ironport.com (10.1.1.5) by a50.ironport.com with ESMTP; 01 Apr 2003 12:00:51 -0800
1026 0         0 # Received: from dyn-81-166-39-132.ppp.tiscali.fr (81.166.39.132) by cpmail.dk.tiscali.com (6.7.018)
  0         0  
  0         0  
1027             if (/^([^\d]\S+) \((${IP_ADDRESS})\) by (\S+) /) {
1028             $helo = $1; $ip = $2; $by = $3; goto enough;
1029             }
1030              
1031 39 50       618 # Received: from scv3.apple.com (scv3.apple.com) by mailgate2.apple.com (Content Technologies SMTPRS 4.2.1) with ESMTP id <T61095998e1118164e13f8@mailgate2.apple.com>; Mon, 17 Mar 2003 17:04:54 -0800
1032 0         0 # bug 4704: Only let this match Content Technologies so it stops breaking things that come after it by matching first
  0         0  
  0         0  
  0         0  
1033             if (/^\S+ \(\S+\) by \S+ \(Content Technologies /) {
1034             return 0; # useless without the $ip anyway!
1035             }
1036              
1037 39 50       91 # Received: from 01al10015010057.ad.bls.com ([90.152.5.141] [90.152.5.141])
1038 0         0 # by aismtp3g.bls.com with ESMTP; Mon, 10 Mar 2003 11:10:41 -0500
1039             if (/^(\S+) \(\[(${IP_ADDRESS})\] \[(\S+)\]\) by (\S+) with /) {
1040             # not sure what $3 is ;)
1041             $helo = $1; $ip = $2; $by = $4;
1042             goto enough;
1043 39 50       602 }
1044              
1045 0         0 # Received: from 206.47.0.153 by dm3cn8.bell.ca with ESMTP (Tumbleweed MMS
  0         0  
  0         0  
1046 0         0 # SMTP Relay (MMS v5.0)); Mon, 24 Mar 2003 19:49:48 -0500
1047             if (/^(${IP_ADDRESS}) by (\S+) with /) {
1048             $ip = $1; $by = $2;
1049             goto enough;
1050             }
1051 39 100       731  
1052 1         5 # Received: from pobox.com (h005018086b3b.ne.client2.attbi.com[66.31.45.164])
  1         3  
1053 1         65 # by rwcrmhc53.attbi.com (rwcrmhc53) with SMTP id <2003031302165605300suph7e>;
1054             # Thu, 13 Mar 2003 02:16:56 +0000
1055             if (/^(\S+) \((\S+)\[(${IP_ADDRESS})\]\) by (\S+) /) {
1056             $mta_looked_up_dns = 1;
1057             $helo = $1; $rdns = $2; $ip = $3; $by = $4; goto enough;
1058             }
1059 38 50       596  
1060 0         0 # Received: from [10.128.128.81]:50999 (HELO dfintra.f-secure.com) by fsav4im2 ([10.128.128.74]:25) (F-Secure Anti-Virus for Internet Mail 6.0.34 Release) with SMTP; Tue, 5 Mar 2002 14:11:53 -0000
1061 0         0 if (/^\[(${IP_ADDRESS})\]\S+ \((?:HELO|EHLO) (\S*)\) by (\S+) /) {
  0         0  
  0         0  
  0         0  
  0         0  
1062             $ip = $1; $helo = $2; $by = $3; goto enough;
1063             }
1064              
1065 38 50       623 # Received: from 62.180.7.250 (HELO daisy) by smtp.altavista.de (209.228.22.152) with SMTP; 19 Sep 2002 17:03:17 +0000
1066 0         0 if (/^(${IP_ADDRESS}) \((?:HELO|EHLO) (\S*)\) by (\S+) /) {
  0         0  
  0         0  
  0         0  
1067             $ip = $1; $helo = $2; $by = $3; goto enough;
1068             }
1069              
1070 38 50       730 # Received: from oemcomputer [63.232.189.195] by highstream.net (SMTPD32-7.07) id A4CE7F2A0028; Sat, 01 Feb 2003 21:39:10 -0500
1071 0         0 if (/^(\S+) \[(${IP_ADDRESS})\] by (\S+) /) {
  0         0  
  0         0  
  0         0  
1072             $helo = $1; $ip = $2; $by = $3; goto enough;
1073             }
1074              
1075 38 50       594 # from nodnsquery(192.100.64.12) by herbivore.monmouth.edu via csmap (V4.1) id srcAAAyHaywy
1076 0         0 if (/^(\S+)\((${IP_ADDRESS})\) by (\S+) /) {
  0         0  
  0         0  
  0         0  
1077             $rdns = $1; $ip = $2; $by = $3; goto enough;
1078             }
1079              
1080 38 50       608 # Received: from [192.168.0.13] by <server> (MailGate 3.5.172) with SMTP;
1081 0         0 # Tue, 1 Apr 2003 15:04:55 +0100
  0         0  
  0         0  
  0         0  
1082             if (/^\[(${IP_ADDRESS})\] by (\S+) \(MailGate /) {
1083             $ip = $1; $by = $2; goto enough;
1084             }
1085              
1086 38 50       625 # Received: from jmason.org (unverified [195.218.107.131]) by ni-mail1.dna.utvinternet.net <B0014212518@ni-mail1.dna.utvinternet.net>; Tue, 11 Feb 2003 12:18:12 +0000
1087 0         0 if (/^(\S+) \(unverified \[(${IP_ADDRESS})\]\) by (\S+) /) {
  0         0  
  0         0  
1088             $helo = $1; $ip = $2; $by = $3; goto enough;
1089             }
1090              
1091 38 50       633 # # from 165.228.131.11 (proxying for 139.130.20.189) (SquirrelMail authenticated user jmmail) by jmason.org with HTTP
1092 0         0 # if (/^from (\S+) \(proxying for (${IP_ADDRESS})\) \([A-Za-z][^\)]+\) by (\S+) with /) {
  0         0  
  0         0  
  0         0  
1093             # $ip = $2; $by = $3; goto enough;
1094             # }
1095             if (/^(${IP_ADDRESS}) \([A-Za-z][^\)]+\) by (\S+) with /) {
1096             $ip = $1; $by = $2; goto enough;
1097             }
1098              
1099 38 50       741 # Received: from [212.87.144.30] (account seiz [212.87.144.30] verified) by x.imd.net (CommuniGate Pro SMTP 4.0.3) with ESMTP-TLS id 5026665 for spamassassin-talk@lists.sourceforge.net; Wed, 15 Jan 2003 16:27:05 +0100
1100 0         0 # bug 4704 This pattern was checked as just an Exim format, but it does exist elsewhere
  0         0  
  0         0  
1101             # Received: from [206.51.230.145] (helo=t-online.de)
1102             # by mxeu2.kundenserver.de with ESMTP (Nemesis),
1103             # id 0MKpdM-1CkRpr14PF-000608; Fri, 31 Dec 2004 19:49:15 +0100
1104             # Received: from [218.19.142.229] (helo=hotmail.com ident=yiuhyotp)
1105             # by yzordderrex with smtp (Exim 3.35 #1 (Debian)) id 194BE5-0005Zh-00; Sat, 12 Apr 2003 03:58:53 +0100
1106             if (/^\[(${IP_ADDRESS})\] \(([^\)]+)\) by (\S+) /) {
1107             $ip = $1; my $sub = $2; $by = $3;
1108             $sub =~ s/helo=(\S+)// and $helo = $1;
1109             $sub =~ s/ident=(\S*)// and $ident = $1;
1110 38 100       650 goto enough;
1111 3         10 }
  3         6  
  3         6  
1112 3 100       10  
1113 3 50       7 # Received: from mtsbp606.email-info.net (?dXqpg3b0hiH9faI2OxLT94P/YKDD3rQ1?@64.253.199.166) by kde.informatik.uni-kl.de with SMTP; 30 Apr 2003 15:06:29
1114 3         249 if (/^(\S+) \((?:\S+\@)?(${IP_ADDRESS})\) by (\S+) with /) {
1115             $rdns = $1; $ip = $2; $by = $3; goto enough;
1116             }
1117              
1118 35 50       690 # Obtuse smtpd: http://www.obtuse.com/
1119 0         0 # Received: from TCE-E-7-182-54.bta.net.cn(202.106.182.54) via SMTP
  0         0  
  0         0  
  0         0  
1120             # by st.tahina.priv.at, id smtpdEDUB8h; Sun Nov 13 14:50:12 2005
1121             # Received: from pl027.nas934.d-osaka.nttpc.ne.jp(61.197.82.27), claiming to be "foo.woas.net" via SMTP
1122             # by st.tahina.priv.at, id smtpd1PBsZT; Sun Nov 13 15:38:52 2005
1123             if (/^(\S+)\((${IP_ADDRESS})\)(?:, claiming to be "(\S+)")? via \S+ by (\S+),/) {
1124             $rdns = $1; $ip = $2; $helo = (defined $3) ? $3 : ''; $by = $4;
1125             if ($1 ne 'UNKNOWN') {
1126             $mta_looked_up_dns = 1;
1127 35 100       694 $rdns = $1;
1128 2 100       8 }
  2         6  
  2         7  
  2         6  
1129 2 50       7 goto enough;
1130 2         3 }
1131 2         4  
1132             # Yahoo Authenticated SMTP; Bug #6535
1133 2         126 # from itrqtnlnq (lucilleskinner@93.124.107.183 with login) by smtp111.mail.ne1.yahoo.com with SMTP; 17 Jan 2011 08:23:27 -0800 PST
1134             if (/^(\S+) \((\S+)@(${IP_ADDRESS}) with login\) by (\S+\.yahoo\.com) with SMTP/) {
1135             $helo = $1; $ip = $3; $by = $4; goto enough;
1136             }
1137              
1138 33 50       603 # a synthetic header, generated internally:
1139 0         0 # Received: X-Originating-IP: 1.2.3.4
  0         0  
  0         0  
  0         0  
1140             if (/^X-Originating-IP: (${IP_ADDRESS})$/) {
1141             $ip = $1; $by = ''; goto enough;
1142             }
1143              
1144 33 100       597 ## STUFF TO IGNORE ##
1145 4         18  
  4         8  
  4         605  
1146             # Received: from raptor.research.att.com (bala@localhost) by
1147             # raptor.research.att.com (SGI-8.9.3/8.8.7) with ESMTP id KAA14788
1148             # for <asrg@example.com>; Fri, 7 Mar 2003 10:37:56 -0500 (EST)
1149             # make this localhost-specific, so we know it's safe to ignore
1150             if (/^\S+ \([^\s\@]+\@${LOCALHOST}\) by \S+ \(/) { return 0; }
1151              
1152             # from paul (helo=felix) by felix.peema.org with local-esmtp (Exim 4.43)
1153             # id 1Ccq0j-0002k2-Lk; Fri, 10 Dec 2004 19:01:01 +0000
1154 29 100       268 # Exim doco says this is local submission, cf switch -oMr
  1         142  
1155             if (/^\S+ \S+ by \S+ with local-e?smtp /) { return 0; }
1156              
1157             # from 127.0.0.1 (AVG SMTP 7.0.299 [265.6.8]); Wed, 05 Jan 2005 15:06:48 -0800
1158             if (/^127\.0\.0\.1 \(AVG SMTP \S+ \[\S+\]\)/) { return 0; }
1159 28 50       64  
  0         0  
1160             # from qmail-scanner-general-admin@lists.sourceforge.net by alpha by uid 7791 with qmail-scanner-1.14 (spamassassin: 2.41. Clear:SA:0(-4.1/5.0):. Processed in 0.209512 secs)
1161             if (/^\S+\@\S+ by \S+ by uid \S+ /) { return 0; }
1162 28 50       53  
  0         0  
1163             # Received: from DSmith1204@aol.com by imo-m09.mx.aol.com (mail_out_v34.13.) id 7.53.208064a0 (4394); Sat, 11 Jan 2003 23:24:31 -0500 (EST)
1164             if (/^\S+\@\S+ by \S+ /) { return 0; }
1165 28 100       56  
  1         15  
1166             # Received: from Unknown/Local ([?.?.?.?]) by mailcity.com; Fri, 17 Jan 2003 15:23:29 -0000
1167             if (/^Unknown\/Local \(/) { return 0; }
1168 27 100       65  
  1         16  
1169             # Received: from localhost (mailnull@localhost) by x.org (8.12.6/8.9.3)
1170             # with SMTP id h2R2iivG093740; Wed, 26 Mar 2003 20:44:44 -0600
1171 26 50       42 # (CST) (envelope-from x@x.org)
  0         0  
1172             # Received: from localhost (localhost [127.0.0.1]) (uid 500) by mail with local; Tue, 07 Jan 2003 11:40:47 -0600
1173             if (/^${LOCALHOST} \((?:\S+\@)?${LOCALHOST}[\)\[]/) { return 0; }
1174              
1175             # Received: from olgisoft.com (127.0.0.1) by 127.0.0.1 (EzMTS MTSSmtp
1176             # 1.55d5) ; Thu, 20 Mar 03 10:06:43 +0100 for <asrg@ietf.org>
1177 26 50       423 if (/^\S+ \((?:\S+\@)?${LOCALHOST}\) /) { return 0; }
  0         0  
1178              
1179             # Received: from casper.ghostscript.com (raph@casper [127.0.0.1]) h148aux8016336verify=FAIL); Tue, 4 Feb 2003 00:36:56 -0800
1180             if (/^\S+ \(\S+\@\S+ \[${LOCALHOST}\]\) /) { return 0; }
1181 26 50       257  
  0         0  
1182             # Received: from (AUTH: e40a9cea) by vqx.net with esmtp (courier-0.40) for <asrg@ietf.org>; Mon, 03 Mar 2003 14:49:28 +0000
1183             if (/^\(AUTH: \S+\) by \S+ with /) { return 0; }
1184 26 50       242  
  0         0  
1185             # from localhost (localhost [[UNIX: localhost]]) by home.barryodonovan.com
1186             # (8.12.11/8.12.11/Submit) id iBADHRP6011034; Fri, 10 Dec 2004 13:17:27 GMT
1187 26 50       56 if (/^localhost \(localhost \[\[UNIX: localhost\]\]\) by /) { return 0; }
  0         0  
1188              
1189             # Internal Amazon traffic
1190             # Received: from dc-mail-3102.iad3.amazon.com by mail-store-2001.amazon.com with ESMTP (peer crosscheck: dc-mail-3102.iad3.amazon.com)
1191 26 50       41 if (/^\S+\.amazon\.com by \S+\.amazon\.com with ESMTP \(peer crosscheck: /) { return 0; }
  0         0  
1192              
1193             # Received: from GWGC6-MTA by gc6.jefferson.co.us with Novell_GroupWise; Tue, 30 Nov 2004 10:09:15 -0700
1194             if (/^[^\.]+ by \S+ with Novell_GroupWise/) { return 0; }
1195 26 50       47  
  0         0  
1196             # Received: from no.name.available by [165.224.43.143] via smtpd (for [165.224.216.89]) with ESMTP; Fri, 28 Jan 2005 13:06:39 -0500
1197             # Received: from no.name.available by [165.224.216.88] via smtpd (for lists.sourceforge.net [66.35.250.206]) with ESMTP; Fri, 28 Jan 2005 15:42:30 -0500
1198 26 50       47 # These are from an internal host protected by a Raptor firewall, to hosts
  0         0  
1199             # outside the firewall. We can only ignore the handover since we don't have
1200             # enough info in those headers; however, from googling, it appears that
1201             # all samples are cases where the handover is safely ignored.
1202             if (/^no\.name\.available by \S+ via smtpd \(for /) { return 0; }
1203              
1204             # from 156.56.111.196 by blazing.arsecandle.org (envelope-from <gentoo-announce-return-530-rod=arsecandle.org@lists.gentoo.org>, uid 502) with qmail-scanner-1.24 (clamdscan: 0.80/594. f-prot: 4.4.2/3.14.11. Clear:RC:0(156.56.111.196):. Processed in 0.288806 secs); 06 Feb 2005 21:11:38 -0000
1205             # these are safe to ignore. the previous handover line has the full
1206 26 50       42 # details of the handover described here, it's just qmail-scanner
  0         0  
1207             # logging a little more.
1208             if (/^\S+ by \S+ \(.{0,100}\) with qmail-scanner/) {
1209             if (defined $envfrom) {
1210             $envfrom =~ s/^\s*<*//gs;
1211             $envfrom =~ s/>*\s*$//gs;
1212 26 100       51 $envfrom =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
1213 1 50       4 }
1214 1         6 $self->{qmail_scanner_env_from} = $envfrom; # hack!
1215 1         12 return 0;
1216 1         4 }
1217              
1218 1         4 # Received: from mmail by argon.connect.org.uk with local (connectmail/exim)
1219 1         27 # id 18tOsg-0008FX-00; Thu, 13 Mar 2003 09:20:06 +0000
1220             if (/^\S+ by \S+ with local/) { return 0; }
1221              
1222             # Local unix socket handover from Cyrus, tested with v2.3.14
1223             # Received: from testintranator.net.vm ([unix socket])_ by testintranator.net.vm (Cyrus v2.3.14) with LMTPA;_ Tue, 21 Jul 2009 14:34:14 +0200
1224 25 50       48 # Attention: Actually the received header is parsed as "testintranator.net.vm ([unix socket]) by testintranator.net.vm (Cyrus v2.3.14) with LMTPA", "from" is omitted.
  0         0  
1225             if (/^\S+ \(\[unix socket\]\) by \S+ \(Cyrus v[0-9]*?\.[0-9]*?\.[0-9]*?\) with LMTPA/) { return 0; }
1226              
1227             # HANDOVERS WE KNOW WE CAN'T DEAL WITH: TCP transmission, but to MTAs that
1228             # just don't log enough info for us to use (ie. no IP address present).
1229 25 50       64 # Note: "return 0" is strongly recommended here, unless you're sure
  0         0  
1230             # the regexp won't match something in the field; otherwise ALL_TRUSTED may
1231             # fire even in the presence of an unparseable Received header.
1232              
1233             # Received: from CATHY.IJS.SI by CATHY.IJS.SI (PMDF V4.3-10 #8779) id <01KTSSR50NSW001MXN@CATHY.IJS.SI>; Fri, 21 Mar 2003 20:50:56 +0100
1234             # Received: from MATT_LINUX by hippo.star.co.uk via smtpd (for mail.webnote.net [193.120.211.219]) with SMTP; 3 Jul 2002 15:43:50 UT
1235             # Received: from cp-its-ieg01.mail.saic.com by cpmx.mail.saic.com for me@jmason.org; Tue, 23 Jul 2002 14:09:10 -0700
1236             if (/^\S+ by \S+ (?:with|via|for|\()/) { return 0; }
1237              
1238             # from senmail2.senate.gov with LMTP by senmail2 (3.0.2/sieved-3-0-build-942) for <example@vandinter.org>; Fri, 30 Jun 2006 10:58:41 -0400
1239             # from zimbramail.artsit.org.uk (unverified) by MAILSWEEP.birminghamartsit.org.uk (Clearswift SMTPRS 5.1.7) with ESMTP id <T78926b35f2c0a80003da8@MAILSWEEP.birminghamartsit.org.uk> for <discuss@lists.surbl.org>; Tue, 30 May 2006 15:56:15 +0100
1240 25 100       112 if (/^\S+ (?:(?:with|via|for) \S+|\(unverified\)) by\b/) { return 0; }
  20         930  
1241              
1242             # from MjA3NDc4Mg (unknown) by ismtpd0001p1lon1.sendgrid.net (SG) with HTTP id aqHKNX2kSp-HiqspAa-uvw for <email@e.example.com>; Thu, 02 Apr 2020 07:53:55.516 +0000 (UTC)
1243             if (/^\S+ \(unknown\) by \S+ \(SG\) with \b/) { return 0; }
1244 5 100       21  
  2         30  
1245             # from localhost (example.com [local]) by example.com (OpenSMTPD) with ESMTPA id 5db34e0d for <email@example.com>; Tue, 7 Apr 2020 01:38:29 -0600 (MDT)
1246             if (/^\S+ \(\S+ \[local\]\) by \S+ \(OpenSMTPD\) with \b/) { return 0; }
1247 3 50       9  
  0         0  
1248             # from DL1GSPMX02 (dl1gspmx02.gamestop.com) by email.ebgames.com (LSMTP for Windows NT v1.1b) with SMTP id <21.000575A0@email.ebgames.com>; Tue, 12 Sep 2006 21:06:43 -0500
1249             if (/\(LSMTP for/) { return 0; }
1250 3 50       6  
  0         0  
1251             # from ([127.0.0.1]) with MailEnable ESMTP; Wed, 10 Jul 2019 10:29:59 +0300
1252             if (/^\(\[${LOCALHOST}\]\) with MailEnable /) { return 0; }
1253 3 100       9  
  1         16  
1254             # from facebook.com (RrlQsUbrndsQ6/zbJaSzSPcmy3GwqE5h6IukkE5GGBIJgonAFnoQE3L+9tv2TU3e 2401:db00:1110:50e8:face:0000:002f:0000)
1255             # by facebook.com with Thrift id 423753524b5011e9a83e248a0796a3b2-169bd530; Wed, 20 Mar 2019 13:39:29 -0700
1256 2 50       102 if (/^facebook\.com \([^\)]+\) by facebook\.com with Thrift id \S+$/) { return 0; }
  0         0  
1257              
1258             # from 384836569573 named unknown by gmailapi.google.com with HTTPREST; Wed, 6 Mar 2019 03:39:24 -0500
1259             if (/^\S+ named \S+ by gmailapi\.google\.com with HTTPREST$/) { return 0; }
1260 2 50       7  
  0         0  
1261             # from mail.payex.com id <B5b8f11e30004>; Wed, 05 Sep 2018 01:14:43 +0200
1262             if (/^\S+ id \S+$/) { return 0; }
1263 2 50       6
  0         0  
1264             # from [<4124973-137966-3089@be2.maropost.com>] ([<4124973-137966-3089@be2.maropost.com>] helo=maropost.com) by 643852-mailer2 (envelope-from 4124973-137966-3089@be2.maropost.com)
1265             # (Jetsend MTA 0.0.1 with ESMTP; Fri Sep 14 14:36:56 EDT 2018
1266 2 50       7 if (/^\[<.*? \(Jetsend/) { return 0; }
  0         0  
1267              
1268             # if at this point we still haven't figured out the HELO string, see if we
1269             # can't just guess
1270 2 50       7 if (!$helo && /^(\S+)[^-A-Za-z0-9\.]/) { $helo = $1; }
  0         0  
1271             }
1272              
1273             # ---------------------------------------------------------------------------
1274 2 100 66     20  
  1         12  
1275             elsif (s/^FROM //) {
1276             # simta: http://rsug.itd.umich.edu/software/simta/
1277             # Note the ugly uppercase FROM/BY/ID
1278             # Received: FROM hackers.mr.itd.umich.edu (smtp.mail.umich.edu [141.211.14.81])
1279             # BY madman.mr.itd.umich.edu ID 434B508E.174A6.13932 ; 11 Oct 2005 01:41:34 -0400
1280             # Received: FROM [192.168.1.24] (s233-64-90-216.try.wideopenwest.com [64.233.216.90])
1281             # BY hackers.mr.itd.umich.edu ID 434B5051.8CDE5.15436 ; 11 Oct 2005 01:40:33 -0400
1282             # Received: FROM helo (1.2.3.4 [1.2.3.4]) BY xxx.com (Rockliffe SMTPRA 10.3.0)
1283             # WITH SMTP ID <B0065361981@xxx.com> FOR <foo@bar.net>; Tue, 6 Nov 2018 07:41:26 +0200
1284              
1285             if (/^(\S+) \((\S+) \[(${IP_ADDRESS})\]\) BY (\S+) (?:\([^\)]+\) WITH SMTP )?ID <?(\S+?)>?(?: FOR <[^>]+>)?$/ ) {
1286             $mta_looked_up_dns = 1;
1287             $helo = $1; $rdns = $2; $ip = $3; $by = $4; $id = $5;
1288             $rdns = '' if $rdns eq 'unverified';
1289 2 50       301 goto enough;
1290 2         8 }
1291 2         5 }
  2         6  
  2         4  
  2         4  
  2         4  
1292 2 50       7  
1293 2         218 # ---------------------------------------------------------------------------
1294              
1295             elsif (s/^\(from //) {
1296             # Norton AntiVirus Gateway
1297             # Received: (from localhost [24.180.47.240])
1298             # by host.name (NAVGW 2.5.2.12) with SMTP id M2006060503484615455
1299             # for <user@domain.co.uk>; Mon, 05 Jun 2006 03:48:47 +0100
1300             if (/^(\S*) \[(${IP_ADDRESS})\]\) by (\S+) \(NAVGW .*?\) with /) {
1301             $helo = $1; $ip = $2; $by = $3;
1302             goto enough;
1303             }
1304 5 100       666  
1305 1         4 # header produced by command line /usr/bin/sendmail -t -f username@example.com
  1         3  
  1         2  
1306 1         235 # Received: (from username@localhost) by home.example.com
1307             # (8.12.11/8.12.11/Submit) id iBADHRP6011034; Fri, 10 Dec 2004 13:17:27 GMT
1308             if (/^\S+\@localhost\) by \S+ /) { return 0; }
1309              
1310             # Received: (from vashugins@juno.com) by m06.lax.untd.com (jqueuemail) id LRVB3JAJ; Fri, 02 Jun 2006 08:15:21 PDT
1311             if (/^[^\s\@]+\@[^)]+\) by \S+\(jqueuemail\) id [^\s;]+/) { return 0; }
1312 4 50       31 }
  4         38  
1313              
1314             # ---------------------------------------------------------------------------
1315 0 0       0  
  0         0  
1316             # FALL-THROUGH: OK, at this point let's try some general patterns for things
1317             # we may not have already parsed out.
1318             if (!$ip && /\[(${IP_ADDRESS})\]/) { $ip = $1; }
1319              
1320             # ---------------------------------------------------------------------------
1321              
1322 2 50 33     197 # We need to have a minimal amount of information to have a useful parse.
  2         6  
1323             # If we have the IP and the "by" name, move forward. If we don't, we'll
1324             # drop into the unparseable area.
1325             if ($ip && $by) { goto enough; }
1326              
1327             # Ok, we can't handle this header, go ahead and return that.
1328             return;
1329 2 50 33     12  
  2         9  
1330             # ---------------------------------------------------------------------------
1331              
1332 0         0 enough:
1333              
1334             # OK, line parsed (at least partially); now deal with the contents
1335              
1336             # flag handovers we couldn't get an IP address from at all
1337             if ($ip eq '') {
1338             dbg("received-header: could not parse IP address from: $_");
1339             }
1340              
1341 177 50       617 # DISABLED: if we cut out localhost-to-localhost SMTP handovers,
1342 0         0 # we will give FPs on SPF checks -- since the SMTP "MAIL FROM" addr
1343             # will be recorded, but we won't have the relays handover recorded
1344             # for that SMTP transaction, so we wind up checking the wrong IP
1345             # for the addr.
1346             if (0) {
1347             if ($ip eq '127.0.0.1') {
1348             dbg("received-header: ignoring localhost handover");
1349             return 0; # ignore localhost handovers
1350 177         245 }
1351             }
1352              
1353             # Strip ending dot, Bug 7810
1354             $rdns =~ s/\.+\z//;
1355              
1356             if ($rdns =~ /^unknown$/i || $rdns =~ /^\[/) {
1357             $rdns = ''; # some MTAs seem to do this
1358 177         488 }
1359            
1360 177 100 66     832 $ip =~ s/^ipv6://i; # remove "IPv6:" prefix
1361 1         3 $ip =~ s/^\[//; $ip =~ s/\]\z//;
1362              
1363             # IPv6 Scoped Address (RFC 4007, RFC 6874, RFC 3986 "unreserved" charset)
1364 177         377 $ip =~ s/%[A-Z0-9._~-]*\z//si; # scoped address? remove <zone_id>
1365 177         268  
  177         247  
1366             # remove "::ffff:" prefix from IPv4-mapped-in-IPv6 addresses,
1367             # so we can treat them simply as IPv4 addresses
1368 177         269 # (only handles 'alternative form', not 'preferred form' - to be improved)
1369             $ip =~ s/^0*:0*:(?:0*:)*ffff:(\d+\.\d+\.\d+\.\d+)$/$1/i;
1370              
1371             $by =~ s/\;$//;
1372              
1373 177         287 # ensure invalid chars are stripped. Replace with '!' to flag their
1374             # presence, though. NOTE: this means "[1.2.3.4]" IP addr HELO
1375 177         242 # strings, which are legit by RFC-2821, look like "!1.2.3.4!".
1376             # still useful though.
1377             my $strip_chars = qr/[\s\000\#\[\]\(\)\<\>\|]/;
1378             $ip =~ s/$strip_chars/!/gs;
1379             $rdns =~ s/$strip_chars/!/gs;
1380             $helo =~ s/$strip_chars/!/gs;
1381 177         714 $by =~ s/$strip_chars/!/gs;
1382 177         679 $ident =~ s/$strip_chars/!/gs;
1383 177         544 if (defined $envfrom) {
1384 177         479 $envfrom =~ s/^\s*<*//gs;
1385 177         450 $envfrom =~ s/>*\s*$//gs;
1386 177         378 $envfrom =~ s/$strip_chars/!/gs;
1387 177 100       353 }
1388 7         24  
1389 7         52 my $relay = {
1390 7         19 ip => $ip,
1391             by => $by,
1392             helo => $helo,
1393 177         1551 id => $id,
1394             ident => $ident,
1395             envfrom => $envfrom,
1396             lc_by => (lc $by),
1397             lc_helo => (lc $helo),
1398             auth => $auth
1399             };
1400              
1401             if ($rdns eq '') {
1402             if ($mta_looked_up_dns) {
1403             # we know the MTA always does lookups, so this means the host
1404             # really has no rDNS (rather than that the MTA didn't bother
1405 177 100       448 # looking it up for us).
1406 61 100       128 $relay->{no_reverse_dns} = 1;
1407             $rdns = '';
1408             } else {
1409             $relay->{rdns_not_in_headers} = 1;
1410 15         27 }
1411 15         26 }
1412              
1413 46         86 $relay->{rdns} = $rdns;
1414             $relay->{lc_rdns} = lc $rdns;
1415              
1416             $self->make_relay_as_string($relay);
1417 177         410  
1418 177         412 my $is_private = ($ip =~ /${IP_PRIVATE}/o);
1419             $relay->{ip_private} = $is_private;
1420 177         535  
1421             # add it to an internal array so Eval tests can use it
1422 177         1009 return $relay;
1423 177         357 }
1424              
1425             my ($self, $relay) = @_;
1426 177         1458  
1427             # as-string rep. use spaces so things like Bayes can tokenize them easily.
1428             # NOTE: when tokenizing or matching, be sure to note that new
1429             # entries may be added to this string later. However, the *order*
1430 288     288 0 936 # of entries must be preserved, so that regexps that assume that
1431             # e.g. "ip" comes before "helo" will still work.
1432             #
1433              
1434             # we could mark envfrom as "undef" if missing? dunno if needed?
1435             my $envfrom = $relay->{envfrom} || '';
1436             my $asstr = "[ ip=$relay->{ip} rdns=$relay->{rdns} helo=$relay->{helo} by=$relay->{by} ident=$relay->{ident} envfrom=$envfrom intl=0 id=$relay->{id} auth=$relay->{auth} msa=0 ]";
1437             dbg("received-header: parsed as $asstr");
1438             $relay->{as_string} = $asstr;
1439             }
1440 288   100     789  
1441 288         1390 # restart the parse if we find a fetchmail marker or similar.
1442 288         1032 # spamcop does this, and it's a great idea ;)
1443 288         766 my ($self) = @_;
1444             if ($self->{allow_mailfetch_markers}) {
1445             dbg("received-header: found mail fetcher marker, restarting parse");
1446             $self->{relays_trusted} = [ ];
1447             $self->{relays_internal} = [ ];
1448             $self->{relays_external} = [ ];
1449 4     4 0 11 } else {
1450 4 100       13 dbg("received-header: found mail fetcher marker outside trusted area, ignored");
1451 1         5 }
1452 1         4 }
1453 1         6  
1454 1         4 # ---------------------------------------------------------------------------
1455              
1456 3         10 1;