Rev 3 | Details | Compare with Previous | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line | 
|---|---|---|---|
| 1 | magnus | 1 | diff -urN SpamAssassin.orig/Conf.pm SpamAssassin/Conf.pm | 
| 2 | --- SpamAssassin.orig/Conf.pm   Mon Dec 15 22:41:57 2003 | ||
| 3 | +++ SpamAssassin/Conf.pm        Sun Feb 29 17:42:58 2004 | ||
| 4 | @@ -107,6 +107,10 @@ | ||
| 5 | use constant TYPE_URI_EVALS => 0x0011; | ||
| 6 | use constant TYPE_META_TESTS => 0x0012; | ||
| 7 | use constant TYPE_RBL_EVALS => 0x0013; | ||
| 8 | +# Need to reserve a number with the SA folks (needs to be odd as it is an | ||
| 9 | +# eval test) | ||
| 10 | +use constant TYPE_RES_EVALS    => 0x0021; | ||
| 11 | + | ||
| 12 | |||
| 13 | $VERSION = 'bogus'; # avoid CPAN.pm picking up version strings later | ||
| 14 | |||
| 15 | @@ -2000,12 +2004,15 @@ | ||
| 16 | |||
| 17 | =cut | ||
| 18 | |||
| 19 | -    if (/^header\s+(\S+)\s+(?:rbl)?eval:(.*)$/) { | ||
| 20 | +    if (/^header\s+(\S+)\s+(?:rbl|res)?eval:(.*)$/) { | ||
| 21 | my ($name, $fn) = ($1, $2); | ||
| 22 | |||
| 23 | if ($fn =~ /^check_rbl/) { | ||
| 24 | $self->add_test ($name, $fn, TYPE_RBL_EVALS); | ||
| 25 |        } | ||
| 26 | +      elsif (/^header\s+(\S+)\s+reseval:(.*)$/) { | ||
| 27 | +       $self->add_test ($name, $fn, TYPE_RES_EVALS); | ||
| 28 | +      } | ||
| 29 |        else { | ||
| 30 | $self->add_test ($name, $fn, TYPE_HEAD_EVALS); | ||
| 31 |        } | ||
| 32 | @@ -2603,6 +2610,9 @@ | ||
| 33 |         } | ||
| 34 | elsif ($type == TYPE_RBL_EVALS) { | ||
| 35 | $self->{rbl_evals}->{$name} = \@args; | ||
| 36 | +       } | ||
| 37 | +       elsif ($type == TYPE_RES_EVALS) { | ||
| 38 | +         $self->{res_evals}->{$name} = \@args; | ||
| 39 |         } | ||
| 40 | elsif ($type == TYPE_RAWBODY_EVALS) { | ||
| 41 | $self->{rawbody_evals}->{$name} = \@args; | ||
| 42 | diff -urN SpamAssassin.orig/EvalTests.pm SpamAssassin/EvalTests.pm | ||
| 43 | --- SpamAssassin.orig/EvalTests.pm      Sat Jan 17 15:56:08 2004 | ||
| 44 | +++ SpamAssassin/EvalTests.pm   Sun Aug 15 15:47:22 2004 | ||
| 45 | @@ -1941,6 +1941,234 @@ | ||
| 46 | return $self->{habeas_swe}; | ||
| 47 |  } | ||
| 48 | |||
| 49 | + | ||
| 50 | +# This was originally written to implement greylisting in SA-Exim, although | ||
| 51 | +# I have tried to make it more general and allow for reuse in other MTAs | ||
| 52 | +# (although they will need to | ||
| 53 | +# 1) be running SA at SMTP time | ||
| 54 | +# 2) Provide the list of rcpt to and env from in some headers for SA to read | ||
| 55 | +# 3) Provide the IP of the connecting host ) | ||
| 56 | +# | ||
| 57 | +# This rule should get a negative score so that if we've already seen the | ||
| 58 | +# greylisting tuplet before, we lower the score, which hopefully brings us from | ||
| 59 | +# a tempreject to an accept (at least that's how sa-exim does it) | ||
| 60 | +# -- Marc <marc_soft@merlins.org> 2004/01/19 | ||
| 61 | + | ||
| 62 | +sub greylisting { | ||
| 63 | +  my ($self, $optionhash) = @_; | ||
| 64 | + | ||
| 65 | +  $optionhash  =~ s/;/,/g; | ||
| 66 | +  # This is safe, right? (users shouldn't be able to set it in their config) | ||
| 67 | +  my %option=eval $optionhash; | ||
| 68 | +  my $connectip;  | ||
| 69 | +  my $envfrom; | ||
| 70 | +  my $rcptto; | ||
| 71 | +  my @rcptto; | ||
| 72 | +  my $iswhitelisted=0; | ||
| 73 | +  my $err; | ||
| 74 | +  my $mesgid = $self->get ('Message-Id')."\n";  | ||
| 75 | +  my $mesgidfn; | ||
| 76 | +  my $tuplet; | ||
| 77 | + | ||
| 78 | +  foreach my $reqoption (qw ( method greylistsecs dontgreylistthreshold  | ||
| 79 | +       connectiphdr envfromhdr rcpttohdr greylistnullfrom greylistfourthbyte )) | ||
| 80 | +  { | ||
| 81 | +    die "Greylist option $reqoption missing from SA config" unless (defined $option{$reqoption}); | ||
| 82 | +    #warn "found $reqoption -> $option{$reqoption}\n"; | ||
| 83 | +  } | ||
| 84 | +   | ||
| 85 | +  # No newlines, thank you (yes, you need this twice apparently) | ||
| 86 | +  chomp ($mesgid); | ||
| 87 | +  chomp ($mesgid); | ||
| 88 | +  # Newline in the middle mesgids, are you serious? Get rid of them here | ||
| 89 | +  $mesgid =~ s/\012/|/g; | ||
| 90 | + | ||
| 91 | +  # For stuff that we know is spam, don't greylist the host | ||
| 92 | +  # (that might help later spam with a lower score to come in) | ||
| 93 | +  if ($self->{hits} >= $option{'dontgreylistthreshold'}) | ||
| 94 | +  { | ||
| 95 | +    #warn "debug: skipping greylisting on $mesgid, since score is already ".$self->{hits}." and you configured greylisting to not bother with anything above $dontcheckscore\n"; | ||
| 96 | +    return 0; | ||
| 97 | +  } | ||
| 98 | + | ||
| 99 | + | ||
| 100 | +  if (not $connectip = $self->get($option{'connectiphdr'})) | ||
| 101 | +  { | ||
| 102 | +    warn "Couldn't get Connecting IP header $option{'connectiphdr'} for message $mesgid, skipping greylisting call\n"; | ||
| 103 | +    return 0; | ||
| 104 | +  } | ||
| 105 | +  chomp($connectip); | ||
| 106 | +  # Clean up input (for security, if you use files/dirs) | ||
| 107 | +  $connectip =~ s#/#-#g; | ||
| 108 | + | ||
| 109 | +  # Account for a null envelope from | ||
| 110 | +  if (not defined ($envfrom = $self->get($option{'envfromhdr'}))) | ||
| 111 | +  { | ||
| 112 | +    warn "Couldn't get Envelope From header $option{'envfromhdr'} for message $mesgid, skipping greylisting call\n"; | ||
| 113 | +    return 0; | ||
| 114 | +  } | ||
| 115 | +  chomp($envfrom); | ||
| 116 | +  # Clean up input (for security, if you use files/dirs) | ||
| 117 | +  $envfrom =~ s#/#-#g; | ||
| 118 | +  if (not $envfrom) | ||
| 119 | +  { | ||
| 120 | +      $envfrom="<>"; | ||
| 121 | +      return 0 if (not $option{'greylistnullfrom'}); | ||
| 122 | +  } | ||
| 123 | + | ||
| 124 | +  if (not $rcptto = $self->get($option{'rcpttohdr'})) | ||
| 125 | +  { | ||
| 126 | +    warn "Couldn't get Rcpt To header $option{'rcpttohdr'} for message $mesgid, skipping greylisting call\n"; | ||
| 127 | +    return 0; | ||
| 128 | +  } | ||
| 129 | +  chomp($rcptto); | ||
| 130 | +  # Clean up input (for security, if you use files/dirs) | ||
| 131 | +  $rcptto =~ s#/#-#g; | ||
| 132 | +  @rcptto = split(/, /, $rcptto); | ||
| 133 | + | ||
| 134 | + | ||
| 135 | +  umask 0007; | ||
| 136 | + | ||
| 137 | +  foreach $rcptto (@rcptto) | ||
| 138 | +  { | ||
| 139 | +    # The dir method is easy to fiddle with and expire records in (with | ||
| 140 | +    # a find | rm) but it's probably more I/O extensive than a real DB | ||
| 141 | +    # and suffers from directory size problems if a specific IP is sending | ||
| 142 | +    # generating tens of thousands of tuplets. -- Marc | ||
| 143 | +    # That said, I prefer formats I can easily tinker with, and not having to | ||
| 144 | +    # worry about buggy locking and so forth  | ||
| 145 | + | ||
| 146 | +    if ($option{'method'} eq "dir") | ||
| 147 | +    { | ||
| 148 | +      # The clean strings are hardcoded because it's hard to do a variable | ||
| 149 | +      # substitution within a tr (and using the eval solution is too resource | ||
| 150 | +      # expensive) | ||
| 151 | +      $envfrom =~ tr/!#%( )*+,-.0123456789:<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_abcdefghijklmnopqrstuvwxyz{|}~/_/c; | ||
| 152 | +      # clean variables to run properly under -T | ||
| 153 | +      $envfrom =~ /(.+)/; | ||
| 154 | +      $envfrom = $1; | ||
| 155 | +      $rcptto  =~ tr/!#%( )*+,-.0123456789:<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_abcdefghijklmnopqrstuvwxyz{|}~/_/c; | ||
| 156 | +      $rcptto =~ /(.+)/; | ||
| 157 | +      $rcptto = $1; | ||
| 158 | + | ||
| 159 | +      die "greylist option dir not passed, even though method was set to dir" unless ($option{'dir'}); | ||
| 160 | +      my ($ipbyte1, $ipbyte2, $ipbyte3, $ipbyte4) = split(/\./, $connectip);  | ||
| 161 | +      my $ipdir1 = "$option{'dir'}/$ipbyte1"; | ||
| 162 | +      my $ipdir2 = "$ipdir1/$ipbyte2"; | ||
| 163 | +      my $ipdir3 = "$ipdir2/$ipbyte3"; | ||
| 164 | +      my $ipdir4; | ||
| 165 | +      my $tupletdir; | ||
| 166 | + | ||
| 167 | +      $ipdir4 = "$ipdir3"; | ||
| 168 | +      $ipdir4 .= "/$ipbyte4" if ($option{'greylistfourthbyte'}); | ||
| 169 | +      $tupletdir = "$ipdir4/$envfrom"; | ||
| 170 | + | ||
| 171 | +      $tuplet = "$tupletdir/$rcptto"; | ||
| 172 | +   | ||
| 173 | +      # make directory whether it's there or not (faster than test and set) | ||
| 174 | +      mkdir $ipdir1; | ||
| 175 | +      mkdir $ipdir2; | ||
| 176 | +      mkdir $ipdir3; | ||
| 177 | +      mkdir $ipdir4; | ||
| 178 | +      mkdir $tupletdir; | ||
| 179 | + | ||
| 180 | +      if (not -e $tuplet)  | ||
| 181 | +      { | ||
| 182 | +        # If the tuplets aren't there, we create them and continue in | ||
| 183 | +        # case there are other ones (one of them might be whitelisted already) | ||
| 184 | +       $err="creating $tuplet"; | ||
| 185 | +       open (TUPLET, ">$tuplet") or goto greylisterror; | ||
| 186 | +       print TUPLET time."\n"; | ||
| 187 | +       print TUPLET "Status: Greylisted\n"; | ||
| 188 | +       print TUPLET "Last Message-Id: $mesgid\n"; | ||
| 189 | +       print TUPLET "Whitelisted Count: 0\n"; | ||
| 190 | +       print TUPLET "Query Count: 1\n"; | ||
| 191 | +       $err="closing first-written $tuplet"; | ||
| 192 | +       close TUPLET or goto greylisterror; | ||
| 193 | +      } | ||
| 194 | +      else | ||
| 195 | +      { | ||
| 196 | +       my $time; | ||
| 197 | +       my $status; | ||
| 198 | +       my $whitelistcount; | ||
| 199 | +       my $querycount; | ||
| 200 | + | ||
| 201 | +       # Take into account race condition of expiring deletes and us running | ||
| 202 | +       $err="reading $tuplet"; | ||
| 203 | +       open (TUPLET, "<$tuplet") or goto greylisterror; | ||
| 204 | +       $err="Couldn't read time"; | ||
| 205 | +       defined ($time=<TUPLET>) or goto greylisterror; | ||
| 206 | +       chomp ($time); | ||
| 207 | + | ||
| 208 | +       $err="Couldn't read status"; | ||
| 209 | +       defined ($status=<TUPLET>) or goto greylisterror; | ||
| 210 | +       chomp ($status); | ||
| 211 | +       $err="Couldn't extract Status from $status"; | ||
| 212 | +       $status =~ s/^Status: // or goto greylisterror; | ||
| 213 | + | ||
| 214 | +       # Skip Mesg-Id | ||
| 215 | +       $err="Couldn't skip Mesg-Id"; | ||
| 216 | +       defined ($_=<TUPLET>) or goto greylisterror; | ||
| 217 | + | ||
| 218 | +       $err="Couldn't read whitelistcount"; | ||
| 219 | +       defined ($whitelistcount=<TUPLET>) or goto greylisterror; | ||
| 220 | +       chomp ($whitelistcount); | ||
| 221 | +       $err="Couldn't extract Whitelisted Count from $whitelistcount"; | ||
| 222 | +       $whitelistcount =~ s/^Whitelisted Count: // or goto greylisterror; | ||
| 223 | + | ||
| 224 | +       $err="Couldn't read querycount"; | ||
| 225 | +       defined ($querycount=<TUPLET>) or goto greylisterror; | ||
| 226 | +       chomp ($querycount); | ||
| 227 | +       $err="Couldn't extract Query Count from $querycount"; | ||
| 228 | +       $querycount =~ s/^Query Count: // or goto greylisterror; | ||
| 229 | +       close (TUPLET); | ||
| 230 | + | ||
| 231 | +       $querycount++; | ||
| 232 | +       if ((time - $time) > $option{'greylistsecs'}) | ||
| 233 | +       { | ||
| 234 | +         $status="Whitelisted"; | ||
| 235 | +         $whitelistcount++; | ||
| 236 | +       } | ||
| 237 | + | ||
| 238 | +       $err="re-writing $tuplet"; | ||
| 239 | +       open (TUPLET, ">$tuplet") or goto greylisterror; | ||
| 240 | +       print TUPLET "$time\n"; | ||
| 241 | +       print TUPLET "Status: $status\n"; | ||
| 242 | +       print TUPLET "Last Message-Id: $mesgid\n"; | ||
| 243 | +       print TUPLET "Whitelisted Count: $whitelistcount\n"; | ||
| 244 | +       print TUPLET "Query Count: $querycount\n"; | ||
| 245 | +       $err="closing re-written $tuplet"; | ||
| 246 | +       close TUPLET or goto greylisterror; | ||
| 247 | + | ||
| 248 | +        # We continue processing the other recipients, to setup or | ||
| 249 | +       # update their counters | ||
| 250 | +       if ($status eq "Whitelisted") | ||
| 251 | +       { | ||
| 252 | +         $iswhitelisted=1; | ||
| 253 | +       } | ||
| 254 | +      } | ||
| 255 | +    } | ||
| 256 | +    elsif ($option{'method'} eq "file") | ||
| 257 | +    { | ||
| 258 | +      warn "codeme\n"; | ||
| 259 | +    } | ||
| 260 | +    elsif ($option{'method'} eq "db") | ||
| 261 | +    { | ||
| 262 | +      warn "codeme\n"; | ||
| 263 | +    } | ||
| 264 | +  } | ||
| 265 | +   | ||
| 266 | +  return $iswhitelisted; | ||
| 267 | +   | ||
| 268 | +  greylisterror: | ||
| 269 | +  warn "Reached greylisterror: $err / $!"; | ||
| 270 | +  # delete tuplet since it apparently had issues but don't check for errors | ||
| 271 | +  # in case it was a permission denied on write | ||
| 272 | +  unlink ($tuplet); | ||
| 273 | +  return $iswhitelisted; | ||
| 274 | +} | ||
| 275 | + | ||
| 276 | + | ||
| 277 | ########################################################################### | ||
| 278 | # BODY TESTS: | ||
| 279 | ########################################################################### | ||
| 280 | diff -urN SpamAssassin.orig/PerMsgStatus.pm SpamAssassin/PerMsgStatus.pm | ||
| 281 | --- SpamAssassin.orig/PerMsgStatus.pm   Tue Jan 20 13:40:04 2004 | ||
| 282 | +++ SpamAssassin/PerMsgStatus.pm        Sun Feb 29 19:01:19 2004 | ||
| 283 | @@ -184,6 +184,9 @@ | ||
| 284 | |||
| 285 | # add points from Bayes, before adjusting the AWL | ||
| 286 | $self->{hits} += $self->{learned_hits}; | ||
| 287 | +     | ||
| 288 | +    # Now, we can run rules that have to run last | ||
| 289 | +    $self->do_res_eval_tests(); | ||
| 290 | |||
| 291 | # Do AWL tests last, since these need the score to have already been | ||
| 292 | # calculated | ||
| 293 | @@ -2010,6 +2013,11 @@ | ||
| 294 |  } | ||
| 295 | |||
| 296 | ########################################################################### | ||
| 297 | + | ||
| 298 | +sub do_res_eval_tests { | ||
| 299 | +  my ($self) = @_; | ||
| 300 | +  $self->run_eval_tests ($self->{conf}->{res_evals}, ''); | ||
| 301 | +} | ||
| 302 | |||
| 303 |  sub do_head_eval_tests { | ||
| 304 | my ($self) = @_; |