blib/lib/Plack/Middleware/Greylist.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 93 | 95 | 97.8 |
branch | 28 | 36 | 77.7 |
condition | 19 | 25 | 76.0 |
subroutine | 15 | 15 | 100.0 |
pod | 2 | 2 | 100.0 |
total | 157 | 173 | 90.7 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package Plack::Middleware::Greylist; 2: 3: # ABSTRACT: throttle requests with different rates based on net blocks 4: 5: # RECOMMEND PREREQ: Cache::FastMmap 6: # RECOMMEND PREREQ: Ref::Util::XS 7: 8: use v5.12; 9: use warnings; 10: 11: use parent qw( Plack::Middleware ); 12: 13: use HTTP::Status qw/ HTTP_FORBIDDEN HTTP_TOO_MANY_REQUESTS /; 14: use List::Util 1.29 qw/ pairs /; 15: use Module::Load qw/ load /; 16: use Net::IP::Match::Trie; 17: use Plack::Util; 18: use Plack::Util::Accessor qw/ default_rate rules cache file _match greylist retry_after /; 19: use Ref::Util qw/ is_plain_arrayref /; 20: use Time::Seconds qw/ ONE_MINUTE /; 21: 22: our $VERSION = 'v0.5.0'; 23: 24: 25: sub prepare_app { 26: my ($self) = @_; 27: 28: $self->default_rate(-1) unless defined $self->default_rate; 29: 30: die "default_rate must be a positive integer" unless $self->default_rate =~ /^[1-9][0-9]*$/; 31: 32: $self->retry_after( ONE_MINUTE + 1 ) unless defined $self->retry_after; 33: die "retry_after must be a positive integer greater than ${ \ONE_MINUTE} seconds" 34: unless $self->retry_after =~ /^[1-9][0-9]*$/ && $self->retry_after > ONE_MINUTE; 35: 36: unless ( $self->cache ) { 37: 38: my $file = $self->file // die "No cache was set"; 39: 40: load Cache::FastMmap; 41: 42: my $cache = Cache::FastMmap->new( 43: share_file => "$file", 44: init_file => 1, 45: serializer => '', 46: expire_time => ONE_MINUTE, 47: ); 48: 49: $self->cache( 50: sub { 51: my ($ip) = @_; 52: return $cache->get_and_set( 53: $ip, 54: sub { 55: my ( $key, $count, $opts ) = @_; 56: $count //= 0; 57: return ( $count + 1, { expire_on => $opts->{expire_on} } ); 58: } 59: ); 60: } 61: ); 62: 63: } 64: 65: my $match = Net::IP::Match::Trie->new; 66: 67: $self->_match( sub { return $match->match_ip(@_) } ); 68: 69: my @blocks; 70: 71: if ( my $greylist = $self->greylist ) { 72: push @blocks, ( %{ $greylist } ); 73: } 74: 75: $self->rules( my $rules = {} ); 76: 77: my %codes = ( whitelist => -1, allowed => -1, blacklist => 0, rejected => 0, norobots => 0 ); 78: my %types = ( ip => '', netblock => 1 ); 79: 80: for my $line ( pairs @blocks ) { 81: 82: my ( $block, $rule ) = @{$line}; 83: $rule = [ split /\s+/, $rule ] unless is_plain_arrayref($rule); 84: 85: my ( $rate, $type ) = @{ $rule }; 86: 87: $type //= "ip"; 88: my $mask = $types{$type} // $type; 89: $mask = $block if $mask eq "1"; 90: 91: $rate //= "rejected"; 92: if (exists $codes{$rate}) { 93: $mask = $rate if $mask eq ""; 94: $rate = $codes{$rate}; 95: } 96: 97: $rules->{$block} = [ $rate, $mask ]; 98: $match->add( $block => [$block] ); 99: } 100: 101: } 102: 103: sub call { 104: my ( $self, $env ) = @_; 105: 106: my $ip = $env->{REMOTE_ADDR}; 107: my $name = $self->_match->($ip); 108: my $rule = $name ? $self->rules->{$name} : [ $self->default_rate ]; 109: 110: my $rate = $rule->[0]; 111: 112: if ( $rate == 0 && $rule->[1] && $rule->[1] eq "norobots" ) { 113: if ( $env->{PATH_INFO} eq "/robots.txt" ) { 114: $rate = ONE_MINUTE; # one request/second 115: } 116: } 117: 118: if ( $rate >= 0 ) { 119: 120: my $limit = $rate == 0; 121: 122: my ($hits) = $self->cache->( $rule->[1] || $ip ); 123: $limit = $hits > $rate ? $hits : 0; 124: 125: if ($limit) { 126: 127: my $block = $name || "default"; 128: my $msg = "Rate limiting ${ip} after ${limit}/${rate} for ${block}"; 129: 130: if ( my $log = $env->{'psgix.logger'} ) { 131: $log->( { message => $msg, level => 'warn' } ); 132: } 133: else { 134: $env->{'psgi.errors'}->print($msg); 135: } 136: 137: if ( $rate == 0 ) { 138: 139: return [ HTTP_FORBIDDEN, [], ["Forbbidden"] ]; 140: 141: } 142: else { 143: 144: return [ 145: HTTP_TOO_MANY_REQUESTS, 146: [ 147: "Retry-After" => $self->retry_after, 148: ], 149: ["Too Many Requests"] 150: ]; 151: 152: } 153: } 154: 155: } 156: 157: return $self->app->($env); 158: } 159: 160: 161: 1; 162: 163: __END__ 164: 165: =pod 166: 167: =encoding UTF-8 168: 169: =head1 NAME 170: 171: Plack::Middleware::Greylist - throttle requests with different rates based on net blocks 172: 173: =head1 VERSION 174: 175: version v0.5.0 176: 177: =head1 SYNOPSIS 178: 179: use Plack::Builder; 180: 181: builder { 182: 183: enable "Greylist", 184: file => sprintf('/run/user/%u/greylist', $>), # cache file 185: default_rate => 250, 186: greylist => { 187: '192.168.0.0/24' => 'whitelist', 188: '172.16.1.0/25' => [ 100, 'netblock' ], 189: }; 190: 191: } 192: 193: =head1 DESCRIPTION 194: 195: This middleware will apply rate limiting to requests, depending on the requestor netblock. 196: 197: Hosts that exceed their configured per-minute request limit will be rejected with HTTP 429 errors. 198: 199: =head2 Log Messages 200: 201: Rejections will be logged with a message of the form 202: 203: Rate limiting $ip after $hits/$rate for $netblock 204: 205: for example, 206: 207: Rate limiting 172.16.0.10 after 225/250 for 172.16.0.0/24 208: 209: Note that the C<$netblock> for the default rate is simply "default", e.g. 210: 211: Rate limiting 192.168.0.12 after 101/100 for default 212: 213: This will allow you to use something like L<fail2ban> to block repeat offenders, since bad 214: robots are like houseflies that repeatedly bump against closed windows. 215: 216: =head1 ATTRIBUTES 217: 218: =head2 default_rate 219: 220: This is the default maximum number of hits per minute before requests are rejected, for any request not in the L</greylist>. 221: 222: Omitting it will disable the global rate. 223: 224: =head2 retry_after 225: 226: This sets the C<Retry-After> header value, in seconds. It defaults to 61 seconds, which is the minimum allowed value. 227: 228: Note that this does not enforce that a client has waited that amount of time before making a new request, as long as the 229: number of hits per minute is within the allowed rate. 230: 231: =head2 greylist 232: 233: This is a hash reference to the greylist configuration. 234: 235: The keys are network blocks, and the values are an array reference of rates and the tracking type. (A string of space- 236: separated values can be used instead, to make it easier to directly use the configuration from something like 237: L<Config::General>.) 238: 239: The rates are either the maximum number of requests per minute, or "whitelist" or "allowed" to not limit the network 240: block, or "blacklist" or "rejected" to always forbid a network block. 241: 242: (The rate "-1" corresponds to "allowed", and the rate "0" corresponds to "rejected".) 243: 244: A special rate code of "norobots" will reject all requests except for F</robots.txt>, which is allowed at a rate of 60 245: per minute. This will allow you to block a robot but still allow the robot to access the robot rules that say it is 246: disallowed. 247: 248: The tracking type defaults to "ip", which applies limits to individual ips. You can also use "netblock" to apply the 249: limits to all hosts in that network block, or use a name so that limits are applied to all hosts in network blocks 250: with that name. 251: 252: For example: 253: 254: { 255: '127.0.0.1/32' => 'whitelist', 256: 257: '192.168.1.0/24' => 'blacklist', 258: 259: '192.168.2.0/24' => [ 100, 'ip' ], 260: 261: '192.168.3.0/24' => [ 60, 'netblock' ], 262: 263: # All requests from these blocks will limited collectively 264: 265: '10.0.0.0/16' => [ 60, 'group1' ], 266: '172.16.0.0/16' => [ 60, 'group1' ], 267: } 268: 269: Note: the network blocks shown above are examples only. 270: 271: The limit may be larger than L</default_rate>, to allow hosts to exceed the default limit. 272: 273: =head2 file 274: 275: This is the path of the throttle count file used by the L</cache>. 276: 277: It is required unless you are defining your own L</cache>. 278: 279: =head2 cache 280: 281: This is a code reference to a function that increments the cache counter for a key (usually the IP address or net 282: block). 283: 284: If you customise this, then you need to ensure that the counter resets or expires counts after a set period of time, 285: e.g. one minute. If you use a different time interval, then you may need to adjust the L</retry_after> time. 286: 287: =head1 KNOWN ISSUES 288: 289: This does not try and enforce any consistency or block overlapping netblocks. It trusts L<Net::IP::Match::Trie> to 290: handle any overlapping or conflicting network ranges, or to specify exceptions for larger blocks. 291: 292: When configuring the L</greylist> netblocks from a configuration file using L<Config::General>, duplicate netblocks may 293: be merged in unexpected ways, for example 294: 295: 10.0.0.0/16 60 group-1 296: 297: ... 298: 299: 10.0.0.0/16 120 group-2 300: 301: may be merged as something like 302: 303: '10.0.0.0/16' => [ '60 group-1', '120 group-2' ], 304: 305: Some search engine robots may not respect HTTP 429 responses, and will treat these as errors. You may want to make an 306: exception for trusted networks that gives them a higher rate than the default. 307: 308: This does not enforce consistent rates for named blocks. For example, if you specified 309: 310: '10.0.0.0/16' => [ 60, 'named-group' ], 311: '172.16.0.0/16' => [ 100, 'named-group' ], 312: 313: Requests from both netblocks would be counted together, but requests from 10./16 netblock would be rejected after 60 314: requests. This is probably not something that you want. 315: 316: =head1 SUPPORT FOR OLDER PERL VERSIONS 317: 318: This module requires Perl v5.12 or later. 319: 320: Future releases may only support Perl versions released in the last ten years 321: 322: =head1 SOURCE 323: 324: The development version is on github at L<https://github.com/robrwo/Plack-Middleware-Greylist> 325: and may be cloned from L<git://github.com/robrwo/Plack-Middleware-Greylist.git> 326: 327: =head1 BUGS 328: 329: Please report any bugs or feature requests on the bugtracker website 330: L<https://github.com/robrwo/Plack-Middleware-Greylist/issues> 331: 332: When submitting a bug or request, please include a test-file or a 333: patch to an existing test-file that illustrates the bug or desired 334: feature. 335: 336: =head1 AUTHOR 337: 338: Robert Rothenberg <rrwo@cpan.org> 339: 340: The initial development of this module was sponsored by Science Photo 341: Library L<https://www.sciencephoto.com>. 342: 343: =head1 CONTRIBUTOR 344: 345: =for stopwords Gabor Szabo 346: 347: Gabor Szabo <gabor@szabgab.com> 348: 349: =head1 COPYRIGHT AND LICENSE 350: 351: This software is Copyright (c) 2022-2023 by Robert Rothenberg. 352: 353: This is free software, licensed under: 354: 355: The Artistic License 2.0 (GPL Compatible) 356: 357: =cut 358: |