| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package WWW::RobotRules::AnyDBM_File; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | require  WWW::RobotRules; | 
| 4 |  |  |  |  |  |  | @ISA = qw(WWW::RobotRules); | 
| 5 |  |  |  |  |  |  | $VERSION = "6.00"; | 
| 6 |  |  |  |  |  |  |  | 
| 7 | 1 |  |  | 1 |  | 567 | use Carp (); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 19 |  | 
| 8 | 1 |  |  | 1 |  | 781 | use AnyDBM_File; | 
|  | 1 |  |  |  |  | 5382 |  | 
|  | 1 |  |  |  |  | 54 |  | 
| 9 | 1 |  |  | 1 |  | 8 | use Fcntl; | 
|  | 1 |  |  |  |  | 6 |  | 
|  | 1 |  |  |  |  | 347 |  | 
| 10 | 1 |  |  | 1 |  | 6 | use strict; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 1247 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | =head1 NAME | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | WWW::RobotRules::AnyDBM_File - Persistent RobotRules | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | require WWW::RobotRules::AnyDBM_File; | 
| 19 |  |  |  |  |  |  | require LWP::RobotUA; | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | # Create a robot useragent that uses a diskcaching RobotRules | 
| 22 |  |  |  |  |  |  | my $rules = WWW::RobotRules::AnyDBM_File->new( 'my-robot/1.0', 'cachefile' ); | 
| 23 |  |  |  |  |  |  | my $ua = WWW::RobotUA->new( 'my-robot/1.0', 'me@foo.com', $rules ); | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | # Then just use $ua as usual | 
| 26 |  |  |  |  |  |  | $res = $ua->request($req); | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | This is a subclass of I that uses the AnyDBM_File | 
| 31 |  |  |  |  |  |  | package to implement persistent diskcaching of F and host | 
| 32 |  |  |  |  |  |  | visit information. | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | The constructor (the new() method) takes an extra argument specifying | 
| 35 |  |  |  |  |  |  | the name of the DBM file to use.  If the DBM file already exists, then | 
| 36 |  |  |  |  |  |  | you can specify undef as agent name as the name can be obtained from | 
| 37 |  |  |  |  |  |  | the DBM database. | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | =cut | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | sub new | 
| 42 |  |  |  |  |  |  | { | 
| 43 | 4 |  |  | 4 | 1 | 857 | my ($class, $ua, $file) = @_; | 
| 44 | 4 | 50 |  |  |  | 12 | Carp::croak('WWW::RobotRules::AnyDBM_File filename required') unless $file; | 
| 45 |  |  |  |  |  |  |  | 
| 46 | 4 |  |  |  |  | 18 | my $self = bless { }, $class; | 
| 47 | 4 |  |  |  |  | 20 | $self->{'filename'} = $file; | 
| 48 | 4 | 50 |  |  |  | 7 | tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_CREAT|O_RDWR, 0640 | 
|  | 4 |  |  |  |  | 444 |  | 
| 49 |  |  |  |  |  |  | or Carp::croak("Can't open $file: $!"); | 
| 50 |  |  |  |  |  |  |  | 
| 51 | 4 | 100 |  |  |  | 15 | if ($ua) { | 
| 52 | 2 |  |  |  |  | 8 | $self->agent($ua); | 
| 53 |  |  |  |  |  |  | } | 
| 54 |  |  |  |  |  |  | else { | 
| 55 |  |  |  |  |  |  | # Try to obtain name from DBM file | 
| 56 | 2 |  |  |  |  | 33 | $ua = $self->{'dbm'}{"|ua-name|"}; | 
| 57 | 2 | 100 |  |  |  | 472 | Carp::croak("No agent name specified") unless $ua; | 
| 58 |  |  |  |  |  |  | } | 
| 59 |  |  |  |  |  |  |  | 
| 60 | 3 |  |  |  |  | 10 | $self; | 
| 61 |  |  |  |  |  |  | } | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | sub agent { | 
| 64 | 5 |  |  | 5 | 1 | 16 | my($self, $newname) = @_; | 
| 65 | 5 |  |  |  |  | 63 | my $old = $self->{'dbm'}{"|ua-name|"}; | 
| 66 | 5 | 100 |  |  |  | 18 | if (defined $newname) { | 
| 67 | 2 |  |  |  |  | 20 | $newname =~ s!/?\s*\d+.\d+\s*$!!;  # loose version | 
| 68 | 2 | 50 | 66 |  |  | 19 | unless ($old && $old eq $newname) { | 
| 69 |  |  |  |  |  |  | # Old info is now stale. | 
| 70 | 2 |  |  |  |  | 6 | my $file = $self->{'filename'}; | 
| 71 | 2 |  |  |  |  | 3 | untie %{$self->{'dbm'}}; | 
|  | 2 |  |  |  |  | 46 |  | 
| 72 | 2 |  |  |  |  | 5 | tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_TRUNC|O_RDWR, 0640; | 
|  | 2 |  |  |  |  | 183 |  | 
| 73 | 2 |  |  |  |  | 4 | %{$self->{'dbm'}} = (); | 
|  | 2 |  |  |  |  | 17 |  | 
| 74 | 2 |  |  |  |  | 139 | $self->{'dbm'}{"|ua-name|"} = $newname; | 
| 75 |  |  |  |  |  |  | } | 
| 76 |  |  |  |  |  |  | } | 
| 77 | 5 |  |  |  |  | 15 | $old; | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | sub no_visits { | 
| 81 | 5 |  |  | 5 | 0 | 23 | my ($self, $netloc) = @_; | 
| 82 | 5 |  |  |  |  | 31 | my $t = $self->{'dbm'}{"$netloc|vis"}; | 
| 83 | 5 | 100 |  |  |  | 15 | return 0 unless $t; | 
| 84 | 4 |  |  |  |  | 29 | (split(/;\s*/, $t))[0]; | 
| 85 |  |  |  |  |  |  | } | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | sub last_visit { | 
| 88 | 2 |  |  | 2 | 0 | 13 | my ($self, $netloc) = @_; | 
| 89 | 2 |  |  |  |  | 11 | my $t = $self->{'dbm'}{"$netloc|vis"}; | 
| 90 | 2 | 50 |  |  |  | 13 | return undef unless $t; | 
| 91 | 2 |  |  |  |  | 13 | (split(/;\s*/, $t))[1]; | 
| 92 |  |  |  |  |  |  | } | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | sub fresh_until { | 
| 95 | 6 |  |  | 6 | 0 | 17 | my ($self, $netloc, $fresh) = @_; | 
| 96 | 6 |  |  |  |  | 79 | my $old = $self->{'dbm'}{"$netloc|exp"}; | 
| 97 | 6 | 100 |  |  |  | 25 | if ($old) { | 
| 98 | 3 |  |  |  |  | 14 | $old =~ s/;.*//;  # remove cleartext | 
| 99 |  |  |  |  |  |  | } | 
| 100 | 6 | 100 |  |  |  | 18 | if (defined $fresh) { | 
| 101 | 2 |  |  |  |  | 203 | $fresh .= "; " . localtime($fresh); | 
| 102 | 2 |  |  |  |  | 52 | $self->{'dbm'}{"$netloc|exp"} = $fresh; | 
| 103 |  |  |  |  |  |  | } | 
| 104 | 6 |  |  |  |  | 21 | $old; | 
| 105 |  |  |  |  |  |  | } | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | sub visit { | 
| 108 | 4 |  |  | 4 | 0 | 27 | my($self, $netloc, $time) = @_; | 
| 109 | 4 |  | 66 |  |  | 15 | $time ||= time; | 
| 110 |  |  |  |  |  |  |  | 
| 111 | 4 |  |  |  |  | 5 | my $count = 0; | 
| 112 | 4 |  |  |  |  | 22 | my $old = $self->{'dbm'}{"$netloc|vis"}; | 
| 113 | 4 | 100 |  |  |  | 12 | if ($old) { | 
| 114 | 2 |  |  |  |  | 3 | my $last; | 
| 115 | 2 |  |  |  |  | 9 | ($count,$last) = split(/;\s*/, $old); | 
| 116 | 2 | 100 |  |  |  | 9 | $time = $last if $last > $time; | 
| 117 |  |  |  |  |  |  | } | 
| 118 | 4 |  |  |  |  | 5 | $count++; | 
| 119 | 4 |  |  |  |  | 148 | $self->{'dbm'}{"$netloc|vis"} = "$count; $time; " . localtime($time); | 
| 120 |  |  |  |  |  |  | } | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | sub push_rules { | 
| 123 | 4 |  |  | 4 | 0 | 21 | my($self, $netloc, @rules) = @_; | 
| 124 | 4 |  |  |  |  | 6 | my $cnt = 1; | 
| 125 | 4 |  |  |  |  | 143 | $cnt++ while $self->{'dbm'}{"$netloc|r$cnt"}; | 
| 126 |  |  |  |  |  |  |  | 
| 127 | 4 |  |  |  |  | 24 | foreach (@rules) { | 
| 128 | 6 |  |  |  |  | 114 | $self->{'dbm'}{"$netloc|r$cnt"} = $_; | 
| 129 | 6 |  |  |  |  | 26 | $cnt++; | 
| 130 |  |  |  |  |  |  | } | 
| 131 |  |  |  |  |  |  | } | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | sub clear_rules { | 
| 134 | 4 |  |  | 4 | 0 | 21 | my($self, $netloc) = @_; | 
| 135 | 4 |  |  |  |  | 8 | my $cnt = 1; | 
| 136 | 4 |  |  |  |  | 43 | while ($self->{'dbm'}{"$netloc|r$cnt"}) { | 
| 137 | 4 |  |  |  |  | 133 | delete $self->{'dbm'}{"$netloc|r$cnt"}; | 
| 138 | 4 |  |  |  |  | 32 | $cnt++; | 
| 139 |  |  |  |  |  |  | } | 
| 140 |  |  |  |  |  |  | } | 
| 141 |  |  |  |  |  |  |  | 
| 142 |  |  |  |  |  |  | sub rules { | 
| 143 | 4 |  |  | 4 | 0 | 18 | my($self, $netloc) = @_; | 
| 144 | 4 |  |  |  |  | 8 | my @rules = (); | 
| 145 | 4 |  |  |  |  | 4 | my $cnt = 1; | 
| 146 | 4 |  |  |  |  | 6 | while (1) { | 
| 147 | 12 |  |  |  |  | 63 | my $rule = $self->{'dbm'}{"$netloc|r$cnt"}; | 
| 148 | 12 | 100 |  |  |  | 35 | last unless $rule; | 
| 149 | 8 |  |  |  |  | 14 | push(@rules, $rule); | 
| 150 | 8 |  |  |  |  | 12 | $cnt++; | 
| 151 |  |  |  |  |  |  | } | 
| 152 | 4 |  |  |  |  | 127 | @rules; | 
| 153 |  |  |  |  |  |  | } | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | sub dump | 
| 156 | 0 |  |  | 0 | 0 |  | { | 
| 157 |  |  |  |  |  |  | } | 
| 158 |  |  |  |  |  |  |  | 
| 159 |  |  |  |  |  |  | 1; | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | L, L | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  | =head1 AUTHORS | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | Hakan Ardo Ehakan@munin.ub2.lu.se>, Gisle Aas Eaas@sn.no> | 
| 168 |  |  |  |  |  |  |  | 
| 169 |  |  |  |  |  |  | =cut | 
| 170 |  |  |  |  |  |  |  |