line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=head1 NAME |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
Schedule::SoftTime - Scheduling functions (designed) for link checking |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 SYNOPSIS |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
$sched = new Schedule::SoftTime, sched.db; |
8
|
|
|
|
|
|
|
$sched->schedule("last", 400); |
9
|
|
|
|
|
|
|
$sched->schedule("first", 200); |
10
|
|
|
|
|
|
|
my ($time, $name) = $sched->first_item(); |
11
|
|
|
|
|
|
|
"first" |
12
|
|
|
|
|
|
|
my ($time, $name) = $sched->next_item(); |
13
|
|
|
|
|
|
|
"last" |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 DESCRIPTION |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
This is a class to implement an `I'll get round to you when I can be |
18
|
|
|
|
|
|
|
bothered' scheduler. It's based on the queue system in our banks |
19
|
|
|
|
|
|
|
shops and some doctors I've been to. You turn up any time you want, |
20
|
|
|
|
|
|
|
but then you have to wait till everyone else who was there before you |
21
|
|
|
|
|
|
|
has been dealt with. The idea is to let the items being scheduled do |
22
|
|
|
|
|
|
|
so at any free time they wish and then worry about resource |
23
|
|
|
|
|
|
|
requirements later. If we can't handle some items when they were |
24
|
|
|
|
|
|
|
scheduled, they just queue until they can be handled. |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
The functions provided are |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
first_item() - give out the next object which should be checked if |
29
|
|
|
|
|
|
|
any (first on the queue) |
30
|
|
|
|
|
|
|
next_item() - give out the item after the last we gave out |
31
|
|
|
|
|
|
|
schedule(time, string) - schedule an object for testing |
32
|
|
|
|
|
|
|
unschedule(string) - unschedule an object |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
potentially you would want |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
schedule_priority - put an object in as soon as reasonable |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
(simulates an old person coming in and asking to skip to the |
39
|
|
|
|
|
|
|
front of the queue) |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
but we haven't implemented that... |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
To guarantee that eventually each queue member gets to the front, I |
44
|
|
|
|
|
|
|
suggest that you never schedule something in the past.. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
We allow prioritisation by putting identifiers in at whatever time they ask |
47
|
|
|
|
|
|
|
for. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
The time an object is scheduled for represents the first time it could |
50
|
|
|
|
|
|
|
be scheduled for checking. How close to reality it is depends on how |
51
|
|
|
|
|
|
|
bad the backlog is. We only allow one particular item to be scheduled |
52
|
|
|
|
|
|
|
per second. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
If you have sufficient resources, you should be able to clear the |
55
|
|
|
|
|
|
|
backlog no matter what and the schedule will match reality. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
If you are rude (always schedule identifiers for immediate checking) or |
58
|
|
|
|
|
|
|
underresourced this will degenerate to a queue in which the back end |
59
|
|
|
|
|
|
|
is a little disorganised (but in a helpful friendly kind of way). |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
If there is some level of lookahead into the queue (for example so |
62
|
|
|
|
|
|
|
that you can check identifiers on other sites whilst waiting for the |
63
|
|
|
|
|
|
|
longer robot exclusion period on one site), you should make sure that |
64
|
|
|
|
|
|
|
you don't make the situation of the first identifier worse. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 METHODS |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head2 new Schedule::SoftTime filename |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
The new function sets up a schedule object using the file given as an |
71
|
|
|
|
|
|
|
argument for it's storage. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=cut |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
package Schedule::SoftTime; |
76
|
|
|
|
|
|
|
$REVISION=q$Revision: 1.9 $ ; |
77
|
|
|
|
|
|
|
$VERSION='0.030'; |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
our ($silent); |
80
|
|
|
|
|
|
|
our ($no_warn); |
81
|
|
|
|
|
|
|
our ($verbose); |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
$silent=0 unless defined $silent; |
84
|
|
|
|
|
|
|
$no_warn=0 unless defined $no_warn; |
85
|
|
|
|
|
|
|
$verbose=0 unless defined $verbose; |
86
|
|
|
|
|
|
|
|
87
|
1
|
|
|
1
|
|
956
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
100
|
|
88
|
1
|
|
|
1
|
|
5
|
use Fcntl; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
405
|
|
89
|
1
|
|
|
1
|
|
2189
|
use DB_File; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
#FIXME. we should accept different options here in the new so that it |
92
|
|
|
|
|
|
|
#is possible to fail to create a schedule database. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
sub new ($$) { |
95
|
|
|
|
|
|
|
my $class=shift; |
96
|
|
|
|
|
|
|
my $filename=shift; |
97
|
|
|
|
|
|
|
my $self={}; |
98
|
|
|
|
|
|
|
my %hash; |
99
|
|
|
|
|
|
|
bless $self, $class; |
100
|
|
|
|
|
|
|
$self->{"schedule"} = tie %hash, DB_File, $filename, O_CREAT|O_RDWR, |
101
|
|
|
|
|
|
|
0666, $DB_BTREE |
102
|
|
|
|
|
|
|
or die "couldn't open $filename: " . $!; |
103
|
|
|
|
|
|
|
$self->{"sched_hash"} = \%hash; |
104
|
|
|
|
|
|
|
return $self; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
#$::verbose=1; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head2 schedule |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Schedule::SoftTime takes a identifier, and schedules it as soon after the time |
113
|
|
|
|
|
|
|
given as possible. We never schedule backwards in time.. That could |
114
|
|
|
|
|
|
|
be implemented by unscheduling then trying again with an earlier |
115
|
|
|
|
|
|
|
time.. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=cut |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub schedule { |
120
|
|
|
|
|
|
|
my $self=shift; |
121
|
|
|
|
|
|
|
my $time=shift; |
122
|
|
|
|
|
|
|
my $identifier=shift; |
123
|
|
|
|
|
|
|
my $hash=$self->{"sched_hash"}; |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
die "need to know when to schedule" unless defined $time; |
126
|
|
|
|
|
|
|
die "need an identifier to schedule" unless defined $identifier; |
127
|
|
|
|
|
|
|
print STDERR "trying to schedule $identifier at $time\n" |
128
|
|
|
|
|
|
|
if $verbose; |
129
|
|
|
|
|
|
|
while ( defined $self->{"sched_hash"}->{$time} ){ |
130
|
|
|
|
|
|
|
$time++; |
131
|
|
|
|
|
|
|
#in otherwords there is always a second between different |
132
|
|
|
|
|
|
|
#schedulings.. bit arbitrary huh? Well so is the resolution of |
133
|
|
|
|
|
|
|
#UNIX time. Don't blame me, just use a different kind of time. |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
$hash->{$time}=$identifier; |
136
|
|
|
|
|
|
|
print STDERR $hash->{$time}, |
137
|
|
|
|
|
|
|
" scheduled at $time (" . localtime($time) . ")\n" |
138
|
|
|
|
|
|
|
if $verbose; |
139
|
|
|
|
|
|
|
return $time; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=head2 unschedule |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Remove whatever identifier is in a schedule slot using the schedule time. |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=cut |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub unschedule { |
149
|
|
|
|
|
|
|
my $self=shift; |
150
|
|
|
|
|
|
|
my $time=shift; |
151
|
|
|
|
|
|
|
my $hash=$self->{"sched_hash"}; |
152
|
|
|
|
|
|
|
my $identifier=$hash->{$time}; |
153
|
|
|
|
|
|
|
if ( defined $identifier ) { |
154
|
|
|
|
|
|
|
print STDERR "using time $time (" . localtime($time) . |
155
|
|
|
|
|
|
|
") to unschedule $identifier\n" |
156
|
|
|
|
|
|
|
if $verbose; |
157
|
|
|
|
|
|
|
} else { |
158
|
|
|
|
|
|
|
print STDERR "no identifier scheduled at " . localtime($time) . |
159
|
|
|
|
|
|
|
" so can't unschedule\n" |
160
|
|
|
|
|
|
|
unless $no_warn; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
delete $hash->{$time}; |
163
|
|
|
|
|
|
|
return $identifier; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head2 first_item |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Give out the first item that should be scheduled (probably overdue) |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=cut |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
sub first_item { |
173
|
|
|
|
|
|
|
my $self=shift; |
174
|
|
|
|
|
|
|
my $key=0; #everything should be later than time 0 |
175
|
|
|
|
|
|
|
my $value=0; |
176
|
|
|
|
|
|
|
$self->{"schedule"}->seq($key, $value, R_CURSOR); |
177
|
|
|
|
|
|
|
if ($key==0) { |
178
|
|
|
|
|
|
|
carp "no entries in the schedule" unless $silent; |
179
|
|
|
|
|
|
|
return undef; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
$self->{"last_key"}=$key; |
182
|
|
|
|
|
|
|
print STDERR "Schedule first key: " . $key . " value: " . $value . "\n" |
183
|
|
|
|
|
|
|
if $verbose; |
184
|
|
|
|
|
|
|
return $key, $value; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=head2 next_item |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Give out the first item that should be scheduled (probably overdue) |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=cut |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub next_item { |
195
|
|
|
|
|
|
|
my $self=shift; |
196
|
|
|
|
|
|
|
my $key; |
197
|
|
|
|
|
|
|
my $value; |
198
|
|
|
|
|
|
|
my $stat=0; |
199
|
|
|
|
|
|
|
$key=$self->{"last_key"}; |
200
|
|
|
|
|
|
|
$key=0 unless defined $key; |
201
|
|
|
|
|
|
|
$key++; |
202
|
|
|
|
|
|
|
$stat=$self->{"schedule"}->seq( $key, $value, R_CURSOR); |
203
|
|
|
|
|
|
|
unless ($stat==0) { |
204
|
|
|
|
|
|
|
$self->{"last_key"}=$undef; |
205
|
|
|
|
|
|
|
print STDERR "Schedule didn't return a key\n" |
206
|
|
|
|
|
|
|
if $verbose; |
207
|
|
|
|
|
|
|
return undef; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
$self->{"last_key"}=$key; |
210
|
|
|
|
|
|
|
print STDERR "Schedule next key: " . $key . " value: " . $value . "\n" |
211
|
|
|
|
|
|
|
if $verbose; |
212
|
|
|
|
|
|
|
return $key, $value; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=head1 THE FUTURE |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
What might be neat is an event dispatcher which interfaces with Cron. |
218
|
|
|
|
|
|
|
This would keep running when the next item in the schedule is within a |
219
|
|
|
|
|
|
|
few minutes, but would stop completely when there is a long time to |
220
|
|
|
|
|
|
|
wait and be restarted by cron. |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Also useful would be a way to create scheduled (sic) down time. This |
223
|
|
|
|
|
|
|
would allow us to not allow link checking during busy times of the |
224
|
|
|
|
|
|
|
day. A way to avoid a sudden start up at the time of the end of the |
225
|
|
|
|
|
|
|
sudden down time would also be useful. |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=cut |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
42; #bunny rabbits. Requires this. |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
|