line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WWW::Spider; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
WWW::Spider - flexible Internet spider for fetching and analyzing websites |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 VERSION |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
This document describes C version 0.01_10 |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 SYNOPSIS |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
#configuration |
14
|
|
|
|
|
|
|
my $spider=new WWW::Spider; |
15
|
|
|
|
|
|
|
$spider=new WWW::Spider({UASTRING=>"mybot"}); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
print $spider->uastring; |
18
|
|
|
|
|
|
|
$spider->uastring('New UserAgent String'); |
19
|
|
|
|
|
|
|
$spider->user_agent(new LWP::UserAgent); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
#basic stuff |
22
|
|
|
|
|
|
|
print $spider->get_page_response('http://search.cpan.org/') -> content; |
23
|
|
|
|
|
|
|
print $spider->get_page_content('http://search.cpan.org/'); |
24
|
|
|
|
|
|
|
$spider->get_links_from('http://google.com/'); #get array of URLs |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#registering hooks |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
#crawling |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 DESCRIPTION |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
WWW::Spider is a customizable Internet spider intended to be used for |
33
|
|
|
|
|
|
|
fetching and analyzing websites. Features include: |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=over |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=item * basic methods for high-level html handling |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=item * the manner in which pages are retrieved is customizable |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=item * callbacks for when pages are fetched, errors caused, etc... |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=item * caching |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=item * thread-safe operation, and optional multithreading operation |
46
|
|
|
|
|
|
|
(faster) |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=item * a high-level implementation of a 'graph' of either pages or |
49
|
|
|
|
|
|
|
sites (as defined by the callback) which can be analyzed |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=back |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=cut |
54
|
|
|
|
|
|
|
|
55
|
1
|
|
|
1
|
|
28327
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
57
|
|
56
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
31
|
|
57
|
|
|
|
|
|
|
|
58
|
1
|
|
|
1
|
|
2964
|
use threads; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
use Carp; |
60
|
|
|
|
|
|
|
use LWP::UserAgent; |
61
|
|
|
|
|
|
|
use HTTP::Request; |
62
|
|
|
|
|
|
|
use Thread::Queue; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
use WWW::Spider::Graph; |
65
|
|
|
|
|
|
|
use WWW::Spider::Hooklist; |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
use vars qw( $VERSION ); |
68
|
|
|
|
|
|
|
$VERSION = '0.01_10'; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=pod |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=head1 FUNCTIONS |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head2 PARAMETERS |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
Parameter getting and setting functions. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=over |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=item new WWW::Spider([%params]) |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Constructor for C |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=cut |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub new { |
87
|
|
|
|
|
|
|
my $class=shift; |
88
|
|
|
|
|
|
|
my $self={}; |
89
|
|
|
|
|
|
|
my $params=shift || {}; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=pod |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
Arguments include: |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=over |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=item * UASTRING |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
The useragent string to be used. The default is "WWW::Spider" |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=cut |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
my $uastring=$params->{UASTRING} || 'WWW::Spider'; |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=pod |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=item * USER_AGENT |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
The LWP::UserAgent to use. If this is specified, the UASTRING |
110
|
|
|
|
|
|
|
argument is ignored. |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=cut |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
my $ua=new LWP::UserAgent; |
115
|
|
|
|
|
|
|
$ua->agent($uastring); |
116
|
|
|
|
|
|
|
$ua=$params->{USER_AGENT} || $ua; |
117
|
|
|
|
|
|
|
$self->{USER_AGENT}=$ua; |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
$self->{HOOKS}=new WWW::Spider::Hooklist(['']); |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
bless $self,$class; |
122
|
|
|
|
|
|
|
return $self; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=pod |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=back |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=item ->user_agent [LWP::UserAgent] |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Returns/sets the user agent being used by this object. |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=cut |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
sub user_agent { |
136
|
|
|
|
|
|
|
my $self=shift; |
137
|
|
|
|
|
|
|
my $original=$self->{USER_AGENT}; |
138
|
|
|
|
|
|
|
$self->{USER_AGENT}=$_[0] if exists $_[0]; |
139
|
|
|
|
|
|
|
return $original |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=pod |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=item ->uastring [STRING] |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
Returns/sets the user agent string being used by this object. |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=cut |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
sub uastring { |
151
|
|
|
|
|
|
|
my $self=shift; |
152
|
|
|
|
|
|
|
return $self->{USER_AGENT}->agent($_[0]); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=pod |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=back |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 GENERAL |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
These functions could be implemented anywhere - nothing about what |
162
|
|
|
|
|
|
|
they do is special do WWW::Spider. Mainly, they are just conveiniance |
163
|
|
|
|
|
|
|
functions for the rest of the code. |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=over |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=item ->get_page_content URL |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Returns the contents of the page at URL. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=cut |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub get_page_content { |
174
|
|
|
|
|
|
|
my ($self,$url)=@_; |
175
|
|
|
|
|
|
|
return $self->get_page_response($url)->content; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=pod |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=item ->get_page_response URL |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Returns the HTTP::Response object corresponding to URL |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=cut |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub get_page_response { |
187
|
|
|
|
|
|
|
my ($self,$url)=@_; |
188
|
|
|
|
|
|
|
return $self->{USER_AGENT}->get($url); |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=pod |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=back |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=head2 SPIDER |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
These functions implement the spider functionality. |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=over |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=item ->crawl URL MAX_DEPTH |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
Crawls URL to the specified maxiumum depth. This is implemented as a |
204
|
|
|
|
|
|
|
breadth-first search. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
The default value for MAX_DEPTH is 0. |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=cut |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
sub crawl { |
211
|
|
|
|
|
|
|
(my $self,my $url,my $max_depth)=@_; |
212
|
|
|
|
|
|
|
$max_depth=$max_depth || 0; |
213
|
|
|
|
|
|
|
croak "fatal: crawl called with empty url string" unless $url; |
214
|
|
|
|
|
|
|
my $response=$self->get_page_response($url); |
215
|
|
|
|
|
|
|
$self->handle_response($response); |
216
|
|
|
|
|
|
|
$self->crawl_content($response->content,$max_depth,$url); |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=pod |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=item ->handle_url URL |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
The same as C. |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=cut |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub handle_url { |
228
|
|
|
|
|
|
|
my ($self,$url)=@_; |
229
|
|
|
|
|
|
|
croak "fatal: handle_url called with empty url string" unless $url; |
230
|
|
|
|
|
|
|
$self->handle_response($self->get_page_response($url)); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=pod |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=item ->crawl_content STRING [$MAX_DEPTH] [$SOURCE] |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
Treats STRING as if it was encountered during a crawl, with a |
238
|
|
|
|
|
|
|
remaining maximum depth of MAX_DEPTH. The crawl is implemented as a |
239
|
|
|
|
|
|
|
breadth-first search using C. |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
The default value for MAX_DEPTH is 0. |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
The assumption is made that handlers have already been called on this |
244
|
|
|
|
|
|
|
page (otherwise, implementation would be impossible). |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=cut |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub crawl_content { |
249
|
|
|
|
|
|
|
(my $self,my $content,my $max_depth,my $source)=@_; |
250
|
|
|
|
|
|
|
croak "fatal: crawl_content called with empty content string" unless $content; |
251
|
|
|
|
|
|
|
$max_depth=$max_depth || 0; |
252
|
|
|
|
|
|
|
my %urls_done; |
253
|
|
|
|
|
|
|
$urls_done{$source}=1; |
254
|
|
|
|
|
|
|
my @links=$self->get_links_from_content($content,$source); |
255
|
|
|
|
|
|
|
my $q=new Thread::Queue(@links); |
256
|
|
|
|
|
|
|
my $depth=0; |
257
|
|
|
|
|
|
|
$q->enqueue('--'); |
258
|
|
|
|
|
|
|
while($q->pending()>0 and $max_depth>$depth) { |
259
|
|
|
|
|
|
|
my $link=$q->dequeue; |
260
|
|
|
|
|
|
|
if($link eq '--') { |
261
|
|
|
|
|
|
|
$depth++; |
262
|
|
|
|
|
|
|
$q->enqueue('--'); |
263
|
|
|
|
|
|
|
next; |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
next if $urls_done{$link}; |
266
|
|
|
|
|
|
|
my $response=$self->get_page_response($link); |
267
|
|
|
|
|
|
|
next unless $response->header('Content-type')=~/^text/; |
268
|
|
|
|
|
|
|
my $tmp_content=$response->content; |
269
|
|
|
|
|
|
|
$self->handle_response($response); |
270
|
|
|
|
|
|
|
$urls_done{$link}=1; |
271
|
|
|
|
|
|
|
@links=$self->get_links_from_content($tmp_content,$link); |
272
|
|
|
|
|
|
|
for my $a (@links) { |
273
|
|
|
|
|
|
|
next if $urls_done{$a}; |
274
|
|
|
|
|
|
|
$q->enqueue($a); |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=pod |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=item ->handle_response HTTP::RESPONSE |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
Handles the HTTP reponse, calling the appropriate hooks, without |
284
|
|
|
|
|
|
|
crawling any other pages. |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=cut |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
sub handle_response { |
289
|
|
|
|
|
|
|
my ($self, $content)=@_; |
290
|
|
|
|
|
|
|
carp "warning: handle_response called with empty content string" unless $content; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=pod |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=item ->get_links_from URL |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
Returns a list of URLs linked to from URL. |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=cut |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
sub get_links_from { |
302
|
|
|
|
|
|
|
my ($self,$url)=@_; |
303
|
|
|
|
|
|
|
croak "fatal: get_links_from called with empty url string" unless $url; |
304
|
|
|
|
|
|
|
return $self->get_links_from_content($self->get_page_content($url),$url); |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=pod |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=item ->get_links_from_content $CONTENT [$SOURCE] |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
Returns a list of URLs linked to in STRING. When a URL is discovered |
312
|
|
|
|
|
|
|
that is not complete, it is fixed by assuming that is was found on |
313
|
|
|
|
|
|
|
SOURCE. If there is no source page specified, bad URLs are treated as |
314
|
|
|
|
|
|
|
if they were linked to from http://localhost/. |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
SOURCE must be a valid and complete url. |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=cut |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub get_links_from_content { |
321
|
|
|
|
|
|
|
(my $self,my $content,my $source)=@_; |
322
|
|
|
|
|
|
|
croak "fatal: get_links_from_content called with empty content string" unless $content; |
323
|
|
|
|
|
|
|
my @list; |
324
|
|
|
|
|
|
|
my $domain="http://localhost/"; |
325
|
|
|
|
|
|
|
my $root="http://localhost/"; |
326
|
|
|
|
|
|
|
if($source) { |
327
|
|
|
|
|
|
|
$source=~/^(https?:\/\/[^\/]+\/)(.*)$/g; |
328
|
|
|
|
|
|
|
$domain=$1; |
329
|
|
|
|
|
|
|
$root=$1.$2; |
330
|
|
|
|
|
|
|
if($root=~/^(.+\/)[^\/]+$/g) { |
331
|
|
|
|
|
|
|
$root=$1; |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
while($content=~/]* )?href *= *\"([^\"]*)\"/msg) { |
335
|
|
|
|
|
|
|
my $partial=$2; |
336
|
|
|
|
|
|
|
my $url; |
337
|
|
|
|
|
|
|
if($partial=~/^http:\/\/.*\//) { |
338
|
|
|
|
|
|
|
$url=$partial; |
339
|
|
|
|
|
|
|
} elsif($partial=~/^http:\/\//) { |
340
|
|
|
|
|
|
|
$url=$partial."/"; |
341
|
|
|
|
|
|
|
} elsif($partial=~/^\/(.*)$/g) { |
342
|
|
|
|
|
|
|
$url=$domain.$1; |
343
|
|
|
|
|
|
|
} else { |
344
|
|
|
|
|
|
|
$url=$root.$partial; |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
push @list,$url; |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
return @list; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=pod |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=back |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=head2 CALLBACKS AND HOOKS |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
All hook registration and deletion functions are considered atomic. |
358
|
|
|
|
|
|
|
If five hooks have been registered, and then all of them are deleted |
359
|
|
|
|
|
|
|
in one operation, there will be no page for which fewer than five but |
360
|
|
|
|
|
|
|
more than zero of those hooks are called (unless some hooks are added |
361
|
|
|
|
|
|
|
afterwords). |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
The legal hook strings are: |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=over |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=item * handle-page |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
Called whenever a crawlable page is reached. |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
Arguments: CONTENT, URL |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
Return: |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=item * handle-response |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
Called on an HTTP response, successfull, crawlable, or otherwise. |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Arguments: |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
Return: |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=item * handle-failure |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Called on any failed HTTP response. |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
Arguments: |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
Return: |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=back |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Functions for handling callbacks are: |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
=over |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=item ->call_hooks HOOK-STRING, @ARGS |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
Calls all of the registered HOOK-STRING callbacks with @ARGS. This |
400
|
|
|
|
|
|
|
function returns a list of all of the return values (in some |
401
|
|
|
|
|
|
|
unspecified order) which are to be handled appropriately by the |
402
|
|
|
|
|
|
|
caller. |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
=cut |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
sub call_hooks { |
407
|
|
|
|
|
|
|
my ($self,$name,@args)=@_; |
408
|
|
|
|
|
|
|
my @list=$self->get_hooks($name); |
409
|
|
|
|
|
|
|
my @ret; |
410
|
|
|
|
|
|
|
for my $hook (@list) { |
411
|
|
|
|
|
|
|
push @ret,&$hook(@args); |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
return @ret; |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=pod |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=item ->register_hook HOOK-STRING, SUB, [{OPTIONS}] |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
Registers a subroutine to be run on HOOK-STRING. Has no return value. |
421
|
|
|
|
|
|
|
Valid options are: |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=over |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=item * FORK |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Set to a non-zero value if you want this hook to be run in a separate |
428
|
|
|
|
|
|
|
thread. This means that, among other things, the return value will |
429
|
|
|
|
|
|
|
not have the same affect (or even a well defined affect). |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=back |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=cut |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
sub register_hook { |
436
|
|
|
|
|
|
|
my ($self,$name,$hook,$options)=@_; |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=pod |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
=item ->get_hooks [HOOK-STRING] |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
Returns all hooks corresponding to HOOK-STRING. If HOOK-STRING is not |
444
|
|
|
|
|
|
|
given, returns all hooks. |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=cut |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
sub get_hooks { |
449
|
|
|
|
|
|
|
my ($self,$name)=@_; |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=pod |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=item ->clear_hooks [HOOK-STRING] |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
Removes all hooks corresponding to HOOK-STRING. If HOOK-STRING is not |
457
|
|
|
|
|
|
|
given, it deletes all hooks. |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=cut |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
sub clear_hooks { |
462
|
|
|
|
|
|
|
my ($self,$name)=@_; |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
1; |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
__END__ |