line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MyCPAN::Indexer::Worker; |
2
|
1
|
|
|
1
|
|
1447
|
use strict; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
35
|
|
3
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
31
|
|
4
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
6
|
use parent qw(MyCPAN::Indexer::Component); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
6
|
1
|
|
|
1
|
|
71
|
use vars qw($VERSION $logger); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
73
|
|
7
|
|
|
|
|
|
|
$VERSION = '1.28_12'; |
8
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
6
|
use Cwd; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
82
|
|
10
|
1
|
|
|
1
|
|
5
|
use File::Basename; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
76
|
|
11
|
1
|
|
|
1
|
|
5
|
use File::Spec::Functions qw(catfile); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
42
|
|
12
|
1
|
|
|
1
|
|
6
|
use Log::Log4perl; |
|
1
|
|
|
|
|
45
|
|
|
1
|
|
|
|
|
8
|
|
13
|
1
|
|
|
1
|
|
83
|
use MyCPAN::Indexer; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
use Proc::ProcessTable; |
15
|
|
|
|
|
|
|
use YAML; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 NAME |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
MyCPAN::Indexer::Worker - Do the indexing |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SYNOPSIS |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
Use this in backpan_indexer.pl by specifying it as the queue class: |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# in backpan_indexer.config |
26
|
|
|
|
|
|
|
worker_class MyCPAN::Indexer::Worker |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 DESCRIPTION |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
This class takes a distribution and analyses it. This is what the dispatcher |
31
|
|
|
|
|
|
|
hands a disribution to for the actual indexing. |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head2 Methods |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=over 4 |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=item get_task |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
C sets the C key in the notes. The |
40
|
|
|
|
|
|
|
value is a code reference that takes a distribution path as its only |
41
|
|
|
|
|
|
|
argument and indexes that distribution. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
See L for details about what C expects |
44
|
|
|
|
|
|
|
and should do. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=cut |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
BEGIN { |
49
|
|
|
|
|
|
|
$logger = Log::Log4perl->get_logger( 'Worker' ); |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=item component_type |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
This is a worker component. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=cut |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub component_type { $_[0]->worker_type } |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=item get_task |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=cut |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub get_task |
65
|
|
|
|
|
|
|
{ |
66
|
|
|
|
|
|
|
my( $self ) = @_; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
my $config = $self->get_config; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
my $coordinator = $self->get_coordinator; |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
my $indexer = $coordinator->get_component( 'indexer' ); |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
$logger->debug( "Worker class is " . __PACKAGE__ ); |
75
|
|
|
|
|
|
|
$logger->debug( "Indexer class is " . $indexer->class ); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
my $child_task = sub { |
78
|
|
|
|
|
|
|
my $dist = shift; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
my $dist_basename = basename( $dist ); |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
my $basename = $coordinator->get_reporter->check_for_previous_successful_result( $dist ); |
83
|
|
|
|
|
|
|
$logger->debug( "Found successful report for $dist_basename" ) unless $basename; |
84
|
|
|
|
|
|
|
return bless { |
85
|
|
|
|
|
|
|
dist_info => { |
86
|
|
|
|
|
|
|
dist_path => $dist, |
87
|
|
|
|
|
|
|
dist_basename => $dist_basename |
88
|
|
|
|
|
|
|
}, |
89
|
|
|
|
|
|
|
skipped => 1, |
90
|
|
|
|
|
|
|
}, $indexer->class unless $basename; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
my $previous_error_basename = $coordinator->get_reporter->check_for_previous_error_result( $dist ) || ''; |
93
|
|
|
|
|
|
|
$logger->debug( "Error report returned [$previous_error_basename]" ); |
94
|
|
|
|
|
|
|
$logger->debug( "Found error report for $dist_basename" ) if $previous_error_basename; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# we used to handle this by just deleting all the old error |
97
|
|
|
|
|
|
|
# reports in setup_dirs over in MyCPAN::App::BackPAN::Indexer |
98
|
|
|
|
|
|
|
# deleting all the reports before we got started made it |
99
|
|
|
|
|
|
|
# impossible to get a list of error reports to retry |
100
|
|
|
|
|
|
|
if( $previous_error_basename and ! $config->retry_errors ) |
101
|
|
|
|
|
|
|
{ |
102
|
|
|
|
|
|
|
$logger->debug( "By config, skipping $dist because I'm not retrying errors" ); |
103
|
|
|
|
|
|
|
return bless { |
104
|
|
|
|
|
|
|
dist_info => { |
105
|
|
|
|
|
|
|
dist_path => $dist, |
106
|
|
|
|
|
|
|
dist_basename => $dist_basename, |
107
|
|
|
|
|
|
|
}, |
108
|
|
|
|
|
|
|
skip_error => 1, |
109
|
|
|
|
|
|
|
}, $self->get_config->indexer_class; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
elsif( $previous_error_basename and $config->retry_errors ) |
112
|
|
|
|
|
|
|
{ |
113
|
|
|
|
|
|
|
# if we are re-trying errors and there is already a report |
114
|
|
|
|
|
|
|
# unlink the previous report |
115
|
|
|
|
|
|
|
my $report_full_path = $coordinator->get_reporter->get_error_report_path( $dist ); |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
$logger->debug( "Trying to unlink $report_full_path" ); |
118
|
|
|
|
|
|
|
my $rc = unlink $report_full_path; |
119
|
|
|
|
|
|
|
$logger->debug( ($rc ? 'unlinked ' : 'failed to unlink ') . $report_full_path ); |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
$logger->info( "Starting Worker for $dist_basename\n" ); |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
my $starting_dir = cwd(); |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
unless( chdir $config->temp_dir ) |
127
|
|
|
|
|
|
|
{ |
128
|
|
|
|
|
|
|
$logger->error( "Could not change to " . $config->temp_dir . " : $!\n" ); |
129
|
|
|
|
|
|
|
exit 255; |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
$logger->debug( sprintf "Setting alarm for %d seconds", $config->alarm ); |
133
|
|
|
|
|
|
|
local $SIG{ALRM} = sub { |
134
|
|
|
|
|
|
|
$logger->info( "Alarm rang for $dist_basename in process $$!\n" ); |
135
|
|
|
|
|
|
|
$self->_cleanup_children; |
136
|
|
|
|
|
|
|
$logger->info( "Cleaned up" ); |
137
|
|
|
|
|
|
|
die; |
138
|
|
|
|
|
|
|
}; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
local $SIG{CHLD} = 'IGNORE'; |
141
|
|
|
|
|
|
|
alarm( $config->alarm || 15 ); |
142
|
|
|
|
|
|
|
$logger->debug( "Examining $dist_basename" ); |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
my $info = do { |
145
|
|
|
|
|
|
|
unless( -e $dist ) |
146
|
|
|
|
|
|
|
{ |
147
|
|
|
|
|
|
|
$logger->warn( "Dist $dist does not exist" ); |
148
|
|
|
|
|
|
|
undef; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
elsif( ! -s $dist ) |
151
|
|
|
|
|
|
|
{ |
152
|
|
|
|
|
|
|
$logger->warn( "Dist $dist has zero size" ); |
153
|
|
|
|
|
|
|
my $info = bless {}, $self->get_config->indexer_class; |
154
|
|
|
|
|
|
|
$info->setup_dist_info( $dist ); |
155
|
|
|
|
|
|
|
$info->set_dist_info( 'unindexable', 'zero size' ); |
156
|
|
|
|
|
|
|
$info->setup_run_info; |
157
|
|
|
|
|
|
|
$info->set_run_info( qw(completed 1) ); |
158
|
|
|
|
|
|
|
$info; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
else |
161
|
|
|
|
|
|
|
{ |
162
|
|
|
|
|
|
|
$logger->warn( "Indexing $dist" ); |
163
|
|
|
|
|
|
|
eval { $indexer->run( $dist ) }; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
}; |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
$logger->debug( "Done examining $dist_basename" ); |
169
|
|
|
|
|
|
|
my $at = $@; chomp $at; |
170
|
|
|
|
|
|
|
alarm 0; |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
chdir $starting_dir; |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
unless( defined $info ) |
175
|
|
|
|
|
|
|
{ |
176
|
|
|
|
|
|
|
$logger->error( "Indexing failed for $dist_basename: $at" ); |
177
|
|
|
|
|
|
|
$info = bless {}, $self->get_config->indexer_class; # XXX TODO make this a real class |
178
|
|
|
|
|
|
|
$info->setup_dist_info( $dist ); |
179
|
|
|
|
|
|
|
$info->setup_run_info; |
180
|
|
|
|
|
|
|
$info->set_run_info( qw(completed 0) ); |
181
|
|
|
|
|
|
|
$info->set_run_info( error => $at ); |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
elsif( ! eval { $info->run_info( 'completed' ) } ) |
184
|
|
|
|
|
|
|
{ |
185
|
|
|
|
|
|
|
$logger->error( "Indexing for $dist_basename did not complete" ); |
186
|
|
|
|
|
|
|
$self->_copy_bad_dist( $info ) if $config->copy_bad_dists; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
$self->_add_run_info( $info ); |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
$coordinator->get_note('reporter')->( $info ); |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
$logger->debug( "Worker for $dist_basename done" ); |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# some things hang anyway, so just to be careful we'll cleanup |
196
|
|
|
|
|
|
|
# everything here. |
197
|
|
|
|
|
|
|
$self->_cleanup_children; |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
$logger->debug( "Cleaned up, returning..." ); |
200
|
|
|
|
|
|
|
$info; |
201
|
|
|
|
|
|
|
}; |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
$coordinator->set_note( 'child_task', $child_task ); |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
1; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub _cleanup_children |
209
|
|
|
|
|
|
|
{ |
210
|
|
|
|
|
|
|
$logger->warn( "Cleaning up after $$" ); |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
my %children = |
213
|
|
|
|
|
|
|
map { $_->{pid}, 1 } |
214
|
|
|
|
|
|
|
grep { $_->{'ppid'} == $$ } |
215
|
|
|
|
|
|
|
@{ Proc::ProcessTable->new->table }; |
216
|
|
|
|
|
|
|
$logger->debug( "Child processes are @{[keys %children]}" ); |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
my @grandchildren = |
219
|
|
|
|
|
|
|
map { $_->{pid} } |
220
|
|
|
|
|
|
|
grep { exists $children{ $_->{'ppid'} } } |
221
|
|
|
|
|
|
|
@{ Proc::ProcessTable->new->table }; |
222
|
|
|
|
|
|
|
$logger->debug( "Grandchild processes are @grandchildren" ); |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
my @processes = ( keys %children, @grandchildren ); |
225
|
|
|
|
|
|
|
$logger->debug( "There are " . @processes . " processes to clean up" ); |
226
|
|
|
|
|
|
|
return unless @processes; |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
$logger->debug( "Preparing to kill" ); |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
kill 9, @processes; |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
return; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
sub _copy_bad_dist |
236
|
|
|
|
|
|
|
{ |
237
|
|
|
|
|
|
|
my( $self, $info ) = @_; |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
my $config = $self->get_config; |
240
|
|
|
|
|
|
|
my $bad_dist_dir = $config->copy_bad_dists; |
241
|
|
|
|
|
|
|
return unless $bad_dist_dir; |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
unless( -d $bad_dist_dir and mkdir $bad_dist_dir ) { |
244
|
|
|
|
|
|
|
$logger->error( "Could not make dist dir [$bad_dist_dir]: $!" ); |
245
|
|
|
|
|
|
|
return; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
my $dist_file = $info->dist_info( 'dist_file' ); |
249
|
|
|
|
|
|
|
my $basename = $info->dist_info( 'dist_basename' ); |
250
|
|
|
|
|
|
|
my $new_name = catfile( $bad_dist_dir, $basename ); |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
unless( -e $new_name ) |
253
|
|
|
|
|
|
|
{ |
254
|
|
|
|
|
|
|
$logger->debug( "Copying bad dist" ); |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
my( $in, $out ); |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
unless( open $out, ">", $new_name ) |
259
|
|
|
|
|
|
|
{ |
260
|
|
|
|
|
|
|
$logger->fatal( "Could not copy bad dist to $new_name: $!" ); |
261
|
|
|
|
|
|
|
return; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
unless( open $in, "<", $dist_file ) |
265
|
|
|
|
|
|
|
{ |
266
|
|
|
|
|
|
|
$logger->fatal( "Could not open bad dist to $dist_file: $!" ); |
267
|
|
|
|
|
|
|
return; |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
while( <$in> ) { print { $out } $_ } |
271
|
|
|
|
|
|
|
close $in; |
272
|
|
|
|
|
|
|
close $out; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
sub _add_run_info |
277
|
|
|
|
|
|
|
{ |
278
|
|
|
|
|
|
|
my( $self, $info ) = @_; |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
my $config = $self->get_config; |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
return unless eval { $info->can( 'set_run_info' ) }; |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
$info->set_run_info( $_, $config->get( $_ ) ) |
285
|
|
|
|
|
|
|
foreach ( $config->directives ); |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
$info->set_run_info( 'uuid', $self->get_note( 'UUID' ) ); |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
$info->set_run_info( 'child_pid', $$ ); |
290
|
|
|
|
|
|
|
$info->set_run_info( 'parent_pid', eval { $config->indexer_class->getppid } ); |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
$info->set_run_info( 'ENV', \%ENV ); |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
return 1; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=back |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=head1 SEE ALSO |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
MyCPAN::Indexer, MyCPAN::Indexer::Tutorial |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=head1 SOURCE AVAILABILITY |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
This code is in Github: |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
git://github.com/briandfoy/mycpan-indexer.git |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=head1 AUTHOR |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
brian d foy, C<< >> |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
Copyright (c) 2008-2013, brian d foy, All Rights Reserved. |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
You may redistribute this under the same terms as Perl itself. |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
=cut |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
1; |