line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MyCPAN::Indexer::Queue; |
2
|
1
|
|
|
1
|
|
1179
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
28
|
|
3
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
25
|
|
4
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
6
|
use parent qw(MyCPAN::Indexer::Component); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
6
|
|
6
|
1
|
|
|
1
|
|
51
|
use vars qw($VERSION $logger); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
64
|
|
7
|
|
|
|
|
|
|
$VERSION = '1.28_12'; |
8
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
5
|
use File::Basename; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
65
|
|
10
|
1
|
|
|
1
|
|
5
|
use File::Find; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
57
|
|
11
|
1
|
|
|
1
|
|
5
|
use File::Find::Closures qw( find_by_regex ); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
78
|
|
12
|
1
|
|
|
1
|
|
5
|
use File::Path qw( mkpath ); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
51
|
|
13
|
1
|
|
|
1
|
|
5
|
use File::Spec::Functions qw( catfile rel2abs ); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
71
|
|
14
|
1
|
|
|
1
|
|
12
|
use Log::Log4perl; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
BEGIN { |
17
|
1
|
|
|
1
|
|
53
|
$logger = Log::Log4perl->get_logger( 'Queue' ); |
18
|
|
|
|
|
|
|
} |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 NAME |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
MyCPAN::Indexer::Queue - Find distributions to index |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 SYNOPSIS |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
Use this in backpan_indexer.pl by specifying it as the queue class: |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# in backpan_indexer.config |
29
|
|
|
|
|
|
|
queue_class MyCPAN::Indexer::Queue |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 DESCRIPTION |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
This class returns a list of Perl distributions for the BackPAN |
34
|
|
|
|
|
|
|
indexer to process. |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head2 Methods |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=over 4 |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=item component_type |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
This is a queue type. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=cut |
45
|
|
|
|
|
|
|
|
46
|
0
|
|
|
0
|
1
|
|
sub component_type { $_[0]->queue_type } |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=item get_queue |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
C sets the key C in C<$Notes> hash reference. It |
51
|
|
|
|
|
|
|
finds all of the tarballs or zip archives in under the directories |
52
|
|
|
|
|
|
|
named in C and C in the configuration. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
It specifically skips files that end in C<.txt.gz> or C<.data.gz> |
55
|
|
|
|
|
|
|
since PAUSE creates those meta files near the actual module |
56
|
|
|
|
|
|
|
installations. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
If the C configuration value is true, it also copies |
59
|
|
|
|
|
|
|
any distributions it finds into a PAUSE-like structure using the |
60
|
|
|
|
|
|
|
value of the C configuration to create the path. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
This queue component tries to skip any distributions that already have |
63
|
|
|
|
|
|
|
a report to make the list of distributions to examine much shorter. It |
64
|
|
|
|
|
|
|
relies on the |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=cut |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub get_queue |
69
|
|
|
|
|
|
|
{ |
70
|
0
|
|
|
0
|
1
|
|
my( $self ) = @_; |
71
|
|
|
|
|
|
|
|
72
|
0
|
|
0
|
|
|
|
my @dirs = |
73
|
|
|
|
|
|
|
( |
74
|
|
|
|
|
|
|
$self->get_config->backpan_dir, |
75
|
|
|
|
|
|
|
split /\x00/, $self->get_config->merge_dirs || '' |
76
|
|
|
|
|
|
|
) |
77
|
|
|
|
|
|
|
; |
78
|
|
|
|
|
|
|
|
79
|
0
|
|
|
|
|
|
foreach my $dir ( @dirs ) |
80
|
|
|
|
|
|
|
{ |
81
|
0
|
0
|
|
|
|
|
$logger->error( "Distribution source directory does not exist: [$dir]" ) |
82
|
|
|
|
|
|
|
unless -e $dir; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
0
|
|
|
|
|
|
@dirs = grep { -d $_ } @dirs; |
|
0
|
|
|
|
|
|
|
86
|
0
|
0
|
|
|
|
|
$logger->logdie( "No directories to index!" ) unless @dirs; |
87
|
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
|
my $queue = $self->_get_file_list( @dirs ); |
89
|
|
|
|
|
|
|
|
90
|
0
|
0
|
|
|
|
|
if( $self->get_config->organize_dists ) |
91
|
|
|
|
|
|
|
{ |
92
|
0
|
|
|
|
|
|
$self->_setup_organize_dists( $dirs[0] ); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# I really hate this following line. It's sure to |
95
|
|
|
|
|
|
|
# break on something |
96
|
0
|
|
|
|
|
|
my $regex = catfile( qw( authors id (.) .. .+? ), '' ); |
97
|
|
|
|
|
|
|
|
98
|
0
|
|
|
|
|
|
foreach my $i ( 0 .. $#$queue ) |
99
|
|
|
|
|
|
|
{ |
100
|
0
|
|
|
|
|
|
my $file = $queue->[$i]; |
101
|
0
|
|
|
|
|
|
$logger->debug( "Processing $file" ); |
102
|
|
|
|
|
|
|
|
103
|
0
|
0
|
|
|
|
|
next if $file =~ m|$regex|; |
104
|
0
|
|
|
|
|
|
$logger->debug( "Copying $file into PAUSE structure" ); |
105
|
|
|
|
|
|
|
|
106
|
0
|
|
|
|
|
|
$queue->[$i] = $self->_copy_file( $file, $dirs[0] ); |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
0
|
|
|
|
|
|
$self->set_note( 'queue', $queue ); |
111
|
|
|
|
|
|
|
|
112
|
0
|
|
|
|
|
|
1; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub _get_file_list |
116
|
|
|
|
|
|
|
{ |
117
|
0
|
|
|
0
|
|
|
my( $self, @dirs ) = @_; |
118
|
|
|
|
|
|
|
|
119
|
0
|
|
|
|
|
|
$logger->debug( "Taking dists from [@dirs]" ); |
120
|
0
|
|
|
|
|
|
my( $wanted, $reporter ) = |
121
|
|
|
|
|
|
|
File::Find::Closures::find_by_regex( qr/\.(?:(?:tar\.|t)gz|zip)$/ ); |
122
|
|
|
|
|
|
|
|
123
|
0
|
|
|
|
|
|
find( $wanted, @dirs ); |
124
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
my $dist_count = () = $reporter->(); |
126
|
0
|
|
|
|
|
|
$logger->info( "Found $dist_count distributions to possibly index" ); |
127
|
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
|
my $files_to_examine = [ |
129
|
0
|
|
|
|
|
|
grep { ! $self->report_exists_already( $_ ) } |
130
|
0
|
|
0
|
|
|
|
map { rel2abs($_) } |
131
|
0
|
|
|
|
|
|
grep { ! /.(data|txt).gz$/ and ! /02packages/ } |
132
|
|
|
|
|
|
|
$reporter->() |
133
|
|
|
|
|
|
|
]; |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
{ |
136
|
0
|
|
|
|
|
|
my $examine_count = () = @$files_to_examine; |
|
0
|
|
|
|
|
|
|
137
|
0
|
|
|
|
|
|
$logger->info( "Found $examine_count distributions to actually index" ); |
138
|
0
|
|
0
|
|
|
|
my $success_reports = $self->success_report_count || 0; |
139
|
0
|
|
0
|
|
|
|
my $error_reports = $self->error_report_count || 0; |
140
|
|
|
|
|
|
|
|
141
|
0
|
|
0
|
|
|
|
my $success_percent = sprintf "%d", 100 * eval { $success_reports / $dist_count } || 0; |
142
|
0
|
|
0
|
|
|
|
my $error_percent = sprintf "%d", 100 * eval { $error_reports / $dist_count } || 0; |
143
|
|
|
|
|
|
|
|
144
|
0
|
|
|
|
|
|
$logger->info( "Found $success_reports previous success reports ($success_percent%)" ); |
145
|
0
|
|
|
|
|
|
$logger->info( "Found $error_reports previous error reports ($error_percent%)" ); |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
0
|
|
|
|
|
|
return $files_to_examine; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=item report_exists_already( DIST ) |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
This method goes through this process to decide what to return: |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=over 4 |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item Return false if the C configuration is true |
158
|
|
|
|
|
|
|
(so existing reports don't matter). |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=item Return true if there is a successful report already. |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=item Return false if C is true. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=item Return true if there is already an error report. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=item Return false as the default case. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=back |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=cut |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
BEGIN { |
173
|
1
|
|
|
1
|
|
1217
|
my $success_reports; |
174
|
1
|
|
|
|
|
488
|
my $error_reports; |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub report_exists_already |
177
|
|
|
|
|
|
|
{ |
178
|
0
|
|
|
0
|
1
|
|
my( $self, $dist ) = @_; |
179
|
|
|
|
|
|
|
|
180
|
0
|
0
|
|
|
|
|
return 0 if $self->get_config->fresh_start; |
181
|
|
|
|
|
|
|
|
182
|
0
|
|
|
|
|
|
my $reporter = $self->get_coordinator->get_component( 'reporter' ); |
183
|
|
|
|
|
|
|
|
184
|
0
|
|
|
|
|
|
my $success_report = $reporter->get_successful_report_path( $dist ); |
185
|
0
|
0
|
|
|
|
|
do { $success_reports++; return 1 } if -e $success_report; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
|
187
|
0
|
0
|
|
|
|
|
return 0 if $self->get_config->retry_errors; |
188
|
0
|
|
|
|
|
|
my $error_report = $reporter->get_error_report_path( $dist ); |
189
|
0
|
0
|
|
|
|
|
do { $error_reports++; return 1 } if -e $error_report; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
|
191
|
0
|
|
|
|
|
|
return 0; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
0
|
|
|
0
|
0
|
|
sub success_report_count { $success_reports } |
195
|
|
|
|
|
|
|
|
196
|
0
|
|
|
0
|
0
|
|
sub error_report_count { $error_reports } |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
sub _setup_organize_dists |
200
|
|
|
|
|
|
|
{ |
201
|
0
|
|
|
0
|
|
|
my( $self, $base_dir ) = @_; |
202
|
|
|
|
|
|
|
|
203
|
0
|
|
0
|
|
|
|
my $pause_id = eval { $self->get_config->pause_id } || 'MYCPAN'; |
204
|
|
|
|
|
|
|
|
205
|
0
|
|
|
|
|
|
eval { mkpath |
|
0
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
catfile( $base_dir, $self->_path_parts( $pause_id ) ), |
207
|
|
|
|
|
|
|
{ mode => 0775 } |
208
|
|
|
|
|
|
|
}; |
209
|
0
|
0
|
|
|
|
|
$logger->error( "Could not create PAUSE author path for [$pause_id]: $@" ) |
210
|
|
|
|
|
|
|
if $@; |
211
|
|
|
|
|
|
|
|
212
|
0
|
|
|
|
|
|
1; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub _path_parts |
216
|
|
|
|
|
|
|
{ |
217
|
0
|
|
|
0
|
|
|
catfile ( |
218
|
|
|
|
|
|
|
qw(authors id), |
219
|
|
|
|
|
|
|
substr( $_[1], 0, 1 ), |
220
|
|
|
|
|
|
|
substr( $_[1], 0, 2 ), |
221
|
|
|
|
|
|
|
$_[1] |
222
|
|
|
|
|
|
|
); |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# if there is an error with the rename, return the original file name |
226
|
|
|
|
|
|
|
sub _copy_file |
227
|
|
|
|
|
|
|
{ |
228
|
0
|
|
|
0
|
|
|
require File::Copy; |
229
|
|
|
|
|
|
|
|
230
|
0
|
|
|
|
|
|
my( $self, $file, $base_dir ) = @_; |
231
|
|
|
|
|
|
|
|
232
|
0
|
|
0
|
|
|
|
my $pause_id = eval { $self->get_config->pause_id } || 'MYCPAN'; |
233
|
|
|
|
|
|
|
|
234
|
0
|
|
|
|
|
|
my $basename = basename( $file ); |
235
|
0
|
|
|
|
|
|
$logger->debug( "Need to copy file $basename into $pause_id" ); |
236
|
|
|
|
|
|
|
|
237
|
0
|
|
|
|
|
|
my $new_name = rel2abs( |
238
|
|
|
|
|
|
|
catfile( $base_dir, $self->_path_parts( $pause_id ), $basename ) |
239
|
|
|
|
|
|
|
); |
240
|
|
|
|
|
|
|
|
241
|
0
|
0
|
0
|
|
|
|
if( -e $new_name and |
242
|
|
|
|
|
|
|
$self->_file_md5( $new_name ) eq $self->_file_md5( $file ) ) |
243
|
|
|
|
|
|
|
{ |
244
|
0
|
|
|
|
|
|
$logger->debug( "Files [$file] and [$new_name] are the same. Not copying" ); |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
0
|
|
|
|
|
|
my $rc = File::Copy::copy( $file => $new_name ); |
248
|
0
|
0
|
|
|
|
|
$logger->error( "Could not copy [$file] to [$new_name]: $!" ) |
249
|
|
|
|
|
|
|
unless $rc; |
250
|
|
|
|
|
|
|
|
251
|
0
|
0
|
|
|
|
|
return $rc ? $new_name : $file; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub _file_md5 |
255
|
|
|
|
|
|
|
{ |
256
|
0
|
|
|
0
|
|
|
my( $self, $file ) = @_; |
257
|
|
|
|
|
|
|
|
258
|
0
|
|
|
|
|
|
require Digest::MD5; |
259
|
|
|
|
|
|
|
|
260
|
0
|
0
|
|
|
|
|
open my( $fh ), '<', $file or return ''; |
261
|
|
|
|
|
|
|
|
262
|
0
|
|
|
|
|
|
my $ctx = Digest::MD5->new; |
263
|
0
|
|
|
|
|
|
$ctx->addfile($fh); |
264
|
0
|
|
|
|
|
|
$ctx->hexdigest; |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
1; |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=back |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=head1 SEE ALSO |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
MyCPAN::Indexer, MyCPAN::Indexer::Tutorial |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=head1 SOURCE AVAILABILITY |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
This code is in Github: |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
git://github.com/briandfoy/mycpan-indexer.git |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=head1 AUTHOR |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
brian d foy, C<< >> |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
Copyright (c) 2008-2013, brian d foy, All Rights Reserved. |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
You may redistribute this under the same terms as Perl itself. |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=cut |