| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package MyCPAN::Indexer::Queue; |
|
2
|
1
|
|
|
1
|
|
1179
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
28
|
|
|
3
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
25
|
|
|
4
|
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
6
|
use parent qw(MyCPAN::Indexer::Component); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
6
|
|
|
6
|
1
|
|
|
1
|
|
51
|
use vars qw($VERSION $logger); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
64
|
|
|
7
|
|
|
|
|
|
|
$VERSION = '1.28_12'; |
|
8
|
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
5
|
use File::Basename; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
65
|
|
|
10
|
1
|
|
|
1
|
|
5
|
use File::Find; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
57
|
|
|
11
|
1
|
|
|
1
|
|
5
|
use File::Find::Closures qw( find_by_regex ); |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
78
|
|
|
12
|
1
|
|
|
1
|
|
5
|
use File::Path qw( mkpath ); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
51
|
|
|
13
|
1
|
|
|
1
|
|
5
|
use File::Spec::Functions qw( catfile rel2abs ); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
71
|
|
|
14
|
1
|
|
|
1
|
|
12
|
use Log::Log4perl; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
7
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
BEGIN { |
|
17
|
1
|
|
|
1
|
|
53
|
$logger = Log::Log4perl->get_logger( 'Queue' ); |
|
18
|
|
|
|
|
|
|
} |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 NAME |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
MyCPAN::Indexer::Queue - Find distributions to index |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
Use this in backpan_indexer.pl by specifying it as the queue class: |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# in backpan_indexer.config |
|
29
|
|
|
|
|
|
|
queue_class MyCPAN::Indexer::Queue |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
This class returns a list of Perl distributions for the BackPAN |
|
34
|
|
|
|
|
|
|
indexer to process. |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head2 Methods |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=over 4 |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=item component_type |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
This is a queue type. |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=cut |
|
45
|
|
|
|
|
|
|
|
|
46
|
0
|
|
|
0
|
1
|
|
sub component_type { $_[0]->queue_type } |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=item get_queue |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
C sets the key C in C<$Notes> hash reference. It |
|
51
|
|
|
|
|
|
|
finds all of the tarballs or zip archives in under the directories |
|
52
|
|
|
|
|
|
|
named in C and C in the configuration. |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
It specifically skips files that end in C<.txt.gz> or C<.data.gz> |
|
55
|
|
|
|
|
|
|
since PAUSE creates those meta files near the actual module |
|
56
|
|
|
|
|
|
|
installations. |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
If the C configuration value is true, it also copies |
|
59
|
|
|
|
|
|
|
any distributions it finds into a PAUSE-like structure using the |
|
60
|
|
|
|
|
|
|
value of the C configuration to create the path. |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
This queue component tries to skip any distributions that already have |
|
63
|
|
|
|
|
|
|
a report to make the list of distributions to examine much shorter. It |
|
64
|
|
|
|
|
|
|
relies on the |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=cut |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub get_queue |
|
69
|
|
|
|
|
|
|
{ |
|
70
|
0
|
|
|
0
|
1
|
|
my( $self ) = @_; |
|
71
|
|
|
|
|
|
|
|
|
72
|
0
|
|
0
|
|
|
|
my @dirs = |
|
73
|
|
|
|
|
|
|
( |
|
74
|
|
|
|
|
|
|
$self->get_config->backpan_dir, |
|
75
|
|
|
|
|
|
|
split /\x00/, $self->get_config->merge_dirs || '' |
|
76
|
|
|
|
|
|
|
) |
|
77
|
|
|
|
|
|
|
; |
|
78
|
|
|
|
|
|
|
|
|
79
|
0
|
|
|
|
|
|
foreach my $dir ( @dirs ) |
|
80
|
|
|
|
|
|
|
{ |
|
81
|
0
|
0
|
|
|
|
|
$logger->error( "Distribution source directory does not exist: [$dir]" ) |
|
82
|
|
|
|
|
|
|
unless -e $dir; |
|
83
|
|
|
|
|
|
|
} |
|
84
|
|
|
|
|
|
|
|
|
85
|
0
|
|
|
|
|
|
@dirs = grep { -d $_ } @dirs; |
|
|
0
|
|
|
|
|
|
|
|
86
|
0
|
0
|
|
|
|
|
$logger->logdie( "No directories to index!" ) unless @dirs; |
|
87
|
|
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
|
my $queue = $self->_get_file_list( @dirs ); |
|
89
|
|
|
|
|
|
|
|
|
90
|
0
|
0
|
|
|
|
|
if( $self->get_config->organize_dists ) |
|
91
|
|
|
|
|
|
|
{ |
|
92
|
0
|
|
|
|
|
|
$self->_setup_organize_dists( $dirs[0] ); |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# I really hate this following line. It's sure to |
|
95
|
|
|
|
|
|
|
# break on something |
|
96
|
0
|
|
|
|
|
|
my $regex = catfile( qw( authors id (.) .. .+? ), '' ); |
|
97
|
|
|
|
|
|
|
|
|
98
|
0
|
|
|
|
|
|
foreach my $i ( 0 .. $#$queue ) |
|
99
|
|
|
|
|
|
|
{ |
|
100
|
0
|
|
|
|
|
|
my $file = $queue->[$i]; |
|
101
|
0
|
|
|
|
|
|
$logger->debug( "Processing $file" ); |
|
102
|
|
|
|
|
|
|
|
|
103
|
0
|
0
|
|
|
|
|
next if $file =~ m|$regex|; |
|
104
|
0
|
|
|
|
|
|
$logger->debug( "Copying $file into PAUSE structure" ); |
|
105
|
|
|
|
|
|
|
|
|
106
|
0
|
|
|
|
|
|
$queue->[$i] = $self->_copy_file( $file, $dirs[0] ); |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
0
|
|
|
|
|
|
$self->set_note( 'queue', $queue ); |
|
111
|
|
|
|
|
|
|
|
|
112
|
0
|
|
|
|
|
|
1; |
|
113
|
|
|
|
|
|
|
} |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub _get_file_list |
|
116
|
|
|
|
|
|
|
{ |
|
117
|
0
|
|
|
0
|
|
|
my( $self, @dirs ) = @_; |
|
118
|
|
|
|
|
|
|
|
|
119
|
0
|
|
|
|
|
|
$logger->debug( "Taking dists from [@dirs]" ); |
|
120
|
0
|
|
|
|
|
|
my( $wanted, $reporter ) = |
|
121
|
|
|
|
|
|
|
File::Find::Closures::find_by_regex( qr/\.(?:(?:tar\.|t)gz|zip)$/ ); |
|
122
|
|
|
|
|
|
|
|
|
123
|
0
|
|
|
|
|
|
find( $wanted, @dirs ); |
|
124
|
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
my $dist_count = () = $reporter->(); |
|
126
|
0
|
|
|
|
|
|
$logger->info( "Found $dist_count distributions to possibly index" ); |
|
127
|
|
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
|
my $files_to_examine = [ |
|
129
|
0
|
|
|
|
|
|
grep { ! $self->report_exists_already( $_ ) } |
|
130
|
0
|
|
0
|
|
|
|
map { rel2abs($_) } |
|
131
|
0
|
|
|
|
|
|
grep { ! /.(data|txt).gz$/ and ! /02packages/ } |
|
132
|
|
|
|
|
|
|
$reporter->() |
|
133
|
|
|
|
|
|
|
]; |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
{ |
|
136
|
0
|
|
|
|
|
|
my $examine_count = () = @$files_to_examine; |
|
|
0
|
|
|
|
|
|
|
|
137
|
0
|
|
|
|
|
|
$logger->info( "Found $examine_count distributions to actually index" ); |
|
138
|
0
|
|
0
|
|
|
|
my $success_reports = $self->success_report_count || 0; |
|
139
|
0
|
|
0
|
|
|
|
my $error_reports = $self->error_report_count || 0; |
|
140
|
|
|
|
|
|
|
|
|
141
|
0
|
|
0
|
|
|
|
my $success_percent = sprintf "%d", 100 * eval { $success_reports / $dist_count } || 0; |
|
142
|
0
|
|
0
|
|
|
|
my $error_percent = sprintf "%d", 100 * eval { $error_reports / $dist_count } || 0; |
|
143
|
|
|
|
|
|
|
|
|
144
|
0
|
|
|
|
|
|
$logger->info( "Found $success_reports previous success reports ($success_percent%)" ); |
|
145
|
0
|
|
|
|
|
|
$logger->info( "Found $error_reports previous error reports ($error_percent%)" ); |
|
146
|
|
|
|
|
|
|
} |
|
147
|
|
|
|
|
|
|
|
|
148
|
0
|
|
|
|
|
|
return $files_to_examine; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=item report_exists_already( DIST ) |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
This method goes through this process to decide what to return: |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=over 4 |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item Return false if the C configuration is true |
|
158
|
|
|
|
|
|
|
(so existing reports don't matter). |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=item Return true if there is a successful report already. |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=item Return false if C is true. |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=item Return true if there is already an error report. |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=item Return false as the default case. |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=back |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=cut |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
BEGIN { |
|
173
|
1
|
|
|
1
|
|
1217
|
my $success_reports; |
|
174
|
1
|
|
|
|
|
488
|
my $error_reports; |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub report_exists_already |
|
177
|
|
|
|
|
|
|
{ |
|
178
|
0
|
|
|
0
|
1
|
|
my( $self, $dist ) = @_; |
|
179
|
|
|
|
|
|
|
|
|
180
|
0
|
0
|
|
|
|
|
return 0 if $self->get_config->fresh_start; |
|
181
|
|
|
|
|
|
|
|
|
182
|
0
|
|
|
|
|
|
my $reporter = $self->get_coordinator->get_component( 'reporter' ); |
|
183
|
|
|
|
|
|
|
|
|
184
|
0
|
|
|
|
|
|
my $success_report = $reporter->get_successful_report_path( $dist ); |
|
185
|
0
|
0
|
|
|
|
|
do { $success_reports++; return 1 } if -e $success_report; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
|
|
187
|
0
|
0
|
|
|
|
|
return 0 if $self->get_config->retry_errors; |
|
188
|
0
|
|
|
|
|
|
my $error_report = $reporter->get_error_report_path( $dist ); |
|
189
|
0
|
0
|
|
|
|
|
do { $error_reports++; return 1 } if -e $error_report; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
|
|
191
|
0
|
|
|
|
|
|
return 0; |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
|
|
194
|
0
|
|
|
0
|
0
|
|
sub success_report_count { $success_reports } |
|
195
|
|
|
|
|
|
|
|
|
196
|
0
|
|
|
0
|
0
|
|
sub error_report_count { $error_reports } |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
sub _setup_organize_dists |
|
200
|
|
|
|
|
|
|
{ |
|
201
|
0
|
|
|
0
|
|
|
my( $self, $base_dir ) = @_; |
|
202
|
|
|
|
|
|
|
|
|
203
|
0
|
|
0
|
|
|
|
my $pause_id = eval { $self->get_config->pause_id } || 'MYCPAN'; |
|
204
|
|
|
|
|
|
|
|
|
205
|
0
|
|
|
|
|
|
eval { mkpath |
|
|
0
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
catfile( $base_dir, $self->_path_parts( $pause_id ) ), |
|
207
|
|
|
|
|
|
|
{ mode => 0775 } |
|
208
|
|
|
|
|
|
|
}; |
|
209
|
0
|
0
|
|
|
|
|
$logger->error( "Could not create PAUSE author path for [$pause_id]: $@" ) |
|
210
|
|
|
|
|
|
|
if $@; |
|
211
|
|
|
|
|
|
|
|
|
212
|
0
|
|
|
|
|
|
1; |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub _path_parts |
|
216
|
|
|
|
|
|
|
{ |
|
217
|
0
|
|
|
0
|
|
|
catfile ( |
|
218
|
|
|
|
|
|
|
qw(authors id), |
|
219
|
|
|
|
|
|
|
substr( $_[1], 0, 1 ), |
|
220
|
|
|
|
|
|
|
substr( $_[1], 0, 2 ), |
|
221
|
|
|
|
|
|
|
$_[1] |
|
222
|
|
|
|
|
|
|
); |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# if there is an error with the rename, return the original file name |
|
226
|
|
|
|
|
|
|
sub _copy_file |
|
227
|
|
|
|
|
|
|
{ |
|
228
|
0
|
|
|
0
|
|
|
require File::Copy; |
|
229
|
|
|
|
|
|
|
|
|
230
|
0
|
|
|
|
|
|
my( $self, $file, $base_dir ) = @_; |
|
231
|
|
|
|
|
|
|
|
|
232
|
0
|
|
0
|
|
|
|
my $pause_id = eval { $self->get_config->pause_id } || 'MYCPAN'; |
|
233
|
|
|
|
|
|
|
|
|
234
|
0
|
|
|
|
|
|
my $basename = basename( $file ); |
|
235
|
0
|
|
|
|
|
|
$logger->debug( "Need to copy file $basename into $pause_id" ); |
|
236
|
|
|
|
|
|
|
|
|
237
|
0
|
|
|
|
|
|
my $new_name = rel2abs( |
|
238
|
|
|
|
|
|
|
catfile( $base_dir, $self->_path_parts( $pause_id ), $basename ) |
|
239
|
|
|
|
|
|
|
); |
|
240
|
|
|
|
|
|
|
|
|
241
|
0
|
0
|
0
|
|
|
|
if( -e $new_name and |
|
242
|
|
|
|
|
|
|
$self->_file_md5( $new_name ) eq $self->_file_md5( $file ) ) |
|
243
|
|
|
|
|
|
|
{ |
|
244
|
0
|
|
|
|
|
|
$logger->debug( "Files [$file] and [$new_name] are the same. Not copying" ); |
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
|
|
247
|
0
|
|
|
|
|
|
my $rc = File::Copy::copy( $file => $new_name ); |
|
248
|
0
|
0
|
|
|
|
|
$logger->error( "Could not copy [$file] to [$new_name]: $!" ) |
|
249
|
|
|
|
|
|
|
unless $rc; |
|
250
|
|
|
|
|
|
|
|
|
251
|
0
|
0
|
|
|
|
|
return $rc ? $new_name : $file; |
|
252
|
|
|
|
|
|
|
} |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub _file_md5 |
|
255
|
|
|
|
|
|
|
{ |
|
256
|
0
|
|
|
0
|
|
|
my( $self, $file ) = @_; |
|
257
|
|
|
|
|
|
|
|
|
258
|
0
|
|
|
|
|
|
require Digest::MD5; |
|
259
|
|
|
|
|
|
|
|
|
260
|
0
|
0
|
|
|
|
|
open my( $fh ), '<', $file or return ''; |
|
261
|
|
|
|
|
|
|
|
|
262
|
0
|
|
|
|
|
|
my $ctx = Digest::MD5->new; |
|
263
|
0
|
|
|
|
|
|
$ctx->addfile($fh); |
|
264
|
0
|
|
|
|
|
|
$ctx->hexdigest; |
|
265
|
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
1; |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=back |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
MyCPAN::Indexer, MyCPAN::Indexer::Tutorial |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=head1 SOURCE AVAILABILITY |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
This code is in Github: |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
git://github.com/briandfoy/mycpan-indexer.git |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=head1 AUTHOR |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
brian d foy, C<< >> |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
Copyright (c) 2008-2013, brian d foy, All Rights Reserved. |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
You may redistribute this under the same terms as Perl itself. |
|
290
|
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=cut |