File Coverage

blib/lib/CPAN/Site/Index.pm
Criterion Covered Total %
statement 186 284 65.4
branch 55 128 42.9
condition 31 56 55.3
subroutine 29 34 85.2
pod 0 18 0.0
total 301 520 57.8


line stmt bran cond sub pod time code
1             # Copyrights 1998,2005-2022 by [Mark Overmeer].
2             # For other contributors see ChangeLog.
3             # See the manual pages for details on the licensing terms.
4             # Pod stripped from pm file by OODoc 2.03.
5             # This code is part of distribution CPAN::Site.
6             # Copyright Mark Overmeer. Licensed under the same terms as Perl itself.
7              
8             package CPAN::Site::Index;
9 2     2   2043 use vars '$VERSION';
  2         4  
  2         101  
10             $VERSION = '1.17';
11              
12 2     2   11 use base 'Exporter';
  2         4  
  2         181  
13              
14 2     2   10 use warnings;
  2         4  
  2         43  
15 2     2   8 use strict;
  2         3  
  2         115  
16              
17             our @EXPORT_OK = qw/cpan_index cpan_mirror/;
18             our $VERSION; # required in test-env
19              
20 2     2   856 use Log::Report 'cpan-site', syntax => 'SHORT';
  2         184031  
  2         9  
21              
22 2     2   1163 use version;
  2         3102  
  2         9  
23 2     2   130 use File::Find qw/find/;
  2         5  
  2         100  
24 2     2   444 use File::Copy qw/copy/;
  2         2057  
  2         100  
25 2     2   13 use File::Basename qw/basename dirname/;
  2         5  
  2         107  
26 2     2   864 use HTTP::Date qw/time2str/;
  2         3199  
  2         115  
27 2     2   390 use File::Spec::Functions qw/catfile catdir splitdir/;
  2         641  
  2         100  
28 2     2   1112 use LWP::UserAgent ();
  2         70602  
  2         48  
29 2     2   1357 use Archive::Tar ();
  2         92987  
  2         65  
30 2     2   1249 use Archive::Zip qw(:ERROR_CODES :CONSTANTS);
  2         80446  
  2         286  
31 2     2   818 use CPAN::Checksums qw(updatedir); # horrible function name
  2         58704  
  2         131  
32 2     2   15 use IO::Zlib ();
  2         14  
  2         7219  
33              
34             my $tar_gz = qr/ \.tar\.gz$ | \.tar\.Z$ | \.tgz$/xi;
35             my $zip = qr/ \.zip$ /xi;
36             my $cpan_update = 0.04; # days between reload of full CPAN index
37             my $ua;
38              
39             sub safe_copy($$);
40             sub cpan_index($@);
41             sub register($$$);
42             sub package_inventory($$;$);
43             sub package_on_usual_location($);
44             sub inspect_archive;
45             sub inspect_tar_archive($$);
46             sub inspect_zip_archive($$);
47             sub collect_package_details($$$);
48             sub update_global_cpan($$);
49             sub load_file($$);
50             sub merge_global_cpan($$$);
51             sub create_details($$$$$);
52             sub calculate_checksums($$);
53             sub read_details($);
54             sub remove_expired_details($$$);
55             sub mkdirhier(@);
56             sub cpan_mirror($$$@);
57              
58             sub safe_copy($$)
59 2     2 0 8 { my ($from, $to) = @_;
60 2         25 trace "copy $from to $to";
61 2 50       86 copy $from, $to
62             or fault __x"cannot copy {from} to {to}", from => $from, to => $to;
63             }
64              
65             sub cpan_index($@)
66 1     1 0 2035 { my ($mycpan, $globalcpan, %opts) = @_;
67 1         3 my $lazy = $opts{lazy};
68 1         1 my $fallback = $opts{fallback};
69 1 50       4 my $undefs = exists $opts{undefs} ? $opts{undefs} : 1;
70              
71 1 50       3 unless($ua)
72 1         10 { $ua = LWP::UserAgent->new;
73 1 50       2464 $ua->env_proxy if $opts{env_proxy};
74             }
75              
76 1 50       17 -d $mycpan
77             or error __x"archive top '{dir}' is not a directory"
78             , dir => $mycpan;
79              
80 1         28 my $program = basename $0;
81 1   50     4 $VERSION ||= 'undef'; # test env at home
82 1         6 trace "$program version $VERSION";
83              
84 1         44 my $global = catdir $mycpan, 'global';
85 1         4 my $mods = catdir $mycpan, 'modules';
86 1         3 my $authors = catdir $mycpan, 'authors';
87 1         4 mkdirhier $global, $mods, $authors;
88              
89 1         3 my $globdetails = update_global_cpan $mycpan, $globalcpan;
90              
91             # Create mailrc and modlist
92              
93 1         20 safe_copy catfile($global, '01mailrc.txt.gz')
94             , catfile($authors, '01mailrc.txt.gz');
95              
96 1         1574 safe_copy catfile($global, '03modlist.data.gz')
97             , catfile($mods, '03modlist.data.gz');
98              
99             # Create packages details
100              
101 1         377 my $details = catfile $mods, '02packages.details.txt.gz';
102 1         6 my $newlist = catfile $mods, '02packages.details.tmp.gz';
103 1         4 my $newer;
104              
105 1         3 my $reuse_dists = {};
106 1 50 33     7 if($lazy && -f $details)
107 0         0 { $reuse_dists = read_details $details;
108 0         0 $newer = -M $details;
109 0         0 remove_expired_details $mycpan, $reuse_dists, $newer;
110             }
111              
112 1         7 my ($mypkgs, $distdirs)
113             = package_inventory $mycpan, $reuse_dists, $newer;
114              
115 1 50       3 merge_global_cpan $mycpan, $mypkgs, $globdetails
116             if $fallback;
117              
118 1         7 create_details $details, $newlist, $mypkgs, $lazy, $undefs;
119              
120 1 50       621 if(-f $details)
121 0         0 { trace "backup old details file to $details.bak";
122 0         0 safe_copy $details, "$details.bak";
123             }
124              
125 1 50       44 if(-f $newlist)
126 1         10 { trace "promoting $newlist to current";
127 1 50       94 rename $newlist, $details
128             or error __x"cannot rename '{from}' in '{to}'"
129             , from => $newlist, to => $details;
130             }
131              
132 1         15 calculate_checksums $distdirs, catdir($mycpan, 'authors', 'id');
133             }
134              
135             #
136             # Package Inventory
137             #
138              
139             # global variables for testing purposes (sorry)
140             our ($topdir, $findpkgs, %finddirs, $olddists, $index_age);
141              
142             sub register($$$)
143 31     31 0 73 { my ($package, $this_version, $dist) = @_;
144              
145             # warn "register $package, " . (defined $this_version ? $this_version : 'undef');
146              
147 31 50       58 if(ref $this_version)
148 0         0 { eval { $this_version = version->parse($this_version) };
  0         0  
149 0 0       0 if($@)
150 0         0 { alert __x"error when creating version object for {pkg}: {err}"
151             , pkg => $package, err => $@;
152 0         0 return;
153             }
154             }
155            
156 31 50       89 my $registered_version = exists $findpkgs->{$package} ? $findpkgs->{$package}[0] : undef;
157 31 100       57 $this_version =~ s/^v// if $this_version;
158              
159 31 50 33     83 return if defined $registered_version
160             && $registered_version > $this_version;
161              
162 31         234 $findpkgs->{$package} = [ $this_version, $dist ];
163             }
164              
165             sub package_inventory($$;$)
166 1     1 0 4 { (my $mycpan, $olddists, $index_age) = @_; #!!! see "my"
167 1         8 $topdir = catdir $mycpan, 'authors', 'id';
168 1         6 mkdirhier $topdir;
169              
170 1         3 $findpkgs = {};
171 1         8 trace "creating inventory from $topdir";
172              
173 1         174 find {wanted => \&inspect_archive, no_chdir => 1}, $topdir;
174 1         8 ($findpkgs, \%finddirs);
175             }
176              
177             sub package_on_usual_location($)
178 23     23 0 408 { my $file = shift;
179 23         84 my ($top, $subdir, @rest) = splitdir $file;
180 23 50       214 defined $subdir or return 0;
181              
182             !@rest # path is at top-level of distro
183 23 100       125 || $subdir eq 'lib'; # inside lib
184             }
185              
186             sub inspect_archive
187 8     8 0 16275 { my $fn = $File::Find::name;
188 8 100 33     672 return unless -f $fn && ($fn =~ $tar_gz || $fn =~ $zip);
      66        
189              
190 4         95 (my $dist = $fn) =~ s!^\Q$topdir\E[\\/]!!;
191              
192 4 50 33     20 if(defined $index_age && -M $fn > $index_age)
193             {
194 0 0       0 unless(exists $olddists->{$dist})
195 0         0 { trace "not the latest: $dist";
196 0         0 return;
197             }
198              
199 0         0 trace "latest older than index: $dist";
200 0         0 foreach (@{$olddists->{$dist}})
  0         0  
201 0         0 { my ($pkg, $version) = @$_;
202 0         0 register $pkg, $version, $dist;
203             }
204 0         0 return;
205             }
206              
207 4         30 trace "inspecting archive $fn";
208 4         137 $finddirs{$File::Find::dir}++;
209              
210 4 50       47 return inspect_tar_archive $dist, $fn
211             if $fn =~ $tar_gz;
212              
213 0 0       0 return inspect_zip_archive $dist, $fn
214             if $fn =~ $zip;
215             }
216              
217             sub inspect_tar_archive($$)
218 4     4 0 13 { my ($dist, $fn) = @_;
219              
220 4         66 my $arch = Archive::Tar->new;
221 4 50       60 $arch->read($fn, 1)
222             or error __x"no files in tar archive '{fn}': {err}"
223             , fn => $fn, err => $arch->error;
224              
225 4         60840 foreach my $file ($arch->get_files)
226 64         655 { my $fn = $file->full_path;
227 64 100 100     1499 $file->is_file && $fn =~ m/\.pm$/i && package_on_usual_location $fn
      100        
228             or next;
229 21         72 collect_package_details $fn, $dist, $file->get_content_by_ref;
230             }
231             }
232              
233             sub inspect_zip_archive($$)
234 0     0 0 0 { my ($dist, $fn) = @_;
235              
236 0         0 my $arch = Archive::Zip->new;
237 0 0       0 $arch->read($fn)==AZ_OK
238             or error __x"no files in zip archive '{fn}': {err}"
239             , fn => $fn, err => $arch->error;
240              
241 0         0 foreach my $member ($arch->membersMatching( qr/\.pm$/i ))
242 0         0 { my $fn = $member->fileName;
243 0 0 0     0 $member->isTextFile && package_on_usual_location $fn
244             or next;
245 0         0 my ($contents, $status) = $member->contents;
246 0 0       0 $status==AZ_OK
247             or error "error in zip file {fn}: {err}"
248             , fn => $fn, err => $status;
249 0         0 collect_package_details $fn, $dist, \$contents;
250             }
251             }
252              
253             sub collect_package_details($$$)
254 21     21 0 119 { my ($fn, $dist) = (shift, shift);
255 21         31 my @lines = split /\r?\n/, ${shift()};
  21         2527  
256 21         61 my $in_pod = 0;
257 21         25 my $package;
258 21         40 local $VERSION = undef; # may get destroyed by eval
259              
260 21         43 while(@lines)
261 4650         5537 { local $_ = shift @lines;
262 4650 50       6219 last if m/^__(?:END|DATA)__$/;
263              
264 4650 100       6307 $in_pod = ($1 ne 'cut') if m/^=(\w+)/;
265 4650 100 100     9619 next if $in_pod || m/^\s*#/;
266              
267 3409   100     9155 $_ .= shift @lines
      100        
268             while @lines && m/package|use|VERSION/ && !m/[;{]/;
269              
270 3409 100       5268 if( m/^\s* package \s* ((?:\w+\:\:)*\w+) (?:\s+ (\S*))? \s* [;{]/x )
271 31         111 { my ($thispkg, $v) = ($1, $2);
272 31         43 my $thisversion;
273 31 50       57 if($v)
274 0         0 { $thisversion = eval {qv($v)};
  0         0  
275 0 0       0 alert __x"illegal version for {pkg}, found '{version}': {err}"
276             , pkg => $thispkg, version => $v, err => $@ if $@;
277             }
278              
279             # second package in file?
280 31 100       79 register $package, $VERSION, $dist
281             if defined $package;
282              
283 31         53 ($package, $VERSION) = ($thispkg, $thisversion);
284 31         141 trace "pkg $package from $fn";
285             }
286              
287 3409 50       8999 if( m/^\s* \$ ${package}::VERSION \s* = \s* ["']?(\w+?)["']? \s* ;/x )
288 0         0 { $VERSION = $1;
289             }
290              
291 3409 100 100     8839 if( !$VERSION && m/^ (?:use\s+version\s*;\s*)?
292             (?:our)? \s* \$ ((?: \w+\:\:)*) VERSION \s* \= (.*)/x )
293 10 50       31 { defined $2 or next;
294 10         26 my ($ns, $vers) = ($1, $2);
295              
296             # some versions of CPAN.pm do contain lines like "$VERSION =~ ..."
297             # which also need to be processed.
298 10         776 eval "\$VERSION =$vers";
299 10 50       47 if(defined $VERSION)
300 10 100       43 { ($package = $ns) =~ s/\:\:$//
301             if length $ns;
302 10         41 trace "pkg $package version $VERSION";
303             }
304             }
305             }
306              
307 21 50       43 $VERSION = $VERSION->numify if ref $VERSION;
308 21 50       114 register $package, $VERSION, $dist
309             if defined $package;
310             }
311              
312             sub update_global_cpan($$)
313 1     1 0 3 { my ($mycpan, $globalcpan) = @_;
314              
315 1         4 my $global = catdir $mycpan, 'global';
316             my ($mailrc, $globdetails, $modlist) =
317 1         3 map { catfile $global, $_ }
  3         12  
318             qw/01mailrc.txt.gz 02packages.details.txt.gz 03modlist.data.gz/;
319              
320 1 0 33     21 return $globdetails
      33        
      0        
321             if -f $globdetails && -f $globdetails && -f $modlist
322             && -M $globdetails < $cpan_update;
323              
324 1         8 info "(re)loading global CPAN files";
325              
326 1         23 mkdirhier $global;
327 1         5 load_file "$globalcpan/authors/01mailrc.txt.gz", $mailrc;
328 1         54 load_file "$globalcpan/modules/02packages.details.txt.gz", $globdetails;
329 1         73 load_file "$globalcpan/modules/03modlist.data.gz", $modlist;
330 1         44 $globdetails;
331             }
332              
333             sub load_file($$)
334 3     3 0 10 { my ($from, $to) = @_;
335 3         15 my $response = $ua->get($from, ':content_file' => $to);
336 3 50       1352531 return if $response->is_success;
337              
338 0         0 unlink $to;
339 0         0 error __x"failed to get {uri} for {to}: {err}"
340             , uri => $from, to => $to, err => $response->status_line;
341             }
342              
343             sub merge_global_cpan($$$)
344 0     0 0 0 { my ($mycpan, $pkgs, $globdetails) = @_;
345              
346 0         0 trace "merge packages with CPAN core list in $globdetails";
347 0         0 my $cpan_pkgs = read_details $globdetails;
348              
349 0         0 while(my ($cpandist, $cpanpkgs) = each %$cpan_pkgs)
350 0         0 { foreach (@$cpanpkgs)
351 0         0 { my ($pkg, $version) = @$_;
352 0 0       0 next if exists $pkgs->{$pkg};
353 0         0 $pkgs->{$pkg} = [$version, $cpandist];
354             }
355             }
356             }
357              
358             sub create_details($$$$$)
359 2     2 0 2802 { my ($details, $filename, $pkgs, $lazy, $undefs) = @_;
360              
361 2         14 trace "creating package details in $filename";
362 2 50       68 my $fh = IO::Zlib->new($filename, 'wb')
363             or fault __x"generating gzipped {fn}", fn => $filename;
364              
365 2         4824 my $lines = keys %$pkgs;
366 2         17 my $date = time2str time;
367 2 100       57 my $how = $lazy ? "lazy" : "full";
368              
369 2         18 info "produced list of $lines packages $how";
370              
371 2         186 my $program = basename $0;
372 2         9 my $module = __PACKAGE__;
373 2         48 $fh->print (<<__HEADER);
374             File: 02packages.details.txt
375             URL: file://$details
376             Description: Packages listed in CPAN and local repository
377             Columns: package name, version, path
378             Intended-For: private CPAN
379             Line-Count: $lines
380             Written-By: $program with $module $CPAN::Site::Index::VERSION ($how)
381             Last-Updated: $date
382              
383             __HEADER
384              
385 2         486 foreach my $pkg (sort { lc($a) cmp lc($b) } keys %$pkgs)
  104         137  
386 32         2930 { my ($version, $path) = @{$pkgs->{$pkg}};
  32         79  
387              
388 32 100 66     110 $version = 'undef'
389             if !defined $version || $version eq '';
390              
391             next
392 32 50 66     90 if $version eq 'undef' && !$undefs;
393              
394 32         58 $path =~ s,\\,/,g;
395 32         155 $fh->printf("%-30s\t%s\t%s\n", $pkg, $version, $path);
396             }
397             }
398              
399             sub calculate_checksums($$)
400 1     1 0 2 { my $dirs = shift;
401 1         3 my $root = shift;
402 1         5 trace "updating checksums";
403              
404 1         25 foreach my $dir (keys %$dirs)
405 1         4 { trace "summing $dir";
406 1 50       24 updatedir($dir, $root)
407             or warning 'failed calculating checksums in {dir}', dir => $dir;
408             }
409             }
410              
411             sub read_details($)
412 0     0 0 0 { my $fn = shift;
413 0 0       0 -f $fn or return {};
414 0         0 trace "collecting all details from $fn";
415              
416 0 0       0 my $fh = IO::Zlib->new($fn, 'rb')
417             or fault __x"cannot read from {fn}", fn => $fn;
418              
419 0         0 my $line; # skip header, search first blank
420 0         0 do { $line = $fh->getline } until $line =~ m/^\s*$/;
  0         0  
421              
422 0         0 my $time_last_update = (stat $fn)[9];
423 0         0 my %dists;
424              
425 0         0 while(my $line = $fh->getline)
426 0         0 { chomp $line;
427 0         0 my ($pkg, $version, $dist) = split ' ', $line, 3;
428              
429 0 0       0 unless($dist)
430 0         0 { warning "$fn error line=\n $line";
431 0         0 next;
432             }
433              
434 0         0 push @{$dists{$dist}}, [$pkg, $version];
  0         0  
435             }
436              
437 0         0 \%dists;
438             }
439              
440             sub remove_expired_details($$$)
441 0     0 0 0 { my ($mycpan, $dists, $newer) = @_;
442 0         0 trace "extracting only existing local distributions";
443              
444 0         0 my $authors = catdir $mycpan, 'authors', 'id';
445 0         0 foreach my $dist (keys %$dists)
446 0         0 { my $fn = catfile $authors, $dist;
447 0 0       0 if(! -f $fn)
    0          
448             { # removed local or a global dist
449 0         0 delete $dists->{$dist};
450             }
451             elsif(-M $fn < $newer)
452 0         0 { trace "dist $dist file updated, reindexing";
453 0         0 delete $dists->{$dist};
454             }
455             }
456             }
457              
458             sub mkdirhier(@)
459 5     5 0 12 { foreach my $dir (@_)
460 7 100       149 { next if -d $dir;
461 2         65 mkdirhier dirname $dir;
462              
463 2 50       86 mkdir $dir, 0755
464             or fault __x"cannot create directory {dir}", dir => $dir;
465              
466 2         14 trace "created $dir";
467             }
468 5         35 1;
469             }
470              
471             sub cpan_mirror($$$@)
472 0     0 0   { my ($mycpan, $globalcpan, $mods, %opts) = @_;
473 0 0         @$mods or return;
474 0           my %need = map { ($_ => 1) } @$mods;
  0            
475 0           my $auth = catdir $mycpan, 'authors', 'id';
476              
477 0 0         unless($ua)
478 0           { $ua = LWP::UserAgent->new;
479 0 0         $ua->env_proxy if $opts{env_proxy};
480             }
481              
482 0           my $globdetails
483             = update_global_cpan $mycpan, $globalcpan;
484              
485 0 0         my $fh = IO::Zlib->new($globdetails, 'rb')
486             or fault __x"cannot read from {fn}", fn => $globdetails;
487              
488 0           while(my $line = $fh->getline) # skip header, search first blank
489 0 0         { last if $line =~ m/^\s*$/;
490             }
491              
492 0   0       $ua ||= LWP::UserAgent->new;
493 0           while(my $line = $fh->getline)
494 0           { my ($pkg, $version, $dist) = split ' ', $line;
495 0 0         delete $need{$pkg} or next;
496              
497 0           my $to = catfile $auth, split m#/#, $dist;
498 0 0         if(-f $to)
499 0           { trace __x"package {pkg} present in distribution {dist}"
500             , pkg => $pkg, dist => $dist;
501 0           next;
502             }
503              
504 0           my $source = "$globalcpan/authors/id/$dist";
505 0           mkdirhier dirname $to;
506 0           my $response = $ua->get($source, ':content_file' => $to);
507 0 0         unless($response->is_success)
508 0           { unlink $to;
509 0           error __x"failed to get {uri} for {to}: {err}"
510             , uri => $source, to => $to, err => $response->status_line;
511             }
512              
513 0           info __x"got {pkg} in {dist}", pkg => $pkg, dist => $dist;
514             }
515              
516             warning __x"package {pkg} does not exist", pkg => $_
517 0           for sort keys %need;
518             }
519              
520             1;