File Coverage

blib/lib/CPAN/ParseDistribution.pm
Criterion Covered Total %
statement 174 183 95.0
branch 65 88 73.8
condition 16 24 66.6
subroutine 24 24 100.0
pod 5 5 100.0
total 284 324 87.6


line stmt bran cond sub pod time code
1             package CPAN::ParseDistribution;
2              
3 4004     4004   1464522 use strict;
  4004         6938  
  4004         64320  
4 89     89   356 use warnings;
  89         178  
  89         2403  
5              
6 89     89   267 use vars qw($VERSION);
  89         534  
  89         3827  
7              
8             $VERSION = '1.53';
9              
10 89     89   267 use Cwd qw(getcwd abs_path);
  89         89  
  89         4183  
11 89     89   54201 use File::Temp qw(tempdir);
  89         1309190  
  89         4183  
12 89     89   94785 use File::Find::Rule;
  89         478731  
  89         445  
13 89     89   2937 use File::Path;
  89         89  
  89         3204  
14 89     89   44411 use Data::Dumper;
  89         399165  
  89         4094  
15 89     89   54557 use Archive::Tar;
  89         5177041  
  89         4361  
16 89     89   48505 use Archive::Zip;
  89         3069966  
  89         3026  
17 89     89   31684 use YAML qw(LoadFile);
  89         407442  
  89         4005  
18 89     89   39783 use Safe;
  89         1954173  
  89         3204  
19 89     89   40050 use Parallel::ForkManager;
  89         770651  
  89         2225  
20 89     89   890 use Devel::CheckOS qw(os_is);
  89         89  
  89         57138  
21              
22             $Archive::Tar::DO_NOT_USE_PREFIX = 1;
23             $Archive::Tar::CHMOD = 0;
24              
25             =head1 NAME
26              
27             CPAN::ParseDistribution - index a file from the BackPAN
28              
29             =head1 DESCRIPTION
30              
31             Given a file from the BackPAN, this will let you find out what versions
32             of what modules it contains, the distribution name and version
33              
34             =head1 SYNOPSIS
35              
36             my $dist = CPAN::ParseDistribution->new(
37             'A/AU/AUTHORID/subdirectory/Some-Distribution-1.23.tar.gz',
38             use_tar => '/bin/tar',
39             ...
40             );
41             my $modules = $dist->modules(); # hashref of modname => version
42             my $distname = $dist->dist();
43             my $distversion = $dist->distversion();
44              
45             =head1 METHODS
46              
47             =head2 new
48              
49             Constructor, takes a single mandatory argument, which should be a tarball
50             or zip file from the CPAN or BackPAN, and some optional named arguments:
51              
52             =over
53              
54             =item use_tar
55              
56             The full path to 'tar'. This is assumed to be GNU tar, and to be
57             sufficiently well-endowed as to be able to support bzip2 files.
58             Maybe I'll fix that at some point. If this isn't specified, then
59             Archive::Tar is used instead.
60              
61             You might want to use this if dealing with very large files, as
62             Archive::Tar is rather profligate with memory.
63              
64             =back
65              
66             =cut
67              
68             sub new {
69 2580     2580 1 1455908 my($class, $file, %extra_params) = @_;
70 2580 50       9781 die("file parameter is mandatory\n") unless($file);
71 2580 100       42808 die("$file doesn't exist\n") if(!-e $file);
72 2534 100       12257 die("$file looks like a ppm\n")
73             if($file =~ /\.ppm\.(tar(\.gz|\.bz2)?|tbz|tgz|zip)$/i);
74 2488 100       24322 die("$file isn't the right type\n")
75             if($file !~ /\.(tar(\.gz|\.bz2)?|tbz|tgz|zip)$/i);
76 2442         77977 $file = abs_path($file);
77              
78             # dist name and version
79 2442         25934 (my $dist = $file) =~ s{(^.*/|\.(tar(\.gz|\.bz2)?|tbz|tgz|zip)$)}{}gi;
80 2442         10680 $dist =~ /^(.*)-(\d.*)$/;
81 2442         14046 ($dist, my $distversion) = ($1, $2);
82 2442 100       12613 die("Can't index perl itself ($dist-$distversion)\n")
83             if($dist =~ /^(perl|ponie|kurila|parrot|Perl6-Pugs|v6-pugs)$/);
84              
85 2212         22806 bless {
86             file => $file,
87             modules => {},
88             dist => $dist,
89             distversion => $distversion,
90             extra_params => \%extra_params,
91             }, $class;
92             }
93              
94             # takes a filename, unarchives it, returns the directory it's been
95             # unarchived into
96             sub _unarchive {
97 1996     1996   5711 my($file, %extra_params) = @_;
98 1996         6829 my $olddir = getcwd();
99 1996         15582 my $tempdir = tempdir(TMPDIR => 1);
100 1996         635912 chdir($tempdir);
101 1996 100       24775 if($file =~ /\.zip$/i) {
    100          
102 122         1786 my $zip = Archive::Zip->new($file);
103 122 50       434677 $zip->extractTree() if($zip);
104             } elsif($file =~ /\.(tar(\.gz)?|tgz)$/i) {
105 1622 100       5009 if($extra_params{use_tar}) {
106             system(
107             $extra_params{use_tar},
108 393 100       3440327 (($file =~ /gz$/) ? 'xzf' : 'xf'),
109             $file
110             );
111 393         1337128 system("chmod -R u+r *"); # tar might preserve unreadable perms
112             } else {
113 1229         18254 my $tar = Archive::Tar->new($file, 1);
114 1229 50       18338496 $tar->extract() if($tar);
115             }
116             } else {
117 252 100       719 if($extra_params{use_tar}) {
118 82         654041 system( $extra_params{use_tar}, 'xjf', $file);
119 82         268176 system("chmod -R u+r *");
120             } else {
121 170 50       303256 open(my $fh, '-|', qw(bzip2 -dc), $file) || die("Can't unbzip2\n");
122 170         6054 my $tar = Archive::Tar->new($fh);
123 170 50       1041747 $tar->extract() if($tar);
124             }
125             }
126 1996         14099152 chdir($olddir);
127 1996         19247 return $tempdir;
128             }
129              
130             # adapted from PAUSE::pmfile::parse_version_safely in mldistwatch.pm
131             sub _parse_version_safely {
132 4098     4098   9719 my($parsefile) = @_;
133 4098         8550 my $result;
134             my $eval;
135 4098         15316 local $/ = "\n";
136 4098 50       138453 open(my $fh, $parsefile) or die "Could not open '$parsefile': $!";
137 4098         9822 my $inpod = 0;
138 4098         61064 while (<$fh>) {
139 89158 100       142985 $inpod = /^=(?!cut)/ ? 1 : /^=cut/ ? 0 : $inpod;
    100          
140 89158 100 100     220577 next if $inpod || /^\s*#/;
141 55194         47209 chop;
142 55194 100       144054 next unless /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/;
143 4004         17260 my($sigil, $var) = ($1, $2);
144 4004         6786 my $current_parsed_line = $_;
145             {
146 4004         9779 local $^W = 0;
  4004         15384  
147 89     89   445 no strict;
  89         89  
  89         82859  
148 4004         43101 my $c = Safe->new();
149 4004 50       3129368 $c->deny(qw(
150             tie untie tied chdir flock ioctl socket getpeername
151             ssockopt bind connect listen accept shutdown gsockopt
152             getsockname sleep alarm entereval reset dbstate
153             readline rcatline getc read formline enterwrite
154             leavewrite print sysread syswrite send recv eof
155             tell seek sysseek readdir telldir seekdir rewinddir
156             lock stat lstat readlink ftatime ftblk ftchr ftctime
157             ftdir fteexec fteowned fteread ftewrite ftfile ftis
158             ftlink ftmtime ftpipe ftrexec ftrowned ftrread ftsgid
159             ftsize ftsock ftsuid fttty ftzero ftrwrite ftsvtx
160             fttext ftbinary fileno ghbyname ghbyaddr ghostent
161             shostent ehostent gnbyname gnbyaddr gnetent snetent
162             enetent gpbyname gpbynumber gprotoent sprotoent
163             eprotoent gsbyname gsbyport gservent sservent
164             eservent gpwnam gpwuid gpwent spwent epwent
165             getlogin ggrnam ggrgid ggrent sgrent egrent msgctl
166             msgget msgrcv msgsnd semctl semget semop shmctl
167             shmget shmread shmwrite require dofile caller
168             syscall dump chroot link unlink rename symlink
169             truncate backtick system fork wait waitpid glob
170             exec exit kill time tms mkdir rmdir utime chmod
171             chown fcntl sysopen open close umask binmode
172             open_dir closedir
173             ), ($] >= 5.010 ? qw(say) : ()));
174 4004         241930 $c->share_from(__PACKAGE__, [qw(qv)]);
175 4004         157497 s/\buse\s+version\b.*?;//gs;
176             # qv broke some time between version.pm 0.74 and 0.82
177             # so just extract it and hope for the best
178 4004         9711 s/\bqv\s*\(\s*(["']?)([\d\.]+)\1\s*\)\s*/"$2"/;
179 4004         7875 s/\buse\s+vars\b//g;
180 4004         22806 $eval = qq{
181             local ${sigil}${var};
182             \$$var = undef; do {
183             $_
184             }; \$$var
185             };
186              
187 4004         10928 $result = _run_safely($c, $eval);
188             };
189             # stuff that's my fault because of the Safe compartment
190 3916 100 66     2733377 if($result->{error} && $result->{error} =~ /trapped by operation mask|safe compartment timed out/i) {
    50          
191 80         12820 warn("Unsafe code in \$VERSION\n".$result->{error}."\n$parsefile\n$eval");
192 80         466 $result = undef;
193             } elsif($result->{error}) {
194             warn "_parse_version_safely: ".Dumper({
195             eval => $eval,
196             line => $current_parsed_line,
197             file => $parsefile,
198             err => $result->{error},
199 0         0 });
200             }
201 3916         16242 last;
202             }
203 4010         48467 close $fh;
204              
205 4010 100       70908 return exists($result->{result}) ? $result->{result} : undef;
206             }
207              
208             sub _run_safely {
209 4004 50   4004   27031 if(os_is('Unix')) {
    0          
210 89     89   38359 eval 'use CPAN::ParseDistribution::Unix';
  89         178  
  89         1335  
  4004         9003498  
211 4004         29120 return CPAN::ParseDistribution::Unix->_run(@_);
212             } elsif(os_is('MicrosoftWindows')) {
213             # FIXME once someone supplies CPAN::ParseDistribution::Windows
214 0         0 warn("Windows is not fully supported by CPAN::ParseDistribution\n");
215 0         0 warn("See the LIMITATIONS section in the documentation\n");
216 0         0 eval 'use CPAN::ParseDistribution::Unix';
217 0         0 return CPAN::ParseDistribution::Unix->_run(@_);
218             }
219             }
220              
221             =head2 isdevversion
222              
223             Returns true or false depending on whether this is a developer-only
224             or trial release of a distribution. This is determined by looking for
225             an underscore in the distribution version or the string '-TRIAL' at the
226             end of the distribution version.
227              
228             =cut
229              
230             sub isdevversion {
231 326     326 1 2032 my $self = shift;
232 326 100       870 return 1 if($self->distversion() =~ /(_|-TRIAL$)/);
233 110         440 return 0;
234             }
235              
236             =head2 modules
237              
238             Returns a hashref whose keys are module names, and their values are
239             the versions of the modules. The version number is retrieved by
240             eval()ing what looks like a $VERSION line in the code. This is done
241             in a C compartment, but may be a security risk if you do this
242             with untrusted code. Caveat user!
243              
244             =cut
245              
246             sub modules {
247 2104     2104 1 14113 my $self = shift;
248 2104 100       2353 if(!(keys %{$self->{modules}})) {
  2104         8797  
249 1996         3968 $self->{_modules_runs}++;
250 1996         2580 my $tempdir = _unarchive($self->{file}, %{$self->{extra_params}});
  1996         6968  
251              
252 1996         124098 my $meta = (File::Find::Rule->file()->name('META.yml')->in($tempdir))[0];
253 1996         3060164 my $ignore = join('|', qw(t inc xt));
254 1996         3420 my %ignorefiles;
255             my %ignorepackages;
256 0         0 my %ignorenamespaces;
257 1996 100 66     31625 if($meta && -e $meta) {
258 1540         2943 my $yaml = eval { LoadFile($meta); };
  1540         10766  
259 1540 50 33     10809648 if(!$@ &&
      33        
      33        
260             UNIVERSAL::isa($yaml, 'HASH') &&
261             exists($yaml->{no_index}) &&
262             UNIVERSAL::isa($yaml->{no_index}, 'HASH')
263             ) {
264 1540 100       7199 if(exists($yaml->{no_index}->{directory})) {
265 1348 100       1938 if(eval { @{$yaml->{no_index}->{directory}} }) {
  1348 50       1698  
  1348         7969  
266             $ignore = join('|', $ignore,
267 1242         2993 map { "$_/" } @{$yaml->{no_index}->{directory}}
  2718         8552  
  1242         5125  
268             );
269             } elsif(!ref($yaml->{no_index}->{directory})) {
270 106         530 $ignore .= '|'.$yaml->{no_index}->{directory}.'/'
271             }
272             }
273 1540 100       5520 if(exists($yaml->{no_index}->{file})) {
274 242 50       626 if(eval { @{$yaml->{no_index}->{file}} }) {
  242 0       473  
  242         1390  
275 242         1023 %ignorefiles = map { $_, 1 }
276 242         478 @{$yaml->{no_index}->{file}};
  242         797  
277             } elsif(!ref($yaml->{no_index}->{file})) {
278 0         0 $ignorefiles{$yaml->{no_index}->{file}} = 1;
279             }
280             }
281 1540 100       4001 if(exists($yaml->{no_index}->{package})) {
282 288 50       481 if(eval { @{$yaml->{no_index}->{package}} }) {
  288 0       532  
  288         1301  
283 384         1638 %ignorepackages = map { $_, 1 }
284 288         486 @{$yaml->{no_index}->{package}};
  288         1039  
285             } elsif(!ref($yaml->{no_index}->{package})) {
286 0         0 $ignorepackages{$yaml->{no_index}->{package}} = 1;
287             }
288             }
289 1540 100       10191 if(exists($yaml->{no_index}->{namespace})) {
290 94 50       351 if(eval { @{$yaml->{no_index}->{namespace}} }) {
  94 0       326  
  94         470  
291 94         1084 %ignorenamespaces = map { $_, 1 }
292 94         119 @{$yaml->{no_index}->{namespace}};
  94         326  
293             } elsif(!ref($yaml->{no_index}->{namespace})) {
294 0         0 $ignorenamespaces{$yaml->{no_index}->{namespace}} = 1;
295             }
296             }
297             }
298             }
299             # find modules
300             my @PMs = grep {
301 1996         58945 my $pm = $_;
  7168         2527321  
302             $pm !~ m{^\Q$tempdir\E/[^/]+/($ignore)} &&
303 7168   100     138801 !grep { $pm =~ m{^\Q$tempdir\E/[^/]+/$_$} } (keys %ignorefiles)
304             } File::Find::Rule->file()->name('*.pm', '*.pm.PL')->in($tempdir);
305 1996         12173 foreach my $PM (@PMs) {
306 4098         16710 local $/ = undef;
307 4098         13773 my $version = _parse_version_safely($PM);
308 4010 50       145794 open(my $fh, $PM) || die("Can't read $PM\n");
309 4010         102994 $PM = <$fh>;
310 4010         22390 close($fh);
311              
312             # from PAUSE::pmfile::packages_per_pmfile in mldistwatch.pm
313 4010 100       55988 if($PM =~ /\bpackage[ \t]+([\w\:\']+)\s*($|[};])/) {
314 3952         15105 my $module = $1;
315             $self->{modules}->{$module} = $version unless(
316             exists($ignorepackages{$module}) ||
317 3952 100 100     97888 (grep { $module =~ /${_}::/ } keys %ignorenamespaces)
  594         20146  
318             );
319             }
320             }
321 1908         3200740 rmtree($tempdir);
322             }
323 2016         32203 return $self->{modules};
324             }
325              
326             =head2 dist
327              
328             Return the name of the distribution. eg, in the synopsis above, it would
329             return 'Some-Distribution'.
330              
331             =cut
332              
333             sub dist {
334 110     110 1 550 my $self = shift;
335 110         4697 return $self->{dist};
336             }
337              
338             =head2 distversion
339              
340             Return the version of the distribution. eg, in the synopsis above, it would
341             return 1.23.
342              
343             Strictly speaking, the CPAN doesn't have distribution versions -
344             Foo-Bar-1.23.tar.gz is not considered to have any relationship to
345             Foo-Bar-1.24.tar.gz, they just happen to coincidentally have rather
346             similar contents. But other tools, such as those used by the CPAN testers,
347             do treat distributions as being versioned.
348              
349             =cut
350              
351             sub distversion{
352 652     652 1 901 my $self = shift;
353 652         3725 return $self->{distversion};
354             }
355              
356             =head1 SECURITY
357              
358             This module executes a very small amount of code from each module that
359             it finds in a distribution. While every effort has been made to do
360             this safely, there are no guarantees that it won't let the distributions
361             you're examining do horrible things to your machine, such as email your
362             password file to strangers. You are strongly advised to read the source
363             code and to run it in a very heavily restricted user account.
364              
365             =head1 LIMITATIONS, BUGS and FEEDBACK
366              
367             I welcome feedback about my code, including constructive criticism.
368             Bug reports should be made using L
369             and should include the smallest possible chunk of code, along with
370             any necessary data, which demonstrates the bug. Ideally, this
371             will be in the form of files which I can drop in to the module's
372             test suite.
373              
374             There is a known problem with parsing some pathological distributions
375             on Windows, where CPAN::ParseDistribution may either hang or crash. This
376             is because Windows doesn't properly support fork()ing and signals. I can
377             not fix this, but welcome patches with tests.
378              
379             =cut
380              
381             =head1 SEE ALSO
382              
383             L
384              
385             L
386              
387             =head1 AUTHOR, COPYRIGHT and LICENCE
388              
389             Copyright 2009-2011 David Cantrell Edavid@cantrell.org.ukE
390              
391             Contains code originally from the PAUSE by Andreas Koenig.
392              
393             This software is free-as-in-speech software, and may be used,
394             distributed, and modified under the terms of either the GNU
395             General Public Licence version 2 or the Artistic Licence. It's
396             up to you which one you use. The full text of the licences can
397             be found in the files GPL2.txt and ARTISTIC.txt, respectively.
398              
399             =head1 CONSPIRACY
400              
401             This module is also free-as-in-mason software.
402              
403             =cut
404              
405             1;