File Coverage

blib/lib/CPAN/Mini/Extract.pm

Criterion	Covered	Total	%
statement	124	219	56.6
branch	19	74	25.6
condition	4	23	17.3
subroutine	25	32	78.1
pod	4	6	66.6
total	176	354	49.7

line	stmt	bran	cond	sub	pod	time	code
1							package CPAN::Mini::Extract;
2
3							=pod
4
5							=head1 NAME
6
7							CPAN::Mini::Extract - Create CPAN::Mini mirrors with the archives extracted
8
9							=head1 SYNOPSIS
10
11							# Create a CPAN extractor
12							my $cpan = CPAN::Mini::Extract->new(
13							remote => 'http://mirrors.kernel.org/cpan/',
14							local => '/home/adam/.minicpan',
15							trace => 1,
16							extract => '/home/adam/.cpanextracted',
17							extract_filter => sub { /\.pm$/ and ! /\b(inc\|t)\b/ },
18							extract_check => 1,
19							);
20
21							# Run the minicpan process
22							my $changes = $cpan->run;
23
24							=head1 DESCRIPTION
25
26							C provides a base for implementing systems that
27							download "all" of CPAN, extract the dists and then process the files
28							within.
29
30							It provides the same syncronisation functionality as L except
31							that it also maintains a parallel directory tree that contains a directory
32							located at an identical path to each archive file, with a controllable
33							subset of the files in the archive extracted below.
34
35							=head2 How does it work
36
37							C starts with a L local mirror, which it
38							will optionally update before each run. Once the L directory
39							is current, it will scan both directory trees, extracting any new archives
40							and removing any extracted archives no longer in the minicpan mirror.
41
42							=head1 EXTENDING
43
44							This class is relatively straight forward, but may evolve over time.
45
46							If you wish to write an extension, please stay in contact with the
47							maintainer while doing so.
48
49							=head1 METHODS
50
51							=cut
52
53	3			3		52452	use 5.006;
	3					10
	3					114
54	3			3		14	use strict;
	3					5
	3					76
55	3			3		24	use Carp ();
	3					5
	3					43
56	3			3		13	use File::Basename ();
	3					5
	3					60
57	3			3		14	use File::Path ();
	3					10
	3					66
58	3			3		14	use File::Spec 0.80 ();
	3					376
	3					96
59	3			3		2849	use File::Remove 0.34 ();
	3					6808
	3					74
60	3			3		20	use List::Util 1.15 ();
	3					67
	3					56
61	3			3		9228	use File::HomeDir 0.88 ();
	3					21706
	3					93
62	3			3		4361	use File::Temp 0.21 ();
	3					84308
	3					99
63	3			3		2882	use URI 1.37 ();
	3					33698
	3					94
64	3			3		3228	use URI::file ();
	3					20803
	3					93
65	3			3		3633	use IO::File 1.14 ();
	3					3518
	3					80
66	3			3		3619	use IO::Uncompress::Gunzip 2.017 ();
	3					150627
	3					110
67	3			3		4352	use Archive::Tar 1.22 ();
	3					231377
	3					101
68	3			3		3401	use Params::Util 1.00 ();
	3					18020
	3					96
69	3			3		3693	use LWP::Online 0.03 ();
	3					226215
	3					109
70	3			3		2949	use File::Find::Rule 0.30 ();
	3					28193
	3					124
71	3			3		3521	use CPAN::Mini 1.111004 ();
	3					26525
	3					100
72
73	3			3		107	use vars qw{$VERSION @ISA};
	3					8
	3					257
74							BEGIN {
75	3			3		7	$VERSION = '1.23';
76	3					6426	@ISA = 'CPAN::Mini';
77							}
78
79
80
81
82
83							#####################################################################
84							# Constructor and Accessors
85
86							=pod
87
88							=head2 new
89
90							The C constructor is used to create and configure a new CPAN
91							Processor. It takes a set of named params something like the following.
92
93							# Create a CPAN processor
94							my $Object = CPAN::Mini::Extract->new(
95							# The normal CPAN::Mini params
96							remote => 'ftp://cpan.pair.com/pub/CPAN/',
97							local => '/home/adam/.minicpan',
98							trace => 1,
99
100							# Additional params
101							extract => '/home/adam/explosion',
102							extract_filter => sub { /\.pm$/ and ! /\b(inc\|t)\b/ },
103							extract_check => 1,
104							);
105
106							=over
107
108							=item minicpan args
109
110							C inherits from L, so all of the arguments
111							that can be used with L will also work with
112							C.
113
114							Please note that C applies some additional defaults
115							beyond the normal ones, like turning C on.
116
117							=item offline
118
119							Although useless with L itself, the C flag will
120							cause the CPAN synchronisation step to be skipped, and only any
121							extraction tasks to be done. (False by default)
122
123							=item extract
124
125							Provides the directory (which must exist and be writable, or be creatable)
126							that the tarball dists should be extracted to.
127
128							=item extract_filter
129
130							C allows you to specify a filter controlling which
131							types of files are extracted from the Archive. Please note that ONLY
132							normal files are ever considered for extraction from an archive, with
133							any directories needed created automatically.
134
135							Although by default C only extract files of type .pm,
136							.t and .pl from the archives, you can add a list of additional things you
137							do not want to be extracted.
138
139							The filter should be provided as a subroutine reference. This sub will
140							be called with $_ set to the path of the file. The subroutine should
141							return true if the file is to be extracted, or false if not.
142
143							# Extract all .pm files, except those in an include directory
144							extract_filter => sub { /\.pm$/ and ! /\binc\b/ },
145
146							=item extract_check
147
148							The main extraction process is done as each new archive is downloaded,
149							but occasionally in a process this long-running something may go wrong
150							and you can end up with archives not extracted.
151
152							In addition, sometimes the processing of the extracted archives is
153							destructive and will result in them being deleted each run.
154
155							Once the mirror update has been completed, the C keyword
156							forces the processor to go back over every tarball in the mirror and
157							double check that it has a corrosponding extracted directory.
158
159							=item extract_force
160
161							For cases in which the filter has been changed, the C
162							boolean flag can be used to forcefully delete and re-extract every
163							extracted directory.
164
165							=back
166
167							Returns a new C object, or dies on error.
168
169							=cut
170
171							sub new {
172	3			3	1	531563	my $class = shift;
173
174							# Use the CPAN::Mini settings as defaults, and add any
175							# additional explicit params.
176	3					39	my %config = ( CPAN::Mini->read_config, @_ );
177
178							# Unless provided auto-detect offline mode
179	3	100				613	unless ( defined $config{offline} ) {
180	1					6	$config{offline} = LWP::Online::offline();
181							}
182
183							# Fake a remote URI if CPAN::Mini can't handle offline mode
184	3					222286	my %fake = ();
185	3	50	66			65	if ( $config{offline} and $CPAN::Mini::VERSION < 0.570 ) {
186	0					0	my $tempdir = File::Temp::tempdir();
187	0					0	my $tempuri = URI::file->new( $tempdir )->as_string;
188	0					0	$fake{remote} = $tempuri;
189							}
190
191							# Use a default local path if none provided
192	3	50				14	unless ( defined $config{local} ) {
193	0					0	my $local = File::Spec->catdir(
194							File::HomeDir->my_data, 'minicpan',
195							);
196							}
197
198							# Call our superclass to create the object
199	3					71	my $self = $class->SUPER::new( %config, %fake );
200
201							# Check the extract param
202	3	50				412546	$self->{extract} or Carp::croak(
203							"Did not provide an 'extract' path"
204							);
205	3	50				77	if ( -e $self->{extract} ) {
206	0	0	0			0	unless ( -d _ and -w _ ) {
207	0					0	Carp::croak(
208							"The 'extract' path is not a writable directory"
209							);
210							}
211							} else {
212	3	50				898	File::Path::mkpath( $self->{extract}, $self->{trace}, $self->{dirmode} )
213							or Carp::croak("The 'extract' path could not be created");
214							}
215
216							# Set defaults and apply rules
217	3	50				104	unless ( defined $self->{extract_check} ) {
218	3					15	$self->{extract_check} = 1;
219							}
220	3	100				16	if ( $self->{extract_force} ) {
221	1					13	$self->{extract_check} = 1;
222							}
223
224							# Compile file_filters if needed
225	3					26	$self->_compile_filter('extract_filter');
226
227							# We'll need a temp directory for expansions
228	3					31	$self->{tempdir} = File::Temp::tempdir( CLEANUP => 1 );
229
230	3					5555	$self;
231							}
232
233
234
235
236
237							#####################################################################
238							# Main Methods
239
240							=pod
241
242							=head2 run
243
244							The C methods starts the main process, updating the minicpan mirror
245							and extracted version, and then launching the PPI Processor to process the
246							files in the source directory.
247
248							Returns the number of changes made to the local minicpan and extracted
249							directories, or dies on error.
250
251							=cut
252
253							sub run {
254	0			0	1	0	my $self = shift;
255
256							# Prepare to start
257	0					0	local $\| = 1;
258	0					0	my $changes;
259	0					0	$self->{added} = {};
260	0					0	$self->{cleaned} = {};
261
262							# If we want to force re-expansion,
263							# remove all current expansion dirs.
264	0	0				0	if ( $self->{extract_force} ) {
265	0					0	$self->log("Flushing all expansion directories (extract_force enabled)\n");
266	0					0	my $authors_dir = File::Spec->catfile( $self->{extract}, 'authors' );
267	0	0				0	if ( -e $authors_dir ) {
268	0					0	$self->log("Removing $authors_dir...");
269	0	0				0	File::Remove::remove( \1, $authors_dir ) or Carp::croak(
270							"Failed to remove previous expansion directory '$authors_dir'"
271							);
272	0					0	$self->log(" removed\n");
273							}
274							}
275
276							# Update the CPAN::Mini local mirror
277	0	0	0			0	if ( $self->{offline} and $CPAN::Mini::VERSION < 0.570 ) {
278	0					0	$self->log("Skipping minicpan update (offline mode enabled)\n");
279							} else {
280	0					0	$self->log("Updating minicpan local mirror...\n");
281	0					0	$self->update_mirror;
282							}
283
284	0		0			0	$changes \|\|= 0;
285	0	0	0			0	if ( $self->{extract_check} or $self->{extract_force} ) {
286							# Expansion checking is enabled, and we didn't do a normal
287							# forced check, so find the full list of files to check.
288	0					0	$self->log("Tarball expansion checking enabled\n");
289	0					0	my @files = File::Find::Rule->new
290							->name('*.tar.gz')
291							->file
292							->relative
293							->in( $self->{local} );
294
295							# Filter to just those we need to extract
296	0					0	$self->log("Checking " . scalar(@files) . " tarballs\n");
297	0					0	@files = grep { ! -d File::Spec->catfile( $self->{extract}, $_ ) } @files;
	0					0
298	0	0				0	if ( @files ) {
299	0					0	$self->log("Scheduling " . scalar(@files) . " tarballs for expansion\n");
300							} else {
301	0					0	$self->log("No tarballs need to be extracted\n");
302							}
303
304							# Expand each of the tarballs
305	0					0	foreach my $file ( sort @files ) {
306	0					0	$self->mirror_extract( $file );
307	0					0	$changes++;
308							}
309							}
310
311	0					0	$self->log("Completed minicpan extraction\n");
312	0					0	$changes;
313							}
314
315
316
317
318
319							#####################################################################
320							# CPAN::Mini Methods
321
322							# Track what we have added
323							sub mirror_file {
324	0			0	1	0	my $self = shift;
325	0					0	my $file = shift;
326
327							# Do the normal stuff
328	0					0	my $rv = $self->SUPER::mirror_file($file, @_);
329
330							# Expand the tarball if needed
331	0	0				0	unless ( -d File::Spec->catfile( $self->{extract}, $file ) ) {
332	0					0	$self->{current_file} = $file;
333	0	0				0	$self->mirror_extract( $file ) or return undef;
334	0					0	delete $self->{current_file};
335							}
336
337	0					0	$self->{added}->{$file} = 1;
338	0					0	delete $self->{current_file};
339	0					0	$rv;
340							}
341
342							sub mirror_extract {
343	0			0	0	0	my ($self, $file) = @_;
344
345							# Don't try to extract anything other than normal tarballs for now.
346	0	0				0	return 1 unless $file =~ /\.t(ar\.)?gz$/;
347
348							# Extract the new file to the matching directory in
349							# the processor source directory.
350	0					0	my $local_file = File::Spec->catfile( $self->{local}, $file );
351	0					0	my $extract_dir = File::Spec->catfile( $self->{extract}, $file );
352
353							# Do the actual extraction
354	0					0	$self->_extract_archive( $local_file, $extract_dir );
355							}
356
357							# Also remove any processing directory.
358							# And track what we have removed.
359							sub clean_file {
360	0			0	1	0	my $self = shift;
361	0					0	my $file = shift; # Absolute path
362
363							# Convert to relative path, and clear the expansion directory
364	0					0	my $relative = File::Spec->abs2rel( $file, $self->{local} );
365	0					0	$self->clean_extract( $relative );
366
367							# We are doing this in the reverse order to when we created it.
368	0					0	my $rv = $self->SUPER::clean_file($file, @_);
369
370	0					0	$self->{cleaned}->{$file} = 1;
371	0					0	$rv;
372							}
373
374							# Remove a processing directory
375							sub clean_extract {
376	0			0	0	0	my ($self, $file) = @_;
377
378							# Remove the source directory, if it exists
379	0					0	my $source_path = File::Spec->catfile( $self->{extract}, $file );
380	0	0				0	if ( -e $source_path ) {
381	0	0				0	File::Remove::remove( \1, $source_path ) or Carp::carp(
382							"Cannot remove $source_path $!"
383							);
384							}
385
386	0					0	1;
387							}
388
389
390
391
392
393							#####################################################################
394							# Support Methods and Error Handling
395
396							# Compile a set of filters
397							sub _compile_filter {
398	3			3		7	my $self = shift;
399	3					12	my $name = shift;
400
401							# Shortcut for "no filters"
402	3	100				15	return 1 unless $self->{$name};
403
404							# If the filter is already a code ref, shortcut
405	2	50				132	return 1 if Params::Util::_CODELIKE($self->{$name});
406
407							# Allow a single Regexp object for the filter
408	0	0				0	if ( Params::Util::_INSTANCE($self->{$name}, 'Regexp') ) {
409	0					0	$self->{$name} = [ $self->{$name} ];
410							}
411
412							# Check for bad cases
413	0	0				0	Params::Util::_ARRAY0($self->{$name}) or Carp::croak(
414							"$name is not an ARRAY reference"
415							);
416	0	0				0	unless ( @{$self->{$name}} ) {
	0					0
417	0					0	delete $self->{$name};
418	0					0	return 1;
419							}
420
421							# Check we only got Regexp objects
422	0					0	my @filters = @{$self->{$name}};
	0					0
423	0	0				0	if ( scalar grep { ! Params::Util::_INSTANCE($_, 'Regexp') } @filters ) {
	0					0
424	0					0	return $self->_error("$name can only contains Regexp filters");
425							}
426
427							# Build the anonymous sub
428							$self->{$name} = sub {
429	0			0		0	foreach my $regexp ( @filters ) {
430	0	0				0	return 1 if $_ =~ $regexp;
431							}
432	0					0	return '';
433	0					0	};
434
435	0					0	1;
436							}
437
438							# Encapsulate the actual extraction mechanism
439							sub _extract_archive {
440	1			1		1057	my ($self, $gz, $to) = @_;
441
442							# Do a one-shot separate decompression because for some reason
443							# the default on-the-fly decompression is horridly memory
444							# innefficientm, allocating and freeing massive blocks of memory
445							# for every single block that gets read in.
446	1					6	my $archive = $self->_extract_gz( $gz );
447
448							# IO::Zlib::tell will cause problems and Archive::Tar
449							# tries to use it by default, so invoke it with a
450							# file handle to MAKE it do the right thing.
451	1	50				11	my $io = IO::File->new( $archive, "r" )
452							or die "Failed to open $archive";
453
454							# Some hints to Archive::Tar to make it behave to make it
455							# work better on Win32, and to ignore the ownership crap
456							# that we don't care about.
457	1					137	local $Archive::Tar::WARN = 0;
458	1					4	local $Archive::Tar::CHOWN = 0;
459	1					3	local $Archive::Tar::CHMOD = 0;
460
461							# Load the archive
462	1					3	my $tar = eval {
463	1					12	Archive::Tar->new( $io );
464							};
465	1	50	33			2400	if ( $@ or ! $tar ) {
466	0					0	return $self->_tar_error("Loading of $archive failed");
467							}
468
469							# Get the complete list of files
470	1					4	my @files = eval {
471	1					8	$tar->list_files( [ 'name', 'size' ] )
472							};
473	1	50				137	return $self->_tar_error("Loading of $archive failed") if $@;
474
475							# Filter to get just the ones we want
476	1					4	@files = map { $_->{name} } grep { $_->{size} } @files;
	2					10
	5					11
477	1	50				7	if ( $self->{extract_filter} ) {
478	1					4	@files = grep &{$self->{extract_filter}}, @files;
	2					25
479							}
480
481							# Iterate and extract each file
482	1					292	File::Path::mkpath( $to, $self->{trace}, $self->{dirmode} );
483	1					5	foreach my $wanted ( sort @files ) {
484							# Where to extract to
485	1					28	my $to_file = File::Spec->catfile( $to, $wanted );
486	1					40	my $to_dir = File::Basename::dirname( $to_file );
487	1					179	File::Path::mkpath( $to_dir, $self->{trace}, $self->{dirmode} );
488	1					24	$self->log("write $to_file\n");
489
490	1					142	my $rv;
491							SCOPE: {
492	1					5	$rv = eval {
	1					2
493	1					8	$tar->extract_file( $wanted, $to_file );
494							};
495							}
496	1	50	33			6947	if ( $@ or ! $rv ) {
497							# There was an error during the extraction
498	0					0	$self->_tar_error( " ... failed" );
499	0	0				0	if ( -e $to_file ) {
500							# Remove any partial file left behind
501	0					0	File::Remove::remove( $to_file );
502							}
503	0					0	return 1;
504							}
505							}
506
507							# Clean up
508	1					27	$tar->clear;
509	1					139	undef $tar;
510	1					54	$io->close;
511	1					35	File::Remove::remove( $archive );
512
513	1					340	return 1;
514							}
515
516							# Extract a gz-compressed file to a temp file
517							my $counter = 0;
518							sub _extract_gz {
519	1			1		2	my $self = shift;
520	1					3	my $gz = shift;
521	1					4	my $tar = ++$counter . '.tar';
522	1					13	my $file = File::Spec->catfile(
523							$self->{tempdir}, $tar,
524							);
525	1	50				10	IO::Uncompress::Gunzip::gunzip( $gz => $file )
526							or die "Failed to uncompress $gz";
527	1					4923	return $file;
528							}
529
530							sub _tar_error {
531	0			0			my $self = shift;
532
533							# Get and clean up the message
534	0						my $message = shift;
535	0	0	0				if ( ! $message and $self->{current_file} ) {
536	0						$message = "Expansion of $self->{current_file} failed";
537							}
538	0	0					if ( ! $message ) {
539	0						$message = "Expansion of file failed";
540							}
541	0	0					$message .= " (Archive::Tar warning)" if $@ =~ /Archive::Tar warning/;
542	0						$message .= "\n";
543
544	0						$self->log($message);
545							}
546
547							1;
548
549							=pod
550
551							=head1 SUPPORT
552
553							Bugs should always be submitted via the CPAN bug tracker
554
555							L
556
557							For other issues, contact the maintainer
558
559							=head1 AUTHOR
560
561							Adam Kennedy Eadamk@cpan.orgE
562
563							=head1 SEE ALSO
564
565							L
566
567							=head1 COPYRIGHT
568
569							Funding provided by The Perl Foundation.
570
571							Copyright 2005 - 2012 Adam Kennedy.
572
573							This program is free software; you can redistribute
574							it and/or modify it under the same terms as Perl itself.
575
576							The full text of the license can be found in the
577							LICENSE file included with this module.
578
579							=cut