File Coverage

blib/lib/CPAN/Metrics.pm
Criterion Covered Total %
statement 13 15 86.6
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 18 20 90.0


line stmt bran cond sub pod time code
1             package CPAN::Metrics;
2              
3             =pod
4              
5             =head1 NAME
6              
7             CPAN::Metrics - Create and maintain a Perl::Metrics database for all of CPAN
8              
9             =head1 SYNOPSIS
10              
11             # Do a CPAN::Metrics run
12             my $metrics = CPAN::Metrics->new(
13             remote => 'http://mirrors.kernel.org/cpan/',
14             local => '/home/adam/.minicpan',
15             extract => '/home/adam/.cpanmetrics',
16             metrics => '/home/adam/.cpanmetrics/metrics.sqlite',
17             )->run;
18              
19             =head1 DESCRIPTION
20              
21             C is a combination of L and L.
22              
23             It lets you pull out all of CPAN (for various definitions of "all") and
24             run L on it to generate massive amounts of metrics data
25             on the 16,000,000 lines of code in CPAN.
26              
27             =head2 Resource Usage
28              
29             While it might make it relatively easy to write the B to "process
30             all of CPAN", make no mistake that it's going to take you LOT of
31             computing resources to do it. And especially so the first time.
32              
33             To do a single run should require 1-10 gigabytes of disk space, up to
34             several hundred megabytes of memory, and hours (or days) of CPU time.
35              
36             The result will be a SQLite database containing somewhere between several
37             hundred thousand and several million rows of metrics data.
38              
39             What you do with the metrics after B is up to you.
40              
41             =head1 METHODS
42              
43             =cut
44              
45 1     1   1159 use 5.005;
  1         4  
  1         44  
46 1     1   4 use strict;
  1         1  
  1         31  
47 1     1   13 use base 'CPAN::Mini::Extract';
  1         2  
  1         962  
48 1     1   1071029 use Carp 'croak';
  1         2  
  1         53  
49 1     1   440 use Perl::Metrics ();
  0            
  0            
50              
51             use vars qw{$VERSION};
52             BEGIN {
53             $VERSION = '0.08';
54             }
55              
56              
57              
58              
59              
60             #####################################################################
61             # Constructor
62              
63             =pod
64              
65             =head1 new
66              
67             The C constructor creates a new CPAN metrics processor.
68              
69             Although it is created as an object, due to L you can
70             only create a single object within a single process. (I think)
71              
72             It takes a variety of different parameters.
73              
74             =over
75              
76             =item minicpan arguments
77              
78             =back
79              
80             Returns a new C object, or dies on error.
81              
82             =cut
83              
84             sub new {
85             my $class = ref $_[0] ? ref shift : shift;
86              
87             # Call up to get the base object
88             my $self = $class->SUPER::new(
89             force => 1,
90             skip_perl => 1,
91             extract_check => 1,
92             path_filters => [
93             qr/\bAcme\b/i,
94             qr/\bPDF\-API2\b/i,
95             qr/\bPerl6\b/i,
96             ],
97             # Remove some known troublemakers
98             module_filters => [
99             qr/^Acme::/i,
100             qr/^Meta::/i,
101             qr/\bPerl6\b/i,
102             ],
103             extract_filter =>
104             sub {
105             return 0 if /\:/;
106             return 0 if /\binc\b/;
107             return 1 if /\.pl$/;
108             return 0 if /\bexamples?\b/;
109             if ( /\bt\b/ ) {
110             return 1 if /\.t$/;
111             } else {
112             return 1 if /\.pm$/;
113             }
114             return 0;
115             },
116             @_,
117             );
118              
119             # Check and set the metrics database
120             unless ( $self->{metrics} ) {
121             croak("Metrics database param 'metrics' was not provided");
122             }
123             Perl::Metrics->import( $self->{metrics} );
124              
125             $self;
126             }
127              
128             =pod
129              
130             =head2 run
131              
132             The C method launches the CPAN metrics processor. It will
133             syncronize its L mirror from the remote server, expanding
134             any new archives, and removing old ones. Once updated, the directory
135             will be reindexed at update it in the metricsdatabase, and any required
136             processing done to add the resulting metrics needed.
137              
138             And then (a C long time later) it will stop. :)
139              
140             Oh, and return true. Any errors will cause an exception (i.e. die)
141              
142             =cut
143              
144             sub run {
145             my $self = shift;
146             $self->SUPER::run( @_ );
147             $self->process_index;
148             }
149              
150             sub process_index {
151             my $self = shift;
152              
153             # Process the extraction directory
154             local $Perl::Metrics::TRACE = 1;
155             $self->trace("Indexing and processing documents in $self->{extract}...\n");
156             Perl::Metrics->process_index( $self->{extract} );
157              
158             return 1;
159             }
160              
161             1;
162              
163             =pod
164              
165             =head1 TO DO
166              
167             - Improve Perl::Metrics to add needed things
168              
169             - Improve CPAN::Metrics::Extract to add needed things
170              
171             - Improve CPAN::Metrics to add needed things
172              
173             - Get all three of the above to use accessors
174              
175             - Possibly consider intentionally B caching so that
176             we don't end up with a multi-multi-gigabyte parse cache.
177              
178             =head1 SUPPORT
179              
180             Bugs should be reported via the CPAN bug tracker at
181              
182             L
183              
184             For other issues, contact the author.
185              
186             =head1 AUTHOR
187              
188             Adam Kennedy Eadamk@cpan.orgE, L
189              
190             =head1 COPYRIGHT
191              
192             Copyright 2005 - 2008 Adam Kennedy.
193              
194             This program is free software; you can redistribute
195             it and/or modify it under the same terms as Perl itself.
196              
197             The full text of the license can be found in the
198             LICENSE file included with this module.
199              
200             =cut