File Coverage

blib/lib/BackPAN/Version/Discover.pm
Criterion Covered Total %
statement 45 160 28.1
branch 0 32 0.0
condition 0 8 0.0
subroutine 15 23 65.2
pod 2 2 100.0
total 62 225 27.5


line stmt bran cond sub pod time code
1             package BackPAN::Version::Discover;
2              
3             our $VERSION = '0.01';
4              
5 2     2   34114 use warnings;
  2         7  
  2         85  
6 2     2   13 use strict;
  2         4  
  2         86  
7              
8 2     2   1824 use Path::Class qw( dir );
  2         185640  
  2         199  
9 2     2   3215 use File::Spec::Functions qw( splitdir );
  2         3604  
  2         368  
10 2     2   9354 use File::Find::Rule qw();
  2         35662  
  2         63  
11 2     2   2645 use Module::Info qw();
  2         20294  
  2         57  
12 2     2   11905 use CPAN qw();
  2         785132  
  2         337  
13 2     2   5222 use List::MoreUtils qw( uniq );
  2         8050  
  2         369  
14 2     2   2958 use ExtUtils::Installed qw();
  2         18483  
  2         77  
15 2     2   2773 use Module::Extract::VERSION qw();
  2         1932  
  2         47  
16 2     2   2209 use Module::Build::ModuleInfo qw();
  2         21650  
  2         83  
17 2     2   21 use Config;
  2         5  
  2         101  
18              
19 2     2   2208 use BackPAN::Version::Discover::Results qw();
  2         6  
  2         136  
20              
21             # private patched version of BackPAN::Index. Just a hack for
22             # now, the next release may have the needed functionality.
23 2     2   1987 use BackPAN::Version::Discover::_BackPAN::Index qw();
  2         7  
  2         53  
24              
25 2     2   4232 use Data::Dumper;
  2         13790  
  2         3894  
26              
27             # yeah, it's only "OO" because that's what all the cool kids do.
28             sub new {
29 0     0 1   my ($class) = @_;
30              
31 0           return bless {}, $class;
32             }
33              
34              
35             # scan the given directories (@INC by default) for modules,
36             # analyze them, then use the CPAN and BackPAN indices to
37             # guess exactly which version of which distribution each
38             # one came from.
39             sub scan {
40 0     0 1   my ($self, %args) = @_;
41              
42 0 0         my @search_dirs = $self->_cleanup_dirs( @{ $args{dirs} || \@INC } );
  0            
43              
44 0           my @pm_files = $self->_find_pm_files( @search_dirs );
45              
46 0           my @module_names = $self->_get_module_names( @pm_files );
47              
48             #print Dumper \@search_dirs, \@pm_files, \@module_names; exit;
49              
50             # get a hash of {mod_name => Module::Info obj} and an array with the
51             # names of modules that module::info couldn't load/parse.
52 0           my ($mi_objs, $non_loadable_mod_names) =
53             $self->_get_module_info_objs( \@module_names, \@search_dirs );
54              
55             # try to match installed modules to their CPAN distributions and get a
56             # bunch of other info on the possible dists and the modules. also, track
57             # modules that will be skipped for various reasons. (eg, no matching dist,
58             # couldn't parse out needed info, core/vendor-packaged modules, etc...)
59 0           my ($dist_info, $skipped_modules) =
60             $self->_get_dist_info( $mi_objs, \@search_dirs );
61              
62             # add non-loadable to skipped 'cause, well, they're skipped, too, right?
63 0           $skipped_modules->{bad_mod_info} = $non_loadable_mod_names;
64              
65             # finally, use all this data to try to guess which releases we actualy
66             # have installed. Some dists will have no matching release. $dist_info
67             # will also have various new bits of info added, if we need it later.
68 0           my ($bp_releases, $no_release_matched) =
69             $self->_guess_backpan_releases( $dist_info );
70              
71             # the results object will have facilities to get the intresting
72             # info from the results, plus info needed to re-run the same scan
73             # any anything else people may ask for :)
74 0           return BackPAN::Version::Discover::Results->new(
75             releases_matched => $bp_releases,
76             skipped_modules => $skipped_modules,
77             dists_not_matched => $no_release_matched,
78             searched_dirs => \@search_dirs,
79             dist_info => $dist_info,
80             scan_args => \%args,
81             );
82             }
83              
84              
85             # we want to only search dirs that will be useful... therefore, we need to
86             # a. weed out obvious dead-ends like duplicate and non-existent paths.
87             # b. resolve all paths and make them absolute, so the output data is sane.
88             sub _cleanup_dirs {
89 0     0     my ($self, @dirs) = @_;
90              
91 0           my @search_dirs =
92 0           grep { -e } uniq
93 0           map { dir($_)->absolute } #->resolve }
94             @dirs;
95              
96             # no need to return Path::Class: objects
97 0           return map { "$_" } @search_dirs;
  0            
98             }
99              
100              
101             # find all the pm files, relative to each directory
102             # (for easier translation into module/package names)
103             sub _find_pm_files {
104 0     0     my ($self, @search_dirs) = @_;
105              
106 0           my @pm_files;
107 0           for my $dir ( @search_dirs ) {
108 0           push @pm_files, File::Find::Rule
109             ->extras( { follow => 1 } )
110             ->relative
111             ->file
112             ->name( '*.pm' )
113             ->in( $dir );
114             }
115              
116 0           return @pm_files;
117             }
118              
119              
120             # turn pm file paths into module/package names. there may be duplicate
121             # entries, for example from modules installed both as a vendor package
122             # and from CPAN (and therefore in different paths), but we just need
123             # the name for now and will decide later *which* one is important,
124             # specifically, the one that perl would load when used in a script.
125             sub _get_module_names {
126 0     0     my ($self, @pm_files) = @_;
127              
128 0           my @module_names =
129             uniq
130 0           map { join( '::', splitdir( substr( $_, 0, -3 ) ) ) }
131             @pm_files;
132              
133 0           return @module_names;
134             }
135              
136              
137             # construct a Module::Info object for each module but keep
138             # track of invalid names and mods M::I couldn't load (we won't
139             # do anything with them, but it may be useful to know)
140             sub _get_module_info_objs {
141 0     0     my ($self, $module_names, $search_dirs) = @_;
142              
143 0           my @bad_modules;
144 0           my %module_data =
145 0 0         map { @$_ }
146             # filter out modules that couldn't be found/parsed
147 0           grep { defined $_->[1]{mod_info} ? 1 : 0 * push @bad_modules, $_->[0] }
148             # get info on the specific module that would be loaded by perl
149 0 0         map { [ $_, { mod_info => Module::Info->new_from_module( $_, @$search_dirs ) } ] }
150             # filter out invalid module names
151 0           grep { $_ !~ /[^\w\:]/ ? 1 : 0 * push @bad_modules, $_ }
152             @$module_names;
153              
154 0           return \%module_data, \@bad_modules;
155             }
156              
157              
158             # extract/guess a bunch of info from each module, then try to match
159             # each module to a cpan distribution. also, exclude modules from
160             # analysis based on various criteria.
161             # NOTE: this could probably be split into two or more smaller subs.
162             sub _get_dist_info {
163 0     0     my ($self, $module_info_objs, $search_dirs) = @_;
164              
165 0           my %dist_info; # info on dists that our modules match
166 0           my %skipped_modules = ( # info on modules we skip
167             is_core => [], # perl core, not from CPAN
168             is_vendor => [], # NOTE: right now these end up in no_dist_found.
169             no_dist_found => [], # CPAN couldn't match a dist
170             bad_dist_name => [], # dist found, but bad/weird file name (rare)
171             );
172              
173             # an ExtUtils::Installed object for getting info on modules
174             # installed via CPAN or manually with the "mantra". not
175             # sure if the info we get from this is useful yet.
176 0           my $eui_obj = ExtUtils::Installed->new( inc_override => $search_dirs );
177              
178             # note: $mod_data will have various bits of additional
179             # info added to it as we inspect the module.
180             MODULE:
181 0           for my $mod_data ( values %$module_info_objs ) {
182              
183 0           my $mi_obj = $mod_data->{mod_info};
184 0           my $mod_name = $mi_obj->name;
185 0           my $mod_file = $mi_obj->file;
186              
187             # make mi use version.pm objects (or not)
188 0           $mi_obj->use_version( 1 );
189              
190             # skip core mods
191 0 0         if ( $mi_obj->is_core ) {
192 0           push @{ $skipped_modules{is_core} }, $mod_name;
  0            
193 0           next MODULE;
194             }
195              
196             # skip vendor mods
197 0           for my $dir ( @Config{ qw( installvendorarch installvendorlib ) } ) {
198 0 0         if ( dir($dir)->subsumes($mod_file) ) {
199 0           push @{ $skipped_modules{is_vendor} }, $mod_name;
  0            
200 0           next MODULE;
201             }
202             }
203              
204             # look for a version in the module file
205 0           $mod_data->{mi_mod_inst_ver} = $mi_obj->version;
206              
207             # supposedly this uses the same method of version
208             # extraction as mldistwatch
209 0           $mod_data->{mev_mod_inst_ver} =
210             Module::Extract::VERSION->parse_version_safely( $mod_file );
211              
212             # this is (kinda) how Module::Build gets the version...
213             # but messier and not really.
214 0 0 0       if ( eval { "$mod_data->{mi_mod_inst_ver}" } and my $pm_info =
  0            
215 0           eval { Module::Build::ModuleInfo->new_from_file( $mi_obj->file ) }
216             ) {
217 0 0         if (my $ver = $pm_info->version() ) {
218 0 0         $mod_data->{mb_mod_inst_ver} =
    0          
219             ! UNIVERSAL::can($ver, 'is_qv') ?
220             $ver : $ver->is_qv ?
221             $ver->normal : $ver->stringify;
222             }
223             }
224              
225             # what does EU(I|MM) think the version is?
226 0           $mod_data->{eu_mod_inst_ver} = eval { $eui_obj->version( $mod_name ) };
  0            
227              
228              
229             # see if the cpan client knows about this module
230 0           my $cpan_mod = CPAN::Shell->expand( "Module", $mod_name );
231 0 0         if ( ! $cpan_mod ) {
232 0           push @{ $skipped_modules{no_dist_found} }, $mod_name;
  0            
233 0           next MODULE;
234             }
235 0           $mod_data->{cpan_mod_obj} = $cpan_mod;
236              
237             # the cpan client may have yet another way of getting the version.
238 0           $mod_data->{cpan_mod_inst_ver} = $cpan_mod->inst_version;
239              
240             # what does cpan think the latest version of this module is?
241 0           $mod_data->{cpan_mod_latest_ver} = $cpan_mod->cpan_version;
242              
243             # also see what dist file cpan thinks this module belongs to
244 0           my $cpan_file = $cpan_mod->cpan_file;
245 0           $mod_data->{cpan_dist_latest_file} = $cpan_file;
246              
247             # try parse the dist file path for more info...
248             # NOTE: seems that some values of $cpan_file are arbitrary text, but $di
249             # objs are still created? perhaps the CPAN::Module docs can tell me more.
250 0           my $di = CPAN::DistnameInfo->new( $cpan_file );
251 0 0 0       if ( ! $di || ! $di->dist ) {
252             # if no val for dist(), probably hit a weird filename.
253 0           push @{ $skipped_modules{bad_dist_name} }, $mod_name;
  0            
254 0           next MODULE;
255             }
256 0           $mod_data->{cpan_dist_info} = $di;
257              
258 0           my $dist_name = $di->dist;
259 0           my $latest_dist_ver = $di->version;
260 0           $mod_data->{cpan_dist_name} = $dist_name;
261 0           $mod_data->{cpan_dist_latest_ver} = $latest_dist_ver;
262              
263 0           push @{ $dist_info{$dist_name}{$latest_dist_ver}{module_data} }, $mod_data;
  0            
264             }
265              
266 0           return \%dist_info, \%skipped_modules;
267             }
268              
269              
270             # use the info we have to:
271             # a. guess which version of the dist is installed
272             # b. look for a release (dist-name + version) in the backpan index.
273             sub _guess_backpan_releases {
274 0     0     my ($self, $dist_info) = @_;
275              
276             # releases we were able to match are hits
277             # dists where we couldn't find a match are misses
278 0           my %bp_hits; # dist_name => file_path
279             my @bp_misses; # dist_name
280              
281 0           my $bp = BackPAN::Index->new();
282              
283             DIST:
284 0           for my $dist_name ( keys %$dist_info ) {
285             #next unless $dist_name eq "CPAN-Mini";
286 0           for my $latest_dist_ver ( keys %{ $dist_info->{$dist_name} } ) {
  0            
287              
288 0           my $release_data = $dist_info->{$dist_name}{$latest_dist_ver};
289              
290             # THEORY: if we look at all the latest CPAN::Module objects
291             # in this release (supposedly the latest from CPAN) and at
292             # least one module has the same version as the dist, then
293             # we can assume that the "installed version" of that same
294             # module is the version of the dist we want to find as a
295             # release on the backpan.
296              
297 0           my @version_types = qw(
298             mb_mod_inst_ver mev_mod_inst_ver
299             cpan_mod_inst_ver mi_mod_inst_ver
300             );
301              
302 0           my @version_guesses;
303 0           for my $mod_data ( @{ $release_data->{module_data} } ) {
  0            
304 0 0         next unless $latest_dist_ver eq $mod_data->{cpan_mod_latest_ver};
305 0 0         push @version_guesses, grep { $_ and $_ ne 'undef' }
  0            
306 0           @{$mod_data}{@version_types};
307             }
308 0           @version_guesses = reverse uniq sort @version_guesses;
309              
310 0 0         if ( ! @version_guesses ) {
311             # if we couldn't find a correlation between the dist version
312             # and any of the module versions, move on.
313 0           push @bp_misses, $dist_name;
314 0           next DIST;
315             }
316              
317             # maybe we have to latest CPAN version?
318 0 0         if( grep { $_ eq $latest_dist_ver } @version_guesses ) {
  0            
319 0           next DIST;
320             }
321              
322 0           for my $ver_guess ( @version_guesses ) {
323 0 0         if( my $bp_release = $bp->release( $dist_name, $ver_guess ) ) {
324 0           my $rel_path = $bp_release->path;
325 0   0       push @{ $bp_hits{ $dist_name } ||= [] }, "$rel_path";
  0            
326 0           next DIST;
327             }
328             }
329              
330 0           push @bp_misses, $dist_name;
331             }
332             }
333 0           return \%bp_hits, \@bp_misses;
334             }
335              
336              
337             ###
338             1;
339              
340             __END__