File Coverage

lib/Module/Dependency/Indexer.pm
Criterion Covered Total %
statement 131 175 74.8
branch 46 90 51.1
condition 11 29 37.9
subroutine 17 21 80.9
pod 3 8 37.5
total 208 323 64.4


line stmt bran cond sub pod time code
1             package Module::Dependency::Indexer;
2              
3 2     2   9940 use strict;
  2         6  
  2         82  
4              
5 2     2   13 use Cwd;
  2         5  
  2         189  
6 2     2   12 use File::Find;
  2         4  
  2         160  
7 2     2   10 use File::Spec;
  2         4  
  2         48  
8 2     2   15 use File::Basename;
  2         4  
  2         160  
9 2     2   405 use Module::Dependency::Info;
  2         12  
  2         109  
10              
11 2     2   10 use vars qw/$VERSION $UNIFIED @NOINDEX $check_shebang/;
  2         10  
  2         8738  
12              
13             $VERSION = (q$Revision: 6643 $ =~ /(\d+)/g)[0];
14              
15             @NOINDEX = qw(.AppleDouble);
16             my %ignore_names = map { $_ => 1 } qw(
17             CVS
18             .svn
19             .cpan
20             );
21             $check_shebang = 1;
22              
23             our $index_dir;
24              
25             sub setShebangCheck {
26 0     0 1 0 $check_shebang = shift;
27             }
28              
29             sub setIndex {
30 2     2 1 6622 my $file = _makeAbsolute(shift);
31 2         12 return Module::Dependency::Info::setIndex($file);
32             }
33              
34             sub buildIndex {
35 2     2 0 8 my @dirs = map { _makeAbsolute($_) } @_;
  2         7  
36              
37 2         10 TRACE("Running search to build indexes on @dirs");
38 2         6 $UNIFIED = {};
39 2         77 my $find_options = {
40             wanted => \&_wanted,
41             follow => 1, # follow symbolic links
42             follow_skip => 2, # visit everything only once
43             no_chdir => 1,
44             };
45 2         20 my $cwd = getcwd();
46 2         9 for $index_dir (@dirs) {
47 2 50       68 chdir $index_dir or die "Can't chdir $index_dir: $!";
48 2         9 TRACE("Indexing directory $index_dir");
49 2         325 File::Find::find( $find_options, $index_dir);
50             }
51 2 50       35 chdir $cwd or die "Can't return to $cwd dir: $!";
52 2         7 _reverseDepend();
53             }
54              
55             sub makeIndex {
56 2     2 1 9 buildIndex(@_);
57 2         10 Module::Dependency::Info::storeIndex($UNIFIED);
58 2         169147 return 1;
59             }
60              
61             ######### PRIVATE
62              
63             sub _makeAbsolute {
64 4     4   18 my $dir = $_[0];
65 4 50       97 if ( File::Spec->file_name_is_absolute($dir) ) {
66 4         28 TRACE("$dir is an absolute path");
67 4         21 return $dir;
68             }
69             else {
70 0         0 my $abs = File::Spec->rel2abs($dir);
71 0         0 TRACE("$dir is relative - changed to $abs");
72 0         0 return $abs;
73             }
74             }
75              
76             # work out and install reverse dependencies
77             sub _reverseDepend {
78 2     2   4 foreach my $Obj ( values( %{ $UNIFIED->{'allobjects'} } ) ) {
  2         9  
79 22         33 my $item = $Obj->{'package'};
80 22         43 TRACE("Resolving dependencies for $item");
81              
82             # iterate over dependencies...
83 22         21 foreach my $dep ( @{ $Obj->{'depends_on'} } ) {
  22         34  
84             # XXX disabled check for existing item
85             # that way packages that are used but not indexed get an obect
86             # created for them that captures what depends on them, which is
87             # often very useful information
88 58         48 if ( 1 or exists $UNIFIED->{'allobjects'}->{$dep} ) {
89              
90             # put reverse dependencies into packages
91 58         104 TRACE("Installing reverse dependency in $dep");
92 58   100     179 my $obj = $UNIFIED->{'allobjects'}->{$dep} ||= { key => $dep };
93 58         49 push @{ $obj->{'depended_upon_by'} }, $item;
  58         174  
94             }
95             }
96             }
97             }
98              
99             sub _wanted {
100 24     24   35 my $fname = $File::Find::name;
101             # strip off the current start directory (see buildIndex) to give a relative path
102 24         180 $fname =~ s/^\Q$index_dir\E\/?//;
103              
104 24         3305 my ($name, $path, $suffix) = fileparse($fname, qr{\..*});
105 24         91 local $_ = "$name$suffix";
106              
107 24 50       61 if ( $ignore_names{$_} ) {
108 0         0 TRACE("Ignoring $_ ($fname)");
109 0         0 $File::Find::prune = 1;
110 0         0 return;
111             }
112             # XXX generalize into a compiled regex from patterns defined at start/externally
113 24 50       108 if (m/(\~|\.bak)$/) {
114 0         0 TRACE("Ignoring $_ ($fname)");
115 0         0 return;
116             }
117              
118             # ignore anything that's not a plain file
119 24 100       631 return unless -f $fname;
120              
121 22         35 my $is = '';
122 22 100 33     2759 if (m/\.pm$/) {
    100          
    50          
123 16         23 $is = 'module';
124             }
125             elsif (m/\.plx?$/) {
126 4         10 $is = 'script';
127             }
128             elsif ( $check_shebang && -s $fname ) {
129 2 50       55 if ( open( F, "<$fname" ) ) {
130 2   50     25 my $first_line = || '';
131 2         24 close F;
132 2 50       15 $is = 'script' if $first_line =~ /^#!.*perl/;
133             # XXX temp hack to pick up most test script - needs something better
134 2 50 33     10 $is = 'script' if m/\.t$/ && $first_line =~ /^\s*(use\s+|#|package|$)/;
135             }
136             else {
137 0         0 warn "Can't open $fname: $!\n";
138             }
139             }
140              
141 22 100       52 if ($is eq 'script') {
    50          
142 6         19 TRACE("script $fname");
143 6   50     18 my $obj = _parseScript($fname, $index_dir) || return;
144 6         10 my $key = $obj->{'filename'};
145 6         12 $obj->{key} = $key;
146              
147 6 50       18 if (my $prev = $UNIFIED->{'allobjects'}->{ $key }) {
148 0         0 warn_duplicate($prev, $obj, "Filename $key");
149             }
150             else {
151 6         7 push @{ $UNIFIED->{'scripts'} }, $key;
  6         14  
152             }
153 6         283 $UNIFIED->{'allobjects'}->{ $key } = $obj;
154             }
155             elsif ($is eq 'module') {
156 16         49 TRACE("module $fname");
157 16   50     32 my $obj = _parseModule($fname, $index_dir) || return;
158 16         28 my $key = $obj->{'package'};
159 16         35 $obj->{key} = $key;
160 16 50       48 if (my $prev = $UNIFIED->{'allobjects'}->{ $key }) {
161 0         0 warn_duplicate($prev, $obj, "Package $key");
162             }
163 16         650 $UNIFIED->{'allobjects'}->{ $key } = $obj;
164             }
165             else {
166 0         0 TRACE("ignored $fname");
167             }
168             }
169              
170             sub warn_duplicate {
171 0     0 0 0 my ($prev_obj, $curr_obj, $what) = @_;
172 0         0 my $prev_file = $prev_obj->{filename};
173 0         0 my $curr_file = $curr_obj->{filename};
174 0 0       0 if ($prev_file eq $curr_file) {
175             # were we're indexing multiple top-level dirs (not recommended) there might
176             # be duplicate filenames found - disambiguate this case:
177 0         0 $prev_file = "$prev_obj->{filerootdir}/$prev_file";
178 0         0 $curr_file = "$curr_obj->{filerootdir}/$curr_file";
179             }
180 0 0       0 my $cmp = files_indentical($prev_obj->{filerootdir},$curr_file) ? "files differ" : "files indentical";
181 0         0 warn "$what seen multiple times ($prev_file superseded by $curr_file, $cmp)\n";
182             }
183              
184             sub files_indentical {
185 0     0 0 0 my ($f1, $f2) = @_;
186 0 0       0 return 1 if $f1 eq $f2;
187 0 0       0 warn "File $f1: $!" unless defined( my $s1 = -s $f1 );
188 0 0       0 warn "File $f2: $!" unless defined( my $s2 = -s $f2 );
189 0 0       0 return 0 if $s1 != $s2;
190 0         0 return system('cmp', '-s', $f1, $f2) == 0;
191             }
192              
193              
194             # Get data from a module file, returns a dependency unit object
195             sub _parseFile {
196 22     22   27 my ($file, $rootdir) = @_;
197              
198             # ensure key contains a slash so we can use the rule that
199             # "if it has a slash in the name then it's not a package"
200 22 50       77 $file = "./$file" unless $file =~ m:/:;
201              
202 22         108 my $self = {
203             'filename' => $file,
204             'filerootdir' => $rootdir,
205             'depends_on' => [],
206             'depended_upon_by' => [],
207             };
208              
209 22         24 my %seen;
210              
211             # go through the file and try to find out some things
212 22         37 local *FILE;
213 22 50       772 open( FILE, $file ) or do { warn("Can't open file $file for read: $!"); return undef; };
  0         0  
  0         0  
214              
215 22         27 my $in_pod;
216 22         302 while () {
217 142         501 s/\r?\n$//;
218 142 100       314 if ($in_pod) {
219 12 100       31 $in_pod = 0 if /^=cut/;
220 12         32 next;
221             }
222              
223             # get the package name
224 130 100       267 if (m/^\s*package\s+([\w\:]+)\s*;/) {
225             # XXX currently only record the first package seen
226 16 50       47 if (exists $self->{'package'}) {
227 0         0 warn "Can only index one package per file currently, ignoring $1 at line $. in $file\n";
228 0         0 next;
229             }
230 16         48 $self->{'package'} = $1;
231             }
232              
233             # get the dependencies
234 130 100       351 if (m/^\s*use\s+([\w\:]+)/) {
235 56 50       183 push( @{ $self->{'depends_on'} }, $1 ) unless ( $seen{$1}++ );
  56         152  
236             }
237              
238             # get the dependencies
239 130 100       268 if (m/^\s*require\s+([^\s;]+)/) { # "require Bar;" or "require 'Foo/Bar.pm' if $wibble;'
240 2         4 my $required = $1;
241 2 50       11 if ($required =~ m/^([\w\:]+)$/) {
    0          
242 2 50       8 push @{ $self->{'depends_on'} }, $required unless $seen{$required}++;
  2         6  
243             }
244             elsif ($required =~ m/^["'](.*?\.pm)["']$/) { # simple Foo/Bar.pm case
245 0         0 ($required = $1) =~ s/\.pm$//;
246 0         0 $required =~ s!/!::!g;
247 0 0       0 push @{ $self->{'depends_on'} }, $required unless $seen{$required}++;
  0         0  
248             }
249             else {
250 0 0 0     0 warn "Can't interpret $_ at line $. in $file\n"
      0        
251             unless m!sys/syscall.ph!
252             or m!dumpvar.pl!
253             or $required =~ /^5\./;
254             }
255             }
256              
257             # the 'base' pragma - SREZIC
258 130 100       240 if (m/^\s*use\s+base\s+(.*)/) {
259 2         268972 require Safe;
260 2         40140 my $safe = new Safe;
261 2         2676 ( my $list = $1 ) =~ s/\s+\#.*//;
262 2         6 $list =~ s/[\r\n]//;
263 2   33     21 while ( $list !~ /;\s*$/ && ( $_ = ) ) {
264 0         0 s/\s+#.*//;
265 0         0 s/[\r\n]//;
266 0         0 $list .= $_;
267             }
268 2         10 $list =~ s/;\s*$//;
269 2         11 my (@mods) = $safe->reval($list);
270 2 50       3631 warn "Unable to eval $_ at line $. in $file: $@\n" if $@;
271 2         13 foreach my $mod (@mods) {
272 0 0       0 push( @{ $self->{'depends_on'} }, $mod ) unless ( $seen{$mod}++ );
  0         0  
273             }
274             }
275              
276 130 100 66     525 $in_pod = 1 if m/^=\w+/ && !m/^=cut/;
277 130 50       562 last if m/^\s*__(END|DATA)__/;
278             }
279 22         212 close FILE;
280              
281 22         133 return $self;
282             }
283              
284             # Get data from a module file, returns a dependency unit object
285             sub _parseModule {
286 16     16   26 my ($file, $rootdir) = @_;
287 16 50       27 my $self = _parseFile($file, $rootdir)
288             or return;
289 16 50       46 if ( !$self->{'package'} ) {
290 0         0 warn "No package found in $file\n";
291 0         0 return undef;
292             }
293 16         46 return $self;
294             }
295              
296             # Get data from a program file, returns a dependency unit object
297             sub _parseScript {
298 6     6   9 my ($file, $rootdir) = @_;
299 6 50       15 my $self = _parseFile($file, $rootdir)
300             or return;
301              
302             # XXX force package for script file to be the filename
303 6 50 33     19 warn "Ignored package ($self->{'package'}) within script $file\n"
304             if $self->{'package'} && $self->{'package'} ne 'main';
305 6         16 $self->{'package'} = $self->{filename};
306              
307 6         18 return $self;
308             }
309              
310 110     110 0 133 sub TRACE { }
311 0     0 0   sub LOG { }
312              
313             1;
314              
315             =head1 NAME
316              
317             Module::Dependency::Indexer - creates the databases used by the dependency mapping module
318              
319             =head1 SYNOPSIS
320              
321             use Module::Dependency::Indexer;
322             Module::Dependency::Indexer::setIndex( '/var/tmp/dependency/unified.dat' );
323             Module::Dependency::Indexer::makeIndex( $directory, [ $another, $andanother... ] );
324             Module::Dependency::Indexer::setShebangCheck( 0 );
325              
326             =head1 DESCRIPTION
327              
328             This module looks at all .pm, .pl and .plx files within and below a given directory/directories
329             (found with File::Find), reads through them and extracts some information about them.
330             If the shebang check is turned on then it also looks at the first line of all
331             other files, to see if they're perl programs too. We extract this information:
332              
333             =over 4
334              
335             =item *
336              
337             The name of the package (e.g. 'Foo::Bar') or the name of the script (e.g. 'chat.pl')
338              
339             =item *
340              
341             The full filesystem location of the file.
342              
343             =item *
344              
345             The dependencies of the file - i.e. the packages that it 'use's or 'require's
346              
347             =item *
348              
349             The reverse dependencies - i.e. what other scripts and modules B use or require
350             the file. It can't, of course, know about 'use' statements in files it hasn't examined.
351              
352             =back
353              
354             When it has extracted all this information it uses Storable to write the data to disk in the indexfile location.
355              
356             This search is quite an expensive operation, taking around 10 seconds for the site_perl directory here.
357             However once the information has been gathered it's extremely fast to use.
358              
359             =head1 FUNCTIONS
360              
361             =over 4
362              
363             =item setIndex( $filename )
364              
365             This function tells the module where to write out the datafile. You can set this, make an index
366             of some directory of perl stuff, set it to something else, index a different folder, etc., in order
367             to build up many indices. This only affects this module - you need to tell ...::Info where to look
368             for datafiles independently of this module.
369              
370             Default is /var/tmp/dependence/unified.dat
371              
372             =item makeIndex( $directory, [ $another, $andanother... ] )
373              
374             Builds, and stores to the current data file, a SINGLE database for all the files found under
375             all of the supplied directories. To create multiple indexes, run this method many times with a setIndex
376             inbetween each so that you don't clobber the previous run's datafile.
377              
378             =item setShebangCheck( BOOLEAN )
379              
380             Turns on or off the checking of #! lines for all files that are not .pl, .plx or .pm filenames.
381             By default we do check the #! lines.
382              
383             =back
384              
385             =head1 NOTE ABOUT WHAT IS INDEXED
386              
387             A database entry is made for B. This makes the generally good assumption that a .pl file is
388             a script that is not use/required by anything else, and a .pm file is a package file which may be use/required
389             by many other files. Database entries ARE NOT made just because a file is use/required - hence the database
390             will not contain an entry for 'strict' or 'File::Find' (for example) unless you explicitly index your perl's lib/ folder.
391              
392             E.g., if 'Local::Foo.pm' uses strict and File::Find and we index it, its entry in the database will show that it
393             depends on strict and File::Find, as you'd expect. It's just that we won't create an entry for 'strict' on that basis alone.
394              
395             In practice this behaviour is what you want - you want to see how the mass of perl in your cgi-bin and site_perl folders
396             fits together (for example), or maybe just a single project in CVS.
397             You may of course include your perl lib directory in the database should you want to see the dependencies involving
398             the standard modules, but generally that's not relevant.
399              
400             =head1 USE OF THE DATA
401              
402             Now you've got a datafile which links all the scripts and modules in a set of directories. Use ...::Info to get at the data.
403             Note that the data is stored using Storable's nstore method which _should_ make these indexes portable across platforms.
404             Not tested though.
405              
406             =head1 ADVICE, GETTING AT DATA
407              
408             As Storable is so fast, you may want to make one big index of all folders where perl things are. Then you can load this
409             datafile back up, extract the entry for, say, Local::Foo and examine its dependencies (and reverse dependencies).
410             Based on what you find, you can get the entries for Local::Foo::Bar and Local::Foo::Baz (things used by Local::Foo) or
411             perhaps Local::Stuff (which uses Local::Foo). Then you can examine those records, etc. This is how ...::Grapher builds
412             the tree of dependencies, basically.
413              
414             You use Module::Dependency::Info to get at these records using a nice simple API. If you're feeling keen you can just
415             grab the entire object - but that's in the ...::Info module.
416              
417             Here we have a single index for all our local perl code, and that lives in /var/tmp/dependence/unified.dat - the default
418             location. Other applications just use that file.
419              
420             =head1 DEBUGGING
421              
422             There is a TRACE stub function, and the module uses TRACE() to log activity. Override our TRACE with your own routine, e.g.
423             one that prints to STDERR, to see these messages.
424              
425             =head1 SEE ALSO
426              
427             Module::Dependency and the README files.
428              
429             =head1 VERSION
430              
431             $Id: Indexer.pm 6643 2006-07-12 20:23:31Z timbo $
432              
433             =cut
434              
435