File Coverage

blib/lib/File/CachingFind.pm
Criterion Covered Total %
statement 114 122 93.4
branch 59 74 79.7
condition 3 6 50.0
subroutine 14 14 100.0
pod 7 7 100.0
total 197 223 88.3


line stmt bran cond sub pod time code
1             package File::CachingFind;
2             #
3             # Copyright 2002 Thomas Dorner
4             #
5             # Author: see end of file
6             # Created: 9. April 2002
7             #
8             # This program is free software; you can redistribute it and/or modify
9             # it under the same terms as Perl itself.
10              
11             =head1 NAME
12              
13             File::CachingFind - find files within cached search paths (e.g. include files)
14              
15             =head1 SYNOPSIS
16              
17             use File::CachingFind;
18              
19             $includes = File::CachingFind->new(Path => ['/usr/local/include',
20             '/usr/include']);
21             $stdio = $includes->findFirstInPath('stdio.h');
22              
23              
24             =head1 DESCRIPTION
25              
26             C is useful for repeated file searches within a
27             path of directories. It caches the contents of its search and
28             supports two different methods of fuzzy search, a normalize function
29             and regular expressions. See the different METHODS for details.
30              
31             =head1 METHODS
32              
33             =over 4
34              
35             =cut
36              
37             #########################################################################
38              
39             require 5.006;
40 3     3   13816 use strict;
  3         7  
  3         102  
41 3     3   16 use warnings;
  3         5  
  3         108  
42              
43 3     3   27 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
  3         9  
  3         382  
44              
45             require Exporter;
46              
47             @ISA = qw(Exporter);
48             @EXPORT = qw();
49             @EXPORT_OK = qw();
50             $VERSION = '0.67';
51              
52 3     3   18 use Carp;
  3         5  
  3         279  
53 3     3   16 use Cwd 'abs_path';
  3         12  
  3         148  
54 3     3   3419 use DirHandle;
  3         7238  
  3         5432  
55              
56             #########################################################################
57              
58             =item B - create a new File::CachingFind object
59              
60             $obj = File::CachingFind->new(Path =>
61             $reference_to_list_of_directories,
62             Normalize => $reference_to_function,
63             Filter => $regular_expression,
64             NoSoftlinks => $true_or_false);
65              
66             Example:
67              
68             $win32_includes =
69             File::CachingFind->new
70             (Path =>
71             ['.!', '/cygdrive/C/Programme/DevStudio/VC/include'],
72             Normalize => sub{lc @_},
73             Filter => '\.h$');
74              
75             This is the constructor for a cache to the filenames of one or more
76             directories. It has one mandatory and three optional parameters. The
77             cache build is a hash using the normalized filename without any
78             directory parts in it as a key for retrieval. Each key of course can
79             point to one or more real, full filenames.
80              
81             =over 4
82              
83             =item B< Path>
84              
85             is the mandatory parameter. It must contain a reference to list of
86             directories. Both relative and absolute paths are possible. Normally
87             the directory itself and all its subdirectories are cached. If the
88             directory name is followed by (ends with) an exclamation mark, the
89             subdirectories are ignored.
90              
91             =item B< Normalize>
92              
93             is an optional code reference. The function referenced to must take
94             exactly one string parameter (the filename withot its directory parts)
95             as input and returns the string in a normalized fashion. If this
96             result is not the empty string it's used as key for the cache
97             (otherwise the filename is ignored). If no code reference is given,
98             the unmodified filename is used as key for the cache.
99              
100             =item B< Filter>
101              
102             is an optional regular expression used for caching only certain files
103             of the directories (those matching the regular expression). If no
104             filter is given, every file is cached.
105              
106             =item B< NoSoftlinks>
107              
108             is an optional flag telling if the caching of softlinks should be
109             inhibited. Normally the names of ordinary files as well as the name
110             of softlinks are cached. Set the flag to true, if this is not wanted.
111              
112             =back
113              
114             =cut
115              
116             # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
117             sub new
118             {
119 9     9 1 1946 my $this = shift;
120 9   33     61 my $class = ref($this) || $this;
121 9         25 my %newObject = ();
122 9         13 local $_;
123              
124             # clone object (if applicable):
125 9 50       34 if (ref($this))
126             {
127 0         0 $newObject{Path} = $this->{Path};
128 0         0 $newObject{Norm} = $this->{Norm};
129 0         0 $newObject{Filter} = $this->{Filter};
130 0         0 $newObject{NoLink} = $this->{NoLink};
131             }
132              
133             # analyze parameters:
134 9         35 my %args = @_;
135 9         42 foreach (keys %args)
136             {
137 17 100       107 if (/^Path$/i)
    100          
    100          
    50          
138             {
139 9 50       30 croak $_, ' is not a reference to an array'
140             unless 'ARRAY' eq ref($args{$_});
141 9         32 $newObject{Path} = $args{$_};
142             }
143             elsif (/^Normali[zs]e$/i)
144             {
145 1 50       5 croak $_, ' is not a reference to a function'
146             unless 'CODE' eq ref($args{$_});
147 1         4 $newObject{Norm} = $args{$_};
148             }
149             elsif (/^Filter$/i)
150             {
151 6 50       21 croak $_, ' is not scalar' unless '' eq ref($args{$_});
152 6         21 $newObject{Filter} = $args{$_};
153             }
154             elsif (/^NoSoftlinks$/i)
155             {
156 1 50       10 croak $_, ' is not scalar' unless '' eq ref($args{$_});
157 1         5 $newObject{NoLink} = $args{$_};
158             }
159             else
160             {
161 0         0 croak 'unknown parameter ', $_, ' passed to ', __PACKAGE__;
162             }
163             }
164              
165             # check for completeness:
166 9 50       40 croak 'no path defined' unless defined $newObject{Path};
167              
168             # cache files with full names and priorities in object:
169 9         17 my %fullname = ();
170 9         21 $newObject{Fullname} = \%fullname;
171 9         14 my %priority = ();
172 9         19 $newObject{Priority} = \%priority;
173 9         13 my $priority = 0;
174 9         14 foreach (@{$newObject{Path}})
  9         21  
175             {
176 10         67 my $recursive = ! s/!$//; # handle no-recursive flag
177 10 100       139 next unless -d $_;
178 9         134 _parse_directory(\%newObject, abs_path($_), $recursive, ++$priority);
179             }
180              
181             # now we're finished:
182 9         317 bless \%newObject, $class;
183             }
184              
185              
186             #########################################################################
187              
188             =item B - locate all files with a given (normalized) name
189              
190             @list = $obj->findInPath($a_file_name);
191              
192             Example:
193              
194             @time_h = $includes->findInPath('time.h');
195              
196             This method returns all full filenames (including the directory parts)
197             of all files in the cache of the object, which have the same
198             normalized filename as the parameter passed to this method. The
199             parameter itself will be normalized as well before comparizion.
200              
201             On a standard Unix system the list in aboves example should at least
202             contain /usr/include/time.h and /usr/include/sys/time.h, provided
203             $includes is similar to the one defined at the very beginning of this
204             documentation.
205              
206             If no file is found, an empty list is returned.
207              
208             =cut
209              
210             # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
211             sub findInPath
212             {
213 13     13 1 2281 my ($this, $name) = @_;
214             # apply normalization:
215 13 100       57 $name = &{$this->{Norm}}($name) if $this->{Norm};
  1         5  
216             # return list:
217 13 100       90 if (! defined $this->{Fullname}->{$name})
    100          
    50          
218             {
219 3         14 return ();
220             }
221             elsif ('' eq ref($this->{Fullname}->{$name}))
222             {
223 3         15 return ($this->{Fullname}->{$name});
224             }
225             elsif ('ARRAY' eq ref($this->{Fullname}->{$name}))
226             {
227 7         11 return @{$this->{Fullname}->{$name}};
  7         59  
228             }
229             else
230             {
231 0         0 confess('internal error in ', __PACKAGE__,
232             '(please report this bug): unexpected reference type "',
233             ref($this->{Fullname}->{$name}), '"');
234             }
235             }
236              
237             #########################################################################
238              
239             =item B - locate first file with a given (normalized) name
240              
241             @list = $obj->findFirstInPath($a_file_name);
242              
243             Example:
244              
245             $includes2 =
246             File::CachingFind->new(Path => ['/usr/include!',
247             '/usr/include/sys!']);
248             $time_h = $includes2->findFirstInPath('time.h');
249              
250             This method returns the first full filename (including the directory
251             parts) of all files in the cache of the object. The search is similar
252             to the one in the method B. The function will search the
253             cache in the order of the paths given to the constructor (B).
254              
255             On a standard Unix system above example returns /usr/include/time.h.
256             A call to C<$includes-EfindFirstInPath('time.h')> (see
257             B) would return either /usr/include/time.h or
258             /usr/include/sys/time.h (indeterministic).
259              
260             If no file is found, undef is returned.
261              
262             =cut
263              
264             # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
265             sub findFirstInPath
266             {
267 2     2 1 317 my ($this) = @_;
268 2         16 my @list = findInPath(@_);
269 2 100       10 return undef if 0 == @list;
270 1         7 @list = sort {$this->{Priority}->{$a} <=> $this->{Priority}->{$b}} @list;
  3         9  
271 1         3 return $list[0];
272             }
273              
274             #########################################################################
275              
276             =item B - locate best file with a given (normalized) name
277              
278             @list = $obj->findBestInPath($a_file_name,
279             $reference_to_comparison_function);
280              
281             Example:
282              
283             $time_h =
284             $includes2->findBestInPath
285             ('time.h',
286             sub{ length($_[1]) <=> length($_[0]) });
287              
288             This method returns the best full filename (including the directory
289             parts) of all files in the cache of the object. The search is similar
290             to the one in the method B. All files found are compared
291             using the given comparision function (similar to comparision functions
292             given to sort, except that it uses real parameters). If more than one
293             file remains, the order of the paths given to the constructor (B)
294             will be considered as well (as in B).
295              
296             On a standard Unix system above example returns
297             /usr/include/sys/time.h as it has a longer full filename than
298             /usr/include/time.h.
299              
300             If no file is found, undef is returned.
301              
302             =cut
303              
304             # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
305             sub findBestInPath
306             {
307 1     1 1 141 my ($this, $name, $rCompare) = @_;
308 1 50       6 croak 'third parameter is not a reference to a function'
309             unless 'CODE' eq ref($rCompare);
310 1         4 my @list = findInPath($this, $name);
311 1 50       4 return undef if 0 == @list;
312 3         7 @list =
313             sort {
314 1         4 my $order = &$rCompare($a, $b);
315             return
316 3 100       24 $order != 0 ? $order :
317             $this->{Priority}->{$a} <=> $this->{Priority}->{$b}
318             } @list;
319 1         3 return $list[0];
320             }
321              
322             #########################################################################
323              
324             =item B - locate all files matching a regular expression
325              
326             @list = $obj->findMatch($regular_expression);
327              
328             Example:
329              
330             @std_h = $includes2->findMatch('^(?i:std)');
331              
332             This method returns all full filenames (including the directory parts)
333             of all files in the cache of the object, which match the given regular
334             expression. Note, that the regular expression won't be normalized,
335             I have to make sure that it matches the normalized filenames.
336              
337             On a standard Unix system the list in aboves example should at least
338             contain /usr/include/stdio.h and /usr/include/stdlib.h, provided
339             $includes2 is similar to the used in prior examples. Your mileage may
340             vary, especially on different systems. Note that the example uses a
341             case insensitive match.
342              
343             If no file is found, an empty list is returned.
344              
345             =cut
346              
347             # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
348             sub findMatch
349             {
350 4     4 1 289 my ($this, $regexp) = @_;
351 4         7 my @result = ();
352             # loop all files:
353 4         5 while (my ($name, $files) = each %{$this->{Fullname}})
  36         101  
354             {
355 32 100       139 next unless $name =~ m/$regexp/;
356 5 100       16 if ('' eq ref($files)) { push @result, $files; }
  2 50       5  
357 3         5 elsif ('ARRAY' eq ref($files)) { push @result, @{$files}; }
  3         8  
358             else
359             {
360 0         0 confess('internal error in ', __PACKAGE__,
361             '(please report this bug): unexpected reference type "',
362             ref($files), '"');
363             }
364             }
365 4         17 return @result;
366             }
367              
368             #########################################################################
369              
370             =item B - locate first file matching a regular expression
371              
372             @list = $obj->findFirstMatch($regular_expression);
373              
374             Example:
375              
376             $std_h = $includes2->findFirstMatch('^std');
377              
378             This method returns the first full filename (including the directory
379             parts) of all files in the cache of the object matching the given
380             regular expression. It works similar to B and will
381             search the cache in the order of the paths given to the constructor
382             (B). Thus it may be of limited use as the algorithm chosing
383             between more than one file of the same path is indeterministic.
384             B would be a better choice in most circumstances though
385             it is a bit slower most of the times.
386              
387             On a standard Unix system above example returns /usr/include/stdio.h
388             or /usr/include/stdlib.h or another matching file (indeterministic).
389              
390             If no file is found, undef is returned.
391              
392             =cut
393              
394             # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
395             sub findFirstMatch
396             {
397 1     1 1 139 my ($this) = @_;
398 1         4 my @list = findMatch(@_);
399 1 50       5 return undef if 0 == @list;
400 1         4 @list = sort {$this->{Priority}->{$a} <=> $this->{Priority}->{$b}} @list;
  3         9  
401 1         3 return $list[0];
402             }
403              
404             #########################################################################
405              
406             =item B - locate best file matching a regular expression
407              
408             @list = $obj->findBestMatch($regular_expression,
409             $reference_to_comparison_function);
410              
411             Example:
412              
413             $std_h =
414             $includes2->findBestMatch
415             ('^std',
416             sub{ length($_[0]) <=> length($_[1]) });
417              
418             This method returns the best full filename (including the directory
419             parts) of all files in the cache of the object matching the given
420             regular expression. As in B all files found are
421             compared using the given comparision function followed by the order of
422             the paths given to the constructor (B).
423              
424             On a standard Unix system above example returns /usr/include/stdio.h
425             unless there is another include with an even shorter name beginning
426             with /usr/include/std.
427              
428             If no file is found, undef is returned.
429              
430             =cut
431              
432             # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
433             sub findBestMatch
434             {
435 1     1 1 338 my ($this, $regexp, $rCompare) = @_;
436 1 50       6 croak 'third parameter is not a reference to a function'
437             unless 'CODE' eq ref($rCompare);
438 1         4 my @list = findMatch($this, $regexp);
439 1 50       9 return undef if 0 == @list;
440 2         8 @list =
441             sort {
442 1         7 my $order = &$rCompare($a, $b);
443             return
444 2 100       16 $order != 0 ? $order :
445             $this->{Priority}->{$a} <=> $this->{Priority}->{$b}
446             } @list;
447 1         5 return $list[0];
448             }
449              
450             #########################################################################
451             #########################################################################
452             ######### internal methods / functions following #########
453             #########################################################################
454             #########################################################################
455              
456             #########################################################################
457             # call: (recursive, only used in new) #
458             # _parse_directory($rNewObject, $directory, $recursive, #
459             # $priority); #
460             # parameters: #
461             # $rNewObject reference to (yet) unblessed new object #
462             # $dir directory (full absolute path!) to parse #
463             # $recursive flag, if subdirectories should be parsed as well#
464             # $priority priority of the current path #
465             # description: #
466             # The function parses the directory $directory and puts its #
467             # relevant filenames and directories into $rNewObject->{Fullname}.#
468             # The priority is cached in $rNewObject->{Priority}. #
469             # global variables used: #
470             # - #
471             # returns: #
472             # - #
473             #########################################################################
474             sub _parse_directory
475             {
476 90     90   170 my ($rNewObject, $directory, $recursive, $priority) = @_;
477 90         102 local $_;
478             # loop directory:
479 90         377 my $dirh = new DirHandle $directory;
480 90         4631 while (defined($_ = $dirh->read))
481             {
482 460 100       4624 next if m/^\.\.?$/o; # ignore . and ..
483 280         515 my $fullname = $directory.'/'.$_;
484             # handle directories:
485 280 100       5884 if (-d $fullname)
486             {
487 87 100       289 _parse_directory($rNewObject, $fullname, $recursive, $priority)
488             if $recursive;
489 87         2780 next;
490             }
491 193         3542 lstat $fullname;
492             # filter non-files / non-links (if applicable):
493 193 100       399 if (! -f _)
494             {
495 2 100 66     16 next if -l _ and $rNewObject->{NoLink};
496             }
497             # apply filter:
498 192 100       540 if (defined $rNewObject->{Filter})
499             {
500 136 100       807 next unless m/$rNewObject->{Filter}/;
501             }
502             # apply normalization:
503 74 100       146 $_ = &{$rNewObject->{Norm}}($_) if $rNewObject->{Norm};
  3         11  
504             # put filename/fullname in cache:
505 74 100       259 if (! defined $rNewObject->{Fullname}->{$_})
    100          
    50          
506             {
507 62         187 $rNewObject->{Fullname}->{$_} = $fullname;
508             }
509             elsif ('' eq ref($rNewObject->{Fullname}->{$_}))
510             {
511 9         37 $rNewObject->{Fullname}->{$_} =
512             [ $rNewObject->{Fullname}->{$_}, $fullname ];
513             }
514             elsif ('ARRAY' eq ref($rNewObject->{Fullname}->{$_}))
515             {
516 3         5 push @{$rNewObject->{Fullname}->{$_}}, $fullname;
  3         12  
517             }
518             else
519             {
520 0         0 confess('internal error in ', __PACKAGE__,
521             '(please report this bug): unexpected reference type "',
522             ref($rNewObject->{Fullname}->{$_}), '"');
523             }
524             # cache priority:
525 74         335 $rNewObject->{Priority}->{$fullname} = $priority;
526             }
527             }
528              
529             1;
530             __END__