File Coverage

blib/lib/LaTeXML/Util/Pathname.pm
Criterion Covered Total %
statement 35 230 15.2
branch 5 136 3.6
condition 0 35 0.0
subroutine 9 33 27.2
pod 18 25 72.0
total 67 459 14.6


line stmt bran cond sub pod time code
1             # /=====================================================================\ #
2             # | LaTeXML::Util::Pathname | #
3             # | Pathname Utilities for LaTeXML | #
4             # |=====================================================================| #
5             # | Part of LaTeXML: | #
6             # | Public domain software, produced as part of work done by the | #
7             # | United States Government & not subject to copyright in the US. | #
8             # |---------------------------------------------------------------------| #
9             # | Bruce Miller #_# | #
10             # | http://dlmf.nist.gov/LaTeXML/ (o o) | #
11             # \=========================================================ooo==U==ooo=/ #
12             #======================================================================
13             # Sanely combine features of File::Spec and File::Basename
14             # Somehow, both modules tend to bite me at random times.
15             # eg. sometimes Basename's fileparse doesn't extract extension.
16             # sometimes File::Spec seems to do too many filesystem checks (gets slow!)
17             # File::Spec->splitpath "may or may not ... trailing '/'" ... Huh?
18             #======================================================================
19             # My first instinct is that this should bless the pathnames,
20             # but strings as pathnames come so naturally in perl;
21             # But I may still do it...
22             #======================================================================
23             # Some portability changes for Windows, thanks to Ioan Sucan.
24             #======================================================================
25             # Packages in the LaTeXML::Util package set have no dependence on LaTeXML
26             # objects or context.
27             #======================================================================
28             package LaTeXML::Util::Pathname;
29 21     21   80 use strict;
  21         21  
  21         520  
30 21     21   72 use warnings;
  21         23  
  21         460  
31 21     21   67 use File::Spec;
  21         23  
  21         339  
32 21     21   9073 use File::Copy;
  21         70613  
  21         1139  
33 21     21   7831 use File::Which;
  21         13844  
  21         938  
34 21     21   93 use Cwd;
  21         24  
  21         1308  
35 21     21   86 use base qw(Exporter);
  21         21  
  21         8828  
36             our @EXPORT = qw( &pathname_find &pathname_findall &pathname_kpsewhich
37             &pathname_make &pathname_canonical
38             &pathname_split &pathname_directory &pathname_name &pathname_type
39             &pathname_timestamp
40             &pathname_concat
41             &pathname_relative &pathname_absolute
42             &pathname_is_absolute &pathname_is_contained
43             &pathname_is_url &pathname_is_literaldata
44             &pathname_protocol
45             &pathname_cwd &pathname_chdir &pathname_mkdir &pathname_copy
46             &pathname_installation);
47              
48             # NOTE: For absolute pathnames, the directory component starts with
49             # whatever File::Spec considers to be the volume, or "/".
50             #======================================================================
51             # Ioan Sucan suggests switching this to '\\' for windows, but notes
52             # that it works as it is, so we'll leave it (for now).
53             ### my $SEP = '/'; # [CONSTANT]
54             # Some indicators that this is not sufficient? (calls to libraries/externals???)
55             # PRELIMINARY test, probably need to be even more careful
56             my $ISWINDOWS = $^O =~ /^(MSWin|NetWare|cygwin)/i;
57             my $SEP = ($ISWINDOWS ? '\\' : '/'); # [CONSTANT]
58             my $KPATHSEP = ($ISWINDOWS ? ';' : ':'); # [CONSTANT]
59             my $LITERAL_RE = '(?:literal)(?=:)'; # [CONSTANT]
60             my $PROTOCOL_RE = '(?:https|http|ftp)(?=:)'; # [CONSTANT]
61              
62             #======================================================================
63             # pathname_make(dir=>dir, name=>name, type=>type);
64             # Returns a pathname. This will be an absolute path if
65             # dir (or the first, if dir is an array), is absolute.
66             sub pathname_make {
67 0     0 1 0 my (%pieces) = @_;
68 0         0 my $pathname = '';
69 0 0       0 if (my $dir = $pieces{dir}) {
70 0 0       0 my @dirs = (ref $dir eq 'ARRAY' ? @$dir : ($dir));
71 0         0 $pathname = shift(@dirs);
72 0         0 foreach my $d (@dirs) {
73 0         0 $pathname =~ s|\Q$SEP\E$||; $dir =~ s|^\Q$SEP\E||;
  0         0  
74 0         0 $pathname .= $SEP . $dir; } }
75 0 0 0     0 $pathname .= $SEP if $pathname && $pieces{name} && $pathname !~ m|\Q$SEP\E$|;
      0        
76 0 0       0 $pathname .= $pieces{name} if $pieces{name};
77 0 0       0 $pathname .= '.' . $pieces{type} if $pieces{type};
78 0         0 return pathname_canonical($pathname); }
79              
80             # Split the pathname into components (dir,name,type).
81             # If pathname is absolute, dir starts with volume or '/'
82             sub pathname_split {
83 0     0 1 0 my ($pathname) = @_;
84 0         0 $pathname = pathname_canonical($pathname);
85 0         0 my ($vol, $dir, $name) = File::Spec->splitpath($pathname);
86             # Hmm, for /, we get $dir = / but we want $vol='/' ?????
87 0 0 0     0 if ($vol) { $dir = $vol . $dir; }
  0 0       0  
88 0         0 elsif (File::Spec->file_name_is_absolute($pathname) && !File::Spec->file_name_is_absolute($dir)) { $dir = $SEP . $dir; }
89             # $dir shouldn't end with separator, unless it is root.
90 0 0       0 $dir =~ s/\Q$SEP\E$// unless $dir eq $SEP;
91 0         0 my $type = '';
92 0 0       0 if ($name =~ s/\.([^\.]+)$//) { $type = $1; }
  0         0  
93 0         0 return ($dir, $name, $type); }
94              
95 21     21   94 use Carp;
  21         21  
  21         41086  
96              
97             # This likely needs portability work!!! (particularly regarding urls, separators, ...)
98             # AND, care about symbolic links and collapsing ../ !!!
99             sub pathname_canonical {
100 231     231 1 180 my ($pathname) = @_;
101 231 50       696 if ($pathname =~ /^($LITERAL_RE)/) {
102 0         0 return $pathname; }
103             # Don't call pathname_is_absolute, etc, here, cause THEY call US!
104 231 50       293 confess "Undefined pathname!" unless defined $pathname;
105             # File::Spec->canonpath($pathname); }
106 231         158 $pathname =~ s|^~|$ENV{HOME}|;
107             # We CAN canonicalize urls, but we need to be careful about the // before host!
108             # OHHH, but we DON'T want \ for separator!
109 231         160 my $urlprefix = undef;
110 231 50       919 if ($pathname =~ s|^($PROTOCOL_RE//[^/]*)/|/|) {
111 0         0 $urlprefix = $1; }
112              
113 231 50       310 if ($pathname =~ m|//+/|) {
114 0         0 Carp::cluck "Recursive pathname? : $pathname\n"; }
115             ## $pathname =~ s|//+|/|g;
116 231         176 $pathname =~ s|/\./|/|g;
117             # Collapse any foo/.. patterns, but not ../..
118 231         560 while ($pathname =~ s|/(?!\.\./)[^/]+/\.\.(/\|$)|$1|) { }
119 231         180 $pathname =~ s|^\./||;
120 231 50       448 return (defined $urlprefix ? $urlprefix . $pathname : $pathname); }
121              
122             # Convenient extractors;
123             sub pathname_directory {
124 0     0 1   my ($pathname) = @_;
125 0           my ($dir, $name, $type) = pathname_split($pathname);
126 0           return $dir; }
127              
128             sub pathname_name {
129 0     0 1   my ($pathname) = @_;
130 0           my ($dir, $name, $type) = pathname_split($pathname);
131 0           return $name; }
132              
133             sub pathname_type {
134 0     0 1   my ($pathname) = @_;
135 0           my ($dir, $name, $type) = pathname_split($pathname);
136 0           return $type; }
137              
138             # Note that this returns ONLY recognized protocols!
139             sub pathname_protocol {
140 0     0 0   my ($pathname) = @_;
141 0 0         return ($pathname =~ /^($PROTOCOL_RE|$LITERAL_RE)/ ? $1 : 'file'); }
142              
143             #======================================================================
144             sub pathname_concat {
145 0     0 1   my ($dir, $file) = @_;
146 0 0         return $file unless $dir;
147 0 0 0       return $dir if !defined $file || ($file eq '.');
148 0   0       return pathname_canonical(File::Spec->catpath('', $dir || '', $file)); }
149              
150             #======================================================================
151             # Is $pathname an absolute pathname ?
152             # pathname_is_absolute($pathname) => (0|1)
153             sub pathname_is_absolute {
154 0     0 1   my ($pathname) = @_;
155 0   0       return $pathname && File::Spec->file_name_is_absolute(pathname_canonical($pathname)); }
156              
157             sub pathname_is_url {
158 0     0 1   my ($pathname) = @_;
159 0   0       return $pathname && $pathname =~ /^($PROTOCOL_RE)/ && $1; } # Other protocols?
160              
161             sub pathname_is_literaldata {
162 0     0 0   my ($pathname) = @_;
163 0 0         if ($pathname =~ /^($LITERAL_RE)/) { return $1; } else { return; } }
  0            
  0            
164              
165             # Check whether $pathname is contained in (ie. underneath) $base
166             # Returns the relative pathname if it is underneath; undef otherwise.
167             sub pathname_is_contained {
168 0     0 1   my ($pathname, $base) = @_;
169             # after assuring that both paths are absolute,
170             # get $pathname relative to $base
171 0           my $rel = pathname_canonical(pathname_relative(pathname_absolute($pathname),
172             pathname_absolute($base)));
173             # If the relative pathname starts with "../" that it apparently is NOT underneath base!
174 0 0         return ($rel =~ m|^\.\.(?:/\|\Q$SEP\E)| ? undef : $rel); }
175              
176             # pathname_relative($pathname,$base) => $relativepathname
177             # If $pathname is an absolute, non-URL pathname,
178             # return the pathname relative to $base,
179             # else just return its canonical form.
180             # Actually, if it's a url and $base is also url, to SAME host! & protocol...
181             # we _could_ make relative...
182             sub pathname_relative {
183 0     0 1   my ($pathname, $base) = @_;
184 0           $pathname = pathname_canonical($pathname);
185 0 0 0       return ($base && pathname_is_absolute($pathname) && !pathname_is_url($pathname)
186             ? File::Spec->abs2rel($pathname, pathname_canonical($base))
187             : $pathname); }
188              
189             sub pathname_absolute {
190 0     0 1   my ($pathname, $base) = @_;
191 0           $pathname = pathname_canonical($pathname);
192 0 0 0       return (!pathname_is_absolute($pathname) && !pathname_is_url($pathname)
    0          
193             ? File::Spec->rel2abs($pathname, ($base ? pathname_canonical($base) : pathname_cwd()))
194             : $pathname); }
195              
196             #======================================================================
197             # Actual file system operations.
198             sub pathname_timestamp {
199 0     0 1   my ($pathname) = @_;
200 0 0         return -f $pathname ? (stat($pathname))[9] : 0; }
201              
202             sub pathname_cwd {
203 0 0   0 1   if (my $cwd = cwd()) {
204 0           return pathname_canonical($cwd); }
205             else {
206             # Fatal not imported
207 0           die "INTERNAL: Could not determine current working directory (cwd)"
208             . "Perhaps a problem with Perl's locale settings?"; } }
209              
210             sub pathname_chdir {
211 0     0 0   my ($directory) = @_;
212 0           return chdir($directory); }
213              
214             sub pathname_mkdir {
215 0     0 1   my ($directory) = @_;
216 0 0         return unless $directory;
217 0           $directory = pathname_canonical($directory);
218 0           my ($volume, $dirs, $last) = File::Spec->splitpath($directory);
219 0           my (@dirs) = (File::Spec->splitdir($dirs), $last);
220 0           for (my $i = 0 ; $i <= $#dirs ; $i++) {
221 0           my $dir = File::Spec->catpath($volume, File::Spec->catdir(@dirs[0 .. $i]), '');
222 0 0         if (!-d $dir) {
223 0 0         mkdir($dir) or return; } }
224 0           return $directory; }
225              
226             # copy a file, preserving attributes, if possible.
227             # Why doesn't File::Copy preserve attributes on Unix !?!?!?
228             sub pathname_copy {
229 0     0 1   my ($source, $destination) = @_;
230             # If it _needs_ to be copied:
231 0           $source = pathname_canonical($source);
232 0           $destination = pathname_canonical($destination);
233 0 0 0       if ((!-f $destination) || (pathname_timestamp($source) > pathname_timestamp($destination))) {
234 0 0         if (my $destdir = pathname_directory($destination)) {
235 0 0         pathname_mkdir($destdir) or return; }
236             ### if($^O =~ /^(MSWin32|NetWare)$/){ # Windows
237             ### # According to Ioan, this should work:
238             ### system("xcopy /P $source $destination")==0 or return; }
239             ### else { # Unix
240             ### system("cp --preserve=timestamps $source $destination")==0 or return; }
241             # Hopefully this portably copies, preserving timestamp.
242 0 0         copy($source, $destination) or return;
243 0           my ($atime, $mtime) = (stat($source))[8, 9];
244 0           utime $atime, $mtime, $destination; # And set the modification time
245             }
246 0           return $destination; }
247              
248             #======================================================================
249             # pathname_find($pathname, paths=>[...], types=>[...]) => $absolute_pathname;
250             # Find a file corresponding to $pathname returning the absolute,
251             # completed pathname if found, else undef
252             # * If $pathname is a not an absolute pathname
253             # (although it may still have directory components)
254             # then if search $paths are given, search for it relative to
255             # each of the directories in $paths,
256             # else search for it relative to the current working directory.
257             # * If types is given, then search (in each searched directory)
258             # for the first file with the given extension.
259             # The extension "" (empty string) means to search for the exact name.
260             # * If types is not given, search for the exact named file
261             # without additional extension.
262             # * If installation_subdir is given, look in that subdirectory of where LaTeXML
263             # was installed, by appending it to the paths.
264              
265             # This is presumably daemon safe...
266             my @INSTALLDIRS = grep { (-f "$_.pm") && (-d $_) }
267             map { pathname_canonical($_ . $SEP . 'LaTeXML') } @INC; # [CONSTANT]
268              
269             sub pathname_installation {
270 0     0 0   return $INSTALLDIRS[0]; }
271              
272             sub pathname_find {
273 0     0 1   my ($pathname, %options) = @_;
274 0 0         return unless $pathname;
275 0           my @paths = candidate_pathnames($pathname, %options);
276 0           foreach my $path (@paths) {
277 0 0         return $path if -f $path; }
278 0           return; }
279              
280             sub pathname_findall {
281 0     0 1   my ($pathname, %options) = @_;
282 0 0         return unless $pathname;
283 0           my @paths = candidate_pathnames($pathname, %options);
284 0           return grep { -f $_ } @paths; }
  0            
285              
286             # It's presumably cheep to concatinate all the pathnames,
287             # relative to the cost of testing for files,
288             # and this simplifies overall.
289             sub candidate_pathnames {
290 0     0 0   my ($pathname, %options) = @_;
291 0           my @dirs = ();
292 0 0         $pathname = pathname_canonical($pathname) unless $pathname eq '*';
293 0 0         my ($pathdir, $name, $type) = ($pathname eq '*' ? (undef, '*', undef) : pathname_split($pathname));
294 0 0 0       $name .= '.' . $type if (defined $type) && ($type ne '');
295             # generate the set of search paths we'll use.
296 0 0         if (pathname_is_absolute($pathname)) {
297 0           push(@dirs, $pathdir); }
298             else {
299 0           my $cwd = pathname_cwd();
300 0 0         if ($options{paths}) {
301 0           foreach my $p (@{ $options{paths} }) {
  0            
302             # Complete the search paths by prepending current dir to relative paths,
303 0 0         my $pp = pathname_concat((pathname_is_absolute($p) ? pathname_canonical($p) : pathname_concat($cwd, $p)),
304             $pathdir);
305 0 0         push(@dirs, $pp) unless grep { $pp eq $_ } @dirs; } } # but only include each dir ONCE
  0            
306 0 0         push(@dirs, pathname_concat($cwd, $pathdir)) unless @dirs; # At least have the current directory!
307             # And, if installation dir specified, append it.
308 0 0         if (my $subdir = $options{installation_subdir}) {
309 0           push(@dirs, map { pathname_concat($_, $subdir) } @INSTALLDIRS); } }
  0            
310              
311             # extract the desired extensions.
312 0           my @exts = ();
313 0 0         if ($options{type}) {
314 0           push(@exts, '.' . $options{type}); }
315 0 0         if ($options{types}) {
316 0           foreach my $ext (@{ $options{types} }) {
  0            
317 0 0         if ($ext eq '') { push(@exts, ''); }
  0 0          
    0          
318             elsif ($ext eq '*') {
319 0           push(@exts, '.*', ''); }
320             elsif ($pathname =~ /\.\Q$ext\E$/i) {
321 0           push(@exts, ''); }
322             else {
323 0           push(@exts, '.' . $ext); } } }
324 0 0         push(@exts, '') unless @exts;
325              
326 0           my @paths = ();
327             # Now, combine; precedence to leading directories.
328 0           foreach my $dir (@dirs) {
329 0           foreach my $ext (@exts) {
330 0 0         if ($name eq '*') { # Unfortunately, we've got to test the file system NOW...
    0          
331 0 0         if ($ext eq '.*') { # everything
332 0 0         opendir(DIR, $dir) or next;
333 0           push(@paths, map { pathname_concat($dir, $_) } grep { !/^\./ } readdir(DIR));
  0            
  0            
334 0           closedir(DIR); }
335             else {
336 0 0         opendir(DIR, $dir) or next; # ???
337 0           push(@paths, map { pathname_concat($dir, $_) } grep { /\Q$ext\E$/ } readdir(DIR));
  0            
  0            
338 0           closedir(DIR); } }
339             elsif ($ext eq '.*') { # Unfortunately, we've got to test the file system NOW...
340 0 0         opendir(DIR, $dir) or next; # ???
341 0           push(@paths, map { pathname_concat($dir, $_) } grep { /^\Q$name\E\.\w+$/ } readdir(DIR));
  0            
  0            
342 0           closedir(DIR); }
343             else {
344 0           push(@paths, pathname_concat($dir, $name . $ext)); } } }
345 0           return @paths; }
346              
347             #======================================================================
348             our $kpsewhich = which($ENV{LATEXML_KPSEWHICH} || 'kpsewhich');
349             our $kpse_cache = undef;
350              
351             sub pathname_kpsewhich {
352 0     0 0   my (@candidates) = @_;
353 0 0         return unless $kpsewhich;
354 0 0         build_kpse_cache() unless $kpse_cache;
355 0           foreach my $file (@candidates) {
356 0 0         if (my $result = $$kpse_cache{$file}) {
357 0           return $result; } }
358             # If we've failed to read the cache, try directly calling kpsewhich
359             # For multiple calls, this is slower in general. But MiKTeX, eg., doesn't use texmf ls-R files!
360 0           my $files = join(' ', @candidates);
361 0 0 0       if ($kpsewhich && (my $result = `"$kpsewhich" $files`)) {
362 0 0         if ($result =~ /^\s*(.+?)\s*\n/s) {
363 0           return $1; } }
364 0           return; }
365              
366             sub build_kpse_cache {
367 0     0 0   $kpse_cache = {}; # At least we've tried.
368 0 0         return unless $kpsewhich;
369             # This finds ALL the directories looked for for any purposes, including docs, fonts, etc
370 0           my $texmf = `"$kpsewhich" --expand-var \'\\\$TEXMF\'`; chomp($texmf);
  0            
371             # These are directories which contain the tex related files we're interested in.
372             # (but they're typically below where the ls-R indexes are!)
373 0           my $texpaths = `"$kpsewhich" --show-path tex`; chomp($texpaths);
  0            
374 0           my @filters = ();
375 0           foreach my $path (split(/$KPATHSEP/, $texpaths)) {
376 0           $path =~ s/^!!//; $path =~ s|//+$|/|;
  0            
377 0 0         push(@filters, $path) if -d $path; }
378 0           $texmf =~ s/^["']//; $texmf =~ s/["']$//;
  0            
379 0           $texmf =~ s/^\s*\\\{(.+?)}\s*/$1/s;
380 0           my @dirs = split(/,/, $texmf);
381 0           foreach my $dir (@dirs) {
382 0           $dir =~ s/^!!//;
383             # Presumably if no ls-R, we can ignore the directory?
384 0 0         if (-f "$dir/ls-R") {
385 0           my $LSR;
386             my $subdir;
387 0           my $skip = 0; # whether to skip entries in the current subdirectory.
388 0 0         open($LSR, '<', "$dir/ls-R") or die "Cannot read $dir/ls-R: $!";
389 0           while (<$LSR>) {
390 0           chop;
391 0 0         next unless $_;
392 0 0         if (/^%/) { }
    0          
    0          
393             elsif (/^(.*?):$/) { # Move to a new subdirectory
394 0           $subdir = $1;
395 0           $subdir =~ s|^\./||; # remove prefix
396 0           my $d = $dir . '/' . $subdir; # Hopefully OS safe, for comparison?
397 0           $skip = !grep { $d =~ /^\Q$_\E/ } @filters; } # check if one of the TeX paths
  0            
398             elsif (!$skip) {
399             # Is it safe to use '/' here?
400 0           my $sep = '/';
401 0           $$kpse_cache{$_} = join($sep, $dir, $subdir, $_); } }
402 0           close($LSR); } }
403 0           return; }
404              
405             #======================================================================
406             1;
407              
408             __END__