File Coverage

blib/lib/URL/Checkout.pm
Criterion Covered Total %
statement 24 145 16.5
branch 0 76 0.0
condition 0 26 0.0
subroutine 8 21 38.1
pod 12 12 100.0
total 44 280 15.7


line stmt bran cond sub pod time code
1             package URL::Checkout;
2              
3 2     2   74178 use warnings;
  2         6  
  2         87  
4 2     2   13 use strict;
  2         5  
  2         79  
5 2     2   2366 use String::ShellQuote;
  2         2254  
  2         183  
6 2     2   2092 use Text::Sprintf::Named;
  2         5563  
  2         120  
7 2     2   15 use Cwd;
  2         4  
  2         144  
8 2     2   14 use File::Path;
  2         6  
  2         113  
9 2     2   3135 use File::Temp;
  2         77276  
  2         209  
10 2     2   21 use Carp;
  2         4  
  2         5730  
11              
12             =head1 NAME
13              
14             URL::Checkout - Get one or multiple files from a remote location
15              
16             =head1 VERSION
17              
18             Version 1.05
19              
20             =cut
21              
22             our $VERSION = '1.05';
23              
24              
25             =head1 SYNOPSIS
26              
27             Retrieve contents from a URL, no matter if the URL specifies a simple file via
28             ftp or http, or a Repository of one of the well known VCS systems, cvs, svn, git, hg,
29             Unlike LWP, this module makes no attempts to be perlish. We liberally call shell
30             commands to do the real work. The author especially likes to call C.
31              
32             use URL::Checkout;
33              
34             my $f = URL::Checkout->new(dest => '/tmp/outdir', verbose => 1);
35             $f->auth($user, $pass);
36             $f->dest($outdir);
37             $f->method('*');
38              
39             # obs://api.opensuse.org/source/home:jnweiger/fate?rev=19
40             # https://svn.suse.de/svn/inttools/trunk/features/fate
41             $url = "ssh://user:pass@scm.somewhere.org/git/repo.git";
42             $f->get($url);
43              
44             $m = $f->find_method($url);
45             $cmd = $f->fmt_cmd($m, $url);
46             chdir($f->dest());
47             system $cmd;
48              
49              
50             =head1 SUBROUTINES/METHODS
51              
52             =head2 new
53              
54             Create a checkout object.
55             It can be configured through several parameters to new, or through similarly named methods.
56             If no destination directory is specified via dest, File::Temp is consulted to create a
57             temporary directory.
58              
59             =head2 auth($user, $pass)
60              
61             An alternative to specifying user, pass with C.
62             Provide authentication credentials for the remote access.
63              
64             =head2 dest($directory)
65              
66             =head2 dest()
67              
68             Set and/or get the destination directory. The directory need not be created ahead of time.
69              
70             =head2 list_methods()
71              
72             Return a hash with method names as keys, detection patterns and retrieval commands.
73             The values in this hash are aliases to the internal values. You can change them to e.g.
74             add a -q flag if you find a command to be too noisy.
75              
76             =head2 describe()
77              
78             Returns a verbal description of the matching rules.
79              
80             =head2 add_method(name, qr{url-match-pattern}, cmd_fmt_string, "Some descriptive text")
81              
82             Multiple commands can be specified for each name. Commands should be written in bourne shell
83             syntax, with the following named sprintf templates: %(user)s, %(pass)s, %(url)s, %(dest)s.
84             Commands that contain %(user)s and/or %(pass)s are ignored, if username and/or password
85             credentials are not given. Example:
86              
87             add_method('git', qr{^(git://.*|\.git/?)$}, "git clone --depth 1 %(url)s");
88              
89             The destination directory is the current working directory while the command runs.
90             The templates are expanded using String::ShellQuote and Text::Sprintf::Named.
91              
92             If an array-ref of patterns is specified instead of a pattern, the patterns
93             should be ordered by decreasing reliability. Methods are tested breadth-first.
94              
95             If a subroutine reference is specified as third parameter, it is called with the URL and the
96             return value of find_method(), and is expected to return a command or an array of commands.
97              
98             =head2 method('*')
99              
100             Limit the method by name. The default '*' means no limitation. An array of
101             method names can be specified, which denotes a first match choice.
102             This is helpful for URLs that do not match anything specific.
103             This is harmless, as it still allows other methods if the URL matches there.
104              
105             =cut
106              
107             sub new
108             {
109 0     0 1   my $self = shift;
110 0   0       my $class = ref($self) || $self;
111 0 0         my %obj = (ref $_[0] eq 'HASH') ? %{$_[0]} : @_;
  0            
112              
113             $obj{_methods} =
114             [
115             { name => 'obs', pat => [qr{^(obs://|https://api\.(opensuse\.org|suse\.de)/(public/)?source/)}],
116             osc => ['osc'], co => ['co', '--current-dir', '--expand-link'],
117             desc => "OpenSUSE Build Service(obs): URLs starting with obs://, https://api.opensuse.org/, https://api.suse.de are handled by 'osc checkout'. Path components /public and /source are stripped, the remaining path components are Project, Package, and optionally File. Project can be written as either a:/b:/c: or a:b:c",
118              
119 0     0     cmd => sub { my ($url, $m) = @_;
120 0 0         my $api = $1 if $url =~ s{^\w+://([^/]+)/+}{};
121 0           $url =~ s{^(public/+)?sources?/+}{};
122 0 0         my $rev = $1 if $url =~ s{[\?&]rev=(\w+)}{};
123 0           $url =~ s{\?.*}{};
124 0           $url =~ s{:/}{:}g;
125 0           my @pp = split m{/+}, $url;
126              
127 0           my @cmd = (@{$m->{osc}}, '-A', "https://$api", @{$m->{co}});
  0            
  0            
128 0 0         push @cmd, '-r', $rev if defined $rev;
129             ## -S aka --server-side-source-service-files, what an ugly name!
130 0           return [ shell_quote(@cmd, '-S', @pp), shell_quote(@cmd, @pp)];
131             },
132 0           fake_home => { '.oscrc' => q{
133             [general]
134             apiurl = https://$api
135              
136             [https://$api]
137             user = %(user)s
138             pass = %(pass)s
139             keyring=0
140             } } },
141              
142             { name => 'git', pat => [qr{(^git://|\.git/?$)}],
143             desc => "git: URLs starting with git:// or ending in .git are handled by 'git clone'",
144             cmd => ["git clone --depth 1 %(url)s"] },
145              
146             { name => 'svn', pat => [qr{^svn://}, qr{[/@]svn(root)?[\./].*/(trunk|branches)/}, qr{[/@]svn(root)?[\./]}],
147             desc => "Subversion(svn): URLs starting with git:// or containing /svn. followed by /trunk/ or /branches/ or containing /svn/ followed by /trunk/ or /branches/ are handled by 'svn checkout'. Second Prio: URLs containing only /svn. or /svn/",
148             cmd => ["svn --no-auth-cache --non-interactive --trust-server-cert co -q --force %(url)s",
149             "svn --no-auth-cache --non-interactive --trust-server-cert --username %(user)s --password %(pass)s co -q --force %(url)s" ] },
150              
151             { name => 'http', pat => [undef, undef, qr{^https?://}],
152             desc => "WWW(http): URLs starting with http:// or https:// are handled as third priority with 'wget -m', this third priority is a fallback, if no first or second priority commands match",
153             cmd => ["wget -m -np -nd -nH --no-check-certificate -e robots=off %(url)s"] },
154             ];
155              
156 0           $obj{_sel} = ['*'];
157              
158 0           return bless \%obj, $class;
159             }
160              
161             sub dest
162             {
163 0     0 1   my ($self, $dir) = @_;
164 0 0         $self->{dest} = $dir if defined $dir;
165 0 0         $self->{dest} = File::Temp::tempdir( "co_XXXXXX", TMPDIR => 1)
166             unless $self->{dest};
167 0           return $self->{dest};
168             }
169              
170             sub auth
171             {
172 0     0 1   my ($self, $user, $pass) = @_;
173 0 0         $self->{user} = $user if defined $user;
174 0 0         $self->{pass} = $pass if defined $pass;
175 0           return ($self->{user}, $self->{pass});
176             }
177              
178             sub list_methods
179             {
180 0     0 1   return $_[0]->{_methods};
181             }
182              
183             sub describe
184             {
185 0     0 1   my @d = map { $_->{desc} } @{$_[0]->{_methods}};
  0            
  0            
186 0 0         return (wantarray ? @d : join("\n\n", @d)."\n");
187             }
188              
189             sub method
190             {
191 0     0 1   my ($self, @sel) = @_;
192 0 0         $sel[0] = '*' unless @sel;
193 0 0         $self->{_sel} = (ref $sel[0]) ? $sel[0] : [@sel];
194             }
195              
196             =head2 find_method($url)
197              
198             Tests $url against the regexp patterns stored with each method. The first match is returned.
199             If multiple patterns are specified per method, all other methods are tested,
200             before the next set of patterns is tested.
201              
202             Unless a method name was specified with C, we return undef, if no pattern matches.
203             With one or multiple method names specified, the first available method by that
204             name is returned, when there is no pattern match.
205              
206             =cut
207              
208             sub find_method
209             {
210 0     0 1   my ($self, $url) = @_;
211              
212 0           my $max_pat_idx = 0;
213 0           for my $m (@{$self->{_methods}})
  0            
214             {
215 0 0         $max_pat_idx = $#{$m->{pat}} if $#{$m->{pat}} > $max_pat_idx;
  0            
  0            
216             }
217              
218             # match method patterns, breadth first
219 0           for my $sel (@{$self->{_sel}})
  0            
220             {
221 0           for my $pat_idx (0 .. $max_pat_idx)
222             {
223 0           for my $m (@{$self->{_methods}})
  0            
224             {
225 0 0 0       next if $sel ne '*' and $sel ne $m->{name};
226 0 0         next unless defined (my $pat = $m->{pat}[$pat_idx]);
227 0 0         return $m if $url =~ m{$pat};
228             }
229             }
230             }
231              
232             # if a name was give in sel, try hard to use it, even if no pattern matched.
233 0           for my $sel (@{$self->{_sel}})
  0            
234             {
235 0 0         next if $sel eq '*';
236 0           for my $m (@{$self->{_methods}})
  0            
237             {
238 0 0         return $m if $sel eq $m->{name};
239             }
240             }
241              
242 0           return undef; # sorry, really nothing matched.
243             }
244              
245             =head2 fmt_cmd($meth_hash, $url)
246              
247             Use a method hash as returned by C and prepare all possible commands from it with the given url. One or multiple commands are returned suitable for use with system or backticks.
248              
249             =cut
250              
251             sub fmt_cmd
252             {
253 0     0 1   my ($self, $m, $url) = @_;
254              
255 0           my $list;
256 0 0         if (ref $m->{cmd} eq 'CODE')
257             {
258 0           $list = $m->{cmd}->($url, $m);
259             }
260             else
261             {
262 0           $list = $m->{cmd};
263             }
264             # use Data::Dumper; die Dumper $m, $list, $url;
265              
266 0           my @cmd;
267 0           for my $cmd (@$list)
268             {
269 0           my $need_user = 0;
270 0           my $need_pass = 0;
271 0           my $need_dest = 0;
272 0 0         $need_user++ if $cmd =~ m{%\(user\)};
273 0 0         $need_pass++ if $cmd =~ m{%\(pass\)};
274 0 0         $need_dest++ if $cmd =~ m{%\(dest\)};
275              
276 0 0         $self->dest() if $need_dest; # creates tempdir
277 0 0 0       next if $need_pass and !defined($self->{pass});
278 0 0 0       next if $need_user and !defined($self->{user});
279              
280 0           my $fmt = Text::Sprintf::Named->new({fmt => $cmd});
281 0   0       push @cmd, $fmt->format({ args =>
      0        
282             {
283             url => shell_quote($url),
284             user => shell_quote($self->{user}||''),
285             pass => shell_quote($self->{pass}||''),
286             dest => shell_quote($self->{dest})
287             }});
288             }
289              
290 0 0         return wantarray ? @cmd : $cmd[0];
291             }
292              
293             =head2 get($url)
294              
295             Similar to this code:
296              
297             $m = $f->find_method($url);
298             system "".$f->fmt_cmd($m, $url);
299              
300             Except that it tries further commands from C if if the first fails.
301             It also assures that the current working directory is C<< $f->dest() >> while executing a command.
302             Command names are printed to stdout, if verbose is set.
303              
304             =cut
305              
306             sub add_method
307             {
308 0     0 1   my ($self, $name, $pat, $cmd, $desc) = @_;
309 0 0         $pat = [$pat] unless ref $pat eq 'ARRAY';
310 0 0         $cmd = [$cmd] unless ref $cmd eq 'ARRAY';
311 0   0       $desc ||= $cmd->[0];
312 0           unshift @{$self->{_methods}}, { name => $name, desc => $desc, pat => $pat, cmd => $cmd };
  0            
313             }
314              
315              
316             =head2 pre_cmd($method)
317              
318             Helper function run by C. This prepares temporary files if the method has a 'fake_home' and
319             at least a username credential was given to C.
320             This also creates the destination directory and changes into it.
321              
322             =cut
323              
324             sub pre_cmd
325             {
326 0     0 1   my ($self, $m) = @_;
327              
328 0           my $cwd = getcwd();
329 0 0         $cwd = $1 if $cwd =~ m{^(.*)$};
330              
331 0           my $dest = $self->dest();
332 0           File::Path::mkpath($dest);
333 0 0         chdir($dest) or croak "cannot chdir('$dest')\n";
334 0 0 0       if ($m->{fake_home} and defined($self->{user}))
335             {
336 0           my $fake_home = File::Temp::tempdir("co_fake_home_XXXXXX", TMPDIR => 1, UNLINK => 1);
337 0           chmod 0700, $fake_home;
338 0           for my $f (keys %{$m->{fake_home}})
  0            
339             {
340 0           my $fmt = Text::Sprintf::Named->new({ fmt => $m->{fake_home}{$f} });
341 0 0         open O, ">", "$fake_home/$f" or croak "pre_cmd: failed to populate fake_home: $f: $!";
342 0   0       print O $fmt->format({ args => { user => $self->{user}||'', pass => $self->{pass}||'' } });
      0        
343 0           close O;
344             }
345 0           $self->{saved_fake_home} = $fake_home;
346             }
347 0           $self->{saved_cwd} = $cwd;
348             }
349              
350             =head2 post_cmd()
351              
352             Cleanup handler run by C. This removes any temporary
353             files and restores the current working directory.
354              
355             =cut
356              
357             sub post_cmd
358             {
359 0     0 1   my ($self) = @_;
360 0           my $cwd = $self->{saved_cwd};
361 0           croak "no {saved_cwd}. Called post_cmd() without pre_cmd() ??\n";
362              
363 0 0         if ($self->{saved_fake_home})
364             {
365             # cleanup that home recursively
366 0           File::Path::remove_tree($self->{saved_fake_home});
367 0           delete $self->{saved_fake_home};
368             }
369 0 0         chdir($cwd) or croak "cannot chdir back to '$cwd'\n";
370             }
371              
372             sub get
373             {
374 0     0 1   my ($self, $url) = @_;
375              
376 0           my $m = $self->find_method($url);
377 0 0         croak "get: no method known for '$url', try add_method()\n" unless $m;
378              
379 0           my @cmd = $self->fmt_cmd($m, $url);
380 0 0         croak "no method usable for this url. Need auth?\n" unless @cmd;
381              
382 0           my $cwd = $self->pre_cmd();
383              
384 0           my $success = 0;
385 0           for my $c (@cmd)
386             {
387 0 0         print STDOUT "[$c]\n" if $self->{verbose};
388 0 0         if (system $c)
389             {
390 0 0         carp $self->{verbose} ? "--: r=$?, $!\n" : "[$c]: r=$?, $!\n";
391             }
392             else
393             {
394 0           $success++;
395 0           last;
396             }
397             }
398 0           $self->post_cmd();
399              
400 0           return $success;
401             }
402              
403             =head1 AUTHOR
404              
405             Juergen Weigert, C<< >>
406              
407             =head1 BUGS
408              
409             Please report any bugs or feature requests to C, or through
410             the web interface at L. I will be notified, and then you'll
411             automatically be notified of progress on your bug as I make changes.
412              
413              
414              
415              
416             =head1 SUPPORT
417              
418             You can find documentation for this module with the perldoc command.
419              
420             perldoc URL::Checkout
421              
422              
423             You can also look for information at:
424              
425             =over 4
426              
427             =item * RT: CPAN's request tracker
428              
429             L
430              
431             =item * AnnoCPAN: Annotated CPAN documentation
432              
433             L
434              
435             =item * CPAN Ratings
436              
437             L
438              
439             =item * Search CPAN
440              
441             L
442              
443             =back
444              
445              
446             =head1 ACKNOWLEDGEMENTS
447              
448              
449             =head1 LICENSE AND COPYRIGHT
450              
451             Copyright 2010 Juergen Weigert.
452              
453             This program is free software; you can redistribute it and/or modify it
454             under the terms of either: the GNU General Public License as published
455             by the Free Software Foundation; or the Artistic License.
456              
457             See http://dev.perl.org/licenses/ for more information.
458              
459              
460             =cut
461              
462             1; # End of URL::Checkout