File Coverage

blib/lib/App/GitWorkspaceScanner.pm
Criterion Covered Total %
statement 36 157 22.9
branch 0 82 0.0
condition 0 41 0.0
subroutine 12 18 66.6
pod 4 4 100.0
total 52 302 17.2


line stmt bran cond sub pod time code
1             package App::GitWorkspaceScanner;
2              
3 1     1   15074 use strict;
  1         2  
  1         34  
4 1     1   4 use warnings;
  1         1  
  1         26  
5              
6             # External dependencies.
7 1     1   3 use Carp qw( croak );
  1         5  
  1         32  
8 1     1   562 use Data::Dumper;
  1         6732  
  1         61  
9 1     1   8 use File::Spec;
  1         1  
  1         26  
10 1     1   658 use Getopt::Long;
  1         9542  
  1         7  
11 1     1   723 use Git::Repository;
  1         23058  
  1         6  
12 1     1   641 use Log::Any qw( $log );
  1         1741  
  1         5  
13 1     1   74 use Pod::Find qw();
  1         1  
  1         23  
14 1     1   647 use Pod::Usage qw();
  1         44575  
  1         26  
15 1     1   578 use Readonly;
  1         2877  
  1         66  
16 1     1   625 use Try::Tiny;
  1         1356  
  1         1671  
17              
18              
19             =head1 NAME
20              
21             App::GitWorkspaceScanner - Scan git repositories in your workspace for local changes not synced up.
22              
23              
24             =head1 VERSION
25              
26             Version 1.0.1
27              
28             =cut
29              
30             our $VERSION = '1.0.1';
31              
32              
33             =head1 DESCRIPTION
34              
35             This module scans a workspace to find git repositories that are not in sync
36             with their remotes or that are not on an expected branch. This gives you a
37             snapshot of all outstanding changes in your entire workspace.
38              
39              
40             =head1 SYNOPSIS
41              
42             sudo nice ./scan_git_repositories
43              
44              
45             =head1 OPTIONS
46              
47             C provides a C utility as a command
48             line interface to the module. It supports the following command line options:
49              
50             =over 4
51              
52             =item * C<--verbose>
53              
54             Print out information about the analysis performed. Off by default.
55              
56             # Print out information.
57             ./scan_git_repositories --verbose
58              
59             =item * C<--workspace>
60              
61             Root of the workspace to search git repositories into. By default, the search
62             is performed on '/'.
63              
64             ./scan_git_repositories --workspace=$HOME
65              
66             =item * C<--allow_untracked_files>
67              
68             Set whether untracked files should generate a warning in the report. Currently
69             on by default, but this is likely to change in the near future as we add/clean
70             up our .gitignore files.
71              
72             # Do not warn on untracked files (default).
73             ./scan_git_repositories --allow_untracked_files=0
74              
75             # Warn on untracked files.
76             ./scan_git_repositories --allow_untracked_files=1
77              
78             =item * C<--allowed_branches>
79              
80             Generate a warning if the current branch doesn't match one of the branches
81             specified. Set to C default.
82              
83             # Allow only using the master branch.
84             ./scan_git_repositories
85              
86             # Allow only using the master branch.
87             ./scan_git_repositories --allowed_branches=master
88              
89             # Allow only using the master and production branches.
90             ./scan_git_repositories --allowed_branches=master,production
91              
92             =item * C<--allow_any_branches>
93              
94             Disable the check performed by C<--allowed_branches>, which is set to force
95             using the C branch by default.
96              
97             # Don't check the branch the repository is on.
98             ./scan_git_repositories --allow_any_branches=1
99              
100             =item * C<--whitelist_repositories>
101              
102             Excludes specific repositories from the checks performed by this script. The
103             argument accepts a comma-separated list of paths to ignore, but by default no
104             repositories are whitelisted.
105              
106             # Whitelist /root/my_custom_repo
107             ./scan_git_repositories --whitelist_repositories=/root/my_custom_repo
108              
109             =back
110              
111              
112             =head1 CAVEATS
113              
114             =over 4
115              
116             =item *
117              
118             This script currently uses C to scan the current machine for git
119             repositories, so this only works for Linux/Unix machines.
120              
121             =item *
122              
123             If you are not using C<--workspace> to limit the scan to files on which you
124             have read permissions, this script needs to be run as root.
125              
126             =item *
127              
128             You should have C in your crontab running daily, to ensure that new
129             repositories are picked up.
130              
131             =item *
132              
133             You should run this script using C. While it uses C, it still has
134             an impact on the file cache and using C will help mitigate any potential
135             issues.
136              
137             =back
138              
139             =cut
140              
141             Readonly::Scalar my $FILE_STATUS_PARSER =>
142             {
143             '??' => 'untracked',
144             'A' => 'added',
145             'D' => 'deleted',
146             'M' => 'modified',
147             'R' => 'moved',
148             };
149              
150              
151             =head1 FUNCTIONS
152              
153             =head2 new()
154              
155             Create a new C object.
156              
157             my $scanner = Git::WorkspaceScanner->new(
158             arguments => \@arguments,
159             );
160              
161             Arguments:
162              
163             =over 4
164              
165             =item * arguments I<(mandatory)>
166              
167             An arrayref of arguments passed originally to the command line utility.
168              
169             =back
170              
171             =cut
172              
173             sub new
174             {
175 0     0 1   my ( $class, %args ) = @_;
176              
177             # Verify arguments.
178 0           my $arguments = delete( $args{'arguments'} );
179 0 0         croak 'The following argument(s) are not valid: ' . join( ', ', keys %args )
180             if scalar( keys %args ) != 0;
181              
182             # Create the object.
183 0           my $self = bless( {}, $class );
184              
185             # Parse the arguments provided.
186 0           $self->parse_arguments( $arguments );
187              
188             # If --help was passed, print out usage info and exit.
189 0 0         if ( $self->{'help'} )
190             {
191 0           Pod::Usage::pod2usage(
192             '-verbose' => 99,
193             '-sections' => 'NAME|SYNOPSIS|OPTIONS',
194             '-input' => Pod::Find::pod_where(
195             {-inc => 1},
196             __PACKAGE__,
197             ),
198             );
199             }
200              
201 0           return $self;
202             }
203              
204              
205             =head2 parse_arguments()
206              
207             Parse the options passed via the command line arguments and make sure there is
208             no conflict or invalid settings.
209              
210             my $options = $scanner->parse_arguments();
211              
212             =cut
213              
214             sub parse_arguments
215             {
216 0     0 1   my ( $self, $arguments ) = @_;
217              
218             # Parse arguments.
219 0 0         Getopt::Long::GetOptionsFromArray(
220             $arguments,
221             $self,
222             'verbose',
223             'allowed_branches=s',
224             'allow_any_branches=i',
225             'allow_untracked_files=i',
226             'whitelist_repositories=s',
227             'workspace=s',
228             'help',
229             ) || croak "Error parsing command line arguments";
230              
231             # --help is off by default.
232 0   0       $self->{'help'} //= 0;
233              
234             # --verbose is off by default.
235 0   0       $self->{'verbose'} = $self->{'verbose'} // 0;
236 0 0         croak "Invalid value for --verbose\n"
237             if $self->{'verbose'} !~ /\A[01]\z/;
238              
239             # Set '/' as the default for --workspace.
240 0   0       $self->{'workspace'} //= '/';
241              
242             # Force a trailing slash.
243 0           $self->{'workspace'} =~ s|/+$|/|;
244              
245             # --allowed_branches cannot be combined with --allow_any_branches.
246 0 0 0       croak "--allowed_branches cannot be combined with --allow_any_branches\n"
247             if defined( $self->{'allowed_branches'} ) && defined( $self->{'allow_any_branches'} );
248              
249             # --allow_any_branches is off by default.
250 0   0       $self->{'allow_any_branches'} //= 0;
251 0 0         croak "--allow_any_branches must be set to either 0 or 1"
252             if $self->{'allow_any_branches'} !~ /\A[01]\z/;
253              
254             # --allow_untracked_files is off by default.
255 0   0       $self->{'allow_untracked_files'} //= 0;
256 0 0         croak "--allow_untracked_files must be set to either 0 or 1"
257             if $self->{'allow_untracked_files'} !~ /\A[01]\z/;
258              
259             # Specific logic when we restrict which branches are valid.
260 0 0         if ( !$self->{'allow_any_branches'} )
261             {
262             # It doesn't matter whether it was an explicit choice or not to
263             # restrict valid branches, set the option to 0 for future tests.
264 0           $self->{'allow_any_branches'} = 0;
265              
266             # Default --allowed_branches to master.
267 0   0       $self->{'allowed_branches'} //= 'master';
268             }
269              
270             # Check that the paths provided to --whitelist_repositories are valid.
271 0   0       $self->{'whitelist_repositories'} //= '';
272 0           my @whitelist_repositories = ();
273 0           foreach my $path ( split( /,/, $self->{'whitelist_repositories'} ) )
274             {
275 0 0         if ( -d $path )
276             {
277             # Ensure a trailing slash.
278 0           $path =~ s/\/$//;
279 0           push( @whitelist_repositories, "$path/" );
280             }
281             else
282             {
283 0           print "Warning: the path >$path< provided via --whitelist_repositories is not valid and will be skipped.\n";
284             }
285             }
286 0           $self->{'whitelist_repositories'} = \@whitelist_repositories;
287              
288 0 0         $self->{'verbose'} && $log->info( 'Finished parsing arguments.' );
289              
290 0           return;
291             }
292              
293              
294             =head2 get_git_repositories()
295              
296             Return a list of all the git repositories on the machine.
297              
298             my $git_repositories = get_git_repositories();
299              
300             =cut
301              
302             sub get_git_repositories
303             {
304 0     0 1   my ( $self ) = @_;
305              
306 0 0         if ( !defined( $self->{'git_repositories'} ) )
307             {
308 0 0         if ( $self->{'verbose'} )
309             {
310 0           $log->infof( "Running as user '%s'.", getpwuid( $< ) );
311 0           $log->infof( "Scanning workspace '%s'.", $self->{'workspace'} );
312             }
313              
314             # Find .git directories.
315             # TODO: convert to not use backticks.
316             # TODO: find a way to generalize to non-Unix systems.
317             # TODO: generalize to handle .git repositories that are outside of their
318             # repos (rare).
319 0 0         $self->{'verbose'} && $log->info( "Locate .git directories." );
320 0           my @locate_results = `locate --basename '\\.git'`; ## no critic (InputOutput::ProhibitBacktickOperators)
321 0 0         $self->{'verbose'} && $log->infof( "Found %s potential directories.", scalar( @locate_results ) );
322              
323 0           $self->{'git_repositories'} = [];
324 0           foreach my $scanned_path ( @locate_results )
325             {
326 0           chomp( $scanned_path );
327 0 0         $self->{'verbose'} && $log->infof( "Evaluating path %s.", $scanned_path );
328              
329             # Parse the path.
330 0           my ( $volume, $git_repository, $file ) = File::Spec->splitpath( $scanned_path );
331 0 0         if ( $file ne '.git' )
332             {
333 0 0         $self->{'verbose'} && $log->infof( " -> '%s' is not a .git directory after all.", $file );
334 0           next;
335             }
336 0 0         if ( ! -d $git_repository )
337             {
338 0 0         $self->{'verbose'} && $log->infof( " -> '%s' is not a directory.", $git_repository );
339 0           next;
340             }
341              
342             # Skip paths outside of the workspace.
343 0 0         if ( $git_repository !~ /^\Q$self->{'workspace'}\E/x )
344             {
345 0 0         $self->{'verbose'} && $log->infof( " -> '%s' is not inside the scanned space.", $git_repository );
346 0           next;
347             }
348              
349             # Skip whitelisted repositories.
350 0 0         if ( scalar( grep { $_ eq $git_repository } @{ $self->{'whitelist_repositories'} } ) != 0 )
  0            
  0            
351             {
352 0 0         $self->{'verbose'} && $log->infof( " -> '%s' is whitelisted.", $git_repository );
353 0           next;
354             }
355              
356 0           push( @{ $self->{'git_repositories'} }, $git_repository );
  0            
357 0 0         $self->{'verbose'} && $log->info( " -> Added to the list of repositories!" );
358             }
359             }
360              
361 0           $self->{'verbose'} && $log->infof(
362             '%s relevant git directories.',
363 0 0         scalar( @{ $self->{'git_repositories'} } ),
364             );
365              
366 0           return $self->{'git_repositories'};
367             }
368              
369              
370             =head2 get_unclean_repositories()
371              
372             Return a list of repositories with local modifications not reflected on the
373             origin repository.
374              
375             my $unclean_repositories = $app->get_unclean_repositories( $git_repositories );
376              
377             The return value is a hashref, with the key being the path to the git
378             repository and the value the git status for that git repository.
379              
380             =cut
381              
382             sub get_unclean_repositories ## no critic (Subroutines::ProhibitExcessComplexity)
383             {
384 0     0 1   my ( $self ) = @_;
385              
386             # Get the list of repositories on the machine.
387 0           my $git_repositories = $self->get_git_repositories();
388              
389 0           my $report = {};
390 0           foreach my $git_repository ( @$git_repositories )
391             {
392 0 0         $self->{'verbose'} && $log->infof( 'Analyzing %s.', $git_repository );
393              
394             # Detect whether we're in a submodule. Submodules behave differently for
395             # branch detection in particular.
396 0 0         my $is_submodule = -d File::Spec->catfile( $git_repository, '.git' ) ? 0 : 1;
397              
398             # Retrieve the status for that repository.
399             # --untracked-files=all will show all the individual untracked files in
400             # untracked directories, for the purpose of counting accurately untracked
401             # files.
402             # --branch adds branch tracking information with the prefix ##.
403 0           my $git = Git::Repository->new( work_tree => $git_repository );
404 0           my $git_status = $git->run( 'status', '--porcelain', '--untracked-files=all', '--branch' );
405              
406             # Parse the output of the git status command.
407 0           my $files_stats = { map { $_ => 0 } ( values %$FILE_STATUS_PARSER, 'unknown' ) };
  0            
408 0           my $local_branch;
409             my $commits_ahead;
410 0           foreach my $line ( split( /\n/, $git_status ) )
411             {
412             try
413             {
414             # Detect and parse branch information.
415 0     0     my ( $branch_info ) = $line =~ /^##\s(.*?)$/;
416 0 0         if ( defined( $branch_info ) )
417             {
418 0           my ( $remote_branch, $status );
419 0           ( $local_branch, $remote_branch, $status ) = $branch_info =~ /
420             \A
421             ([^\. ]+) # Local branch name.
422             (?:
423             \.\.\. # Three dots indicate a remote branch name following next.
424             ([^\. ]+) # Remote branch name.
425             (?:
426             \s+ # Space before more information optionally follows about the respective
427             # advancement of local and remote branches.
428             \[([^\]]+)\]
429             )?
430             )?
431             \z
432             /x;
433 0 0 0       $self->{'verbose'} && $log->infof(
      0        
434             " (B) %s...%s: %s",
435             $local_branch,
436             ( $remote_branch // '(no remote)' ),
437             ( $status // '(no status)' ),
438             );
439              
440             # If the branch is in sync with its remote, skip.
441             return
442 0 0         if !defined( $status );
443              
444             # It's only an issue if the local branch is ahead of its remote,
445             # since it means we have local changes.
446 0 0 0       ( $commits_ahead ) = $status =~ /^ahead\s+([0-9]+)$/
447             if !defined( $commits_ahead ) || ( $commits_ahead == 0 );
448 0           return;
449             }
450              
451             # Review the status of each file.
452 0           my ( $status, $file ) = $line =~ /^\s*(\S{1,2})\s+(.*?)$/x;
453 0 0         die "The format of line >$line< is not recognized.\n"
454             if !defined( $file );
455 0 0         $self->{'verbose'} && $log->infof( ' (F) %s: %s.', $file, $status );
456              
457 0           foreach my $code ( keys %$FILE_STATUS_PARSER )
458             {
459 0 0         next if $status !~ /\Q$code\E/;
460 0           my $key = $FILE_STATUS_PARSER->{ $code };
461 0           $files_stats->{ $key }++;
462 0           $status =~ s/\Q$code\E//g;
463             }
464              
465 0 0         if ( $status ne '' )
466             {
467 0           $files_stats->{'unknown'}++;
468 0           die "Unknown status code >$status< for file >$file<.\n";
469             }
470             }
471             catch
472             {
473 0     0     chomp( $_ );
474 0           push( @{ $report->{'errors'} }, "$git_repository: $_" );
  0            
475 0           };
476             }
477              
478             # If the --allow_untracked_files option is active, delete that status
479             # from the stats so that it doesn't get reported upon.
480 0 0         delete( $files_stats->{'untracked'} )
481             if $self->{'allow_untracked_files'};
482              
483             # Tally the number of uncommitted file changes.
484 0           my $total_file_issues = 0;
485 0           foreach my $count ( values %$files_stats )
486             {
487 0           $total_file_issues += $count;
488             }
489              
490 0 0 0       $log->infof( ' => %s.', join( ', ', map { "$_: $files_stats->{$_}" } keys %$files_stats ) )
  0            
491             if $self->{'verbose'} && ( $total_file_issues > 0 );
492              
493             # Add to the report if we have uncommitted files or unpushed commits.
494 0 0 0       if ( ( $total_file_issues > 0 ) || ( ( $commits_ahead // 0 ) > 0 ) )
      0        
495             {
496 0   0       $report->{ $git_repository } //= {};
497 0           $report->{ $git_repository }->{'files_stats'} = $files_stats;
498 0           $report->{ $git_repository }->{'files_total'} = $total_file_issues;
499 0   0       $report->{ $git_repository }->{'commits_ahead'} = $commits_ahead // 0;
500             }
501              
502             # Check if the branch name is authorized.
503 0 0 0       if ( !$self->{'allow_any_branches'} && !$is_submodule )
504             {
505 0 0         if ( defined( $local_branch ) )
506             {
507 0 0         if ( scalar( grep { $local_branch eq $_ } split( /\s*,\s*/, $self->{'allowed_branches'} ) ) == 0 )
  0            
508             {
509 0   0       $report->{ $git_repository } //= {};
510 0           $report->{ $git_repository }->{'is_branch_allowed'} = 0;
511 0           $report->{ $git_repository }->{'local_branch'} = $local_branch;
512             }
513             }
514             else
515             {
516 0           $log->warnf( "Failed to detect the local branch name for >%s<.", $git_repository );
517             }
518             }
519             }
520              
521 0           return $report;
522             }
523              
524              
525             =head1 BUGS
526              
527             Please report any bugs or feature requests through the web interface at
528             L.
529             I will be notified, and then you'll automatically be notified of progress on
530             your bug as I make changes.
531              
532              
533             =head1 SUPPORT
534              
535             You can find documentation for this module with the perldoc command.
536              
537             perldoc App::GitWorkspaceScanner
538              
539              
540             You can also look for information at:
541              
542             =over
543              
544             =item * GitHub's request tracker
545              
546             L
547              
548             =item * AnnoCPAN: Annotated CPAN documentation
549              
550             L
551              
552             =item * CPAN Ratings
553              
554             L
555              
556             =item * MetaCPAN
557              
558             L
559              
560             =back
561              
562              
563             =head1 AUTHOR
564              
565             L,
566             C<< >>.
567              
568              
569             =head1 COPYRIGHT & LICENSE
570              
571             Copyright 2014-2015 Guillaume Aubert.
572              
573             This program is free software: you can redistribute it and/or modify it under
574             the terms of the GNU General Public License version 3 as published by the Free
575             Software Foundation.
576              
577             This program is distributed in the hope that it will be useful, but WITHOUT ANY
578             WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
579             PARTICULAR PURPOSE. See the GNU General Public License for more details.
580              
581             You should have received a copy of the GNU General Public License along with
582             this program. If not, see http://www.gnu.org/licenses/
583              
584             =cut
585              
586             1;