File Coverage

blib/lib/App/GitWorkspaceScanner.pm
Criterion Covered Total %
statement 36 157 22.9
branch 0 82 0.0
condition 0 41 0.0
subroutine 12 18 66.6
pod 4 4 100.0
total 52 302 17.2


line stmt bran cond sub pod time code
1             package App::GitWorkspaceScanner;
2              
3 1     1   14317 use strict;
  1         2  
  1         29  
4 1     1   3 use warnings;
  1         1  
  1         22  
5              
6             # External dependencies.
7 1     1   3 use Carp qw( croak );
  1         4  
  1         27  
8 1     1   506 use Data::Dumper;
  1         7846  
  1         61  
9 1     1   6 use File::Spec;
  1         1  
  1         20  
10 1     1   766 use Getopt::Long;
  1         7624  
  1         4  
11 1     1   591 use Git::Repository;
  1         17344  
  1         31  
12 1     1   460 use Log::Any qw( $log );
  1         1209  
  1         4  
13 1     1   47 use Pod::Find qw();
  1         2  
  1         15  
14 1     1   451 use Pod::Usage qw();
  1         33547  
  1         22  
15 1     1   482 use Readonly;
  1         2038  
  1         38  
16 1     1   458 use Try::Tiny;
  1         1077  
  1         1312  
17              
18              
19             =head1 NAME
20              
21             App::GitWorkspaceScanner - Scan git repositories in your workspace for local changes not synced up.
22              
23              
24             =head1 VERSION
25              
26             Version 1.0.2
27              
28             =cut
29              
30             our $VERSION = '1.0.2';
31              
32              
33             =head1 DESCRIPTION
34              
35             This module scans a workspace to find git repositories that are not in sync
36             with their remotes or that are not on an expected branch. This gives you a
37             snapshot of all outstanding changes in your entire workspace.
38              
39              
40             =head1 SYNOPSIS
41              
42             sudo nice ./scan_git_repositories
43              
44              
45             =head1 OPTIONS
46              
47             C provides a C utility as a command
48             line interface to the module. It supports the following command line options:
49              
50             =over 4
51              
52             =item * C<--verbose>
53              
54             Print out information about the analysis performed. Off by default.
55              
56             # Print out information.
57             ./scan_git_repositories --verbose
58              
59             =item * C<--workspace>
60              
61             Root of the workspace to search git repositories into. By default, the search
62             is performed on '/', but you can use any absolute path.
63              
64             ./scan_git_repositories --workspace=$HOME
65              
66              
67             =item * C<--allow_untracked_files>
68              
69             Set whether untracked files should generate a warning in the report. Currently
70             on by default, but this is likely to change in the near future as we add/clean
71             up our .gitignore files.
72              
73             # Do not warn on untracked files (default).
74             ./scan_git_repositories --allow_untracked_files=0
75              
76             # Warn on untracked files.
77             ./scan_git_repositories --allow_untracked_files=1
78              
79             =item * C<--allowed_branches>
80              
81             Generate a warning if the current branch doesn't match one of the branches
82             specified. Set to C default.
83              
84             # Allow only using the master branch.
85             ./scan_git_repositories
86              
87             # Allow only using the master branch.
88             ./scan_git_repositories --allowed_branches=master
89              
90             # Allow only using the master and production branches.
91             ./scan_git_repositories --allowed_branches=master,production
92              
93             =item * C<--allow_any_branches>
94              
95             Disable the check performed by C<--allowed_branches>, which is set to force
96             using the C branch by default.
97              
98             # Don't check the branch the repository is on.
99             ./scan_git_repositories --allow_any_branches=1
100              
101             =item * C<--whitelist_repositories>
102              
103             Excludes specific repositories from the checks performed by this script. The
104             argument accepts a comma-separated list of paths to ignore, but by default no
105             repositories are whitelisted.
106              
107             # Whitelist /root/my_custom_repo
108             ./scan_git_repositories --whitelist_repositories=/root/my_custom_repo
109              
110             =back
111              
112              
113             =head1 CAVEATS
114              
115             =over 4
116              
117             =item *
118              
119             This script currently uses C to scan the current machine for git
120             repositories, so this only works for Linux/Unix machines.
121              
122             =item *
123              
124             If you are not using C<--workspace> to limit the scan to files on which you
125             have read permissions, this script needs to be run as root.
126              
127             =item *
128              
129             You should have C in your crontab running daily, to ensure that new
130             repositories are picked up.
131              
132             =item *
133              
134             You should run this script using C. While it uses C, it still has
135             an impact on the file cache and using C will help mitigate any potential
136             issues.
137              
138             =back
139              
140             =cut
141              
142             Readonly::Scalar my $FILE_STATUS_PARSER =>
143             {
144             '??' => 'untracked',
145             'A' => 'added',
146             'D' => 'deleted',
147             'M' => 'modified',
148             'R' => 'moved',
149             };
150              
151              
152             =head1 FUNCTIONS
153              
154             =head2 new()
155              
156             Create a new C object.
157              
158             my $scanner = Git::WorkspaceScanner->new(
159             arguments => \@arguments,
160             );
161              
162             Arguments:
163              
164             =over 4
165              
166             =item * arguments I<(mandatory)>
167              
168             An arrayref of arguments passed originally to the command line utility.
169              
170             =back
171              
172             =cut
173              
174             sub new
175             {
176 0     0 1   my ( $class, %args ) = @_;
177              
178             # Verify arguments.
179 0           my $arguments = delete( $args{'arguments'} );
180 0 0         croak 'The following argument(s) are not valid: ' . join( ', ', keys %args )
181             if scalar( keys %args ) != 0;
182              
183             # Create the object.
184 0           my $self = bless( {}, $class );
185              
186             # Parse the arguments provided.
187 0           $self->parse_arguments( $arguments );
188              
189             # If --help was passed, print out usage info and exit.
190 0 0         if ( $self->{'help'} )
191             {
192 0           Pod::Usage::pod2usage(
193             '-verbose' => 99,
194             '-sections' => 'NAME|SYNOPSIS|OPTIONS',
195             '-input' => Pod::Find::pod_where(
196             {-inc => 1},
197             __PACKAGE__,
198             ),
199             );
200             }
201              
202 0           return $self;
203             }
204              
205              
206             =head2 parse_arguments()
207              
208             Parse the options passed via the command line arguments and make sure there is
209             no conflict or invalid settings.
210              
211             my $options = $scanner->parse_arguments();
212              
213             =cut
214              
215             sub parse_arguments
216             {
217 0     0 1   my ( $self, $arguments ) = @_;
218              
219             # Parse arguments.
220 0 0         Getopt::Long::GetOptionsFromArray(
221             $arguments,
222             $self,
223             'verbose',
224             'allowed_branches=s',
225             'allow_any_branches=i',
226             'allow_untracked_files=i',
227             'whitelist_repositories=s',
228             'workspace=s',
229             'help',
230             ) || croak "Error parsing command line arguments";
231              
232             # --help is off by default.
233 0   0       $self->{'help'} //= 0;
234              
235             # --verbose is off by default.
236 0   0       $self->{'verbose'} = $self->{'verbose'} // 0;
237 0 0         croak "Invalid value for --verbose\n"
238             if $self->{'verbose'} !~ /\A[01]\z/;
239              
240             # Set '/' as the default for --workspace.
241 0   0       $self->{'workspace'} //= '/';
242              
243             # Force a trailing slash.
244 0           $self->{'workspace'} =~ s|/+$|/|;
245              
246             # --allowed_branches cannot be combined with --allow_any_branches.
247 0 0 0       croak "--allowed_branches cannot be combined with --allow_any_branches\n"
248             if defined( $self->{'allowed_branches'} ) && defined( $self->{'allow_any_branches'} );
249              
250             # --allow_any_branches is off by default.
251 0   0       $self->{'allow_any_branches'} //= 0;
252 0 0         croak "--allow_any_branches must be set to either 0 or 1"
253             if $self->{'allow_any_branches'} !~ /\A[01]\z/;
254              
255             # --allow_untracked_files is off by default.
256 0   0       $self->{'allow_untracked_files'} //= 0;
257 0 0         croak "--allow_untracked_files must be set to either 0 or 1"
258             if $self->{'allow_untracked_files'} !~ /\A[01]\z/;
259              
260             # Specific logic when we restrict which branches are valid.
261 0 0         if ( !$self->{'allow_any_branches'} )
262             {
263             # It doesn't matter whether it was an explicit choice or not to
264             # restrict valid branches, set the option to 0 for future tests.
265 0           $self->{'allow_any_branches'} = 0;
266              
267             # Default --allowed_branches to master.
268 0   0       $self->{'allowed_branches'} //= 'master';
269             }
270              
271             # Check that the paths provided to --whitelist_repositories are valid.
272 0   0       $self->{'whitelist_repositories'} //= '';
273 0           my @whitelist_repositories = ();
274 0           foreach my $path ( split( /,/, $self->{'whitelist_repositories'} ) )
275             {
276 0 0         if ( -d $path )
277             {
278             # Ensure a trailing slash.
279 0           $path =~ s/\/$//;
280 0           push( @whitelist_repositories, "$path/" );
281             }
282             else
283             {
284 0           print "Warning: the path >$path< provided via --whitelist_repositories is not valid and will be skipped.\n";
285             }
286             }
287 0           $self->{'whitelist_repositories'} = \@whitelist_repositories;
288              
289 0 0         $self->{'verbose'} && $log->info( 'Finished parsing arguments.' );
290              
291 0           return;
292             }
293              
294              
295             =head2 get_git_repositories()
296              
297             Return a list of all the git repositories on the machine.
298              
299             my $git_repositories = get_git_repositories();
300              
301             =cut
302              
303             sub get_git_repositories
304             {
305 0     0 1   my ( $self ) = @_;
306              
307 0 0         if ( !defined( $self->{'git_repositories'} ) )
308             {
309 0 0         if ( $self->{'verbose'} )
310             {
311 0           $log->infof( "Running as user '%s'.", getpwuid( $< ) );
312 0           $log->infof( "Scanning workspace '%s'.", $self->{'workspace'} );
313             }
314              
315             # Find .git directories.
316             # TODO: convert to not use backticks.
317             # TODO: find a way to generalize to non-Unix systems.
318             # TODO: generalize to handle .git repositories that are outside of their
319             # repos (rare).
320 0 0         $self->{'verbose'} && $log->info( "Locate .git directories." );
321 0           my @locate_results = `locate --basename '\\.git'`; ## no critic (InputOutput::ProhibitBacktickOperators)
322 0 0         $self->{'verbose'} && $log->infof( "Found %s potential directories.", scalar( @locate_results ) );
323              
324 0           $self->{'git_repositories'} = [];
325 0           foreach my $scanned_path ( @locate_results )
326             {
327 0           chomp( $scanned_path );
328 0 0         $self->{'verbose'} && $log->infof( "Evaluating path %s.", $scanned_path );
329              
330             # Parse the path.
331 0           my ( $volume, $git_repository, $file ) = File::Spec->splitpath( $scanned_path );
332 0 0         if ( $file ne '.git' )
333             {
334 0 0         $self->{'verbose'} && $log->infof( " -> '%s' is not a .git directory after all.", $file );
335 0           next;
336             }
337 0 0         if ( ! -d $git_repository )
338             {
339 0 0         $self->{'verbose'} && $log->infof( " -> '%s' is not a directory.", $git_repository );
340 0           next;
341             }
342              
343             # Skip paths outside of the workspace.
344 0 0         if ( $git_repository !~ /^\Q$self->{'workspace'}\E/x )
345             {
346 0 0         $self->{'verbose'} && $log->infof( " -> '%s' is not inside the scanned space.", $git_repository );
347 0           next;
348             }
349              
350             # Skip whitelisted repositories.
351 0 0         if ( scalar( grep { $_ eq $git_repository } @{ $self->{'whitelist_repositories'} } ) != 0 )
  0            
  0            
352             {
353 0 0         $self->{'verbose'} && $log->infof( " -> '%s' is whitelisted.", $git_repository );
354 0           next;
355             }
356              
357 0           push( @{ $self->{'git_repositories'} }, $git_repository );
  0            
358 0 0         $self->{'verbose'} && $log->info( " -> Added to the list of repositories!" );
359             }
360             }
361              
362 0           $self->{'verbose'} && $log->infof(
363             '%s relevant git directories.',
364 0 0         scalar( @{ $self->{'git_repositories'} } ),
365             );
366              
367 0           return $self->{'git_repositories'};
368             }
369              
370              
371             =head2 get_unclean_repositories()
372              
373             Return a list of repositories with local modifications not reflected on the
374             origin repository.
375              
376             my $unclean_repositories = $app->get_unclean_repositories( $git_repositories );
377              
378             The return value is a hashref, with the key being the path to the git
379             repository and the value the git status for that git repository.
380              
381             =cut
382              
383             sub get_unclean_repositories ## no critic (Subroutines::ProhibitExcessComplexity)
384             {
385 0     0 1   my ( $self ) = @_;
386              
387             # Get the list of repositories on the machine.
388 0           my $git_repositories = $self->get_git_repositories();
389              
390 0           my $report = {};
391 0           foreach my $git_repository ( @$git_repositories )
392             {
393 0 0         $self->{'verbose'} && $log->infof( 'Analyzing %s.', $git_repository );
394              
395             # Detect whether we're in a submodule. Submodules behave differently for
396             # branch detection in particular.
397 0 0         my $is_submodule = -d File::Spec->catfile( $git_repository, '.git' ) ? 0 : 1;
398              
399             # Retrieve the status for that repository.
400             # --untracked-files=all will show all the individual untracked files in
401             # untracked directories, for the purpose of counting accurately untracked
402             # files.
403             # --branch adds branch tracking information with the prefix ##.
404 0           my $git = Git::Repository->new( work_tree => $git_repository );
405 0           my $git_status = $git->run( 'status', '--porcelain', '--untracked-files=all', '--branch' );
406              
407             # Parse the output of the git status command.
408 0           my $files_stats = { map { $_ => 0 } ( values %$FILE_STATUS_PARSER, 'unknown' ) };
  0            
409 0           my $local_branch;
410             my $commits_ahead;
411 0           foreach my $line ( split( /\n/, $git_status ) )
412             {
413             try
414             {
415             # Detect and parse branch information.
416 0     0     my ( $branch_info ) = $line =~ /^##\s(.*?)$/;
417 0 0         if ( defined( $branch_info ) )
418             {
419 0           my ( $remote_branch, $status );
420 0           ( $local_branch, $remote_branch, $status ) = $branch_info =~ /
421             \A
422             ([^\. ]+) # Local branch name.
423             (?:
424             \.\.\. # Three dots indicate a remote branch name following next.
425             ([^\. ]+) # Remote branch name.
426             (?:
427             \s+ # Space before more information optionally follows about the respective
428             # advancement of local and remote branches.
429             \[([^\]]+)\]
430             )?
431             )?
432             \z
433             /x;
434 0 0 0       $self->{'verbose'} && $log->infof(
      0        
435             " (B) %s...%s: %s",
436             $local_branch,
437             ( $remote_branch // '(no remote)' ),
438             ( $status // '(no status)' ),
439             );
440              
441             # If the branch is in sync with its remote, skip.
442             return
443 0 0         if !defined( $status );
444              
445             # It's only an issue if the local branch is ahead of its remote,
446             # since it means we have local changes.
447 0 0 0       ( $commits_ahead ) = $status =~ /^ahead\s+([0-9]+)$/
448             if !defined( $commits_ahead ) || ( $commits_ahead == 0 );
449 0           return;
450             }
451              
452             # Review the status of each file.
453 0           my ( $status, $file ) = $line =~ /^\s*(\S{1,2})\s+(.*?)$/x;
454 0 0         die "The format of line >$line< is not recognized.\n"
455             if !defined( $file );
456 0 0         $self->{'verbose'} && $log->infof( ' (F) %s: %s.', $file, $status );
457              
458 0           foreach my $code ( keys %$FILE_STATUS_PARSER )
459             {
460 0 0         next if $status !~ /\Q$code\E/;
461 0           my $key = $FILE_STATUS_PARSER->{ $code };
462 0           $files_stats->{ $key }++;
463 0           $status =~ s/\Q$code\E//g;
464             }
465              
466 0 0         if ( $status ne '' )
467             {
468 0           $files_stats->{'unknown'}++;
469 0           die "Unknown status code >$status< for file >$file<.\n";
470             }
471             }
472             catch
473             {
474 0     0     chomp( $_ );
475 0           push( @{ $report->{'errors'} }, "$git_repository: $_" );
  0            
476 0           };
477             }
478              
479             # If the --allow_untracked_files option is active, delete that status
480             # from the stats so that it doesn't get reported upon.
481 0 0         delete( $files_stats->{'untracked'} )
482             if $self->{'allow_untracked_files'};
483              
484             # Tally the number of uncommitted file changes.
485 0           my $total_file_issues = 0;
486 0           foreach my $count ( values %$files_stats )
487             {
488 0           $total_file_issues += $count;
489             }
490              
491 0 0 0       $log->infof( ' => %s.', join( ', ', map { "$_: $files_stats->{$_}" } keys %$files_stats ) )
  0            
492             if $self->{'verbose'} && ( $total_file_issues > 0 );
493              
494             # Add to the report if we have uncommitted files or unpushed commits.
495 0 0 0       if ( ( $total_file_issues > 0 ) || ( ( $commits_ahead // 0 ) > 0 ) )
      0        
496             {
497 0   0       $report->{ $git_repository } //= {};
498 0           $report->{ $git_repository }->{'files_stats'} = $files_stats;
499 0           $report->{ $git_repository }->{'files_total'} = $total_file_issues;
500 0   0       $report->{ $git_repository }->{'commits_ahead'} = $commits_ahead // 0;
501             }
502              
503             # Check if the branch name is authorized.
504 0 0 0       if ( !$self->{'allow_any_branches'} && !$is_submodule )
505             {
506 0 0         if ( defined( $local_branch ) )
507             {
508 0 0         if ( scalar( grep { $local_branch eq $_ } split( /\s*,\s*/, $self->{'allowed_branches'} ) ) == 0 )
  0            
509             {
510 0   0       $report->{ $git_repository } //= {};
511 0           $report->{ $git_repository }->{'is_branch_allowed'} = 0;
512 0           $report->{ $git_repository }->{'local_branch'} = $local_branch;
513             }
514             }
515             else
516             {
517 0           $log->warnf( "Failed to detect the local branch name for >%s<.", $git_repository );
518             }
519             }
520             }
521              
522 0           return $report;
523             }
524              
525              
526             =head1 SEE ALSO
527              
528             =over 4
529              
530             =item * L
531              
532             =back
533              
534              
535             =head1 BUGS
536              
537             Please report any bugs or feature requests through the web interface at
538             L.
539             I will be notified, and then you'll automatically be notified of progress on
540             your bug as I make changes.
541              
542              
543             =head1 SUPPORT
544              
545             You can find documentation for this module with the perldoc command.
546              
547             perldoc App::GitWorkspaceScanner
548              
549              
550             You can also look for information at:
551              
552             =over
553              
554             =item * GitHub's request tracker
555              
556             L
557              
558             =item * AnnoCPAN: Annotated CPAN documentation
559              
560             L
561              
562             =item * CPAN Ratings
563              
564             L
565              
566             =item * MetaCPAN
567              
568             L
569              
570             =back
571              
572              
573             =head1 AUTHOR
574              
575             L,
576             C<< >>.
577              
578              
579             =head1 COPYRIGHT & LICENSE
580              
581             Copyright 2014-2015 Guillaume Aubert.
582              
583             This program is free software: you can redistribute it and/or modify it under
584             the terms of the GNU General Public License version 3 as published by the Free
585             Software Foundation.
586              
587             This program is distributed in the hope that it will be useful, but WITHOUT ANY
588             WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
589             PARTICULAR PURPOSE. See the GNU General Public License for more details.
590              
591             You should have received a copy of the GNU General Public License along with
592             this program. If not, see http://www.gnu.org/licenses/
593              
594             =cut
595              
596             1;