File Coverage

blib/lib/Devel/Git/MultiBisect.pm
Criterion Covered Total %
statement 26 145 17.9
branch 0 58 0.0
condition 0 3 0.0
subroutine 9 19 47.3
pod 6 6 100.0
total 41 231 17.7


line stmt bran cond sub pod time code
1             package Devel::Git::MultiBisect;
2 8     8   5668 use v5.14.0;
  8         31  
3 8     8   46 use warnings;
  8         16  
  8         566  
4 8     8   3849 use Devel::Git::MultiBisect::Init;
  8         25  
  8         388  
5 8         800 use Devel::Git::MultiBisect::Auxiliary qw(
6             clean_outputfile
7             hexdigest_one_file
8             validate_list_sequence
9 8     8   3994 );
  8         27  
10 8     8   102 use Carp;
  8         17  
  8         553  
11 8     8   79 use Cwd;
  8         14  
  8         460  
12 8     8   57 use File::Spec;
  8         17  
  8         143  
13 8     8   34 use File::Temp;
  8         45  
  8         644  
14 8     8   50 use List::Util qw(sum);
  8         42  
  8         17959  
15              
16             our $VERSION = '0.21';
17             $VERSION = eval $VERSION;
18              
19             =head1 NAME
20              
21             Devel::Git::MultiBisect - Study build and test output over a range of F commits
22              
23             =head1 SYNOPSIS
24              
25             You will typically construct an object of a class which is a child of
26             F, such as F,
27             F or
28             F. All methods documented in this
29             parent package may be called from any of these child classes.
30              
31             use Devel::Git::MultiBisect::AllCommits;
32             $self = Devel::Git::MultiBisect::AllCommits->new(\%parameters);
33              
34             ... or
35              
36             use Devel::Git::MultiBisect::Transitions;
37             $self = Devel::Git::MultiBisect::Transitions->new(\%parameters);
38              
39             ... or
40              
41             use Devel::Git::MultiBisect::BuildTransitions;
42             $self = Devel::Git::MultiBisect::BuildTransitions->new(\%parameters);
43              
44             ... and then:
45              
46             $commit_range = $self->get_commits_range();
47              
48             $full_targets = $self->set_targets(\@target_args);
49              
50             ... or, under certain circumstances:
51              
52             $full_targets = $self->set_outside_targets(\@target_args);
53              
54             $outputs = $self->run_test_files_on_one_commit($commit_range->[0]);
55              
56             ... followed by methods specific to the child class.
57              
58             ... and then perhaps also:
59              
60             $timings = $self->get_timings();
61              
62             =head1 DESCRIPTION
63              
64             Given a Perl library or application kept in F for version control, it is
65             often useful to be able to compare the output collected from running one or
66             more test files over a range of F commits. If that range is sufficiently
67             large, a test may fail in B over that range.
68              
69             If that is the case, then simply asking, I<"When did this file start to
70             fail?"> -- a question which C is designed to answer -- is
71             insufficient. In order to identify more than one point of failure, we may
72             need to (a) capture the test output for each commit; or, (b) capture the test
73             output only at those commits where the output changed. The output of a run of
74             a test file may change for a variety of reasons: test failures, segfaults,
75             changes in the number or content of tests, etc.
76              
77             F provides methods to achieve that objective. Its
78             child classes, F and
79             F, provide different flavors of that
80             functionality for objectives (a) and (b), respectively. Please refer to their
81             documentation for further discussion.
82              
83             Child class F focuses on failures
84             during the B process rather than during testing. It can handle three
85             different types of problems which arise when you run F to build a Perl
86             library or to build Perl itself:
87              
88             =over 4
89              
90             =item * Exceptions detected by the C-compiler
91              
92             =item * Warnings emitted by the C-compiler
93              
94             =item * Warnings emitted by F or other languages invoked during F
95              
96             =back
97              
98             See the documentation for further details.
99              
100             =head2 GLOSSARY
101              
102             =over 4
103              
104             =item * B
105              
106             A source code change set entered ("committed") to a F repository. Each
107             commit is denoted by a SHA. In this library, whenever a commit is called for
108             as the argument to a function, you can also use a F.
109              
110             =item * B
111              
112             The range of sequential commits (determined by F) requested for analysis.
113              
114             =item * B
115              
116             A test file from the test suite of the application or library under study.
117              
118             =item * B
119              
120             A test file outside the test suite of the application or library under study.
121              
122             =item * B
123              
124             What is sent to STDOUT or STDERR as a result of calling a test program such as
125             F or F on an individual target file. Currently we assume
126             that all such test programs are written based on the
127             L.
128              
129             =item * B
130              
131             A commit at which the test output for a given target (or outside target)
132             changes from that of the commit immediately preceding.
133              
134             =item * B
135              
136             A string holding the output of a cryptographic process run on test output
137             which uniquely identifies that output. (Currently, we use the
138             C algorithm.) We assume that if the test output does
139             not change between one or more commits, then that commit is not a transitional
140             commit.
141              
142             Note: Before taking a digest on a particular test output, we exclude text
143             such as timings which are highly likely to change from one run to the next and
144             which would introduce spurious variability into the digest calculations.
145              
146             =item * B or B
147              
148             A series of configure-build-test process sequences at those commits within the
149             commit range which are selected by a bisection algorithm.
150              
151             Normally, when we bisect (via F, F or
152             otherwise), we are seeking a I point where a Boolean result -- yes/no,
153             true/false, pass/fail -- is returned. What the test run outputs to STDOUT or
154             STDERR is a lesser concern.
155              
156             B points where the output
157             of the test command changes> -- regardless of whether that change is a C,
158             C or whatever. We capture the output for later human or programmatic
159             examination.
160              
161             =back
162              
163             =head1 METHODS
164              
165             =head2 C
166              
167             =over 4
168              
169             =item * Purpose
170              
171             Constructor.
172              
173             =item * Arguments
174              
175             $self = Devel::Git::MultiBisect::AllCommits->new(\%params);
176              
177             or
178              
179             $self = Devel::Git::MultiBisect::Transitions->new(\%params);
180              
181             or
182              
183             $self = Devel::Git::MultiBisect::BuildTransitions->new(\%params);
184              
185             Reference to a hash, typically the return value of
186             C.
187              
188             The hashref passed as argument must contain key-value pairs for C,
189             C. C tests for the existence of each of these directories.
190              
191             =item * Return Value
192              
193             Object of Devel::Git::MultiBisect child class.
194              
195             =back
196              
197             =cut
198              
199             sub new {
200 0     0 1   my ($class, $params) = @_;
201              
202 0           my $data = Devel::Git::MultiBisect::Init::init($params);
203              
204 0           return bless $data, $class;
205             }
206              
207             =head2 C
208              
209             =over 4
210              
211             =item * Purpose
212              
213             Identify the SHAs of each F commit identified by C.
214              
215             =item * Arguments
216              
217             $commit_range = $self->get_commits_range();
218              
219             None; all data needed is already in the object.
220              
221             =item * Return Value
222              
223             Array reference, each element of which is a SHA.
224              
225             =back
226              
227             =cut
228              
229             sub get_commits_range {
230 0     0 1   my $self = shift;
231 0           return [ map { $_->{sha} } @{$self->{commits}} ];
  0            
  0            
232             }
233              
234             =head2 C
235              
236             =over 4
237              
238             =item * Purpose
239              
240             Identify the test files which will be run at different points in the commits
241             range. We shall assume that each such test file has existed with its name
242             unchanged over the entire commit range. We further assume that each such test
243             file resides in or under the top-level directory of the F checkout,
244             I that the file can be specified by its relative path from the
245             top-level directory. (Should the latter assumption not be valid, use
246             C.)
247              
248             =item * Arguments
249              
250             $target_args = [
251             't/44_func_hashes_mult_unsorted.t',
252             't/45_func_hashes_alt_dual_sorted.t',
253             ];
254             $full_targets = $self->set_targets($target_args);
255              
256             Reference to an array holding the relative paths beneath the C to the
257             test files selected for examination.
258              
259             =item * Return Value
260              
261             Reference to an array holding hash references with these elements:
262              
263             =over 4
264              
265             =item * C
266              
267             Absolute paths to the test files selected for examination. Test file is
268             tested for its existence.
269              
270             =item * C
271              
272             String composed by taking an element in the array ref passed as argument and
273             substituting underscores C(<_>) for forward slash (C) and dot (C<.>)
274             characters. So,
275              
276             t/44_func_hashes_mult_unsorted.t
277              
278             ... becomes:
279              
280             t_44_func_hashes_mult_unsorted_t
281              
282             =back
283              
284             =back
285              
286             =cut
287              
288             sub set_targets {
289 0     0 1   my ($self, $explicit_targets) = @_;
290              
291 0           my @raw_targets = @{$self->{targets}};
  0            
292              
293             # If set_targets() is provided with an appropriate argument
294             # ($explicit_targets), override whatever may have been stored in the
295             # object by new().
296              
297 0 0         if (defined $explicit_targets) {
298 0 0         croak "Explicit targets passed to set_targets() must be in array ref"
299             unless ref($explicit_targets) eq 'ARRAY';
300 0           @raw_targets = @{$explicit_targets};
  0            
301             }
302              
303 0           my @full_targets = ();
304 0           my @missing_files = ();
305 0           for my $rt (@raw_targets) {
306 0           my $ft = File::Spec->catfile($self->{gitdir}, $rt);
307 0 0         if (! -e $ft) { push @missing_files, $ft; next }
  0            
  0            
308 0           my $stub;
309 0           ($stub = $rt) =~ s{[./]}{_}g;
310 0           push @full_targets, {
311             path => $ft,
312             stub => $stub,
313             };
314             }
315 0 0         if (@missing_files) {
316 0           croak "Cannot find file(s) to be tested: @missing_files";
317             }
318 0           $self->{targets} = [ @full_targets ];
319 0           return \@full_targets;
320             }
321              
322             =head2 C
323              
324             =over 4
325              
326             =item * Purpose
327              
328             Identify the test files which will be run at different points in the commits
329             range. This method differs from C in that it assumes that the
330             targeted test file sits I the F repository in which the source
331             code resides and, consequently, must be specified with an absolute path.
332              
333             =item * Arguments
334              
335             $target_args = [
336             '/tmp/gh-22159-class.t',
337             ];
338             $full_targets = $self->set_outside_targets($target_args);
339              
340             Reference to an array holding the absolute paths to the test files selected
341             for examination. B This method has not yet been tested with more than
342             one file in C<$target_args>.
343              
344             =item * Return Value
345              
346             Reference to an array holding hash references with these elements:
347              
348             =over 4
349              
350             =item * C
351              
352             Absolute paths to the test files selected for examination. Test file is
353             tested for its existence.
354              
355             =item * C
356              
357             String composed by taking an element in the array ref passed as argument and
358             substituting underscores C(<_>) for forward slash (C) and dot (C<.>)
359             characters. So,
360              
361             /tmp/gh-22159-class.t
362              
363             ... becomes:
364              
365             _tmp_gh-22159-class_t
366              
367             =back
368              
369             =back
370              
371             =cut
372              
373             sub set_outside_targets {
374 0     0 1   my ($self, $explicit_targets) = @_;
375              
376 0           my @raw_targets = @{$self->{targets}};
  0            
377              
378             # If set_targets() is provided with an appropriate argument
379             # ($explicit_targets), override whatever may have been stored in the
380             # object by new().
381              
382 0 0         if (defined $explicit_targets) {
383 0 0         croak "Explicit targets passed to set_targets() must be in array ref"
384             unless ref($explicit_targets) eq 'ARRAY';
385 0           @raw_targets = @{$explicit_targets};
  0            
386             }
387              
388 0           my @full_targets = ();
389 0           my @missing_files = ();
390 0           for my $rt (@raw_targets) {
391 0           my $ft = $rt;
392 0 0         if (! -e $ft) { push @missing_files, $ft; next }
  0            
  0            
393 0           my $stub;
394 0           ($stub = $rt) =~ s{[./]}{_}g;
395 0           push @full_targets, {
396             path => $ft,
397             stub => $stub,
398             };
399             }
400 0 0         if (@missing_files) {
401 0           croak "Cannot find file(s) to be tested: @missing_files";
402             }
403 0           $self->{targets} = [ @full_targets ];
404 0           return \@full_targets;
405             }
406              
407             =head2 C
408              
409             =over 4
410              
411             =item * Purpose
412              
413             Capture the output from running the selected test files at one specific F
414             checkout.
415              
416             =item * Arguments
417              
418             $outputs = $self->run_test_files_on_one_commit("2a2e54a");
419              
420             or
421              
422             $excluded_targets = [
423             't/45_func_hashes_alt_dual_sorted.t',
424             ];
425             $outputs = $self->run_test_files_on_one_commit("2a2e54a", $excluded_targets);
426              
427             =over 4
428              
429             =item 1
430              
431             String holding the SHA from a single commit in the repository. This string
432             would typically be one of the elements in the array reference returned by
433             C<$self->get_commits_range()>. If no argument is provided, the method will
434             default to using the first element in the array reference returned by
435             C<$self->get_commits_range()>.
436              
437             =item 2
438              
439             Reference to array of target test files to be excluded from a particular
440             invocation of this method. Optional, but will die if argument is not an array
441             reference.
442              
443             =back
444              
445             =item * Return Value
446              
447             Reference to an array, each element of which is a hash reference with the
448             following elements:
449              
450             =over 4
451              
452             =item * C
453              
454             String holding the SHA from the commit passed as argument to this method (or
455             the default described above).
456              
457             =item * C
458              
459             String holding the value of C (above) to the number of characters
460             specified in the C element passed to the constructor; defaults to 7.
461              
462             =item * C
463              
464             String holding a rewritten version of the relative path beneath C of
465             the test file being run. In this relative path forward slash (C) and dot
466             (C<.>) characters are changed to underscores C(<_>). So,
467              
468             t/44_func_hashes_mult_unsorted.t
469              
470             ... becomes:
471              
472             t_44_func_hashes_mult_unsorted_t'
473              
474             =item * C
475              
476             String holding the full path to the file holding the TAP output collected
477             while running one test file at the given commit. The following example shows
478             how that path is calculated. Given:
479              
480             output directory (outputdir) => '/tmp/DQBuT_SRAY/'
481             SHA (commit) => '2a2e54af709f17cc6186b42840549c46478b6467'
482             shortened SHA (commit_short) => '2a2e54a'
483             test file (target->[$i]) => 't/44_func_hashes_mult_unsorted.t'
484              
485             ... the file is placed in the directory specified by C. We then
486             join C (the shortened SHA), C (the rewritten relative
487             path) and the strings C and C with a dot to yield this value for
488             the C element:
489              
490             2a2e54a.t_44_func_hashes_mult_unsorted_t.output.txt
491              
492             =item * C
493              
494             String holding the return value of
495             C run with the file
496             designated by the C element as an argument. (More precisely, the file
497             as modified by C.)
498              
499             =back
500              
501             Example:
502              
503             [
504             {
505             commit => "2a2e54af709f17cc6186b42840549c46478b6467",
506             commit_short => "2a2e54a",
507             file => "/tmp/1mVnyd59ee/2a2e54a.t_44_func_hashes_mult_unsorted_t.output.txt",
508             file_stub => "t_44_func_hashes_mult_unsorted_t",
509             md5_hex => "31b7c93474e15a16d702da31989ab565",
510             },
511             {
512             commit => "2a2e54af709f17cc6186b42840549c46478b6467",
513             commit_short => "2a2e54a",
514             file => "/tmp/1mVnyd59ee/2a2e54a.t_45_func_hashes_alt_dual_sorted_t.output.txt",
515             file_stub => "t_45_func_hashes_alt_dual_sorted_t",
516             md5_hex => "6ee767b9d2838e4bbe83be0749b841c1",
517             },
518             ]
519              
520             =item * Comment
521              
522             In this method's current implementation, we start with a C from
523             the repository at the specified C. We configure (I C
524             Makefile.PL>) and build (I C) the source code. We then test each
525             of the test files we have targeted (I C
526             relative/path/to/test_file.t>). We redirect both STDOUT and STDERR to
527             C, clean up the outputfile to remove the line containing timings
528             (as that introduces unwanted variability in the C values) and compute
529             the digest.
530              
531             This implementation is very much subject to change.
532              
533             If a true value for C has been passed to the constructor, the method
534             prints C to STDOUT before returning.
535              
536             B While this method is publicly documented, in actual use you probably
537             will not need to call it directly. Instead, you will probably use either
538             C or
539             C.
540              
541             =back
542              
543             =cut
544              
545             sub run_test_files_on_one_commit {
546 0     0 1   my ($self, $commit, $excluded_targets) = @_;
547 0   0       $commit //= $self->{commits}->[0]->{sha};
548 0 0         say "Testing commit: $commit" if ($self->{verbose});
549              
550 0 0         if (defined $excluded_targets) {
551 0 0         if (ref($excluded_targets) ne 'ARRAY') {
552 0           croak "excluded_targets, if defined, must be in array reference";
553             }
554             }
555             else {
556 0           $excluded_targets = [];
557             }
558 0           my %excluded_targets;
559 0           for my $t (@{$excluded_targets}) {
  0            
560 0           my $ft = File::Spec->catfile($self->{gitdir}, $t);
561 0           $excluded_targets{$ft}++;
562             }
563              
564             my $current_targets = [
565 0           grep { ! exists $excluded_targets{$_->{path}} }
566 0           @{$self->{targets}}
  0            
567             ];
568              
569 0           my $starting_branch = $self->_configure_build_one_commit($commit);
570              
571 0           my $outputsref = $self->_test_one_commit($commit, $current_targets);
572             say "Tested commit: $commit; returning to: $starting_branch"
573 0 0         if ($self->{verbose});
574              
575             # We want to return to our basic branch (e.g., 'master', 'blead')
576             # before checking out a new commit.
577              
578 0 0         system(qq|git checkout --quiet $starting_branch|)
579             and croak "Unable to 'git checkout --quiet $starting_branch";
580              
581 0           $self->{commit_counter}++;
582 0 0         say "Commit counter: $self->{commit_counter}" if $self->{verbose};
583              
584 0           return $outputsref;
585             }
586              
587             sub _configure_one_commit {
588 0     0     my ($self, $commit) = @_;
589 0 0         chdir $self->{gitdir} or croak "Unable to change to $self->{gitdir}";
590 0 0         system(qq|git clean --quiet -dfx|) and croak "Unable to 'git clean --quiet -dfx'";
591 0           my $starting_branch = $self->{branch};
592              
593 0 0         system(qq|git checkout --quiet $commit|) and croak "Unable to 'git checkout --quiet $commit'";
594 0 0         say "Running '$self->{configure_command}'" if $self->{verbose};
595 0 0         system($self->{configure_command}) and croak "Unable to run '$self->{configure_command})'";
596 0           return $starting_branch;
597             }
598              
599             sub _configure_build_one_commit {
600 0     0     my ($self, $commit) = @_;
601              
602 0           my $starting_branch = $self->_configure_one_commit($commit);
603              
604 0 0         say "Running '$self->{make_command}'" if $self->{verbose};
605 0 0         system($self->{make_command}) and croak "Unable to run '$self->{make_command})'";
606              
607 0           return $starting_branch;
608             }
609              
610             sub _test_one_commit {
611 0     0     my ($self, $commit, $current_targets) = @_;
612 0           my $short = substr($commit,0,$self->{short});
613 0           my @outputs;
614 0           for my $target (@{$current_targets}) {
  0            
615             my $outputfile = File::Spec->catfile(
616             $self->{outputdir},
617             join('.' => (
618             $short,
619             $target->{stub},
620 0           'output',
621             'txt'
622             )),
623             );
624 0           my $command_raw = $self->{test_command};
625 0           my $cmd;
626 0 0         unless ($command_raw eq 'harness') {
627 0           $cmd = qq|$command_raw $target->{path} >$outputfile 2>&1|;
628             }
629             else {
630 0           $cmd = qq|cd t; ./perl harness -v $target->{path} >$outputfile 2>&1; cd -|;
631             }
632 0 0         say "Running '$cmd'" if $self->{verbose};
633 0 0         system($cmd) and croak "Unable to run test_command";
634 0           $outputfile = clean_outputfile($outputfile);
635             push @outputs, {
636             commit => $commit,
637             commit_short => $short,
638             file => $outputfile,
639             file_stub => $target->{stub},
640 0           md5_hex => hexdigest_one_file($outputfile),
641             };
642 0 0         say "Created $outputfile" if $self->{verbose};
643             }
644 0           return \@outputs;
645             }
646              
647             sub _bisection_decision {
648 0     0     my ($self, $target_h_md5_hex, $current_start_md5_hex, $h, $relevant_self,
649             $overall_end_md5_hex, $current_start_idx, $current_end_idx, $max_idx, $n) = @_;
650 0 0         if ($target_h_md5_hex ne $current_start_md5_hex) {
651 0           my $g = $h - 1;
652 0           $self->_run_one_commit_and_assign($g);
653 0           my $target_g_md5_hex = $relevant_self->[$g]->{md5_hex};
654 0 0         if ($target_g_md5_hex eq $current_start_md5_hex) {
655 0 0         if ($target_h_md5_hex eq $overall_end_md5_hex) {
656             }
657             else {
658 0           $current_start_idx = $h;
659 0           $current_end_idx = $max_idx;
660             }
661 0           $n++;
662             }
663             else {
664             # Bisection should continue downwards
665 0           $current_end_idx = $h;
666 0           $n++;
667             }
668             }
669             else {
670             # Bisection should continue upwards
671 0           $current_start_idx = $h;
672 0           $n++;
673             }
674 0           return ($current_start_idx, $current_end_idx, $n);
675             }
676              
677             =head2 C
678              
679             =over 4
680              
681             =item * Purpose
682              
683             Get information on the time a multisection took to run.
684              
685             =item * Arguments
686              
687             None; all data needed is already in the object.
688              
689             =item * Return Value
690              
691             Hash reference. The selection of elements in this hashref will depend on
692             which subclass of F you are using and may differ among
693             subclasses. Example:
694              
695             { elapsed => 4297, mean => 186.83, runs => 23 }
696              
697             In this example (taken from a run of one test file over 220 commits in Perl 5
698             blead), 23 runs were needed to achieve a result. These took 4297 seconds
699             (approximately 71 minutes) with a mean run time of approximately 3 minutes
700             each.
701              
702             Method will return undefined value if timings are not yet available within the
703             object.
704              
705             =back
706              
707             =cut
708              
709             sub get_timings {
710 0     0 1   my $self = shift;
711 0 0         return unless exists $self->{timings};
712 0           return $self->{timings};
713             }
714              
715             =head1 SUPPORT
716              
717             Please report any bugs by mail to C
718             or through the web interface at L.
719              
720             =head1 AUTHOR
721              
722             James E. Keenan (jkeenan at cpan dot org). When sending correspondence, please
723             include 'Devel::Git::MultiBisect' or 'Devel-Git-MultiBisect' in your subject line.
724              
725             Creation date: October 12 2016. Last modification date: April 23 2024.
726              
727             Development repository: L
728              
729             =head1 ACKNOWLEDGEMENTS
730              
731             Thanks to the following contributors and reviewers:
732              
733             =over 4
734              
735             =item * Smylers
736              
737             For naming suggestion: L
738              
739             =item * Ricardo Signes
740              
741             For feedback during initial development.
742              
743             =item * Eily and Monk::Thomas
744              
745             For diagnosis of regex problems in L.
746              
747             =item * Max Maischein
748              
749             For diagnosis of File::Temp problems in L.
750              
751             =back
752              
753             =head1 COPYRIGHT
754              
755             Copyright (c) 2016-2021 James E. Keenan. United States. All rights reserved.
756             This is free software and may be distributed under the same terms as Perl
757             itself.
758              
759             =cut
760              
761             1;
762