File Coverage

blib/lib/Perl/Tidy/VerticalAligner.pm
Criterion Covered Total %
statement 1912 2065 92.5
branch 668 826 80.8
condition 423 544 77.7
subroutine 98 109 89.9
pod 0 71 0.0
total 3101 3615 85.7


line stmt bran cond sub pod time code
1             package Perl::Tidy::VerticalAligner;
2 39     39   299 use strict;
  39         90  
  39         1496  
3 39     39   212 use warnings;
  39         93  
  39         1328  
4 39     39   219 use Carp;
  39         74  
  39         2879  
5 39     39   277 use English qw( -no_match_vars );
  39         88  
  39         365  
6             our $VERSION = '20230909';
7 39     39   30807 use Perl::Tidy::VerticalAligner::Alignment;
  39         110  
  39         1386  
8 39     39   16051 use Perl::Tidy::VerticalAligner::Line;
  39         114  
  39         1404  
9              
10 39     39   265 use constant DEVEL_MODE => 0;
  39         91  
  39         2397  
11 39     39   247 use constant EMPTY_STRING => q{};
  39         97  
  39         1760  
12 39     39   212 use constant SPACE => q{ };
  39         115  
  39         17671  
13              
14             # The Perl::Tidy::VerticalAligner package collects output lines and
15             # attempts to line up certain common tokens, such as => and #, which are
16             # identified by the calling routine.
17             #
18             # Usage:
19             # - Initiate an object with a call to new().
20             # - Write lines one-by-one with calls to valign_input().
21             # - Make a final call to flush() to empty the pipeline.
22             #
23             # The sub valign_input collects lines into groups. When a group reaches
24             # the maximum possible size it is processed for alignment and output.
25             # The maximum group size is reached whenever there is a change in indentation
26             # level, a blank line, a block comment, or an external flush call. The calling
27             # routine may also force a break in alignment at any time.
28             #
29             # If the calling routine needs to interrupt the output and send other text to
30             # the output, it must first call flush() to empty the output pipeline. This
31             # might occur for example if a block of pod text needs to be sent to the output
32             # between blocks of code.
33              
34             # It is essential that a final call to flush() be made. Otherwise some
35             # final lines of text will be lost.
36              
37             # Index...
38             # CODE SECTION 1: Preliminary code, global definitions and sub new
39             # sub new
40             # CODE SECTION 2: Some Basic Utilities
41             # CODE SECTION 3: Code to accept input and form groups
42             # sub valign_input
43             # CODE SECTION 4: Code to process comment lines
44             # sub _flush_comment_lines
45             # CODE SECTION 5: Code to process groups of code lines
46             # sub _flush_group_lines
47             # CODE SECTION 6: Output Step A
48             # sub valign_output_step_A
49             # CODE SECTION 7: Output Step B
50             # sub valign_output_step_B
51             # CODE SECTION 8: Output Step C
52             # sub valign_output_step_C
53             # CODE SECTION 9: Output Step D
54             # sub valign_output_step_D
55             # CODE SECTION 10: Summary
56             # sub report_anything_unusual
57              
58             ##################################################################
59             # CODE SECTION 1: Preliminary code, global definitions and sub new
60             ##################################################################
61              
62             sub AUTOLOAD {
63              
64             # Catch any undefined sub calls so that we are sure to get
65             # some diagnostic information. This sub should never be called
66             # except for a programming error.
67 0     0   0 our $AUTOLOAD;
68 0 0       0 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
69 0         0 my ( $pkg, $fname, $lno ) = caller();
70 0         0 my $my_package = __PACKAGE__;
71 0         0 print {*STDERR} <<EOM;
  0         0  
72             ======================================================================
73             Error detected in package '$my_package', version $VERSION
74             Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
75             Called from package: '$pkg'
76             Called from File '$fname' at line '$lno'
77             This error is probably due to a recent programming change
78             ======================================================================
79             EOM
80 0         0 exit 1;
81             } ## end sub AUTOLOAD
82              
83       0     sub DESTROY {
84              
85             # required to avoid call to AUTOLOAD in some versions of perl
86             }
87              
88             sub Die {
89 0     0 0 0 my ($msg) = @_;
90 0         0 Perl::Tidy::Die($msg);
91 0         0 croak "unexpected return from Perl::Tidy::Die";
92             }
93              
94             sub Fault {
95 0     0 0 0 my ($msg) = @_;
96              
97             # This routine is called for errors that really should not occur
98             # except if there has been a bug introduced by a recent program change.
99             # Please add comments at calls to Fault to explain why the call
100             # should not occur, and where to look to fix it.
101 0         0 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
102 0         0 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
103 0         0 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
104 0         0 my $pkg = __PACKAGE__;
105              
106 0         0 my $input_stream_name = get_input_stream_name();
107              
108 0         0 Die(<<EOM);
109             ==============================================================================
110             While operating on input stream with name: '$input_stream_name'
111             A fault was detected at line $line0 of sub '$subroutine1'
112             in file '$filename1'
113             which was called from line $line1 of sub '$subroutine2'
114             Message: '$msg'
115             This is probably an error introduced by a recent programming change.
116             $pkg reports VERSION='$VERSION'.
117             ==============================================================================
118             EOM
119              
120             # We shouldn't get here, but this return is to keep Perl-Critic from
121             # complaining.
122 0         0 return;
123             } ## end sub Fault
124              
125             my %valid_LINE_keys;
126              
127             BEGIN {
128              
129             # define valid keys in a line object
130 39     39   330 my @q = qw(
131             jmax
132             rtokens
133             rfields
134             rfield_lengths
135             rpatterns
136             indentation
137             leading_space_count
138             outdent_long_lines
139             list_type
140             list_seqno
141             is_hanging_side_comment
142             maximum_line_length
143             rvertical_tightness_flags
144             is_terminal_ternary
145             j_terminal_match
146             end_group
147             Kend
148             ci_level
149             level
150             level_end
151             imax_pair
152              
153             ralignments
154             );
155              
156 39         5889 @valid_LINE_keys{@q} = (1) x scalar(@q);
157             } ## end BEGIN
158              
159             BEGIN {
160              
161             # Define the fixed indexes for variables in $self, which is an array
162             # reference. Note the convention of leading and trailing underscores to
163             # keep them unique.
164             # Do not combine with other BEGIN blocks (c101).
165 39     39   192 my $i = 0;
166             use constant {
167 39         11217 _file_writer_object_ => $i++,
168             _logger_object_ => $i++,
169             _diagnostics_object_ => $i++,
170              
171             _rOpts_ => $i++,
172             _rOpts_indent_columns_ => $i++,
173             _rOpts_tabs_ => $i++,
174             _rOpts_entab_leading_whitespace_ => $i++,
175             _rOpts_fixed_position_side_comment_ => $i++,
176             _rOpts_minimum_space_to_comment_ => $i++,
177             _rOpts_valign_code_ => $i++,
178             _rOpts_valign_block_comments_ => $i++,
179             _rOpts_valign_side_comments_ => $i++,
180              
181             _last_level_written_ => $i++,
182             _last_side_comment_column_ => $i++,
183             _last_side_comment_line_number_ => $i++,
184             _last_side_comment_length_ => $i++,
185             _last_side_comment_level_ => $i++,
186             _outdented_line_count_ => $i++,
187             _first_outdented_line_at_ => $i++,
188             _last_outdented_line_at_ => $i++,
189             _consecutive_block_comments_ => $i++,
190              
191             _rgroup_lines_ => $i++,
192             _group_level_ => $i++,
193             _group_type_ => $i++,
194             _group_maximum_line_length_ => $i++,
195             _zero_count_ => $i++,
196             _last_leading_space_count_ => $i++,
197             _comment_leading_space_count_ => $i++,
198 39     39   338 };
  39         86  
199              
200             # Debug flag. This is a relic from the original program development
201             # looking for problems with tab characters. Caution: this debug flag can
202             # produce a lot of output It should be 0 except when debugging small
203             # scripts.
204              
205 39     39   290 use constant DEBUG_TABS => 0;
  39         76  
  39         4731  
206              
207             my $debug_warning = sub {
208 0         0 print {*STDOUT} "VALIGN_DEBUGGING with key $_[0]\n";
  0         0  
209 0         0 return;
210 39         500 };
211              
212 39         53473 DEBUG_TABS && $debug_warning->('TABS');
213             } ## end BEGIN
214              
215             # GLOBAL variables
216             my (
217              
218             %valign_control_hash,
219             $valign_control_default,
220              
221             );
222              
223             sub check_options {
224              
225             # This routine is called to check the user-supplied run parameters
226             # and to configure the control hashes to them.
227 559     559 0 1843 my ($rOpts) = @_;
228              
229             # All alignments are done by default
230 559         1586 %valign_control_hash = ();
231 559         1350 $valign_control_default = 1;
232              
233             # If -vil=s is entered without -vxl, assume -vxl='*'
234 559 50 66     4642 if ( !$rOpts->{'valign-exclusion-list'}
235             && $rOpts->{'valign-inclusion-list'} )
236             {
237 0         0 $rOpts->{'valign-exclusion-list'} = '*';
238             }
239              
240             # See if the user wants to exclude any alignment types ...
241 559 100       2386 if ( $rOpts->{'valign-exclusion-list'} ) {
242              
243             # The inclusion list is only relevant if there is an exclusion list
244 3 100       18 if ( $rOpts->{'valign-inclusion-list'} ) {
245 1         7 my @vil = split /\s+/, $rOpts->{'valign-inclusion-list'};
246 1         6 @valign_control_hash{@vil} = (1) x scalar(@vil);
247             }
248              
249             # Note that the -vxl list is done after -vil, so -vxl has priority
250             # in the event of duplicate entries.
251 3         18 my @vxl = split /\s+/, $rOpts->{'valign-exclusion-list'};
252 3         18 @valign_control_hash{@vxl} = (0) x scalar(@vxl);
253              
254             # Optimization: revert to defaults if no exclusions.
255             # This could happen with -vxl=' ' and any -vil list
256 3 50       17 if ( !@vxl ) {
257 0         0 %valign_control_hash = ();
258             }
259              
260             # '$valign_control_default' applies to types not in the hash:
261             # - If a '*' was entered then set it to be that default type
262             # - Otherwise, leave it set it to 1
263 3 100       18 if ( defined( $valign_control_hash{'*'} ) ) {
264 1         3 $valign_control_default = $valign_control_hash{'*'};
265             }
266              
267             # Side comments are controlled separately and must be removed
268             # if given in a list.
269 3 50       12 if (%valign_control_hash) {
270 3         12 $valign_control_hash{'#'} = 1;
271             }
272             }
273              
274 559         1414 return;
275             } ## end sub check_options
276              
277             sub check_keys {
278 0     0 0 0 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
279              
280             # Check the keys of a hash:
281             # $rtest = ref to hash to test
282             # $rvalid = ref to hash with valid keys
283              
284             # $msg = a message to write in case of error
285             # $exact_match defines the type of check:
286             # = false: test hash must not have unknown key
287             # = true: test hash must have exactly same keys as known hash
288             my @unknown_keys =
289 0         0 grep { !exists $rvalid->{$_} } keys %{$rtest};
  0         0  
  0         0  
290             my @missing_keys =
291 0         0 grep { !exists $rtest->{$_} } keys %{$rvalid};
  0         0  
  0         0  
292 0         0 my $error = @unknown_keys;
293 0 0 0     0 if ($exact_match) { $error ||= @missing_keys }
  0         0  
294 0 0       0 if ($error) {
295 0         0 local $LIST_SEPARATOR = ')(';
296 0         0 my @expected_keys = sort keys %{$rvalid};
  0         0  
297 0         0 @unknown_keys = sort @unknown_keys;
298 0         0 Fault(<<EOM);
299             ------------------------------------------------------------------------
300             Program error detected checking hash keys
301             Message is: '$msg'
302             Expected keys: (@expected_keys)
303             Unknown key(s): (@unknown_keys)
304             Missing key(s): (@missing_keys)
305             ------------------------------------------------------------------------
306             EOM
307             }
308 0         0 return;
309             } ## end sub check_keys
310              
311             sub new {
312              
313 560     560 0 2844 my ( $class, @args ) = @_;
314              
315 560         3774 my %defaults = (
316             rOpts => undef,
317             file_writer_object => undef,
318             logger_object => undef,
319             diagnostics_object => undef,
320             );
321 560         3703 my %args = ( %defaults, @args );
322              
323             # Initialize other caches and buffers
324 560         3696 initialize_step_B_cache();
325 560         3042 initialize_valign_buffer();
326 560         3020 initialize_decode();
327 560         3244 set_logger_object( $args{logger_object} );
328              
329             # Initialize all variables in $self.
330             # To add an item to $self, first define a new constant index in the BEGIN
331             # section.
332 560         1669 my $self = [];
333              
334             # objects
335 560         2082 $self->[_file_writer_object_] = $args{file_writer_object};
336 560         1809 $self->[_logger_object_] = $args{logger_object};
337 560         1505 $self->[_diagnostics_object_] = $args{diagnostics_object};
338              
339             # shortcuts to user options
340 560         1382 my $rOpts = $args{rOpts};
341              
342 560         1515 $self->[_rOpts_] = $rOpts;
343 560         1743 $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
344 560         1712 $self->[_rOpts_tabs_] = $rOpts->{'tabs'};
345             $self->[_rOpts_entab_leading_whitespace_] =
346 560         1581 $rOpts->{'entab-leading-whitespace'};
347             $self->[_rOpts_fixed_position_side_comment_] =
348 560         1401 $rOpts->{'fixed-position-side-comment'};
349             $self->[_rOpts_minimum_space_to_comment_] =
350 560         1635 $rOpts->{'minimum-space-to-comment'};
351 560         1546 $self->[_rOpts_valign_code_] = $rOpts->{'valign-code'};
352 560         1488 $self->[_rOpts_valign_block_comments_] = $rOpts->{'valign-block-comments'};
353 560         1572 $self->[_rOpts_valign_side_comments_] = $rOpts->{'valign-side-comments'};
354              
355             # Batch of lines being collected
356 560         1724 $self->[_rgroup_lines_] = [];
357 560         1451 $self->[_group_level_] = 0;
358 560         1441 $self->[_group_type_] = EMPTY_STRING;
359 560         1584 $self->[_group_maximum_line_length_] = undef;
360 560         1495 $self->[_zero_count_] = 0;
361 560         1442 $self->[_comment_leading_space_count_] = 0;
362 560         1365 $self->[_last_leading_space_count_] = 0;
363              
364             # Memory of what has been processed
365 560         1294 $self->[_last_level_written_] = -1;
366 560         1236 $self->[_last_side_comment_column_] = 0;
367 560         1266 $self->[_last_side_comment_line_number_] = 0;
368 560         1287 $self->[_last_side_comment_length_] = 0;
369 560         1226 $self->[_last_side_comment_level_] = -1;
370 560         1220 $self->[_outdented_line_count_] = 0;
371 560         1290 $self->[_first_outdented_line_at_] = 0;
372 560         1263 $self->[_last_outdented_line_at_] = 0;
373 560         1251 $self->[_consecutive_block_comments_] = 0;
374              
375 560         1439 bless $self, $class;
376 560         3021 return $self;
377             } ## end sub new
378              
379             #################################
380             # CODE SECTION 2: Basic Utilities
381             #################################
382              
383             sub flush {
384              
385             # flush() is the external call to completely empty the pipeline.
386 1817     1817 0 3864 my ($self) = @_;
387              
388             # push things out the pipeline...
389              
390             # push out any current group lines
391 1817         5818 $self->_flush_group_lines();
392              
393             # then anything left in the cache of step_B
394 1817         7300 $self->_flush_step_B_cache();
395              
396             # then anything left in the buffer of step_C
397 1817         5806 $self->dump_valign_buffer();
398              
399 1817         3692 return;
400             } ## end sub flush
401              
402             sub initialize_for_new_group {
403 2236     2236 0 4922 my ($self) = @_;
404              
405 2236         5203 $self->[_rgroup_lines_] = [];
406 2236         4699 $self->[_group_type_] = EMPTY_STRING;
407 2236         4158 $self->[_zero_count_] = 0;
408 2236         3617 $self->[_comment_leading_space_count_] = 0;
409 2236         3605 $self->[_last_leading_space_count_] = 0;
410 2236         3868 $self->[_group_maximum_line_length_] = undef;
411              
412             # Note that the value for _group_level_ is
413             # handled separately in sub valign_input
414 2236         3746 return;
415             } ## end sub initialize_for_new_group
416              
417             sub group_line_count {
418 73     73 0 118 return +@{ $_[0]->[_rgroup_lines_] };
  73         368  
419             }
420              
421             # interface to Perl::Tidy::Diagnostics routines
422             # For debugging; not currently used
423             sub write_diagnostics {
424 0     0 0 0 my ( $self, $msg ) = @_;
425 0         0 my $diagnostics_object = $self->[_diagnostics_object_];
426 0 0       0 if ($diagnostics_object) {
427 0         0 $diagnostics_object->write_diagnostics($msg);
428             }
429 0         0 return;
430             } ## end sub write_diagnostics
431              
432             { ## begin closure for logger routines
433             my $logger_object;
434              
435             # Called once per file to initialize the logger object
436             sub set_logger_object {
437 560     560 0 21704 $logger_object = shift;
438 560         1610 return;
439             }
440              
441             sub get_logger_object {
442 0     0 0 0 return $logger_object;
443             }
444              
445             sub get_input_stream_name {
446 0     0 0 0 my $input_stream_name = EMPTY_STRING;
447 0 0       0 if ($logger_object) {
448 0         0 $input_stream_name = $logger_object->get_input_stream_name();
449             }
450 0         0 return $input_stream_name;
451             } ## end sub get_input_stream_name
452              
453             sub warning {
454 0     0 0 0 my ($msg) = @_;
455 0 0       0 if ($logger_object) {
456 0         0 $logger_object->warning($msg);
457             }
458 0         0 return;
459             } ## end sub warning
460              
461             sub write_logfile_entry {
462 91     91 0 189 my ($msg) = @_;
463 91 50       220 if ($logger_object) {
464 91         210 $logger_object->write_logfile_entry($msg);
465             }
466 91         205 return;
467             } ## end sub write_logfile_entry
468             }
469              
470             sub get_cached_line_count {
471 1     1 0 3 my $self = shift;
472 1 50       5 return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
473             }
474              
475             sub get_recoverable_spaces {
476              
477             # return the number of spaces (+ means shift right, - means shift left)
478             # that we would like to shift a group of lines with the same indentation
479             # to get them to line up with their opening parens
480 4113     4113 0 6954 my $indentation = shift;
481 4113 100       14874 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
482             } ## end sub get_recoverable_spaces
483              
484             ######################################################
485             # CODE SECTION 3: Code to accept input and form groups
486             ######################################################
487              
488 39     39   415 use constant DEBUG_VALIGN => 0;
  39         118  
  39         2617  
489 39     39   282 use constant SC_LONG_LINE_DIFF => 12;
  39         114  
  39         3575  
490              
491             my %is_closing_token;
492              
493             BEGIN {
494 39     39   252 my @q = qw< } ) ] >;
495 39         65308 @is_closing_token{@q} = (1) x scalar(@q);
496             }
497              
498             #--------------------------------------------
499             # VTFLAGS: Vertical tightness types and flags
500             #--------------------------------------------
501             # Vertical tightness is controlled by a 'type' and associated 'flags' for each
502             # line. These values are set by sub Formatter::set_vertical_tightness_flags.
503             # These are defined as follows:
504              
505             # Vertical Tightness Line Type Codes:
506             # Type 0, no vertical tightness condition
507             # Type 1, last token of this line is a non-block opening token
508             # Type 2, first token of next line is a non-block closing
509             # Type 3, isolated opening block brace
510             # type 4, isolated closing block brace
511              
512             # Opening token flag values are the vertical tightness flags
513             # 0 do not join with next line
514             # 1 just one join per line
515             # 2 any number of joins
516              
517             # Closing token flag values indicate spacing:
518             # 0 = no space added before closing token
519             # 1 = single space added before closing token
520              
521             sub valign_input {
522              
523             #---------------------------------------------------------------------
524             # This is the front door of the vertical aligner. On each call
525             # we receive one line of specially marked text for vertical alignment.
526             # We compare the line with the current group, and either:
527             # - the line joins the current group if alignments match, or
528             # - the current group is flushed and a new group is started otherwise
529             #---------------------------------------------------------------------
530             #
531             # The key input parameters describing each line are:
532             # $level = indentation level of this line
533             # $rfields = ref to array of fields
534             # $rpatterns = ref to array of patterns, one per field
535             # $rtokens = ref to array of tokens starting fields 1,2,..
536             # $rfield_lengths = ref to array of field display widths
537             #
538             # Here is an example of what this package does. In this example,
539             # we are trying to line up both the '=>' and the '#'.
540             #
541             # '18' => 'grave', # \`
542             # '19' => 'acute', # `'
543             # '20' => 'caron', # \v
544             # <-tabs-><f1-><--field 2 ---><-f3->
545             # | | | |
546             # | | | |
547             # col1 col2 col3 col4
548             #
549             # The calling routine has already broken the entire line into 3 fields as
550             # indicated. (So the work of identifying promising common tokens has
551             # already been done).
552             #
553             # In this example, there will be 2 tokens being matched: '=>' and '#'.
554             # They are the leading parts of fields 2 and 3, but we do need to know
555             # what they are so that we can dump a group of lines when these tokens
556             # change.
557             #
558             # The fields contain the actual characters of each field. The patterns
559             # are like the fields, but they contain mainly token types instead
560             # of tokens, so they have fewer characters. They are used to be
561             # sure we are matching fields of similar type.
562             #
563             # In this example, there will be 4 column indexes being adjusted. The
564             # first one is always at zero. The interior columns are at the start of
565             # the matching tokens, and the last one tracks the maximum line length.
566             #
567             # Each time a new line comes in, it joins the current vertical
568             # group if possible. Otherwise it causes the current group to be flushed
569             # and a new group is started.
570             #
571             # For each new group member, the column locations are increased, as
572             # necessary, to make room for the new fields. When the group is finally
573             # output, these column numbers are used to compute the amount of spaces of
574             # padding needed for each field.
575             #
576             # Programming note: the fields are assumed not to have any tab characters.
577             # Tabs have been previously removed except for tabs in quoted strings and
578             # side comments. Tabs in these fields can mess up the column counting.
579             # The log file warns the user if there are any such tabs.
580              
581 7376     7376 0 16254 my ( $self, $rcall_hash ) = @_;
582              
583             # Unpack the call args. This form is significantly faster than getting them
584             # one-by-one.
585             my (
586              
587             $Kend,
588             $break_alignment_after,
589             $break_alignment_before,
590             $ci_level,
591             $forget_side_comment,
592             $indentation,
593             $is_terminal_ternary,
594             $level,
595             $level_end,
596             $list_seqno,
597             $maximum_line_length,
598             $outdent_long_lines,
599             $rline_alignment,
600             $rvertical_tightness_flags,
601              
602             ) =
603              
604 7376         28592 @{$rcall_hash}{
605 7376         14181 qw(
606             Kend
607             break_alignment_after
608             break_alignment_before
609             ci_level
610             forget_side_comment
611             indentation
612             is_terminal_ternary
613             level
614             level_end
615             list_seqno
616             maximum_line_length
617             outdent_long_lines
618             rline_alignment
619             rvertical_tightness_flags
620             )
621             };
622              
623             my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
624 7376         12053 @{$rline_alignment};
  7376         14268  
625              
626             # The index '$Kend' is a value which passed along with the line text to sub
627             # 'write_code_line' for a convergence check.
628              
629             # number of fields is $jmax
630             # number of tokens between fields is $jmax-1
631 7376         10569 my $jmax = @{$rfields} - 1;
  7376         12435  
632              
633 7376 100       16221 my $leading_space_count =
634             ref($indentation) ? $indentation->get_spaces() : $indentation;
635              
636             # set outdented flag to be sure we either align within statements or
637             # across statement boundaries, but not both.
638 7376         13943 my $is_outdented =
639             $self->[_last_leading_space_count_] > $leading_space_count;
640 7376         12109 $self->[_last_leading_space_count_] = $leading_space_count;
641              
642             # Identify a hanging side comment. Hanging side comments have an empty
643             # initial field.
644 7376   100     25124 my $is_hanging_side_comment =
645             ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
646              
647             # Undo outdented flag for a hanging side comment
648 7376 100       14918 $is_outdented = 0 if $is_hanging_side_comment;
649              
650             # Identify a block comment.
651 7376   100     22925 my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
652              
653             # Block comment .. update count
654 7376 100       14078 if ($is_block_comment) {
655 632         1273 $self->[_consecutive_block_comments_]++;
656             }
657              
658             # Not a block comment ..
659             # Forget side comment column if we saw 2 or more block comments,
660             # and reset the count
661             else {
662              
663 6744 100       14899 if ( $self->[_consecutive_block_comments_] > 1 ) {
664 67         396 $self->forget_side_comment();
665             }
666 6744         11090 $self->[_consecutive_block_comments_] = 0;
667             }
668              
669             # Reset side comment location if we are entering a new block from level 0.
670             # This is intended to keep them from drifting too far to the right.
671 7376 100       14329 if ($forget_side_comment) {
672 44         292 $self->forget_side_comment();
673             }
674              
675 7376         12363 my $is_balanced_line = $level_end == $level;
676              
677 7376         11860 my $group_level = $self->[_group_level_];
678 7376         11632 my $group_maximum_line_length = $self->[_group_maximum_line_length_];
679              
680 7376         9960 DEBUG_VALIGN && do {
681             my $nlines = $self->group_line_count();
682             print {*STDOUT}
683             "Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level=$level, group_level=$group_level, level_end=$level_end\n";
684             };
685              
686             # Validate cached line if necessary: If we can produce a container
687             # with just 2 lines total by combining an existing cached opening
688             # token with the closing token to follow, then we will mark both
689             # cached flags as valid.
690 7376         17715 my $cached_line_type = get_cached_line_type();
691 7376 100       15571 if ($cached_line_type) {
692 224         599 my $cached_line_opening_flag = get_cached_line_opening_flag();
693 224 50       587 if ($rvertical_tightness_flags) {
694 224         590 my $cached_seqno = get_cached_seqno();
695 224 100 100     1088 if ( $cached_seqno
      100        
696             && $rvertical_tightness_flags->{_vt_seqno}
697             && $rvertical_tightness_flags->{_vt_seqno} == $cached_seqno )
698             {
699              
700             # Fix for b1187 and b1188: Normally this step is only done
701             # if the number of existing lines is 0 or 1. But to prevent
702             # blinking, this range can be controlled by the caller.
703             # If zero values are given we fall back on the range 0 to 1.
704 4         22 my $line_count = $self->group_line_count();
705 4         9 my $min_lines = $rvertical_tightness_flags->{_vt_min_lines};
706 4         10 my $max_lines = $rvertical_tightness_flags->{_vt_max_lines};
707 4 50       22 $min_lines = 0 if ( !$min_lines );
708 4 50       16 $max_lines = 1 if ( !$max_lines );
709 4 100 66     29 if ( ( $line_count >= $min_lines )
710             && ( $line_count <= $max_lines ) )
711             {
712 3   50     18 $rvertical_tightness_flags->{_vt_valid_flag} ||= 1;
713 3         39 set_cached_line_valid(1);
714             }
715             }
716             }
717              
718             # do not join an opening block brace (type 3, see VTFLAGS)
719             # with an unbalanced line unless requested with a flag value of 2
720 224 50 100     691 if ( $cached_line_type == 3
      66        
      66        
721             && !$self->group_line_count()
722             && $cached_line_opening_flag < 2
723             && !$is_balanced_line )
724             {
725 0         0 set_cached_line_valid(0);
726             }
727             }
728              
729             # shouldn't happen:
730 7376 50       15819 if ( $level < 0 ) { $level = 0 }
  0         0  
731              
732             # do not align code across indentation level changes
733             # or changes in the maximum line length
734             # or if vertical alignment is turned off
735 7376 100 66     57626 if (
      66        
      66        
      100        
      100        
      100        
      100        
736             $level != $group_level
737             || ( $group_maximum_line_length
738             && $maximum_line_length != $group_maximum_line_length )
739             || $is_outdented
740             || ( $is_block_comment && !$self->[_rOpts_valign_block_comments_] )
741             || ( !$is_block_comment
742             && !$self->[_rOpts_valign_side_comments_]
743             && !$self->[_rOpts_valign_code_] )
744             )
745             {
746              
747 2837         11282 $self->_flush_group_lines( $level - $group_level );
748              
749 2837         5170 $group_level = $level;
750 2837         5094 $self->[_group_level_] = $group_level;
751 2837         4646 $self->[_group_maximum_line_length_] = $maximum_line_length;
752              
753             # Update leading spaces after the above flush because the leading space
754             # count may have been changed if the -icp flag is in effect
755 2837 100       6460 $leading_space_count =
756             ref($indentation) ? $indentation->get_spaces() : $indentation;
757             }
758              
759             # --------------------------------------------------------------------
760             # Collect outdentable block COMMENTS
761             # --------------------------------------------------------------------
762 7376 100       17178 if ( $self->[_group_type_] eq 'COMMENT' ) {
763 558 100 66     3146 if ( $is_block_comment
      66        
764             && $outdent_long_lines
765             && $leading_space_count == $self->[_comment_leading_space_count_] )
766             {
767              
768             # Note that for a comment group we are not storing a line
769             # but rather just the text and its length.
770 77         173 push @{ $self->[_rgroup_lines_] },
  77         345  
771             [ $rfields->[0], $rfield_lengths->[0], $Kend ];
772 77         273 return;
773             }
774             else {
775 481         1973 $self->_flush_group_lines();
776             }
777             }
778              
779 7299         11605 my $rgroup_lines = $self->[_rgroup_lines_];
780 7299 100 100     16854 if ( $break_alignment_before && @{$rgroup_lines} ) {
  111         537  
781 27         91 $rgroup_lines->[-1]->{'end_group'} = 1;
782             }
783              
784             # --------------------------------------------------------------------
785             # add dummy fields for terminal ternary
786             # --------------------------------------------------------------------
787 7299         10690 my $j_terminal_match;
788              
789 7299 100 100     16147 if ( $is_terminal_ternary && @{$rgroup_lines} ) {
  16         96  
790 13         124 $j_terminal_match =
791             fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
792             $rpatterns, $rfield_lengths, $group_level, );
793 13         29 $jmax = @{$rfields} - 1;
  13         37  
794             }
795              
796             # --------------------------------------------------------------------
797             # add dummy fields for else statement
798             # --------------------------------------------------------------------
799              
800             # Note the trailing space after 'else' here. If there were no space between
801             # the else and the next '{' then we would not be able to do vertical
802             # alignment of the '{'.
803 7299 100 100     18420 if ( $rfields->[0] eq 'else '
      66        
804 12         124 && @{$rgroup_lines}
805             && $is_balanced_line )
806             {
807              
808 9         194 $j_terminal_match =
809             fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
810             $rpatterns, $rfield_lengths );
811 9         19 $jmax = @{$rfields} - 1;
  9         24  
812             }
813              
814             # --------------------------------------------------------------------
815             # Handle simple line of code with no fields to match.
816             # --------------------------------------------------------------------
817 7299 100       14681 if ( $jmax <= 0 ) {
818 4278         7785 $self->[_zero_count_]++;
819              
820 4278 100 100     6079 if ( @{$rgroup_lines}
  4278         12857  
821             && !get_recoverable_spaces( $rgroup_lines->[0]->{'indentation'} ) )
822             {
823              
824             # flush the current group if it has some aligned columns..
825             # or we haven't seen a comment lately
826 337 100 100     1738 if ( $rgroup_lines->[0]->{'jmax'} > 1
827             || $self->[_zero_count_] > 3 )
828             {
829 308         1238 $self->_flush_group_lines();
830              
831             # Update '$rgroup_lines' - it will become a ref to empty array.
832             # This allows avoiding a call to get_group_line_count below.
833 308         967 $rgroup_lines = $self->[_rgroup_lines_];
834             }
835             }
836              
837             # start new COMMENT group if this comment may be outdented
838 4278 100 100     12419 if ( $is_block_comment
      66        
839             && $outdent_long_lines
840 531         1936 && !@{$rgroup_lines} )
841             {
842 531         1350 $self->[_group_type_] = 'COMMENT';
843 531         1117 $self->[_comment_leading_space_count_] = $leading_space_count;
844 531         1015 $self->[_group_maximum_line_length_] = $maximum_line_length;
845 531         949 push @{$rgroup_lines},
  531         1933  
846             [ $rfields->[0], $rfield_lengths->[0], $Kend ];
847 531         1994 return;
848             }
849              
850             # just write this line directly if no current group, no side comment,
851             # and no space recovery is needed.
852 3747 100 100     5623 if ( !@{$rgroup_lines}
  3747         13042  
853             && !get_recoverable_spaces($indentation) )
854             {
855              
856 3703         33677 $self->valign_output_step_B(
857             {
858             leading_space_count => $leading_space_count,
859             line => $rfields->[0],
860             line_length => $rfield_lengths->[0],
861             side_comment_length => 0,
862             outdent_long_lines => $outdent_long_lines,
863             rvertical_tightness_flags => $rvertical_tightness_flags,
864             level => $level,
865             level_end => $level_end,
866             Kend => $Kend,
867             maximum_line_length => $maximum_line_length,
868             }
869             );
870 3703         16413 return;
871             }
872             }
873             else {
874 3021         5987 $self->[_zero_count_] = 0;
875             }
876              
877             # --------------------------------------------------------------------
878             # It simplifies things to create a zero length side comment
879             # if none exists.
880             # --------------------------------------------------------------------
881 3065 100 100     13558 if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
882 2740         4678 $jmax += 1;
883 2740         5804 $rtokens->[ $jmax - 1 ] = '#';
884 2740         5419 $rfields->[$jmax] = EMPTY_STRING;
885 2740         4843 $rfield_lengths->[$jmax] = 0;
886 2740         5208 $rpatterns->[$jmax] = '#';
887             }
888              
889             # --------------------------------------------------------------------
890             # create an object to hold this line
891             # --------------------------------------------------------------------
892              
893             # The hash keys below must match the list of keys in %valid_LINE_keys.
894             # Values in this hash are accessed directly, except for 'ralignments',
895             # rather than with get/set calls for efficiency.
896 3065         52191 my $new_line = Perl::Tidy::VerticalAligner::Line->new(
897             {
898             jmax => $jmax,
899             rtokens => $rtokens,
900             rfields => $rfields,
901             rpatterns => $rpatterns,
902             rfield_lengths => $rfield_lengths,
903             indentation => $indentation,
904             leading_space_count => $leading_space_count,
905             outdent_long_lines => $outdent_long_lines,
906             list_seqno => $list_seqno,
907             list_type => EMPTY_STRING,
908             is_hanging_side_comment => $is_hanging_side_comment,
909             rvertical_tightness_flags => $rvertical_tightness_flags,
910             is_terminal_ternary => $is_terminal_ternary,
911             j_terminal_match => $j_terminal_match,
912             end_group => $break_alignment_after,
913             Kend => $Kend,
914             ci_level => $ci_level,
915             level => $level,
916             level_end => $level_end,
917             imax_pair => -1,
918             maximum_line_length => $maximum_line_length,
919              
920             ralignments => [],
921             }
922             );
923              
924 3065         5408 DEVEL_MODE
925             && check_keys( $new_line, \%valid_LINE_keys,
926             "Checking line keys at line definition", 1 );
927              
928             # --------------------------------------------------------------------
929             # Decide if this is a simple list of items.
930             # We use this to be less restrictive in deciding what to align.
931             # --------------------------------------------------------------------
932 3065 100       8818 decide_if_list($new_line) if ($list_seqno);
933              
934             # --------------------------------------------------------------------
935             # Append this line to the current group (or start new group)
936             # --------------------------------------------------------------------
937              
938 3065         4812 push @{ $self->[_rgroup_lines_] }, $new_line;
  3065         7525  
939 3065         5919 $self->[_group_maximum_line_length_] = $maximum_line_length;
940              
941             # output this group if it ends in a terminal else or ternary line
942 3065 100 100     17553 if ( defined($j_terminal_match) ) {
    100          
943 20         119 $self->_flush_group_lines();
944             }
945              
946             # Force break after jump to lower level
947             elsif ($level_end < $level
948             || $is_closing_token{ substr( $rfields->[0], 0, 1 ) } )
949             {
950 119         524 $self->_flush_group_lines(-1);
951             }
952              
953             else {
954             ##ok: no output needed
955             }
956              
957             # --------------------------------------------------------------------
958             # Some old debugging stuff
959             # --------------------------------------------------------------------
960 3065         4798 DEBUG_VALIGN && do {
961             print {*STDOUT} "exiting valign_input fields:";
962             dump_array( @{$rfields} );
963             print {*STDOUT} "exiting valign_input tokens:";
964             dump_array( @{$rtokens} );
965             print {*STDOUT} "exiting valign_input patterns:";
966             dump_array( @{$rpatterns} );
967             };
968              
969 3065         9450 return;
970             } ## end sub valign_input
971              
972             sub join_hanging_comment {
973              
974             # Add dummy fields to a hanging side comment to make it look
975             # like the first line in its potential group. This simplifies
976             # the coding.
977 38     38 0 98 my ( $new_line, $old_line ) = @_;
978              
979 38         91 my $jmax = $new_line->{'jmax'};
980              
981             # must be 2 fields
982 38 50       116 return 0 unless $jmax == 1;
983 38         82 my $rtokens = $new_line->{'rtokens'};
984              
985             # the second field must be a comment
986 38 50       119 return 0 unless $rtokens->[0] eq '#';
987 38         82 my $rfields = $new_line->{'rfields'};
988              
989             # the first field must be empty
990 38 50       215 return 0 if ( $rfields->[0] !~ /^\s*$/ );
991              
992             # the current line must have fewer fields
993 38         92 my $maximum_field_index = $old_line->{'jmax'};
994 38 100       123 return 0
995             if ( $maximum_field_index <= $jmax );
996              
997             # looks ok..
998 3         8 my $rpatterns = $new_line->{'rpatterns'};
999 3         10 my $rfield_lengths = $new_line->{'rfield_lengths'};
1000              
1001 3         8 $new_line->{'is_hanging_side_comment'} = 1;
1002              
1003 3         6 $jmax = $maximum_field_index;
1004 3         6 $new_line->{'jmax'} = $jmax;
1005 3         8 $rfields->[$jmax] = $rfields->[1];
1006 3         8 $rfield_lengths->[$jmax] = $rfield_lengths->[1];
1007 3         11 $rtokens->[ $jmax - 1 ] = $rtokens->[0];
1008 3         8 $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
1009              
1010 3         11 foreach my $j ( 1 .. $jmax - 1 ) {
1011 3         7 $rfields->[$j] = EMPTY_STRING;
1012 3         8 $rfield_lengths->[$j] = 0;
1013 3         7 $rtokens->[ $j - 1 ] = EMPTY_STRING;
1014 3         8 $rpatterns->[ $j - 1 ] = EMPTY_STRING;
1015             }
1016 3         8 return 1;
1017             } ## end sub join_hanging_comment
1018              
1019             { ## closure for sub decide_if_list
1020              
1021             my %is_comma_token;
1022              
1023             BEGIN {
1024              
1025 39     39   268 my @q = qw( => );
1026 39         147 push @q, ',';
1027 39         10470 @is_comma_token{@q} = (1) x scalar(@q);
1028             } ## end BEGIN
1029              
1030             sub decide_if_list {
1031              
1032 1032     1032 0 1971 my $line = shift;
1033              
1034             # A list will be taken to be a line with a forced break in which all
1035             # of the field separators are commas or comma-arrows (except for the
1036             # trailing #)
1037              
1038 1032         2131 my $rtokens = $line->{'rtokens'};
1039 1032         2207 my $test_token = $rtokens->[0];
1040 1032         2759 my ( $raw_tok, $lev, $tag, $tok_count ) =
1041             decode_alignment_token($test_token);
1042 1032 100       3188 if ( $is_comma_token{$raw_tok} ) {
1043 930         1736 my $list_type = $test_token;
1044 930         1682 my $jmax = $line->{'jmax'};
1045              
1046 930         2767 foreach ( 1 .. $jmax - 2 ) {
1047 871         1742 ( $raw_tok, $lev, $tag, $tok_count ) =
1048             decode_alignment_token( $rtokens->[$_] );
1049 871 100       2403 if ( !$is_comma_token{$raw_tok} ) {
1050 26         1051 $list_type = EMPTY_STRING;
1051 26         86 last;
1052             }
1053             }
1054 930         2212 $line->{'list_type'} = $list_type;
1055             }
1056 1032         1979 return;
1057             } ## end sub decide_if_list
1058             }
1059              
1060             sub fix_terminal_ternary {
1061              
1062             # Add empty fields as necessary to align a ternary term
1063             # like this:
1064             #
1065             # my $leapyear =
1066             # $year % 4 ? 0
1067             # : $year % 100 ? 1
1068             # : $year % 400 ? 0
1069             # : 1;
1070             #
1071             # returns the index of the terminal question token, if any
1072              
1073 13     13 0 57 my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
1074             $group_level )
1075             = @_;
1076              
1077 13 50       52 return if ( !$old_line );
1078 39     39   345 use constant EXPLAIN_TERNARY => 0;
  39         104  
  39         55541  
1079              
1080 13 50       62 if (%valign_control_hash) {
1081 0         0 my $align_ok = $valign_control_hash{'?'};
1082 0 0       0 $align_ok = $valign_control_default unless defined($align_ok);
1083 0 0       0 return if ( !$align_ok );
1084             }
1085              
1086 13         30 my $jmax = @{$rfields} - 1;
  13         44  
1087 13         41 my $rfields_old = $old_line->{'rfields'};
1088              
1089 13         40 my $rpatterns_old = $old_line->{'rpatterns'};
1090 13         33 my $rtokens_old = $old_line->{'rtokens'};
1091 13         33 my $maximum_field_index = $old_line->{'jmax'};
1092              
1093             # look for the question mark after the :
1094 13         42 my ($jquestion);
1095             my $depth_question;
1096 13         48 my $pad = EMPTY_STRING;
1097 13         29 my $pad_length = 0;
1098 13         55 foreach my $j ( 0 .. $maximum_field_index - 1 ) {
1099 14         52 my $tok = $rtokens_old->[$j];
1100 14         71 my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
1101 14 100       63 if ( $raw_tok eq '?' ) {
1102 13         33 $depth_question = $lev;
1103              
1104             # depth must be correct
1105 13 50       76 next if ( $depth_question ne $group_level );
1106              
1107 13         44 $jquestion = $j;
1108 13 50       118 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
1109 13         54 $pad_length = length($1);
1110 13         55 $pad = SPACE x $pad_length;
1111             }
1112             else {
1113 0         0 return; # shouldn't happen
1114             }
1115 13         41 last;
1116             }
1117             }
1118 13 50       56 return if ( !defined($jquestion) ); # shouldn't happen
1119              
1120             # Now splice the tokens and patterns of the previous line
1121             # into the else line to insure a match. Add empty fields
1122             # as necessary.
1123 13         29 my $jadd = $jquestion;
1124              
1125             # Work on copies of the actual arrays in case we have
1126             # to return due to an error
1127 13         39 my @fields = @{$rfields};
  13         46  
1128 13         31 my @patterns = @{$rpatterns};
  13         46  
1129 13         39 my @tokens = @{$rtokens};
  13         46  
1130 13         31 my @field_lengths = @{$rfield_lengths};
  13         39  
1131              
1132 13         28 EXPLAIN_TERNARY && do {
1133             local $LIST_SEPARATOR = '><';
1134             print {*STDOUT} "CURRENT FIELDS=<@{$rfields_old}>\n";
1135             print {*STDOUT} "CURRENT TOKENS=<@{$rtokens_old}>\n";
1136             print {*STDOUT} "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
1137             print {*STDOUT} "UNMODIFIED FIELDS=<@{$rfields}>\n";
1138             print {*STDOUT} "UNMODIFIED TOKENS=<@{$rtokens}>\n";
1139             print {*STDOUT} "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
1140             };
1141              
1142             # handle cases of leading colon on this line
1143 13 50       93 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
1144              
1145 13         66 my ( $colon, $therest ) = ( $1, $2 );
1146              
1147             # Handle sub-case of first field with leading colon plus additional code
1148             # This is the usual situation as at the '1' below:
1149             # ...
1150             # : $year % 400 ? 0
1151             # : 1;
1152 13 50       47 if ($therest) {
1153              
1154             # Split the first field after the leading colon and insert padding.
1155             # Note that this padding will remain even if the terminal value goes
1156             # out on a separate line. This does not seem to look to bad, so no
1157             # mechanism has been included to undo it.
1158 13         55 my $field1 = shift @fields;
1159 13         48 my $field_length1 = shift @field_lengths;
1160 13         57 my $len_colon = length($colon);
1161 13         60 unshift @fields, ( $colon, $pad . $therest );
1162 13         45 unshift @field_lengths,
1163             ( $len_colon, $pad_length + $field_length1 - $len_colon );
1164              
1165             # change the leading pattern from : to ?
1166 13 50       130 return if ( $patterns[0] !~ s/^\:/?/ );
1167              
1168             # install leading tokens and patterns of existing line
1169 13         1140 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
  13         64  
1170 13         53 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
  13         46  
1171              
1172             # insert appropriate number of empty fields
1173 13 100       53 splice( @fields, 1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1174 13 100       63 splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
1175             }
1176              
1177             # handle sub-case of first field just equal to leading colon.
1178             # This can happen for example in the example below where
1179             # the leading '(' would create a new alignment token
1180             # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
1181             # : ( $mname = $name . '->' );
1182             else {
1183              
1184 0 0 0     0 return if ( $jmax <= 0 || $tokens[0] eq '#' ); # shouldn't happen
1185              
1186             # prepend a leading ? onto the second pattern
1187 0         0 $patterns[1] = "?b" . $patterns[1];
1188              
1189             # pad the second field
1190 0         0 $fields[1] = $pad . $fields[1];
1191 0         0 $field_lengths[1] = $pad_length + $field_lengths[1];
1192              
1193             # install leading tokens and patterns of existing line, replacing
1194             # leading token and inserting appropriate number of empty fields
1195 0         0 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
  0         0  
1196 0         0 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
  0         0  
1197 0 0       0 splice( @fields, 1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1198 0 0       0 splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
1199             }
1200             }
1201              
1202             # Handle case of no leading colon on this line. This will
1203             # be the case when -wba=':' is used. For example,
1204             # $year % 400 ? 0 :
1205             # 1;
1206             else {
1207              
1208             # install leading tokens and patterns of existing line
1209 0         0 $patterns[0] = '?' . 'b' . $patterns[0];
1210 0         0 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
  0         0  
1211 0         0 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
  0         0  
1212              
1213             # insert appropriate number of empty fields
1214 0         0 $jadd = $jquestion + 1;
1215 0         0 $fields[0] = $pad . $fields[0];
1216 0         0 $field_lengths[0] = $pad_length + $field_lengths[0];
1217 0 0       0 splice( @fields, 0, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1218 0 0       0 splice( @field_lengths, 0, 0, (0) x $jadd ) if $jadd;
1219             }
1220              
1221 13         28 EXPLAIN_TERNARY && do {
1222             local $LIST_SEPARATOR = '><';
1223             print {*STDOUT} "MODIFIED TOKENS=<@tokens>\n";
1224             print {*STDOUT} "MODIFIED PATTERNS=<@patterns>\n";
1225             print {*STDOUT} "MODIFIED FIELDS=<@fields>\n";
1226             };
1227              
1228             # all ok .. update the arrays
1229 13         38 @{$rfields} = @fields;
  13         58  
1230 13         58 @{$rtokens} = @tokens;
  13         51  
1231 13         28 @{$rpatterns} = @patterns;
  13         51  
1232 13         33 @{$rfield_lengths} = @field_lengths;
  13         47  
1233              
1234             # force a flush after this line
1235 13         52 return $jquestion;
1236             } ## end sub fix_terminal_ternary
1237              
1238             sub fix_terminal_else {
1239              
1240             # Add empty fields as necessary to align a balanced terminal
1241             # else block to a previous if/elsif/unless block,
1242             # like this:
1243             #
1244             # if ( 1 || $x ) { print "ok 13\n"; }
1245             # else { print "not ok 13\n"; }
1246             #
1247             # returns a positive value if the else block should be indented
1248             #
1249 9     9 0 39 my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
1250              
1251 9 50       43 return if ( !$old_line );
1252 9         19 my $jmax = @{$rfields} - 1;
  9         33  
1253 9 50       48 return if ( $jmax <= 0 );
1254              
1255 9 50       37 if (%valign_control_hash) {
1256 0         0 my $align_ok = $valign_control_hash{'{'};
1257 0 0       0 $align_ok = $valign_control_default unless defined($align_ok);
1258 0 0       0 return if ( !$align_ok );
1259             }
1260              
1261             # check for balanced else block following if/elsif/unless
1262 9         32 my $rfields_old = $old_line->{'rfields'};
1263              
1264             # TBD: add handling for 'case'
1265 9 100       94 return if ( $rfields_old->[0] !~ /^(?:if|elsif|unless)\s*$/ );
1266              
1267             # look for the opening brace after the else, and extract the depth
1268 7         27 my $tok_brace = $rtokens->[0];
1269 7         12 my $depth_brace;
1270 7 50       43 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
  7         38  
1271              
1272             # probably: "else # side_comment"
1273 0         0 else { return }
1274              
1275 7         19 my $rpatterns_old = $old_line->{'rpatterns'};
1276 7         20 my $rtokens_old = $old_line->{'rtokens'};
1277 7         28 my $maximum_field_index = $old_line->{'jmax'};
1278              
1279             # be sure the previous if/elsif is followed by an opening paren
1280 7         17 my $jparen = 0;
1281 7         19 my $tok_paren = '(' . $depth_brace;
1282 7         18 my $tok_test = $rtokens_old->[$jparen];
1283 7 50       26 return if ( $tok_test ne $tok_paren ); # shouldn't happen
1284              
1285             # Now find the opening block brace
1286 7         15 my ($jbrace);
1287 7         28 foreach my $j ( 1 .. $maximum_field_index - 1 ) {
1288 8         38 my $tok = $rtokens_old->[$j];
1289 8 100       43 if ( $tok eq $tok_brace ) {
1290 7         17 $jbrace = $j;
1291 7         16 last;
1292             }
1293             }
1294 7 50       30 return if ( !defined($jbrace) ); # shouldn't happen
1295              
1296             # Now splice the tokens and patterns of the previous line
1297             # into the else line to insure a match. Add empty fields
1298             # as necessary.
1299 7         29 my $jadd = $jbrace - $jparen;
1300 7         17 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
  7         32  
  7         28  
1301 7         16 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
  7         25  
  7         24  
1302 7         17 splice( @{$rfields}, 1, 0, (EMPTY_STRING) x $jadd );
  7         25  
1303 7         16 splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
  7         21  
1304              
1305             # force a flush after this line if it does not follow a case
1306 7 50       29 if ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
  0         0  
1307 7         26 else { return $jbrace }
1308             } ## end sub fix_terminal_else
1309              
1310             my %is_closing_block_type;
1311              
1312             BEGIN {
1313 39     39   244 my @q = qw< } ] >;
1314 39         1219 @is_closing_block_type{@q} = (1) x scalar(@q);
1315             }
1316              
1317             # This is a flag for testing alignment by sub sweep_left_to_right only.
1318             # This test can help find problems with the alignment logic.
1319             # This flag should normally be zero.
1320 39     39   403 use constant TEST_SWEEP_ONLY => 0;
  39         127  
  39         2879  
1321              
1322 39     39   340 use constant EXPLAIN_CHECK_MATCH => 0;
  39         107  
  39         3506  
1323              
1324             sub check_match {
1325              
1326             # See if the current line matches the current vertical alignment group.
1327              
1328 1139     1139 0 3716 my ( $self, $new_line, $base_line, $prev_line, $group_line_count ) = @_;
1329              
1330             # Given:
1331             # $new_line = the line being considered for group inclusion
1332             # $base_line = the first line of the current group
1333             # $prev_line = the line just before $new_line
1334             # $group_line_count = number of lines in the current group
1335              
1336             # returns a flag and a value as follows:
1337             # return (0, $imax_align) if the line does not match
1338             # return (1, $imax_align) if the line matches but does not fit
1339             # return (2, $imax_align) if the line matches and fits
1340              
1341 39     39   359 use constant NO_MATCH => 0;
  39         114  
  39         2480  
1342 39     39   312 use constant MATCH_NO_FIT => 1;
  39         84  
  39         2255  
1343 39     39   245 use constant MATCH_AND_FIT => 2;
  39         102  
  39         69567  
1344              
1345 1139         2223 my $return_value;
1346              
1347             # Returns '$imax_align' which is the index of the maximum matching token.
1348             # It will be used in the subsequent left-to-right sweep to align as many
1349             # tokens as possible for lines which partially match.
1350 1139         1921 my $imax_align = -1;
1351              
1352             # variable $GoToMsg explains reason for no match, for debugging
1353 1139         2125 my $GoToMsg = EMPTY_STRING;
1354              
1355 1139         2043 my $jmax = $new_line->{'jmax'};
1356 1139         2024 my $maximum_field_index = $base_line->{'jmax'};
1357              
1358 1139         1985 my $jlimit = $jmax - 2;
1359 1139 100       2892 if ( $jmax > $maximum_field_index ) {
1360 82         251 $jlimit = $maximum_field_index - 2;
1361             }
1362              
1363 1139 100       2429 if ( $new_line->{'is_hanging_side_comment'} ) {
1364              
1365             # HSC's can join the group if they fit
1366             }
1367              
1368             # Everything else
1369             else {
1370              
1371             # A group with hanging side comments ends with the first non hanging
1372             # side comment.
1373 1101 50       2452 if ( $base_line->{'is_hanging_side_comment'} ) {
1374 0         0 $GoToMsg = "end of hanging side comments";
1375 0         0 $return_value = NO_MATCH;
1376             }
1377             else {
1378              
1379             # The number of tokens that this line shares with the previous
1380             # line has been stored with the previous line. This value was
1381             # calculated and stored by sub 'match_line_pair'.
1382 1101         1847 $imax_align = $prev_line->{'imax_pair'};
1383              
1384             # Only the following ci sequences are accepted (issue c225):
1385             # 0 0 0 ... OK
1386             # 0 1 1 ... OK but marginal*
1387             # 1 1 1 ... OK
1388             # This check is rarely activated, but for example we want
1389             # to avoid something like this 'tail wag dog' situation:
1390             # $tag =~ s/\b([a-z]+)/\L\u$1/gio;
1391             # $tag =~ s/\b([b-df-hj-np-tv-z]+)\b/\U$1/gio
1392             # if $tag =~ /-/;
1393             # *Note: we could set a flag for the 0 1 marginal case and
1394             # use it to prevent alignment of selected token types.
1395 1101         1816 my $ci_prev = $prev_line->{'ci_level'};
1396 1101         1825 my $ci_new = $new_line->{'ci_level'};
1397 1101 50 100     4808 if ( $ci_prev != $ci_new
    100 33        
      66        
1398             && $imax_align >= 0
1399             && ( $ci_new == 0 || $group_line_count > 1 ) )
1400             {
1401 0         0 $imax_align = -1;
1402 0         0 $GoToMsg =
1403             "Rejected ci: ci_prev=$ci_prev ci_new=$ci_new num=$group_line_count\n";
1404 0         0 $return_value = NO_MATCH;
1405             }
1406             elsif ( $imax_align != $jlimit ) {
1407 27         203 $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
1408 27         71 $return_value = NO_MATCH;
1409             }
1410             else {
1411             ##ok: continue
1412             }
1413             }
1414             }
1415              
1416 1139 100       2628 if ( !defined($return_value) ) {
1417              
1418             # The tokens match, but the lines must have identical number of
1419             # tokens to join the group.
1420 1112 100 100     4005 if ( $maximum_field_index != $jmax ) {
    100          
1421 118         320 $GoToMsg = "token count differs";
1422 118         245 $return_value = NO_MATCH;
1423             }
1424              
1425             # The tokens match. Now See if there is space for this line in the
1426             # current group.
1427             elsif ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY )
1428             {
1429              
1430 981         2982 $GoToMsg = "match and fit, imax_align=$imax_align, jmax=$jmax\n";
1431 981         1607 $return_value = MATCH_AND_FIT;
1432 981         1799 $imax_align = $jlimit;
1433             }
1434             else {
1435 13         61 $GoToMsg = "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
1436 13         25 $return_value = MATCH_NO_FIT;
1437 13         25 $imax_align = $jlimit;
1438             }
1439             }
1440              
1441             EXPLAIN_CHECK_MATCH
1442 1139         1688 && print
1443             "returning $return_value because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
1444              
1445 1139         3022 return ( $return_value, $imax_align );
1446             } ## end sub check_match
1447              
1448             sub check_fit {
1449              
1450 994     994 0 2195 my ( $self, $new_line, $old_line ) = @_;
1451              
1452             # The new line has alignments identical to the current group. Now we have
1453             # to fit the new line into the group without causing a field to exceed the
1454             # line length limit.
1455             # return true if successful
1456             # return false if not successful
1457              
1458 994         1837 my $jmax = $new_line->{'jmax'};
1459 994         1738 my $leading_space_count = $new_line->{'leading_space_count'};
1460 994         1665 my $rfield_lengths = $new_line->{'rfield_lengths'};
1461 994         3498 my $padding_available = $old_line->get_available_space_on_right();
1462 994         1856 my $jmax_old = $old_line->{'jmax'};
1463              
1464             # Safety check ... only lines with equal array sizes should arrive here
1465             # from sub check_match. So if this error occurs, look at recent changes in
1466             # sub check_match. It is only supposed to check the fit of lines with
1467             # identical numbers of alignment tokens.
1468 994 50       3462 if ( $jmax_old ne $jmax ) {
1469              
1470 0         0 warning(<<EOM);
1471             Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
1472             unexpected difference in array lengths: $jmax != $jmax_old
1473             EOM
1474 0         0 return;
1475             }
1476              
1477             # Save current columns in case this line does not fit.
1478 994         1696 my @alignments = @{ $old_line->{'ralignments'} };
  994         2440  
1479 994         2100 foreach my $alignment (@alignments) {
1480 3451         7267 $alignment->save_column();
1481             }
1482              
1483             # Loop over all alignments ...
1484 994         2990 for my $j ( 0 .. $jmax ) {
1485              
1486 3435         8277 my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
1487              
1488 3435 100       7180 if ( $j == 0 ) {
1489 994         1579 $pad += $leading_space_count;
1490             }
1491              
1492             # Keep going if this field does not need any space.
1493 3435 100       6600 next if ( $pad < 0 );
1494              
1495             # Revert to the starting state if does not fit
1496 2376 100       4868 if ( $pad > $padding_available ) {
1497              
1498             #----------------------------------------------
1499             # Line does not fit -- revert to starting state
1500             #----------------------------------------------
1501 13         28 foreach my $alignment (@alignments) {
1502 39         100 $alignment->restore_column();
1503             }
1504 13         67 return;
1505             }
1506              
1507             # make room for this field
1508 2363         6505 $old_line->increase_field_width( $j, $pad );
1509 2363         3984 $padding_available -= $pad;
1510             }
1511              
1512             #-------------------------------------
1513             # The line fits, the match is accepted
1514             #-------------------------------------
1515 981         5075 return 1;
1516              
1517             } ## end sub check_fit
1518              
1519             sub install_new_alignments {
1520              
1521 2084     2084 0 4179 my ($new_line) = @_;
1522              
1523 2084         4117 my $jmax = $new_line->{'jmax'};
1524 2084         3755 my $rfield_lengths = $new_line->{'rfield_lengths'};
1525 2084         3717 my $col = $new_line->{'leading_space_count'};
1526              
1527 2084         3590 my @alignments;
1528 2084         5040 for my $j ( 0 .. $jmax ) {
1529 6963         10535 $col += $rfield_lengths->[$j];
1530              
1531             # create initial alignments for the new group
1532 6963         22656 my $alignment =
1533             Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
1534 6963         14624 push @alignments, $alignment;
1535             }
1536 2084         5894 $new_line->{'ralignments'} = \@alignments;
1537 2084         4600 return;
1538             } ## end sub install_new_alignments
1539              
1540             sub copy_old_alignments {
1541 981     981 0 1923 my ( $new_line, $old_line ) = @_;
1542 981         1525 my @new_alignments = @{ $old_line->{'ralignments'} };
  981         2615  
1543 981         2341 $new_line->{'ralignments'} = \@new_alignments;
1544 981         2285 return;
1545             } ## end sub copy_old_alignments
1546              
1547             sub dump_array {
1548              
1549             # debug routine to dump array contents
1550 0     0 0 0 local $LIST_SEPARATOR = ')(';
1551 0         0 print {*STDOUT} "(@_)\n";
  0         0  
1552 0         0 return;
1553             } ## end sub dump_array
1554              
1555             sub level_change {
1556              
1557             # compute decrease in level when we remove $diff spaces from the
1558             # leading spaces
1559 10     10 0 23 my ( $self, $leading_space_count, $diff, $level ) = @_;
1560              
1561 10         20 my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
1562 10 50       24 if ($rOpts_indent_columns) {
1563 10         26 my $olev =
1564             int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
1565 10         18 my $nlev = int( $leading_space_count / $rOpts_indent_columns );
1566 10         18 $level -= ( $olev - $nlev );
1567 10 50       25 if ( $level < 0 ) { $level = 0 }
  0         0  
1568             }
1569 10         23 return $level;
1570             } ## end sub level_change
1571              
1572             ###############################################
1573             # CODE SECTION 4: Code to process comment lines
1574             ###############################################
1575              
1576             sub _flush_comment_lines {
1577              
1578             # Output a group consisting of COMMENT lines
1579              
1580 531     531   1256 my ($self) = @_;
1581 531         1166 my $rgroup_lines = $self->[_rgroup_lines_];
1582 531 50       860 return if ( !@{$rgroup_lines} );
  531         1558  
1583 531         1082 my $group_level = $self->[_group_level_];
1584 531         1047 my $group_maximum_line_length = $self->[_group_maximum_line_length_];
1585 531         1070 my $leading_space_count = $self->[_comment_leading_space_count_];
1586              
1587             # look for excessively long lines
1588 531         2302 my $max_excess = 0;
1589 531         1014 foreach my $item ( @{$rgroup_lines} ) {
  531         1465  
1590 608         1009 my ( $str, $str_len ) = @{$item};
  608         1544  
1591 608         1391 my $excess =
1592             $str_len + $leading_space_count - $group_maximum_line_length;
1593 608 100       2049 if ( $excess > $max_excess ) {
1594 38         117 $max_excess = $excess;
1595             }
1596             }
1597              
1598             # zero leading space count if any lines are too long
1599 531 100       1843 if ( $max_excess > 0 ) {
1600 36         102 $leading_space_count -= $max_excess;
1601 36 50       132 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
  36         109  
1602 36         89 my $file_writer_object = $self->[_file_writer_object_];
1603 36         186 my $last_outdented_line_at =
1604             $file_writer_object->get_output_line_number();
1605 36         82 my $nlines = @{$rgroup_lines};
  36         67  
1606 36         96 $self->[_last_outdented_line_at_] =
1607             $last_outdented_line_at + $nlines - 1;
1608 36         69 my $outdented_line_count = $self->[_outdented_line_count_];
1609 36 100       142 if ( !$outdented_line_count ) {
1610 18         43 $self->[_first_outdented_line_at_] = $last_outdented_line_at;
1611             }
1612 36         64 $outdented_line_count += $nlines;
1613 36         76 $self->[_outdented_line_count_] = $outdented_line_count;
1614             }
1615              
1616             # write the lines
1617 531         1142 my $outdent_long_lines = 0;
1618              
1619 531         1029 foreach my $item ( @{$rgroup_lines} ) {
  531         1200  
1620 608         1058 my ( $str, $str_len, $Kend ) = @{$item};
  608         1551  
1621 608         6969 $self->valign_output_step_B(
1622             {
1623             leading_space_count => $leading_space_count,
1624             line => $str,
1625             line_length => $str_len,
1626             side_comment_length => 0,
1627             outdent_long_lines => $outdent_long_lines,
1628             rvertical_tightness_flags => undef,
1629             level => $group_level,
1630             level_end => $group_level,
1631             Kend => $Kend,
1632             maximum_line_length => $group_maximum_line_length,
1633             }
1634             );
1635             }
1636              
1637 531         2624 $self->initialize_for_new_group();
1638 531         1076 return;
1639             } ## end sub _flush_comment_lines
1640              
1641             ######################################################
1642             # CODE SECTION 5: Code to process groups of code lines
1643             ######################################################
1644              
1645             sub _flush_group_lines {
1646              
1647             # This is the vertical aligner internal flush, which leaves the cache
1648             # intact
1649 5582     5582   11317 my ( $self, $level_jump ) = @_;
1650              
1651             # $level_jump = $next_level-$group_level, if known
1652             # = undef if not known
1653             # Note: only the sign of the jump is needed
1654              
1655 5582         10005 my $rgroup_lines = $self->[_rgroup_lines_];
1656 5582 100       7924 return if ( !@{$rgroup_lines} );
  5582         14562  
1657 2236         4883 my $group_type = $self->[_group_type_];
1658 2236         3949 my $group_level = $self->[_group_level_];
1659              
1660             # Debug
1661 2236         3311 0 && do {
1662             my ( $a, $b, $c ) = caller();
1663             my $nlines = @{$rgroup_lines};
1664             print {*STDOUT}
1665             "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
1666             };
1667              
1668             #-------------------------------------------
1669             # Section 1: Handle a group of COMMENT lines
1670             #-------------------------------------------
1671 2236 100       5971 if ( $group_type eq 'COMMENT' ) {
1672 531         2330 $self->_flush_comment_lines();
1673 531         1599 return;
1674             }
1675              
1676             #------------------------------------------------------------------------
1677             # Section 2: Handle line(s) of CODE. Most of the actual work of vertical
1678             # aligning happens here in the following steps:
1679             #------------------------------------------------------------------------
1680              
1681             # STEP 1: Remove most unmatched tokens. They block good alignments.
1682 1705         6253 my ( $max_lev_diff, $saw_side_comment ) =
1683             delete_unmatched_tokens( $rgroup_lines, $group_level );
1684              
1685             # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
1686             # matching common alignments. The indexes of these subgroups are in the
1687             # return variable.
1688 1705         7150 my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
1689              
1690             # STEP 3: Sweep left to right through the lines, looking for leading
1691             # alignment tokens shared by groups.
1692             sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
1693 1705 100       2821 if ( @{$rgroups} > 1 );
  1705         5703  
1694              
1695             # STEP 4: Move side comments to a common column if possible.
1696 1705 100       4394 if ($saw_side_comment) {
1697 199         975 $self->align_side_comments( $rgroup_lines, $rgroups );
1698             }
1699              
1700             # STEP 5: For the -lp option, increase the indentation of lists
1701             # to the desired amount, but do not exceed the line length limit.
1702              
1703             # We are allowed to shift a group of lines to the right if:
1704             # (1) its level is greater than the level of the previous group, and
1705             # (2) its level is greater than the level of the next line to be written.
1706              
1707 1705         2820 my $extra_indent_ok;
1708 1705 100       4816 if ( $group_level > $self->[_last_level_written_] ) {
1709              
1710             # Use the level jump to next line to come, if given
1711 853 100       2507 if ( defined($level_jump) ) {
1712 571         1528 $extra_indent_ok = $level_jump < 0;
1713             }
1714              
1715             # Otherwise, assume the next line has the level of the end of last line.
1716             # This fixes case c008.
1717             else {
1718 282         790 my $level_end = $rgroup_lines->[-1]->{'level_end'};
1719 282         770 $extra_indent_ok = $group_level > $level_end;
1720             }
1721             }
1722              
1723 1705 100       5178 my $extra_leading_spaces =
1724             $extra_indent_ok
1725             ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
1726             : 0;
1727              
1728             # STEP 6: Output the lines.
1729             # All lines in this group have the same leading spacing and maximum line
1730             # length
1731 1705         3345 my $group_leader_length = $rgroup_lines->[0]->{'leading_space_count'};
1732 1705         3505 my $group_maximum_line_length = $rgroup_lines->[0]->{'maximum_line_length'};
1733              
1734 1705         2732 foreach my $line ( @{$rgroup_lines} ) {
  1705         3580  
1735 3065         18621 $self->valign_output_step_A(
1736             {
1737             line => $line,
1738             min_ci_gap => 0,
1739             do_not_align => 0,
1740             group_leader_length => $group_leader_length,
1741             extra_leading_spaces => $extra_leading_spaces,
1742             level => $group_level,
1743             maximum_line_length => $group_maximum_line_length,
1744             }
1745             );
1746             }
1747              
1748             # Let the formatter know that this object has been processed and any
1749             # recoverable spaces have been handled. This is needed for setting the
1750             # closing paren location in -lp mode.
1751 1705         4493 my $object = $rgroup_lines->[0]->{'indentation'};
1752 1705 100       4683 if ( ref($object) ) { $object->set_recoverable_spaces(0) }
  92         397  
1753              
1754 1705         6333 $self->initialize_for_new_group();
1755 1705         3771 return;
1756             } ## end sub _flush_group_lines
1757              
1758             { ## closure for sub sweep_top_down
1759              
1760             my $rall_lines; # all of the lines
1761             my $grp_level; # level of all lines
1762             my $rgroups; # describes the partition of lines we will make here
1763             my $group_line_count; # number of lines in current partition
1764              
1765 39     39   81117 BEGIN { $rgroups = [] }
1766              
1767             sub initialize_for_new_rgroup {
1768 3789     3789 0 5856 $group_line_count = 0;
1769 3789         5678 return;
1770             }
1771              
1772             sub add_to_rgroup {
1773              
1774 3065     3065 0 5800 my ($jend) = @_;
1775 3065         5511 my $rline = $rall_lines->[$jend];
1776              
1777 3065         4476 my $jbeg = $jend;
1778 3065 100       6631 if ( $group_line_count == 0 ) {
1779 2084         5705 install_new_alignments($rline);
1780             }
1781             else {
1782 981         1558 my $rvals = pop @{$rgroups};
  981         1863  
1783 981         1899 $jbeg = $rvals->[0];
1784 981         2608 copy_old_alignments( $rline, $rall_lines->[$jbeg] );
1785             }
1786 3065         5114 push @{$rgroups}, [ $jbeg, $jend, undef ];
  3065         7351  
1787 3065         5091 $group_line_count++;
1788 3065         4774 return;
1789             } ## end sub add_to_rgroup
1790              
1791             sub get_rgroup_jrange {
1792              
1793 1288 50   1288 0 1923 return if ( !@{$rgroups} );
  1288         3208  
1794 1288 50       3099 return if ( $group_line_count <= 0 );
1795 1288         1976 my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
  1288         2898  
1796 1288         2806 return ( $jbeg, $jend );
1797             } ## end sub get_rgroup_jrange
1798              
1799             sub end_rgroup {
1800              
1801 2103     2103 0 4243 my ($imax_align) = @_;
1802 2103 50       3229 return if ( !@{$rgroups} );
  2103         5257  
1803 2103 100       5120 return if ( $group_line_count <= 0 );
1804              
1805 2084         3267 my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
  2084         2917  
  2084         4869  
1806 2084         3992 push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
  2084         5246  
1807              
1808             # Undo some alignments of poor two-line combinations.
1809             # We had to wait until now to know the line count.
1810 2084 100       5871 if ( $jend - $jbeg == 1 ) {
1811 256         867 my $line_0 = $rall_lines->[$jbeg];
1812 256         627 my $line_1 = $rall_lines->[$jend];
1813              
1814 256         625 my $imax_pair = $line_1->{'imax_pair'};
1815 256 50       868 if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
  0         0  
1816              
1817             ## flag for possible future use:
1818             ## my $is_isolated_pair = $imax_pair < 0
1819             ## && ( $jbeg == 0
1820             ## || $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} < 0 );
1821              
1822             my $imax_prev =
1823 256 100       987 $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} : -1;
1824              
1825 256         1205 my ( $is_marginal, $imax_align_fix ) =
1826             is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
1827             $imax_prev );
1828 256 100       889 if ($is_marginal) {
1829 14         71 combine_fields( $line_0, $line_1, $imax_align_fix );
1830             }
1831             }
1832              
1833 2084         5246 initialize_for_new_rgroup();
1834 2084         3656 return;
1835             } ## end sub end_rgroup
1836              
1837             sub block_penultimate_match {
1838              
1839             # emergency reset to prevent sweep_left_to_right from trying to match a
1840             # failed terminal else match
1841 1 50   1 0 2 return if ( @{$rgroups} <= 1 );
  1         4  
1842 1         3 $rgroups->[-2]->[2] = -1;
1843 1         2 return;
1844             } ## end sub block_penultimate_match
1845              
1846             sub sweep_top_down {
1847 1705     1705 0 3903 my ( $self, $rlines, $group_level ) = @_;
1848              
1849             # Partition the set of lines into final alignment subgroups
1850             # and store the alignments with the lines.
1851              
1852             # The alignment subgroups we are making here are groups of consecutive
1853             # lines which have (1) identical alignment tokens and (2) do not
1854             # exceed the allowable maximum line length. A later sweep from
1855             # left-to-right ('sweep_lr') will handle additional alignments.
1856              
1857             # transfer args to closure variables
1858 1705         20552 $rall_lines = $rlines;
1859 1705         4087 $grp_level = $group_level;
1860 1705         5424 $rgroups = [];
1861 1705         5748 initialize_for_new_rgroup();
1862 1705 50       2557 return unless @{$rlines}; # shouldn't happen
  1705         4904  
1863              
1864             # Unset the _end_group flag for the last line if it it set because it
1865             # is not needed and can causes problems for -lp formatting
1866 1705         4101 $rall_lines->[-1]->{'end_group'} = 0;
1867              
1868             # Loop over all lines ...
1869 1705         3176 my $jline = -1;
1870 1705         2940 foreach my $new_line ( @{$rall_lines} ) {
  1705         4040  
1871 3065         4553 $jline++;
1872              
1873             # Start a new subgroup if necessary
1874 3065 100       6939 if ( !$group_line_count ) {
1875 1777         5413 add_to_rgroup($jline);
1876 1777 100       4991 if ( $new_line->{'end_group'} ) {
1877 22         101 end_rgroup(-1);
1878             }
1879 1777         3812 next;
1880             }
1881              
1882 1288         2819 my $j_terminal_match = $new_line->{'j_terminal_match'};
1883 1288         3441 my ( $jbeg, $jend ) = get_rgroup_jrange();
1884 1288 50       3317 if ( !defined($jbeg) ) {
1885              
1886             # safety check, shouldn't happen
1887 0         0 warning(<<EOM);
1888             Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
1889             undefined index for group line count $group_line_count
1890             EOM
1891 0         0 $jbeg = $jline;
1892             }
1893 1288         2295 my $base_line = $rall_lines->[$jbeg];
1894              
1895             # Initialize a global flag saying if the last line of the group
1896             # should match end of group and also terminate the group. There
1897             # should be no returns between here and where the flag is handled
1898             # at the bottom.
1899 1288         1986 my $col_matching_terminal = 0;
1900 1288 100       2911 if ( defined($j_terminal_match) ) {
1901              
1902             # remember the column of the terminal ? or { to match with
1903 19         127 $col_matching_terminal =
1904             $base_line->get_column($j_terminal_match);
1905              
1906             # Ignore an undefined value as a defensive step; shouldn't
1907             # normally happen.
1908 19 50       103 $col_matching_terminal = 0
1909             unless defined($col_matching_terminal);
1910             }
1911              
1912             # -------------------------------------------------------------
1913             # Allow hanging side comment to join current group, if any. The
1914             # only advantage is to keep the other tokens in the same group. For
1915             # example, this would make the '=' align here:
1916             # $ax = 1; # side comment
1917             # # hanging side comment
1918             # $boondoggle = 5; # side comment
1919             # $beetle = 5; # side comment
1920              
1921             # here is another example..
1922              
1923             # _rtoc_name_count => {}, # hash to track ..
1924             # _rpackage_stack => [], # stack to check ..
1925             # # name changes
1926             # _rlast_level => \$last_level, # brace indentation
1927             #
1928             #
1929             # If this were not desired, the next step could be skipped.
1930             # -------------------------------------------------------------
1931 1288 100       4121 if ( $new_line->{'is_hanging_side_comment'} ) {
    100          
1932 38         159 join_hanging_comment( $new_line, $base_line );
1933             }
1934              
1935             # If this line has no matching tokens, then flush out the lines
1936             # BEFORE this line unless both it and the previous line have side
1937             # comments. This prevents this line from pushing side comments out
1938             # to the right.
1939             elsif ( $new_line->{'jmax'} == 1 ) {
1940              
1941             # There are no matching tokens, so now check side comments.
1942             # Programming note: accessing arrays with index -1 is
1943             # risky in Perl, but we have verified there is at least one
1944             # line in the group and that there is at least one field.
1945             my $prev_comment =
1946 194         713 $rall_lines->[ $jline - 1 ]->{'rfields'}->[-1];
1947 194         464 my $side_comment = $new_line->{'rfields'}->[-1];
1948 194 100 100     998 end_rgroup(-1) if ( !$side_comment || !$prev_comment );
1949             }
1950             else {
1951             ##ok: continue
1952             }
1953              
1954             # See if the new line matches and fits the current group,
1955             # if it still exists. Flush the current group if not.
1956 1288         2115 my $match_code;
1957 1288 100       2879 if ($group_line_count) {
1958 1139         3984 ( $match_code, my $imax_align ) =
1959             $self->check_match( $new_line, $base_line,
1960             $rall_lines->[ $jline - 1 ],
1961             $group_line_count );
1962 1139 100       3009 if ( $match_code != 2 ) { end_rgroup($imax_align) }
  158         547  
1963             }
1964              
1965             # Store the new line
1966 1288         3478 add_to_rgroup($jline);
1967              
1968 1288 100       5646 if ( defined($j_terminal_match) ) {
    100          
1969              
1970             # Decide if we should fix a terminal match. We can either:
1971             # 1. fix it and prevent the sweep_lr from changing it, or
1972             # 2. leave it alone and let sweep_lr try to fix it.
1973              
1974             # The current logic is to fix it if:
1975             # -it has not joined to previous lines,
1976             # -and either the previous subgroup has just 1 line, or
1977             # -this line matched but did not fit (so sweep won't work)
1978 19         86 my $fixit;
1979 19 100       95 if ( $group_line_count == 1 ) {
1980 3   66     21 $fixit ||= $match_code;
1981 3 100       12 if ( !$fixit ) {
1982 2 50       4 if ( @{$rgroups} > 1 ) {
  2         9  
1983 2         4 my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
  2         6  
1984 2         7 my $nlines = $jendx - $jbegx + 1;
1985 2   66     13 $fixit ||= $nlines <= 1;
1986             }
1987             }
1988             }
1989              
1990 19 100       81 if ($fixit) {
1991 2         6 $base_line = $new_line;
1992 2         8 my $col_now = $base_line->get_column($j_terminal_match);
1993              
1994             # Ignore an undefined value as a defensive step; shouldn't
1995             # normally happen.
1996 2 50       7 $col_now = 0 unless defined($col_now);
1997              
1998 2         6 my $pad = $col_matching_terminal - $col_now;
1999 2         11 my $padding_available =
2000             $base_line->get_available_space_on_right();
2001 2 100 33     18 if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
      66        
2002 1         6 $base_line->increase_field_width( $j_terminal_match,
2003             $pad );
2004             }
2005              
2006             # do not let sweep_left_to_right change an isolated 'else'
2007 2 100       15 if ( !$new_line->{'is_terminal_ternary'} ) {
2008 1         5 block_penultimate_match();
2009             }
2010             }
2011 19         91 end_rgroup(-1);
2012             }
2013              
2014             # end the group if we know we cannot match next line.
2015             elsif ( $new_line->{'end_group'} ) {
2016 50         254 end_rgroup(-1);
2017             }
2018              
2019             else {
2020             ##ok: continue
2021             }
2022             } ## end loop over lines
2023              
2024 1705         6418 end_rgroup(-1);
2025 1705         3779 return ($rgroups);
2026             } ## end sub sweep_top_down
2027             }
2028              
2029             sub two_line_pad {
2030              
2031 18     18 0 89 my ( $line_m, $line, $imax_min ) = @_;
2032              
2033             # Given:
2034             # two isolated (list) lines
2035             # imax_min = number of common alignment tokens
2036             # Return:
2037             # $pad_max = maximum suggested pad distance
2038             # = 0 if alignment not recommended
2039             # Note that this is only for two lines which do not have alignment tokens
2040             # in common with any other lines. It is intended for lists, but it might
2041             # also be used for two non-list lines with a common leading '='.
2042              
2043             # Allow alignment if the difference in the two unpadded line lengths
2044             # is not more than either line length. The idea is to avoid
2045             # aligning lines with very different field lengths, like these two:
2046              
2047             # [
2048             # 'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
2049             # 1, 0, 0, 0, undef, 0, 0
2050             # ];
2051 18         59 my $rfield_lengths = $line->{'rfield_lengths'};
2052 18         47 my $rfield_lengths_m = $line_m->{'rfield_lengths'};
2053              
2054             # Safety check - shouldn't happen
2055             return 0
2056 18         121 if ( $imax_min >= @{$rfield_lengths}
2057 18 50 33     46 || $imax_min >= @{$rfield_lengths_m} );
  18         77  
2058              
2059 18         56 my $lensum_m = 0;
2060 18         47 my $lensum = 0;
2061 18         67 foreach my $i ( 0 .. $imax_min ) {
2062 49         90 $lensum_m += $rfield_lengths_m->[$i];
2063 49         104 $lensum += $rfield_lengths->[$i];
2064             }
2065              
2066 18 100       140 my ( $lenmin, $lenmax ) =
2067             $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
2068              
2069 18         39 my $patterns_match;
2070 18 50 66     129 if ( $line_m->{'list_type'} && $line->{'list_type'} ) {
2071 16         92 $patterns_match = 1;
2072 16         52 my $rpatterns_m = $line_m->{'rpatterns'};
2073 16         42 my $rpatterns = $line->{'rpatterns'};
2074 16         51 foreach my $i ( 0 .. $imax_min ) {
2075 46         94 my $pat = $rpatterns->[$i];
2076 46         82 my $pat_m = $rpatterns_m->[$i];
2077 46 100       149 if ( $pat ne $pat_m ) { $patterns_match = 0; last }
  2         5  
  2         6  
2078             }
2079             }
2080              
2081 18         49 my $pad_max = $lenmax;
2082 18 50 66     99 if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
  0         0  
2083              
2084 18         59 return $pad_max;
2085             } ## end sub two_line_pad
2086              
2087             sub sweep_left_to_right {
2088              
2089 255     255 0 911 my ( $rlines, $rgroups, $group_level ) = @_;
2090              
2091             # So far we have divided the lines into groups having an equal number of
2092             # identical alignments. Here we are going to look for common leading
2093             # alignments between the different groups and align them when possible.
2094             # For example, the three lines below are in three groups because each line
2095             # has a different number of commas. In this routine we will sweep from
2096             # left to right, aligning the leading commas as we go, but stopping if we
2097             # hit the line length limit.
2098              
2099             # my ( $num, $numi, $numj, $xyza, $ka, $xyzb, $kb, $aff, $error );
2100             # my ( $i, $j, $error, $aff, $asum, $avec );
2101             # my ( $km, $area, $varea );
2102              
2103             # nothing to do if just one group
2104 255         479 my $ng_max = @{$rgroups} - 1;
  255         587  
2105 255 50       754 return if ( $ng_max <= 0 );
2106              
2107             #---------------------------------------------------------------------
2108             # Step 1: Loop over groups to find all common leading alignment tokens
2109             #---------------------------------------------------------------------
2110              
2111 255         3669 my $line;
2112             my $rtokens;
2113 255         0 my $imax; # index of maximum non-side-comment alignment token
2114 255         0 my $istop; # an optional stopping index
2115 255         0 my $jbeg; # starting line index
2116 255         0 my $jend; # ending line index
2117              
2118 255         0 my $line_m;
2119 255         0 my $rtokens_m;
2120 255         0 my $imax_m;
2121 255         0 my $istop_m;
2122 255         0 my $jbeg_m;
2123 255         0 my $jend_m;
2124              
2125 255         0 my $istop_mm;
2126              
2127             # Look at neighboring pairs of groups and form a simple list
2128             # of all common leading alignment tokens. Foreach such match we
2129             # store [$i, $ng], where
2130             # $i = index of the token in the line (0,1,...)
2131             # $ng is the second of the two groups with this common token
2132 255         0 my @icommon;
2133              
2134             # Hash to hold the maximum alignment change for any group
2135 255         0 my %max_move;
2136              
2137             # a small number of columns
2138 255         498 my $short_pad = 4;
2139              
2140 255         536 my $ng = -1;
2141 255         507 foreach my $item ( @{$rgroups} ) {
  255         655  
2142 634         928 $ng++;
2143              
2144 634         957 $istop_mm = $istop_m;
2145              
2146             # save _m values of previous group
2147 634         950 $line_m = $line;
2148 634         923 $rtokens_m = $rtokens;
2149 634         892 $imax_m = $imax;
2150 634         984 $istop_m = $istop;
2151 634         1010 $jbeg_m = $jbeg;
2152 634         970 $jend_m = $jend;
2153              
2154             # Get values for this group. Note that we just have to use values for
2155             # one of the lines of the group since all members have the same
2156             # alignments.
2157 634         936 ( $jbeg, $jend, $istop ) = @{$item};
  634         1266  
2158              
2159 634         1238 $line = $rlines->[$jbeg];
2160 634         1155 $rtokens = $line->{'rtokens'};
2161 634         1232 $imax = $line->{'jmax'} - 2;
2162 634 50       1482 $istop = -1 if ( !defined($istop) );
2163 634 50       1413 $istop = $imax if ( $istop > $imax );
2164              
2165             # Initialize on first group
2166 634 100       1563 next if ( $ng == 0 );
2167              
2168             # Use the minimum index limit of the two groups
2169 379 100       1424 my $imax_min = $imax > $imax_m ? $imax_m : $imax;
2170              
2171             # Also impose a limit if given.
2172 379 100       1167 if ( $istop_m < $imax_min ) {
2173 51         153 $imax_min = $istop_m;
2174             }
2175              
2176             # Special treatment of two one-line groups isolated from other lines,
2177             # unless they form a simple list or a terminal match. Otherwise the
2178             # alignment can look strange in some cases.
2179 379         1006 my $list_type = $rlines->[$jbeg]->{'list_type'};
2180 379 100 100     4713 if (
      100        
      100        
      100        
      100        
      100        
      100        
      100        
2181             $jend == $jbeg
2182             && $jend_m == $jbeg_m
2183             && ( $ng == 1 || $istop_mm < 0 )
2184             && ( $ng == $ng_max || $istop < 0 )
2185             && !$line->{'j_terminal_match'}
2186              
2187             # Only do this for imperfect matches. This is normally true except
2188             # when two perfect matches cannot form a group because the line
2189             # length limit would be exceeded. In that case we can still try
2190             # to match as many alignments as possible.
2191             && ( $imax != $imax_m || $istop_m != $imax_m )
2192             )
2193             {
2194              
2195             # We will just align assignments and simple lists
2196 73 100       330 next if ( $imax_min < 0 );
2197             next
2198 21 100 100     197 if ( $rtokens->[0] !~ /^=\d/
2199             && !$list_type );
2200              
2201             # In this case we will limit padding to a short distance. This
2202             # is a compromise to keep some vertical alignment but prevent large
2203             # gaps, which do not look good for just two lines.
2204 18         275 my $pad_max =
2205             two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
2206 18 50       66 next if ( !$pad_max );
2207 18         51 my $ng_m = $ng - 1;
2208 18         71 $max_move{"$ng_m"} = $pad_max;
2209 18         65 $max_move{"$ng"} = $pad_max;
2210             }
2211              
2212             # Loop to find all common leading tokens.
2213 324 100       1203 if ( $imax_min >= 0 ) {
2214 78         261 foreach my $i ( 0 .. $imax_min ) {
2215 144         270 my $tok = $rtokens->[$i];
2216 144         266 my $tok_m = $rtokens_m->[$i];
2217 144 50       376 last if ( $tok ne $tok_m );
2218 144         537 push @icommon, [ $i, $ng, $tok ];
2219             }
2220             }
2221             }
2222 255 100       1169 return unless @icommon;
2223              
2224             #----------------------------------------------------------
2225             # Step 2: Reorder and consolidate the list into a task list
2226             #----------------------------------------------------------
2227              
2228             # We have to work first from lowest token index to highest, then by group,
2229             # sort our list first on token index then group number
2230 64 50       409 @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
  160         443  
2231              
2232             # Make a task list of the form
2233             # [$i, ng_beg, $ng_end, $tok], ..
2234             # where
2235             # $i is the index of the token to be aligned
2236             # $ng_beg..$ng_end is the group range for this action
2237 64         131 my @todo;
2238 64         157 my ( $i, $ng_end, $tok );
2239 64         185 foreach my $item (@icommon) {
2240 144         266 my $ng_last = $ng_end;
2241 144         230 my $i_last = $i;
2242 144         256 ( $i, $ng_end, $tok ) = @{$item};
  144         325  
2243 144         279 my $ng_beg = $ng_end - 1;
2244 144 100 100     658 if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
      66        
2245 29         87 my $var = pop(@todo);
2246 29         74 $ng_beg = $var->[1];
2247             }
2248 144         373 my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
2249 144         582 push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
2250             }
2251              
2252             #------------------------------
2253             # Step 3: Execute the task list
2254             #------------------------------
2255 64         736 do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
2256             $group_level );
2257 64         296 return;
2258             } ## end sub sweep_left_to_right
2259              
2260             { ## closure for sub do_left_to_right_sweep
2261              
2262             my %is_good_alignment_token;
2263              
2264             BEGIN {
2265              
2266             # One of the most difficult aspects of vertical alignment is knowing
2267             # when not to align. Alignment can go from looking very nice to very
2268             # bad when overdone. In the sweep algorithm there are two special
2269             # cases where we may need to limit padding to a '$short_pad' distance
2270             # to avoid some very ugly formatting:
2271              
2272             # 1. Two isolated lines with partial alignment
2273             # 2. A 'tail-wag-dog' situation, in which a single terminal
2274             # line with partial alignment could cause a significant pad
2275             # increase in many previous lines if allowed to join the alignment.
2276              
2277             # For most alignment tokens, we will allow only a small pad to be
2278             # introduced (the hardwired $short_pad variable) . But for some 'good'
2279             # alignments we can be less restrictive.
2280              
2281             # These are 'good' alignments, which are allowed more padding:
2282 39     39   272 my @q = qw(
2283             => = ? if unless or || {
2284             );
2285 39         132 push @q, ',';
2286 39         319 @is_good_alignment_token{@q} = (0) x scalar(@q);
2287              
2288             # Promote a few of these to 'best', with essentially no pad limit:
2289 39         165 $is_good_alignment_token{'='} = 1;
2290 39         123 $is_good_alignment_token{'if'} = 1;
2291 39         115 $is_good_alignment_token{'unless'} = 1;
2292 39         34258 $is_good_alignment_token{'=>'} = 1;
2293              
2294             # Note the hash values are set so that:
2295             # if ($is_good_alignment_token{$raw_tok}) => best
2296             # if defined ($is_good_alignment_token{$raw_tok}) => good or best
2297              
2298             } ## end BEGIN
2299              
2300             sub move_to_common_column {
2301              
2302             # This is a sub called by sub do_left_to_right_sweep to
2303             # move the alignment column of token $itok to $col_want for a
2304             # sequence of groups.
2305 118     118 0 410 my ( $rlines, $rgroups, $rmax_move, $ngb, $nge, $itok, $col_want,
2306             $raw_tok )
2307             = @_;
2308 118 100 66     551 return if ( !defined($ngb) || $nge <= $ngb );
2309 108         309 foreach my $ng ( $ngb .. $nge ) {
2310              
2311 242         387 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
  242         556  
2312 242         456 my $line = $rlines->[$jbeg];
2313 242         604 my $col = $line->get_column($itok);
2314 242         509 my $move = $col_want - $col;
2315 242 100       880 if ( $move > 0 ) {
    50          
2316              
2317             # limit padding increase in isolated two lines
2318             next
2319             if ( defined( $rmax_move->{$ng} )
2320             && $move > $rmax_move->{$ng}
2321 77 50 66     517 && !$is_good_alignment_token{$raw_tok} );
      33        
2322              
2323 77         346 $line->increase_field_width( $itok, $move );
2324             }
2325             elsif ( $move < 0 ) {
2326              
2327             # spot to take special action on failure to move
2328             }
2329             else {
2330             ##ok: (move==0)
2331             }
2332             }
2333 108         302 return;
2334             } ## end sub move_to_common_column
2335              
2336             sub do_left_to_right_sweep {
2337 64     64 0 262 my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
2338             = @_;
2339              
2340             # $blocking_level[$nj is the level at a match failure between groups
2341             # $ng-1 and $ng
2342 64         139 my @blocking_level;
2343 64         196 my $group_list_type = $rlines->[0]->{'list_type'};
2344              
2345 64         1116 foreach my $task ( @{$rtodo} ) {
  64         201  
2346 115         230 my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
  115         340  
2347              
2348             # Nothing to do for a single group
2349 115 50       337 next if ( $ng_end <= $ng_beg );
2350              
2351 115         321 my $ng_first; # index of the first group of a continuous sequence
2352             my $col_want; # the common alignment column of a sequence of groups
2353 115         0 my $col_limit; # maximum column before bumping into max line length
2354 115         206 my $line_count_ng_m = 0;
2355 115         236 my $jmax_m;
2356             my $it_stop_m;
2357              
2358             # Loop over the groups
2359             # 'ix_' = index in the array of lines
2360             # 'ng_' = index in the array of groups
2361             # 'it_' = index in the array of tokens
2362 115         238 my $ix_min = $rgroups->[$ng_beg]->[0];
2363 115         216 my $ix_max = $rgroups->[$ng_end]->[1];
2364 115         300 my $lines_total = $ix_max - $ix_min + 1;
2365 115         304 foreach my $ng ( $ng_beg .. $ng_end ) {
2366 259         380 my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
  259         607  
2367 259         453 my $line_count_ng = $ix_end - $ix_beg + 1;
2368              
2369             # Important: note that since all lines in a group have a common
2370             # alignments object, we just have to work on one of the lines
2371             # (the first line). All of the rest will be changed
2372             # automatically.
2373 259         455 my $line = $rlines->[$ix_beg];
2374 259         443 my $jmax = $line->{'jmax'};
2375              
2376             # the maximum space without exceeding the line length:
2377 259         727 my $avail = $line->get_available_space_on_right();
2378 259         654 my $col = $line->get_column($itok);
2379 259         529 my $col_max = $col + $avail;
2380              
2381             # Initialize on first group
2382 259 100       719 if ( !defined($col_want) ) {
2383 115         194 $ng_first = $ng;
2384 115         195 $col_want = $col;
2385 115         201 $col_limit = $col_max;
2386 115         196 $line_count_ng_m = $line_count_ng;
2387 115         204 $jmax_m = $jmax;
2388 115         201 $it_stop_m = $it_stop;
2389 115         255 next;
2390             }
2391              
2392             # RULE: Throw a blocking flag upon encountering a token level
2393             # different from the level of the first blocking token. For
2394             # example, in the following example, if the = matches get
2395             # blocked between two groups as shown, then we want to start
2396             # blocking matches at the commas, which are at deeper level, so
2397             # that we do not get the big gaps shown here:
2398              
2399             # my $unknown3 = pack( "v", -2 );
2400             # my $unknown4 = pack( "v", 0x09 );
2401             # my $unknown5 = pack( "VVV", 0x06, 0x00, 0x00 );
2402             # my $num_bbd_blocks = pack( "V", $num_lists );
2403             # my $root_startblock = pack( "V", $root_start );
2404             # my $unknown6 = pack( "VV", 0x00, 0x1000 );
2405              
2406             # On the other hand, it is okay to keep matching at the same
2407             # level such as in a simple list of commas and/or fat commas.
2408              
2409 144   66     679 my $is_blocked = defined( $blocking_level[$ng] )
2410             && $lev > $blocking_level[$ng];
2411              
2412             # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
2413             # Do not let one or two lines with a **different number of
2414             # alignments** open up a big gap in a large block. For
2415             # example, we will prevent something like this, where the first
2416             # line pries open the rest:
2417              
2418             # $worksheet->write( "B7", "http://www.perl.com", undef, $format );
2419             # $worksheet->write( "C7", "", $format );
2420             # $worksheet->write( "D7", "", $format );
2421             # $worksheet->write( "D8", "", $format );
2422             # $worksheet->write( "D8", "", $format );
2423              
2424             # We should exclude from consideration two groups which are
2425             # effectively the same but separated because one does not
2426             # fit in the maximum allowed line length.
2427 144   100     1191 my $is_same_group =
2428             $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
2429              
2430 144         286 my $lines_above = $ix_beg - $ix_min;
2431 144         274 my $lines_below = $lines_total - $lines_above;
2432              
2433             # Increase the tolerable gap for certain favorable factors
2434 144         235 my $factor = 1;
2435 144         285 my $top_level = $lev == $group_level;
2436              
2437             # Align best top level alignment tokens like '=', 'if', ...
2438             # A factor of 10 allows a gap of up to 40 spaces
2439 144 100 100     706 if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
2440 31         75 $factor = 10;
2441             }
2442              
2443             # Otherwise allow some minimal padding of good alignments
2444             else {
2445              
2446 113 100 100     780 if (
      100        
2447              
2448             defined( $is_good_alignment_token{$raw_tok} )
2449              
2450             # We have to be careful if there are just 2 lines.
2451             # This two-line factor allows large gaps only for 2
2452             # lines which are simple lists with fewer items on the
2453             # second line. It gives results similar to previous
2454             # versions of perltidy.
2455             && (
2456             $lines_total > 2
2457             || ( $group_list_type
2458             && $jmax < $jmax_m
2459             && $top_level )
2460             )
2461             )
2462             {
2463 102         187 $factor += 1;
2464 102 100       259 if ($top_level) {
2465 66         107 $factor += 1;
2466             }
2467             }
2468             }
2469              
2470 144         258 my $is_big_gap;
2471 144 100       369 if ( !$is_same_group ) {
2472 118   66     1037 $is_big_gap ||=
      33        
2473             ( $lines_above == 1
2474             || $lines_above == 2 && $lines_below >= 4 )
2475             && $col_want > $col + $short_pad * $factor;
2476 118   66     859 $is_big_gap ||=
      33        
2477             ( $lines_below == 1
2478             || $lines_below == 2 && $lines_above >= 4 )
2479             && $col > $col_want + $short_pad * $factor;
2480             }
2481              
2482             # if match is limited by gap size, stop aligning at this level
2483 144 50       383 if ($is_big_gap) {
2484 0         0 $blocking_level[$ng] = $lev - 1;
2485             }
2486              
2487             # quit and restart if it cannot join this batch
2488 144 50 100     1026 if ( $col_want > $col_max
      66        
      66        
2489             || $col > $col_limit
2490             || $is_big_gap
2491             || $is_blocked )
2492             {
2493              
2494             # remember the level of the first blocking token
2495 10 100       33 if ( !defined( $blocking_level[$ng] ) ) {
2496 9         23 $blocking_level[$ng] = $lev;
2497             }
2498              
2499             move_to_common_column(
2500 10         34 $rlines, $rgroups, $rmax_move, $ng_first,
2501             $ng - 1, $itok, $col_want, $raw_tok
2502             );
2503 10         21 $ng_first = $ng;
2504 10         21 $col_want = $col;
2505 10         14 $col_limit = $col_max;
2506 10         18 $line_count_ng_m = $line_count_ng;
2507 10         18 $jmax_m = $jmax;
2508 10         16 $it_stop_m = $it_stop;
2509 10         21 next;
2510             }
2511              
2512 134         253 $line_count_ng_m += $line_count_ng;
2513              
2514             # update the common column and limit
2515 134 100       358 if ( $col > $col_want ) { $col_want = $col }
  42         89  
2516 134 100       408 if ( $col_max < $col_limit ) { $col_limit = $col_max }
  35         90  
2517              
2518             } ## end loop over groups
2519              
2520 115 100       357 if ( $ng_end > $ng_first ) {
2521 108         419 move_to_common_column(
2522             $rlines, $rgroups, $rmax_move, $ng_first,
2523             $ng_end, $itok, $col_want, $raw_tok
2524             );
2525             } ## end loop over groups for one task
2526             } ## end loop over tasks
2527              
2528 64         169 return;
2529             } ## end sub do_left_to_right_sweep
2530             }
2531              
2532             sub delete_selected_tokens {
2533              
2534 469     469 0 1108 my ( $line_obj, $ridel ) = @_;
2535              
2536             # $line_obj is the line to be modified
2537             # $ridel is a ref to list of indexes to be deleted
2538              
2539             # remove an unused alignment token(s) to improve alignment chances
2540              
2541 469 50 33     2182 return if ( !defined($line_obj) || !defined($ridel) || !@{$ridel} );
  469   33     1535  
2542              
2543 469         1084 my $jmax_old = $line_obj->{'jmax'};
2544 469         1025 my $rfields_old = $line_obj->{'rfields'};
2545 469         893 my $rfield_lengths_old = $line_obj->{'rfield_lengths'};
2546 469         957 my $rpatterns_old = $line_obj->{'rpatterns'};
2547 469         901 my $rtokens_old = $line_obj->{'rtokens'};
2548 469         920 my $j_terminal_match = $line_obj->{'j_terminal_match'};
2549              
2550 39     39   385 use constant EXPLAIN_DELETE_SELECTED => 0;
  39         136  
  39         35072  
2551              
2552 469         1214 local $LIST_SEPARATOR = '> <';
2553 469         707 EXPLAIN_DELETE_SELECTED && print <<EOM;
2554             delete indexes: <@{$ridel}>
2555             old jmax: $jmax_old
2556             old tokens: <@{$rtokens_old}>
2557             old patterns: <@{$rpatterns_old}>
2558             old fields: <@{$rfields_old}>
2559             old field_lengths: <@{$rfield_lengths_old}>
2560             EOM
2561              
2562 469         951 my $rfields_new = [];
2563 469         942 my $rpatterns_new = [];
2564 469         965 my $rtokens_new = [];
2565 469         1062 my $rfield_lengths_new = [];
2566              
2567             # Convert deletion list to a hash to allow any order, multiple entries,
2568             # and avoid problems with index values out of range
2569 469         811 my %delete_me;
2570 469         809 @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
  469         1653  
  469         1068  
2571              
2572 469         1153 my $pattern_0 = $rpatterns_old->[0];
2573 469         976 my $field_0 = $rfields_old->[0];
2574 469         918 my $field_length_0 = $rfield_lengths_old->[0];
2575 469         807 push @{$rfields_new}, $field_0;
  469         1074  
2576 469         787 push @{$rfield_lengths_new}, $field_length_0;
  469         984  
2577 469         791 push @{$rpatterns_new}, $pattern_0;
  469         1058  
2578              
2579             # Loop to either copy items or concatenate fields and patterns
2580 469         813 my $jmin_del;
2581 469         1374 foreach my $j ( 0 .. $jmax_old - 1 ) {
2582 1515         2540 my $token = $rtokens_old->[$j];
2583 1515         2788 my $field = $rfields_old->[ $j + 1 ];
2584 1515         2298 my $field_length = $rfield_lengths_old->[ $j + 1 ];
2585 1515         2606 my $pattern = $rpatterns_old->[ $j + 1 ];
2586 1515 100       3231 if ( !$delete_me{$j} ) {
2587 743         1258 push @{$rtokens_new}, $token;
  743         1590  
2588 743         1221 push @{$rfields_new}, $field;
  743         1361  
2589 743         1238 push @{$rpatterns_new}, $pattern;
  743         1325  
2590 743         1130 push @{$rfield_lengths_new}, $field_length;
  743         1745  
2591             }
2592             else {
2593 772 100       1904 if ( !defined($jmin_del) ) { $jmin_del = $j }
  469         892  
2594 772         1780 $rfields_new->[-1] .= $field;
2595 772         1265 $rfield_lengths_new->[-1] += $field_length;
2596 772         1699 $rpatterns_new->[-1] .= $pattern;
2597             }
2598             }
2599              
2600             # ----- x ------ x ------ x ------
2601             #t 0 1 2 <- token indexing
2602             #f 0 1 2 3 <- field and pattern
2603              
2604 469         948 my $jmax_new = @{$rfields_new} - 1;
  469         1107  
2605 469         982 $line_obj->{'rtokens'} = $rtokens_new;
2606 469         854 $line_obj->{'rpatterns'} = $rpatterns_new;
2607 469         874 $line_obj->{'rfields'} = $rfields_new;
2608 469         844 $line_obj->{'rfield_lengths'} = $rfield_lengths_new;
2609 469         836 $line_obj->{'jmax'} = $jmax_new;
2610              
2611             # The value of j_terminal_match will be incorrect if we delete tokens prior
2612             # to it. We will have to give up on aligning the terminal tokens if this
2613             # happens.
2614 469 100 100     1429 if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
2615 1         5 $line_obj->{'j_terminal_match'} = undef;
2616             }
2617              
2618             # update list type -
2619 469 100       1203 if ( $line_obj->{'list_seqno'} ) {
2620              
2621             ## This works, but for efficiency see if we need to make a change:
2622             ## decide_if_list($line_obj);
2623              
2624             # An existing list will still be a list but with possibly different
2625             # leading token
2626 76         201 my $old_list_type = $line_obj->{'list_type'};
2627 76         152 my $new_list_type = EMPTY_STRING;
2628 76 100       505 if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
2629 49         126 $new_list_type = $rtokens_new->[0];
2630             }
2631 76 100 100     447 if ( !$old_list_type || $old_list_type ne $new_list_type ) {
2632 44         156 decide_if_list($line_obj);
2633             }
2634             }
2635              
2636 469         805 EXPLAIN_DELETE_SELECTED && print <<EOM;
2637              
2638             new jmax: $jmax_new
2639             new tokens: <@{$rtokens_new}>
2640             new patterns: <@{$rpatterns_new}>
2641             new fields: <@{$rfields_new}>
2642             EOM
2643 469         3499 return;
2644             } ## end sub delete_selected_tokens
2645              
2646             { ## closure for sub decode_alignment_token
2647              
2648             # This routine is called repeatedly for each token, so it needs to be
2649             # efficient. We can speed things up by remembering the inputs and outputs
2650             # in a hash.
2651             my %decoded_token;
2652              
2653             sub initialize_decode {
2654              
2655             # We will re-initialize the hash for each file. Otherwise, there is
2656             # a danger that the hash can become arbitrarily large if a very large
2657             # number of files is processed at once.
2658 560     560 0 4182 %decoded_token = ();
2659 560         1106 return;
2660             } ## end sub initialize_decode
2661              
2662             sub decode_alignment_token {
2663              
2664             # Unpack the values packed in an alignment token
2665             #
2666             # Usage:
2667             # my ( $raw_tok, $lev, $tag, $tok_count ) =
2668             # decode_alignment_token($token);
2669              
2670             # Alignment tokens have a trailing decimal level and optional tag (for
2671             # commas):
2672             # For example, the first comma in the following line
2673             # sub banner { crlf; report( shift, '/', shift ); crlf }
2674             # is decorated as follows:
2675             # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6)
2676              
2677             # An optional token count may be appended with a leading dot.
2678             # Currently this is only done for '=' tokens but this could change.
2679             # For example, consider the following line:
2680             # $nport = $port = shift || $name;
2681             # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
2682             # The second '=' will be '=0.2' [level 0, second equals]
2683 9364     9364 0 16074 my ($tok) = @_;
2684              
2685 9364 100       19276 if ( defined( $decoded_token{$tok} ) ) {
2686 7923         11100 return @{ $decoded_token{$tok} };
  7923         30036  
2687             }
2688              
2689 1441         4084 my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, EMPTY_STRING, 1 );
2690 1441 100       9341 if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
2691 1135         3299 $raw_tok = $1;
2692 1135         2287 $lev = $2;
2693 1135 100       4248 $tag = $3 if ($3);
2694 1135 100       3211 $tok_count = $5 if ($5);
2695             }
2696 1441         4862 my @vals = ( $raw_tok, $lev, $tag, $tok_count );
2697 1441         4187 $decoded_token{$tok} = \@vals;
2698 1441         6189 return @vals;
2699             } ## end sub decode_alignment_token
2700             }
2701              
2702             { ## closure for sub delete_unmatched_tokens
2703              
2704             my %is_assignment;
2705             my %keep_after_deleted_assignment;
2706              
2707             BEGIN {
2708 39     39   260 my @q;
2709              
2710 39         403 @q = qw(
2711             = **= += *= &= <<= &&=
2712             -= /= |= >>= ||= //=
2713             .= %= ^=
2714             x=
2715             );
2716 39         706 @is_assignment{@q} = (1) x scalar(@q);
2717              
2718             # These tokens may be kept following an = deletion
2719 39         169 @q = qw(
2720             if unless or ||
2721             );
2722 39         89328 @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
2723              
2724             } ## end BEGIN
2725              
2726             sub delete_unmatched_tokens {
2727 1705     1705 0 4012 my ( $rlines, $group_level ) = @_;
2728              
2729             # This is a important first step in vertical alignment in which
2730             # we remove as many obviously un-needed alignment tokens as possible.
2731             # This will prevent them from interfering with the final alignment.
2732              
2733             # Returns:
2734 1705         2840 my $max_lev_diff = 0; # used to avoid a call to prune_tree
2735 1705         2858 my $saw_side_comment = 0; # used to avoid a call for side comments
2736              
2737             # Handle no lines -- shouldn't happen
2738 1705 50       2816 return unless @{$rlines};
  1705         4341  
2739              
2740             # Handle a single line
2741 1705 100       2792 if ( @{$rlines} == 1 ) {
  1705         4720  
2742 1121         2456 my $line = $rlines->[0];
2743 1121         2476 my $jmax = $line->{'jmax'};
2744 1121         2451 my $length = $line->{'rfield_lengths'}->[$jmax];
2745 1121         2391 $saw_side_comment = $length > 0;
2746 1121         4730 return ( $max_lev_diff, $saw_side_comment );
2747             }
2748              
2749             # ignore hanging side comments in these operations
2750 584         1410 my @filtered = grep { !$_->{'is_hanging_side_comment'} } @{$rlines};
  1944         6057  
  584         1726  
2751 584         1639 my $rnew_lines = \@filtered;
2752              
2753 584         1223 $saw_side_comment = @filtered != @{$rlines};
  584         1386  
2754 584         1175 $max_lev_diff = 0;
2755              
2756             # nothing to do if all lines were hanging side comments
2757 584         970 my $jmax = @{$rnew_lines} - 1;
  584         1274  
2758 584 100       1751 return ( $max_lev_diff, $saw_side_comment ) if ( $jmax < 0 );
2759              
2760             #----------------------------------------------------
2761             # Create a hash of alignment token info for each line
2762             #----------------------------------------------------
2763 583         2161 ( my $rline_hashes, my $requals_info, $saw_side_comment, $max_lev_diff )
2764             = make_alignment_info( $group_level, $rnew_lines, $saw_side_comment );
2765              
2766             #------------------------------------------------------------
2767             # Find independent subgroups of lines. Neighboring subgroups
2768             # do not have a common alignment token.
2769             #------------------------------------------------------------
2770 583         1267 my @subgroups;
2771 583         1659 push @subgroups, [ 0, $jmax ];
2772 583         1965 foreach my $jl ( 0 .. $jmax - 1 ) {
2773 1315 100       3548 if ( $rnew_lines->[$jl]->{'end_group'} ) {
2774 72         207 $subgroups[-1]->[1] = $jl;
2775 72         276 push @subgroups, [ $jl + 1, $jmax ];
2776             }
2777             }
2778              
2779             #-----------------------------------------------------------
2780             # PASS 1 over subgroups to remove unmatched alignment tokens
2781             #-----------------------------------------------------------
2782             delete_unmatched_tokens_main_loop(
2783 583         2831 $group_level, $rnew_lines, \@subgroups,
2784             $rline_hashes, $requals_info
2785             );
2786              
2787             #----------------------------------------------------------------
2788             # PASS 2: Construct a tree of matched lines and delete some small
2789             # deeper levels of tokens. They also block good alignments.
2790             #----------------------------------------------------------------
2791 583 100       2695 prune_alignment_tree($rnew_lines) if ($max_lev_diff);
2792              
2793             #--------------------------------------------
2794             # PASS 3: compare all lines for common tokens
2795             #--------------------------------------------
2796 583         2871 match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
2797              
2798 583         6178 return ( $max_lev_diff, $saw_side_comment );
2799             } ## end sub delete_unmatched_tokens
2800              
2801             sub make_alignment_info {
2802              
2803 583     583 0 1754 my ( $group_level, $rnew_lines, $saw_side_comment ) = @_;
2804              
2805             #------------------------------------------------------------
2806             # Loop to create a hash of alignment token info for each line
2807             #------------------------------------------------------------
2808 583         1274 my $rline_hashes = [];
2809 583         1241 my @equals_info;
2810             my @line_info; # no longer used
2811 583         987 my $jmax = @{$rnew_lines} - 1;
  583         1347  
2812 583         1138 my $max_lev_diff = 0;
2813 583         1111 foreach my $line ( @{$rnew_lines} ) {
  583         1625  
2814 1898         3304 my $rhash = {};
2815 1898         3741 my $rtokens = $line->{'rtokens'};
2816 1898         3118 my $rpatterns = $line->{'rpatterns'};
2817 1898         2859 my $i = 0;
2818 1898         4552 my ( $i_eq, $tok_eq, $pat_eq );
2819 1898         0 my ( $lev_min, $lev_max );
2820 1898         2692 foreach my $tok ( @{$rtokens} ) {
  1898         3586  
2821 5174         9692 my ( $raw_tok, $lev, $tag, $tok_count ) =
2822             decode_alignment_token($tok);
2823              
2824 5174 100       11163 if ( $tok ne '#' ) {
2825 3276 100       6107 if ( !defined($lev_min) ) {
2826 1779         2901 $lev_min = $lev;
2827 1779         2844 $lev_max = $lev;
2828             }
2829             else {
2830 1497 100       3825 if ( $lev < $lev_min ) { $lev_min = $lev }
  75         144  
2831 1497 100       3173 if ( $lev > $lev_max ) { $lev_max = $lev }
  260         528  
2832             }
2833             }
2834             else {
2835 1898 100       4626 if ( !$saw_side_comment ) {
2836 1709         3830 my $length = $line->{'rfield_lengths'}->[ $i + 1 ];
2837 1709   66     5214 $saw_side_comment ||= $length;
2838             }
2839             }
2840              
2841             # Possible future upgrade: for multiple matches,
2842             # record [$i1, $i2, ..] instead of $i
2843 5174         16210 $rhash->{$tok} =
2844             [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
2845              
2846             # remember the first equals at line level
2847 5174 100 100     16470 if ( !defined($i_eq) && $raw_tok eq '=' ) {
2848              
2849 520 100       1414 if ( $lev eq $group_level ) {
2850 405         721 $i_eq = $i;
2851 405         717 $tok_eq = $tok;
2852 405         838 $pat_eq = $rpatterns->[$i];
2853             }
2854             }
2855 5174         8993 $i++;
2856             }
2857 1898         3187 push @{$rline_hashes}, $rhash;
  1898         3717  
2858 1898         5743 push @equals_info, [ $i_eq, $tok_eq, $pat_eq ];
2859 1898         5638 push @line_info, [ $lev_min, $lev_max ];
2860 1898 100       4099 if ( defined($lev_min) ) {
2861 1779         3160 my $lev_diff = $lev_max - $lev_min;
2862 1779 100       4885 if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
  162         504  
2863             }
2864             }
2865              
2866             #----------------------------------------------------
2867             # Loop to compare each line pair and remember matches
2868             #----------------------------------------------------
2869 583         1880 my $rtok_hash = {};
2870 583         1256 my $nr = 0;
2871 583         1980 foreach my $jl ( 0 .. $jmax - 1 ) {
2872 1315         2151 my $nl = $nr;
2873 1315         1972 $nr = 0;
2874 1315         2207 my $jr = $jl + 1;
2875 1315         2193 my $rhash_l = $rline_hashes->[$jl];
2876 1315         2074 my $rhash_r = $rline_hashes->[$jr];
2877 1315         1985 foreach my $tok ( keys %{$rhash_l} ) {
  1315         4514  
2878 3154 100       6644 if ( defined( $rhash_r->{$tok} ) ) {
2879 2670         4114 my $il = $rhash_l->{$tok}->[0];
2880 2670         4021 my $ir = $rhash_r->{$tok}->[0];
2881 2670         3966 $rhash_l->{$tok}->[2] = $ir;
2882 2670         3796 $rhash_r->{$tok}->[1] = $il;
2883 2670 100       5759 if ( $tok ne '#' ) {
2884 1355         2016 push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
  1355         3879  
2885 1355         2693 $nr++;
2886             }
2887             }
2888             }
2889              
2890             # Set a line break if no matching tokens between these lines
2891             # (this is not strictly necessary now but does not hurt)
2892 1315 100 100     5168 if ( $nr == 0 && $nl > 0 ) {
2893 36         213 $rnew_lines->[$jl]->{'end_group'} = 1;
2894             }
2895              
2896             # Also set a line break if both lines have simple equals but with
2897             # different leading characters in patterns. This check is similar
2898             # to one in sub check_match, and will prevent sub
2899             # prune_alignment_tree from removing alignments which otherwise
2900             # should be kept. This fix is rarely needed, but it can
2901             # occasionally improve formatting.
2902             # For example:
2903             # my $name = $this->{Name};
2904             # $type = $this->ctype($genlooptype) if defined $genlooptype;
2905             # my $declini = ( $asgnonly ? "" : "\t$type *" );
2906             # my $cast = ( $type ? "($type *)" : "" );
2907             # The last two lines start with 'my' and will not match the
2908             # previous line starting with $type, so we do not want
2909             # prune_alignment tree to delete their ? : alignments at a deeper
2910             # level.
2911 1315         2121 my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
  1315         3273  
2912 1315         2095 my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
  1315         2625  
2913 1315 100 100     4538 if ( defined($i_eq_l) && defined($i_eq_r) ) {
2914              
2915             # Also, do not align equals across a change in ci level
2916             my $ci_jump = $rnew_lines->[$jl]->{'ci_level'} !=
2917 199         596 $rnew_lines->[$jr]->{'ci_level'};
2918              
2919 199 100 66     2193 if (
      66        
      100        
      100        
2920             $tok_eq_l eq $tok_eq_r
2921             && $i_eq_l == 0
2922             && $i_eq_r == 0
2923             && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
2924             || $ci_jump )
2925             )
2926             {
2927 12         47 $rnew_lines->[$jl]->{'end_group'} = 1;
2928             }
2929             }
2930             }
2931 583         4118 return ( $rline_hashes, \@equals_info, $saw_side_comment,
2932             $max_lev_diff );
2933             } ## end sub make_alignment_info
2934              
2935             sub delete_unmatched_tokens_main_loop {
2936              
2937             my (
2938 583     583 0 1872 $group_level, $rnew_lines, $rsubgroups,
2939             $rline_hashes, $requals_info
2940             ) = @_;
2941              
2942             #--------------------------------------------------------------
2943             # Main loop over subgroups to remove unmatched alignment tokens
2944             #--------------------------------------------------------------
2945              
2946             # flag to allow skipping pass 2 - not currently used
2947 583         1093 my $saw_large_group;
2948              
2949 583         1468 my $has_terminal_match = $rnew_lines->[-1]->{'j_terminal_match'};
2950              
2951 583         1102 foreach my $item ( @{$rsubgroups} ) {
  583         1531  
2952 655         1200 my ( $jbeg, $jend ) = @{$item};
  655         1614  
2953              
2954 655         1510 my $nlines = $jend - $jbeg + 1;
2955              
2956             #---------------------------------------------------
2957             # Look for complete if/elsif/else and ternary blocks
2958             #---------------------------------------------------
2959              
2960             # We are looking for a common '$dividing_token' like these:
2961              
2962             # if ( $b and $s ) { $p->{'type'} = 'a'; }
2963             # elsif ($b) { $p->{'type'} = 'b'; }
2964             # elsif ($s) { $p->{'type'} = 's'; }
2965             # else { $p->{'type'} = ''; }
2966             # ^----------- dividing_token
2967              
2968             # my $severity =
2969             # !$routine ? '[PFX]'
2970             # : $routine =~ /warn.*_d\z/ ? '[DS]'
2971             # : $routine =~ /ck_warn/ ? 'W'
2972             # : $routine =~ /ckWARN\d*reg_d/ ? 'S'
2973             # : $routine =~ /ckWARN\d*reg/ ? 'W'
2974             # : $routine =~ /vWARN\d/ ? '[WDS]'
2975             # : '[PFX]';
2976             # ^----------- dividing_token
2977              
2978             # Only look for groups which are more than 2 lines long. Two lines
2979             # can get messed up doing this, probably due to the various
2980             # two-line rules.
2981              
2982 655         1413 my $dividing_token;
2983             my %token_line_count;
2984 655 100       2080 if ( $nlines > 2 ) {
2985              
2986 301         968 foreach my $jj ( $jbeg .. $jend ) {
2987 1281         1843 my %seen;
2988 1281         2027 my $line = $rnew_lines->[$jj];
2989 1281         2023 my $rtokens = $line->{'rtokens'};
2990 1281         1715 foreach my $tok ( @{$rtokens} ) {
  1281         2278  
2991 3581 100       6636 if ( !$seen{$tok} ) {
2992 3065         4630 $seen{$tok}++;
2993 3065         6020 $token_line_count{$tok}++;
2994             }
2995             }
2996             }
2997              
2998 301         1697 foreach my $tok ( keys %token_line_count ) {
2999 927 100       2541 if ( $token_line_count{$tok} == $nlines ) {
3000 562 100 100     3246 if ( substr( $tok, 0, 1 ) eq '?'
      100        
3001             || substr( $tok, 0, 1 ) eq '{'
3002             && $tok =~ /^\{\d+if/ )
3003             {
3004 21         59 $dividing_token = $tok;
3005 21         54 last;
3006             }
3007             }
3008             }
3009             }
3010              
3011             #-------------------------------------------------------------
3012             # Loop over subgroup lines to remove unwanted alignment tokens
3013             #-------------------------------------------------------------
3014 655         2397 foreach my $jj ( $jbeg .. $jend ) {
3015 1898         3361 my $line = $rnew_lines->[$jj];
3016 1898         3094 my $rtokens = $line->{'rtokens'};
3017 1898         2936 my $rhash = $rline_hashes->[$jj];
3018 1898         2985 my $i_eq = $requals_info->[$jj]->[0];
3019 1898         2848 my @idel;
3020 1898         2748 my $imax = @{$rtokens} - 2;
  1898         3392  
3021 1898         2918 my $delete_above_level;
3022             my $deleted_assignment_token;
3023              
3024 1898         2951 my $saw_dividing_token = EMPTY_STRING;
3025 1898   100     8551 $saw_large_group ||= $nlines > 2 && $imax > 1;
      100        
3026              
3027             # Loop over all alignment tokens
3028 1898         3766 foreach my $i ( 0 .. $imax ) {
3029 3276         5223 my $tok = $rtokens->[$i];
3030 3276 50       6463 next if ( $tok eq '#' ); # shouldn't happen
3031             my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
3032 3276         4618 @{ $rhash->{$tok} };
  3276         8108  
3033              
3034             #------------------------------------------------------
3035             # Here is the basic RULE: remove an unmatched alignment
3036             # which does not occur in the surrounding lines.
3037             #------------------------------------------------------
3038 3276   100     8840 my $delete_me = !defined($il) && !defined($ir);
3039              
3040             # Apply any user controls. Note that not all lines pass
3041             # this way so they have to be applied elsewhere too.
3042 3276         4613 my $align_ok = 1;
3043 3276 100       6169 if (%valign_control_hash) {
3044 31         78 $align_ok = $valign_control_hash{$raw_tok};
3045 31 100       77 $align_ok = $valign_control_default
3046             unless defined($align_ok);
3047 31   100     96 $delete_me ||= !$align_ok;
3048             }
3049              
3050             # But now we modify this with exceptions...
3051              
3052             # EXCEPTION 1: If we are in a complete ternary or
3053             # if/elsif/else group, and this token is not on every line
3054             # of the group, should we delete it to preserve overall
3055             # alignment?
3056 3276 100       6453 if ($dividing_token) {
3057 147 100       314 if ( $token_line_count{$tok} >= $nlines ) {
3058 120   100     413 $saw_dividing_token ||= $tok eq $dividing_token;
3059             }
3060             else {
3061              
3062             # For shorter runs, delete toks to save alignment.
3063             # For longer runs, keep toks after the '{' or '?'
3064             # to allow sub-alignments within braces. The
3065             # number 5 lines is arbitrary but seems to work ok.
3066 27   66     109 $delete_me ||=
      100        
3067             ( $nlines < 5 || !$saw_dividing_token );
3068             }
3069             }
3070              
3071             # EXCEPTION 2: Remove all tokens above a certain level
3072             # following a previous deletion. For example, we have to
3073             # remove tagged higher level alignment tokens following a
3074             # '=>' deletion because the tags of higher level tokens
3075             # will now be incorrect. For example, this will prevent
3076             # aligning commas as follows after deleting the second '=>'
3077             # $w->insert(
3078             # ListBox => origin => [ 270, 160 ],
3079             # size => [ 200, 55 ],
3080             # );
3081 3276 100       6012 if ( defined($delete_above_level) ) {
3082 280 100       1068 if ( $lev > $delete_above_level ) {
3083 132   100     424 $delete_me ||= 1;
3084             }
3085 148         386 else { $delete_above_level = undef }
3086             }
3087              
3088             # EXCEPTION 3: Remove all but certain tokens after an
3089             # assignment deletion.
3090 3276 100 100     6045 if (
      100        
3091             $deleted_assignment_token
3092             && ( $lev > $group_level
3093             || !$keep_after_deleted_assignment{$raw_tok} )
3094             )
3095             {
3096 41   100     137 $delete_me ||= 1;
3097             }
3098              
3099             # EXCEPTION 4: Do not touch the first line of a 2 line
3100             # terminal match, such as below, because j_terminal has
3101             # already been set.
3102             # if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
3103             # else { $tago = $tagc = ''; }
3104             # But see snippets 'else1.t' and 'else2.t'
3105 3276 100 100     8475 $delete_me = 0
      100        
3106             if ( $jj == $jbeg
3107             && $has_terminal_match
3108             && $nlines == 2 );
3109              
3110             # EXCEPTION 5: misc additional rules for commas and equals
3111 3276 100 100     7963 if ( $delete_me && $tok_count == 1 ) {
3112              
3113             # okay to delete second and higher copies of a token
3114              
3115             # for a comma...
3116 721 100       2000 if ( $raw_tok eq ',' ) {
3117              
3118             # Do not delete commas before an equals
3119 262 100 100     979 $delete_me = 0
3120             if ( defined($i_eq) && $i < $i_eq );
3121              
3122             # Do not delete line-level commas
3123 262 100       733 $delete_me = 0 if ( $lev <= $group_level );
3124             }
3125              
3126             # For an assignment at group level..
3127 721 100 100     2803 if ( $is_assignment{$raw_tok}
3128             && $lev == $group_level )
3129             {
3130              
3131             # Do not delete if it is the last alignment of
3132             # multiple tokens; this will prevent some
3133             # undesirable alignments
3134 106 100 100     604 if ( $imax > 0 && $i == $imax ) {
3135 12         31 $delete_me = 0;
3136             }
3137              
3138             # Otherwise, set a flag to delete most
3139             # remaining tokens
3140 94         220 else { $deleted_assignment_token = $raw_tok }
3141             }
3142             }
3143              
3144             # Do not let a user exclusion be reactivated by above rules
3145 3276   66     10185 $delete_me ||= !$align_ok;
3146              
3147             #------------------------------------
3148             # Add this token to the deletion list
3149             #------------------------------------
3150 3276 100       6961 if ($delete_me) {
3151 661         1211 push @idel, $i;
3152              
3153             # update deletion propagation flags
3154 661 100 66     2097 if ( !defined($delete_above_level)
3155             || $lev < $delete_above_level )
3156             {
3157              
3158             # delete all following higher level alignments
3159 529         915 $delete_above_level = $lev;
3160              
3161             # but keep deleting after => to next lower level
3162             # to avoid some bizarre alignments
3163 529 100       1648 if ( $raw_tok eq '=>' ) {
3164 53         151 $delete_above_level = $lev - 1;
3165             }
3166             }
3167             }
3168             } # End loop over alignment tokens
3169              
3170             # Process all deletion requests for this line
3171 1898 100       5910 if (@idel) {
3172 413         1736 delete_selected_tokens( $line, \@idel );
3173             }
3174             } # End loop over lines
3175             } ## end main loop over subgroups
3176              
3177 583         1692 return;
3178             } ## end sub delete_unmatched_tokens_main_loop
3179             }
3180              
3181             sub match_line_pairs {
3182 583     583 0 1866 my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
3183              
3184             # Compare each pair of lines and save information about common matches
3185             # $rlines = list of lines including hanging side comments
3186             # $rnew_lines = list of lines without any hanging side comments
3187             # $rsubgroups = list of subgroups of the new lines
3188              
3189             # TODO:
3190             # Maybe change: imax_pair => pair_match_info = ref to array
3191             # = [$imax_align, $rMsg, ... ]
3192             # This may eventually have multi-level match info
3193              
3194             # Previous line vars
3195 583         2318 my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
3196             $list_type_m, $ci_level_m );
3197              
3198             # Current line vars
3199 583         0 my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
3200             $ci_level );
3201              
3202             # loop over subgroups
3203 583         1076 foreach my $item ( @{$rsubgroups} ) {
  583         1400  
3204 655         1150 my ( $jbeg, $jend ) = @{$item};
  655         1501  
3205 655         1589 my $nlines = $jend - $jbeg + 1;
3206 655 100       1926 next if ( $nlines <= 1 );
3207              
3208             # loop over lines in a subgroup
3209 564         1565 foreach my $jj ( $jbeg .. $jend ) {
3210              
3211 1807         2663 $line_m = $line;
3212 1807         3537 $rtokens_m = $rtokens;
3213 1807         2989 $rpatterns_m = $rpatterns;
3214 1807         2518 $rfield_lengths_m = $rfield_lengths;
3215 1807         2394 $imax_m = $imax;
3216 1807         2691 $list_type_m = $list_type;
3217 1807         2610 $ci_level_m = $ci_level;
3218              
3219 1807         2993 $line = $rnew_lines->[$jj];
3220 1807         2975 $rtokens = $line->{'rtokens'};
3221 1807         2915 $rpatterns = $line->{'rpatterns'};
3222 1807         2742 $rfield_lengths = $line->{'rfield_lengths'};
3223 1807         2527 $imax = @{$rtokens} - 2;
  1807         2796  
3224 1807         3098 $list_type = $line->{'list_type'};
3225 1807         2843 $ci_level = $line->{'ci_level'};
3226              
3227             # nothing to do for first line
3228 1807 100       4393 next if ( $jj == $jbeg );
3229              
3230 1243         2848 my $ci_jump = $ci_level - $ci_level_m;
3231              
3232 1243 100       3117 my $imax_min = $imax_m < $imax ? $imax_m : $imax;
3233              
3234 1243         2005 my $imax_align = -1;
3235              
3236             # find number of leading common tokens
3237              
3238             #---------------------------------
3239             # No match to hanging side comment
3240             #---------------------------------
3241 1243 50 100     4722 if ( $line->{'is_hanging_side_comment'} ) {
    100          
3242              
3243             # Should not get here; HSC's have been filtered out
3244 0         0 $imax_align = -1;
3245             }
3246              
3247             #-----------------------------
3248             # Handle comma-separated lists
3249             #-----------------------------
3250             elsif ( $list_type && $list_type eq $list_type_m ) {
3251              
3252             # do not align lists across a ci jump with new list method
3253 488 50       1136 if ($ci_jump) { $imax_min = -1 }
  0         0  
3254              
3255 488         861 my $i_nomatch = $imax_min + 1;
3256 488         1014 foreach my $i ( 0 .. $imax_min ) {
3257 883         1507 my $tok = $rtokens->[$i];
3258 883         1384 my $tok_m = $rtokens_m->[$i];
3259 883 50       2062 if ( $tok ne $tok_m ) {
3260 0         0 $i_nomatch = $i;
3261 0         0 last;
3262             }
3263             }
3264              
3265 488         893 $imax_align = $i_nomatch - 1;
3266             }
3267              
3268             #-----------------
3269             # Handle non-lists
3270             #-----------------
3271             else {
3272 755         1444 my $i_nomatch = $imax_min + 1;
3273 755         1553 foreach my $i ( 0 .. $imax_min ) {
3274 745         1392 my $tok = $rtokens->[$i];
3275 745         1263 my $tok_m = $rtokens_m->[$i];
3276 745 100       1669 if ( $tok ne $tok_m ) {
3277 19         54 $i_nomatch = $i;
3278 19         53 last;
3279             }
3280              
3281 726         1294 my $pat = $rpatterns->[$i];
3282 726         1183 my $pat_m = $rpatterns_m->[$i];
3283              
3284             # If patterns don't match, we have to be careful...
3285 726 100       1779 if ( $pat_m ne $pat ) {
3286 166         431 my $pad =
3287             $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
3288 166         506 my ( $match_code, $rmsg ) =
3289             compare_patterns( $group_level,
3290             $tok, $tok_m, $pat, $pat_m, $pad );
3291 166 100       563 if ($match_code) {
3292 8 100       37 if ( $match_code == 1 ) { $i_nomatch = $i }
  7 50       17  
3293 1         2 elsif ( $match_code == 2 ) { $i_nomatch = 0 }
3294             else { } ##ok
3295 8         21 last;
3296             }
3297             }
3298             }
3299 755         1279 $imax_align = $i_nomatch - 1;
3300             }
3301              
3302 1243         2705 $line_m->{'imax_pair'} = $imax_align;
3303              
3304             } ## end loop over lines
3305              
3306             # Put fence at end of subgroup
3307 564         1868 $line->{'imax_pair'} = -1;
3308              
3309             } ## end loop over subgroups
3310              
3311             # if there are hanging side comments, propagate the pair info down to them
3312             # so that lines can just look back one line for their pair info.
3313 583 100       1069 if ( @{$rlines} > @{$rnew_lines} ) {
  583         1132  
  583         1869  
3314 24         59 my $last_pair_info = -1;
3315 24         54 foreach my $line ( @{$rlines} ) {
  24         77  
3316 95 100       217 if ( $line->{'is_hanging_side_comment'} ) {
3317 39         91 $line->{'imax_pair'} = $last_pair_info;
3318             }
3319             else {
3320 56         113 $last_pair_info = $line->{'imax_pair'};
3321             }
3322             }
3323             }
3324 583         1534 return;
3325             } ## end sub match_line_pairs
3326              
3327             sub compare_patterns {
3328              
3329 166     166 0 516 my ( $group_level, $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
3330              
3331             # helper routine for sub match_line_pairs to decide if patterns in two
3332             # lines match well enough..Given
3333             # $tok_m, $pat_m = token and pattern of first line
3334             # $tok, $pat = token and pattern of second line
3335             # $pad = 0 if no padding is needed, !=0 otherwise
3336             # return code:
3337             # 0 = patterns match, continue
3338             # 1 = no match
3339             # 2 = no match, and lines do not match at all
3340              
3341 166         311 my $GoToMsg = EMPTY_STRING;
3342 166         253 my $return_code = 0;
3343              
3344 39     39   409 use constant EXPLAIN_COMPARE_PATTERNS => 0;
  39         102  
  39         50633  
3345              
3346 166         606 my ( $alignment_token, $lev, $tag, $tok_count ) =
3347             decode_alignment_token($tok);
3348              
3349             # We have to be very careful about aligning commas
3350             # when the pattern's don't match, because it can be
3351             # worse to create an alignment where none is needed
3352             # than to omit one. Here's an example where the ','s
3353             # are not in named containers. The first line below
3354             # should not match the next two:
3355             # ( $a, $b ) = ( $b, $r );
3356             # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
3357             # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
3358 166 100       839 if ( $alignment_token eq ',' ) {
    100          
    100          
3359              
3360             # do not align commas unless they are in named
3361             # containers
3362 26 100       140 if ( $tok !~ /[A-Za-z]/ ) {
3363 3         5 $return_code = 1;
3364 3         8 $GoToMsg = "do not align commas in unnamed containers";
3365             }
3366             else {
3367 23         44 $return_code = 0;
3368             }
3369             }
3370              
3371             # do not align parens unless patterns match;
3372             # large ugly spaces can occur in math expressions.
3373             elsif ( $alignment_token eq '(' ) {
3374              
3375             # But we can allow a match if the parens don't
3376             # require any padding.
3377 4 50       17 if ( $pad != 0 ) {
3378 4         10 $return_code = 1;
3379 4         10 $GoToMsg = "do not align '(' unless patterns match or pad=0";
3380             }
3381             else {
3382 0         0 $return_code = 0;
3383             }
3384             }
3385              
3386             # Handle an '=' alignment with different patterns to
3387             # the left.
3388             elsif ( $alignment_token eq '=' ) {
3389              
3390             # It is best to be a little restrictive when
3391             # aligning '=' tokens. Here is an example of
3392             # two lines that we will not align:
3393             # my $variable=6;
3394             # $bb=4;
3395             # The problem is that one is a 'my' declaration,
3396             # and the other isn't, so they're not very similar.
3397             # We will filter these out by comparing the first
3398             # letter of the pattern. This is crude, but works
3399             # well enough.
3400 16 50       141 if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
    100          
3401 0         0 $GoToMsg = "first character before equals differ";
3402 0         0 $return_code = 1;
3403             }
3404              
3405             # The introduction of sub 'prune_alignment_tree'
3406             # enabled alignment of lists left of the equals with
3407             # other scalar variables. For example:
3408             # my ( $D, $s, $e ) = @_;
3409             # my $d = length $D;
3410             # my $c = $e - $s - $d;
3411              
3412             # But this would change formatting of a lot of scripts,
3413             # so for now we prevent alignment of comma lists on the
3414             # left with scalars on the left. We will also prevent
3415             # any partial alignments.
3416              
3417             # set return code 2 if the = is at line level, but
3418             # set return code 1 if the = is below line level, i.e.
3419             # sub new { my ( $p, $v ) = @_; bless \$v, $p }
3420             # sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
3421              
3422             elsif ( ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) ) {
3423 1         5 $GoToMsg = "mixed commas/no-commas before equals";
3424 1         3 $return_code = 1;
3425 1 50       5 if ( $lev eq $group_level ) {
3426 1         3 $return_code = 2;
3427             }
3428             }
3429             else {
3430 15         44 $return_code = 0;
3431             }
3432             }
3433             else {
3434 120         186 $return_code = 0;
3435             }
3436              
3437             EXPLAIN_COMPARE_PATTERNS
3438             && $return_code
3439 166         231 && print {*STDOUT} "no match because $GoToMsg\n";
3440              
3441 166         431 return ( $return_code, \$GoToMsg );
3442              
3443             } ## end sub compare_patterns
3444              
3445             sub fat_comma_to_comma {
3446 765     765 0 1427 my ($str) = @_;
3447              
3448             # We are changing '=>' to ',' and removing any trailing decimal count
3449             # because currently fat commas have a count and commas do not.
3450             # For example, we will change '=>2+{-3.2' into ',2+{-3'
3451 765 100       2199 if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
  181         543  
3452 765         2155 return $str;
3453             } ## end sub fat_comma_to_comma
3454              
3455             sub get_line_token_info {
3456              
3457             # scan lines of tokens and return summary information about the range of
3458             # levels and patterns.
3459 154     154 0 401 my ($rlines) = @_;
3460              
3461             # First scan to check monotonicity. Here is an example of several
3462             # lines which are monotonic. The = is the lowest level, and
3463             # the commas are all one level deeper. So this is not nonmonotonic.
3464             # $$d{"weeks"} = [ "w", "wk", "wks", "week", "weeks" ];
3465             # $$d{"days"} = [ "d", "day", "days" ];
3466             # $$d{"hours"} = [ "h", "hr", "hrs", "hour", "hours" ];
3467 154         316 my @all_token_info;
3468 154         332 my $all_monotonic = 1;
3469 154         743 foreach my $jj ( 0 .. @{$rlines} - 1 ) {
  154         509  
3470 627         1167 my ($line) = $rlines->[$jj];
3471 627         1106 my $rtokens = $line->{'rtokens'};
3472 627         841 my $last_lev;
3473 627         976 my $is_monotonic = 1;
3474 627         937 my $i = -1;
3475 627         927 foreach my $tok ( @{$rtokens} ) {
  627         1160  
3476 1649         2119 $i++;
3477 1649         2945 my ( $raw_tok, $lev, $tag, $tok_count ) =
3478             decode_alignment_token($tok);
3479 1649         2561 push @{ $all_token_info[$jj] },
  1649         4806  
3480             [ $raw_tok, $lev, $tag, $tok_count ];
3481 1649 100       4206 last if ( $tok eq '#' );
3482 1022 100 100     3169 if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
  81         194  
3483 1022         1794 $last_lev = $lev;
3484             }
3485 627 100       1814 if ( !$is_monotonic ) { $all_monotonic = 0 }
  78         179  
3486             }
3487              
3488 154         659 my $rline_values = [];
3489 154         1126 foreach my $jj ( 0 .. @{$rlines} - 1 ) {
  154         595  
3490 627         1212 my ($line) = $rlines->[$jj];
3491              
3492 627         1094 my $rtokens = $line->{'rtokens'};
3493 627         941 my $i = -1;
3494 627         946 my ( $lev_min, $lev_max );
3495 627         988 my $token_pattern_max = EMPTY_STRING;
3496 627         876 my %saw_level;
3497 627         879 my $is_monotonic = 1;
3498              
3499             # find the index of the last token before the side comment
3500 627         888 my $imax = @{$rtokens} - 2;
  627         1079  
3501 627         949 my $imax_true = $imax;
3502              
3503             # If the entire group is monotonic, and the line ends in a comma list,
3504             # walk it back to the first such comma. this will have the effect of
3505             # making all trailing ragged comma lists match in the prune tree
3506             # routine. these trailing comma lists can better be handled by later
3507             # alignment rules.
3508              
3509             # Treat fat commas the same as commas here by converting them to
3510             # commas. This will improve the chance of aligning the leading parts
3511             # of ragged lists.
3512              
3513 627         1590 my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
3514 627 100 100     2473 if ( $all_monotonic && $tok_end =~ /^,/ ) {
3515 142         347 my $ii = $imax - 1;
3516 142   100     550 while ( $ii >= 0
3517             && fat_comma_to_comma( $rtokens->[$ii] ) eq $tok_end )
3518             {
3519 93         158 $imax = $ii;
3520 93         234 $ii--;
3521             }
3522             }
3523              
3524             # make a first pass to find level range
3525 627         1010 my $last_lev;
3526 627         934 foreach my $tok ( @{$rtokens} ) {
  627         1160  
3527 1556         2042 $i++;
3528 1556 100       2890 last if ( $i > $imax );
3529 929 50       1759 last if ( $tok eq '#' );
3530             my ( $raw_tok, $lev, $tag, $tok_count ) =
3531 929         1227 @{ $all_token_info[$jj]->[$i] };
  929         2434  
3532              
3533 929 50       1831 last if ( $tok eq '#' );
3534 929         1469 $token_pattern_max .= $tok;
3535 929         1699 $saw_level{$lev}++;
3536 929 100       1759 if ( !defined($lev_min) ) {
3537 527         1192 $lev_min = $lev;
3538 527         771 $lev_max = $lev;
3539             }
3540             else {
3541 402 100       1007 if ( $lev < $lev_min ) { $lev_min = $lev; }
  51         115  
3542 402 100       813 if ( $lev > $lev_max ) { $lev_max = $lev; }
  122         960  
3543 402 100       841 if ( $lev < $last_lev ) { $is_monotonic = 0 }
  81         139  
3544             }
3545 929         1532 $last_lev = $lev;
3546             }
3547              
3548             # handle no levels
3549 627         1266 my $rtoken_patterns = {};
3550 627         1119 my $rtoken_indexes = {};
3551 627         2363 my @levs = sort keys %saw_level;
3552 627 100       1929 if ( !defined($lev_min) ) {
    100          
3553 100         191 $lev_min = -1;
3554 100         186 $lev_max = -1;
3555 100         224 $levs[0] = -1;
3556 100         323 $rtoken_patterns->{$lev_min} = EMPTY_STRING;
3557 100         253 $rtoken_indexes->{$lev_min} = [];
3558             }
3559              
3560             # handle one level
3561             elsif ( $lev_max == $lev_min ) {
3562 359         861 $rtoken_patterns->{$lev_max} = $token_pattern_max;
3563 359         1043 $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
3564             }
3565              
3566             # handle multiple levels
3567             else {
3568 168         438 $rtoken_patterns->{$lev_max} = $token_pattern_max;
3569 168         681 $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
3570              
3571 168         376 my $lev_top = pop @levs; # already did max level
3572 168         330 my $itok = -1;
3573 168         264 foreach my $tok ( @{$rtokens} ) {
  168         395  
3574 704         965 $itok++;
3575 704 100       1402 last if ( $itok > $imax );
3576             my ( $raw_tok, $lev, $tag, $tok_count ) =
3577 536         753 @{ $all_token_info[$jj]->[$itok] };
  536         1209  
3578 536 50       1130 last if ( $raw_tok eq '#' );
3579 536         836 foreach my $lev_test (@levs) {
3580 564 100       1882 next if ( $lev > $lev_test );
3581 280         638 $rtoken_patterns->{$lev_test} .= $tok;
3582 280         422 push @{ $rtoken_indexes->{$lev_test} }, $itok;
  280         816  
3583             }
3584             }
3585 168         497 push @levs, $lev_top;
3586             }
3587              
3588 627         1053 push @{$rline_values},
  627         2448  
3589             [
3590             $lev_min, $lev_max, $rtoken_patterns, \@levs,
3591             $rtoken_indexes, $is_monotonic, $imax_true, $imax,
3592             ];
3593              
3594             # debug
3595 627         1836 0 && do {
3596             local $LIST_SEPARATOR = ')(';
3597             print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
3598             foreach my $key ( sort keys %{$rtoken_patterns} ) {
3599             print "$key => $rtoken_patterns->{$key}\n";
3600             print "$key => @{$rtoken_indexes->{$key}}\n";
3601             }
3602             };
3603             } ## end loop over lines
3604 154         1246 return ( $rline_values, $all_monotonic );
3605             } ## end sub get_line_token_info
3606              
3607             sub prune_alignment_tree {
3608 154     154 0 493 my ($rlines) = @_;
3609 154         290 my $jmax = @{$rlines} - 1;
  154         447  
3610 154 50       600 return if ( $jmax <= 0 );
3611              
3612             # Vertical alignment in perltidy is done as an iterative process. The
3613             # starting point is to mark all possible alignment tokens ('=', ',', '=>',
3614             # etc) for vertical alignment. Then we have to delete all alignments
3615             # which, if actually made, would detract from overall alignment. This
3616             # is done in several phases of which this is one.
3617              
3618             # In this routine we look at the alignments of a group of lines as a
3619             # hierarchical tree. We will 'prune' the tree to limited depths if that
3620             # will improve overall alignment at the lower depths.
3621             # For each line we will be looking at its alignment patterns down to
3622             # different fixed depths. For each depth, we include all lower depths and
3623             # ignore all higher depths. We want to see if we can get alignment of a
3624             # larger group of lines if we ignore alignments at some lower depth.
3625             # Here is an # example:
3626              
3627             # for (
3628             # [ '$var', sub { join $_, "bar" }, 0, "bar" ],
3629             # [ 'CONSTANT', sub { join "foo", "bar" }, 0, "bar" ],
3630             # [ 'CONSTANT', sub { join "foo", "bar", 3 }, 1, "barfoo3" ],
3631             # [ '$myvar', sub { my $var; join $var, "bar" }, 0, "bar" ],
3632             # );
3633              
3634             # In the above example, all lines have three commas at the lowest depth
3635             # (zero), so if there were no other alignments, these lines would all
3636             # align considering only the zero depth alignment token. But some lines
3637             # have additional comma alignments at the next depth, so we need to decide
3638             # if we should drop those to keep the top level alignments, or keep those
3639             # for some additional low level alignments at the expense losing some top
3640             # level alignments. In this case we will drop the deeper level commas to
3641             # keep the entire collection aligned. But in some cases the decision could
3642             # go the other way.
3643              
3644             # The tree for this example at the zero depth has one node containing
3645             # all four lines, since they are identical at zero level (three commas).
3646             # At depth one, there are three 'children' nodes, namely:
3647             # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
3648             # - line 3, which has 2 commas at depth 1
3649             # - line4, which has a ';' and a ',' at depth 1
3650             # There are no deeper alignments in this example.
3651             # so the tree structure for this example is:
3652             #
3653             # depth 0 depth 1 depth 2
3654             # [lines 1-4] -- [line 1-2] - (empty)
3655             # | [line 3] - (empty)
3656             # | [line 4] - (empty)
3657              
3658             # We can carry this to any depth, but it is not really useful to go below
3659             # depth 2. To cleanly stop there, we will consider depth 2 to contain all
3660             # alignments at depth >=2.
3661              
3662 39     39   365 use constant EXPLAIN_PRUNE => 0;
  39         121  
  39         54189  
3663              
3664             #-------------------------------------------------------------------
3665             # Prune Tree Step 1. Start by scanning the lines and collecting info
3666             #-------------------------------------------------------------------
3667              
3668             # Note that the caller had this info but we have to redo this now because
3669             # alignment tokens may have been deleted.
3670 154         699 my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
3671              
3672             # If all the lines have levels which increase monotonically from left to
3673             # right, then the sweep-left-to-right pass can do a better job of alignment
3674             # than pruning, and without deleting alignments.
3675 154 100       1026 return if ($all_monotonic);
3676              
3677             # Contents of $rline_values
3678             # [
3679             # $lev_min, $lev_max, $rtoken_patterns, \@levs,
3680             # $rtoken_indexes, $is_monotonic, $imax_true, $imax,
3681             # ];
3682              
3683             # We can work to any depth, but there is little advantage to working
3684             # to a a depth greater than 2
3685 31         95 my $MAX_DEPTH = 2;
3686              
3687             # This arrays will hold the tree of alignment tokens at different depths
3688             # for these lines.
3689 31         77 my @match_tree;
3690              
3691             # Tree nodes contain these values:
3692             # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
3693             # $nc_beg_p, $nc_end_p, $rindexes];
3694             # where
3695             # $depth = 0,1,2 = index of depth of the match
3696              
3697             # $jbeg beginning index j of the range of lines in this match
3698             # $jend ending index j of the range of lines in this match
3699             # $n_parent = index of the containing group at $depth-1, if it exists
3700             # $level = actual level of code being matched in this group
3701             # $pattern = alignment pattern being matched
3702             # $nc_beg_p = first child
3703             # $nc_end_p = last child
3704             # $rindexes = ref to token indexes
3705              
3706             # the patterns and levels of the current group being formed at each depth
3707 31         159 my ( @token_patterns_current, @levels_current, @token_indexes_current );
3708              
3709             # the patterns and levels of the next line being tested at each depth
3710 31         0 my ( @token_patterns_next, @levels_next, @token_indexes_next );
3711              
3712             #-----------------------------------------------------------
3713             # define a recursive worker subroutine for tree construction
3714             #-----------------------------------------------------------
3715              
3716             # This is a recursive routine which is called if a match condition changes
3717             # at any depth when a new line is encountered. It ends the match node
3718             # which changed plus all deeper nodes attached to it.
3719 31         0 my $end_node;
3720             $end_node = sub {
3721 321     321   558 my ( $depth, $jl, $n_parent ) = @_;
3722              
3723             # $depth is the tree depth
3724             # $jl is the index of the line
3725             # $n_parent is index of the parent node of this node
3726              
3727 321 100       636 return if ( $depth > $MAX_DEPTH );
3728              
3729             # end any current group at this depth
3730 234 100 100     785 if ( $jl >= 0
      66        
      100        
3731             && defined( $match_tree[$depth] )
3732 75         366 && @{ $match_tree[$depth] }
3733             && defined( $levels_current[$depth] ) )
3734             {
3735 69         134 $match_tree[$depth]->[-1]->[1] = $jl;
3736             }
3737              
3738             # Define the index of the node we will create below
3739 234         402 my $ng_self = 0;
3740 234 100       468 if ( defined( $match_tree[$depth] ) ) {
3741 75         123 $ng_self = @{ $match_tree[$depth] };
  75         147  
3742             }
3743              
3744             # end any next deeper child node(s)
3745 234         773 $end_node->( $depth + 1, $jl, $ng_self );
3746              
3747             # update the levels being matched
3748 234         453 $token_patterns_current[$depth] = $token_patterns_next[$depth];
3749 234         422 $token_indexes_current[$depth] = $token_indexes_next[$depth];
3750 234         372 $levels_current[$depth] = $levels_next[$depth];
3751              
3752             # Do not start a new group at this level if it is not being used
3753 234 100 66     970 if ( !defined( $levels_next[$depth] )
      66        
3754             || $depth > 0
3755             && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
3756             {
3757 120         213 return;
3758             }
3759              
3760             # Create a node for the next group at this depth. We initially assume
3761             # that it will continue to $jmax, and correct that later if the node
3762             # ends earlier.
3763 114         195 push @{ $match_tree[$depth] },
  114         549  
3764             [
3765             $jl + 1, $jmax, $n_parent, $levels_current[$depth],
3766             $token_patterns_current[$depth],
3767             undef, undef, $token_indexes_current[$depth],
3768             ];
3769              
3770 114         279 return;
3771 31         324 }; ## end sub end_node
3772              
3773             #-----------------------------------------------------
3774             # Prune Tree Step 2. Loop to form the tree of matches.
3775             #-----------------------------------------------------
3776 31         132 foreach my $jp ( 0 .. $jmax ) {
3777              
3778             # working with two adjacent line indexes, 'm'=minus, 'p'=plus
3779 236         364 my $jm = $jp - 1;
3780              
3781             # Pull out needed values for the next line
3782             my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
3783             $is_monotonic, $imax_true, $imax )
3784 236         305 = @{ $rline_values->[$jp] };
  236         591  
3785              
3786             # Transfer levels and patterns for this line to the working arrays.
3787             # If the number of levels differs from our chosen MAX_DEPTH ...
3788             # if fewer than MAX_DEPTH: leave levels at missing depths undefined
3789             # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
3790 236         419 @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
  236         522  
3791 236 100       354 if ( @{$rlevs} > $MAX_DEPTH ) {
  236         467  
3792 5         7 $levels_next[$MAX_DEPTH] = $rlevs->[-1];
3793             }
3794 236         328 my $depth = 0;
3795 236         361 foreach my $item (@levels_next) {
3796             $token_patterns_next[$depth] =
3797 708 100       1280 defined($item) ? $rtoken_patterns->{$item} : undef;
3798             $token_indexes_next[$depth] =
3799 708 100       1075 defined($item) ? $rtoken_indexes->{$item} : undef;
3800 708         972 $depth++;
3801             }
3802              
3803             # Look for a change in match groups...
3804              
3805             # Initialize on the first line
3806 236 100       834 if ( $jp == 0 ) {
    100          
    50          
3807 31         71 my $n_parent;
3808 31         134 $end_node->( 0, $jm, $n_parent );
3809             }
3810              
3811             # End groups if a hard flag has been set
3812             elsif ( $rlines->[$jm]->{'end_group'} ) {
3813 10         34 my $n_parent;
3814 10         34 $end_node->( 0, $jm, $n_parent );
3815             }
3816              
3817             # Continue at hanging side comment
3818             elsif ( $rlines->[$jp]->{'is_hanging_side_comment'} ) {
3819 0         0 next;
3820             }
3821              
3822             # Otherwise see if anything changed and update the tree if so
3823             else {
3824 195         359 foreach my $depth ( 0 .. $MAX_DEPTH ) {
3825              
3826 401         566 my $def_current = defined( $token_patterns_current[$depth] );
3827 401         501 my $def_next = defined( $token_patterns_next[$depth] );
3828 401 100 100     935 last if ( !$def_current && !$def_next );
3829 253 100 100     1031 if ( !$def_current
      100        
3830             || !$def_next
3831             || $token_patterns_current[$depth] ne
3832             $token_patterns_next[$depth] )
3833             {
3834 46         85 my $n_parent;
3835 46 100 66     276 if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
3836 23         40 $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
  23         55  
3837             }
3838 46         171 $end_node->( $depth, $jm, $n_parent );
3839 46         126 last;
3840             }
3841             }
3842             }
3843             } ## end loop to form tree of matches
3844              
3845             #---------------------------------------------------------
3846             # Prune Tree Step 3. Make links from parent to child nodes
3847             #---------------------------------------------------------
3848              
3849             # It seemed cleaner to do this as a separate step rather than during tree
3850             # construction. The children nodes have links up to the parent node which
3851             # created them. Now make links in the opposite direction, so the parents
3852             # can find the children. We store the range of children nodes ($nc_beg,
3853             # $nc_end) of each parent with two additional indexes in the original array.
3854             # These will be undef if no children.
3855 31         179 foreach my $depth ( reverse( 1 .. $MAX_DEPTH ) ) {
3856 62 100       205 next unless defined( $match_tree[$depth] );
3857 32         59 my $nc_max = @{ $match_tree[$depth] } - 1;
  32         95  
3858 32         90 my $np_now;
3859 32         104 foreach my $nc ( 0 .. $nc_max ) {
3860 50         109 my $np = $match_tree[$depth]->[$nc]->[2];
3861 50 50       149 if ( !defined($np) ) {
3862              
3863             # shouldn't happen
3864             #print STDERR "lost child $np at depth $depth\n";
3865 0         0 next;
3866             }
3867 50 100 100     201 if ( !defined($np_now) || $np != $np_now ) {
3868 35         71 $np_now = $np;
3869 35         114 $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
3870             }
3871 50         158 $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
3872             }
3873             } ## end loop to make links down to the child nodes
3874              
3875 31         70 EXPLAIN_PRUNE > 0 && do {
3876             print "Tree complete. Found these groups:\n";
3877             foreach my $depth ( 0 .. $MAX_DEPTH ) {
3878             Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
3879             }
3880             };
3881              
3882             #------------------------------------------------------
3883             # Prune Tree Step 4. Make a list of nodes to be deleted
3884             #------------------------------------------------------
3885              
3886             # list of lines with tokens to be deleted:
3887             # [$jbeg, $jend, $level_keep]
3888             # $jbeg..$jend is the range of line indexes,
3889             # $level_keep is the minimum level to keep
3890 31         105 my @delete_list;
3891              
3892             # Not currently used:
3893             # Groups with ending comma lists and their range of sizes:
3894             # $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
3895             ## my %ragged_comma_group;
3896              
3897             # We work with a list of nodes to visit at the next deeper depth.
3898             my @todo_list;
3899 31 50       142 if ( defined( $match_tree[0] ) ) {
3900 31         89 @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
  31         115  
3901             }
3902              
3903 31         122 foreach my $depth ( 0 .. $MAX_DEPTH ) {
3904 86 100       264 last if ( !@todo_list );
3905 55         107 my @todo_next;
3906 55         125 foreach my $np (@todo_list) {
3907             my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
3908             $rindexes_p )
3909 92         165 = @{ $match_tree[$depth]->[$np] };
  92         344  
3910 92         182 my $nlines_p = $jend_p - $jbeg_p + 1;
3911              
3912             # nothing to do if no children
3913 92 100       267 next unless defined($nc_beg_p);
3914              
3915             # Define the number of lines to either keep or delete a child node.
3916             # This is the key decision we have to make. We want to delete
3917             # short runs of matched lines, and keep long runs. It seems easier
3918             # for the eye to follow breaks in monotonic level changes than
3919             # non-monotonic level changes. For example, the following looks
3920             # best if we delete the lower level alignments:
3921              
3922             # [1] ~~ [];
3923             # [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
3924             # [ qr/o/, qr/a/ ] ~~ [ ["foo"], ["bar"] ];
3925             # [ "foo", "bar" ] ~~ [ qr/o/, qr/a/ ];
3926             # [ qr/o/, qr/a/ ] ~~ [ "foo", "bar" ];
3927             # $deep1 ~~ $deep1;
3928              
3929             # So we will use two thresholds.
3930 35         97 my $nmin_mono = $depth + 2;
3931 35         77 my $nmin_non_mono = $depth + 6;
3932 35 100       153 if ( $nmin_mono > $nlines_p - 1 ) {
3933 21         47 $nmin_mono = $nlines_p - 1;
3934             }
3935 35 100       142 if ( $nmin_non_mono > $nlines_p - 1 ) {
3936 31         82 $nmin_non_mono = $nlines_p - 1;
3937             }
3938              
3939             # loop to keep or delete each child node
3940 35         114 foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
3941             my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
3942             $nc_end_c )
3943 50         89 = @{ $match_tree[ $depth + 1 ]->[$nc] };
  50         174  
3944 50         110 my $nlines_c = $jend_c - $jbeg_c + 1;
3945 50         93 my $is_monotonic = $rline_values->[$jbeg_c]->[5];
3946 50 100       127 my $nmin = $is_monotonic ? $nmin_mono : $nmin_non_mono;
3947 50 100       151 if ( $nlines_c < $nmin ) {
3948             ##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
3949 22         122 push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
3950             }
3951             else {
3952             ##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
3953 28         96 push @todo_next, $nc;
3954             }
3955             }
3956             }
3957 55         513 @todo_list = @todo_next;
3958             } ## end loop to mark nodes to delete
3959              
3960             #------------------------------------------------------------
3961             # Prune Tree Step 5. Loop to delete selected alignment tokens
3962             #------------------------------------------------------------
3963 31         140 foreach my $item (@delete_list) {
3964 22         38 my ( $jbeg, $jend, $level_keep ) = @{$item};
  22         55  
3965 22         50 foreach my $jj ( $jbeg .. $jend ) {
3966 28         49 my $line = $rlines->[$jj];
3967 28         45 my @idel;
3968 28         48 my $rtokens = $line->{'rtokens'};
3969 28         37 my $imax = @{$rtokens} - 2;
  28         57  
3970 28         56 foreach my $i ( 0 .. $imax ) {
3971 152         218 my $tok = $rtokens->[$i];
3972 152         249 my ( $raw_tok, $lev, $tag, $tok_count ) =
3973             decode_alignment_token($tok);
3974 152 100       361 if ( $lev > $level_keep ) {
3975 83         153 push @idel, $i;
3976             }
3977             }
3978 28 50       80 if (@idel) {
3979 28         104 delete_selected_tokens( $line, \@idel );
3980             }
3981             }
3982             } ## end loop to delete selected alignment tokens
3983              
3984 31         390 return;
3985             } ## end sub prune_alignment_tree
3986              
3987             sub Dump_tree_groups {
3988 0     0 0 0 my ( $rgroup, $msg ) = @_;
3989              
3990             # Debug routine
3991 0         0 print "$msg\n";
3992 0         0 local $LIST_SEPARATOR = ')(';
3993 0         0 foreach my $item ( @{$rgroup} ) {
  0         0  
3994 0         0 my @fix = @{$item};
  0         0  
3995 0 0       0 foreach my $val (@fix) { $val = "undef" unless defined $val; }
  0         0  
3996 0         0 $fix[4] = "...";
3997 0         0 print "(@fix)\n";
3998             }
3999 0         0 return;
4000             } ## end sub Dump_tree_groups
4001              
4002             { ## closure for sub is_marginal_match
4003              
4004             my %is_if_or;
4005             my %is_assignment;
4006             my %is_good_alignment;
4007              
4008             # This test did not give sufficiently better results to use as an update,
4009             # but the flag is worth keeping as a starting point for future testing.
4010 39     39   363 use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
  39         108  
  39         6292  
4011              
4012             BEGIN {
4013              
4014 39     39   237 my @q = qw(
4015             if unless or ||
4016             );
4017 39         210 @is_if_or{@q} = (1) x scalar(@q);
4018              
4019 39         213 @q = qw(
4020             = **= += *= &= <<= &&=
4021             -= /= |= >>= ||= //=
4022             .= %= ^=
4023             x=
4024             );
4025 39         587 @is_assignment{@q} = (1) x scalar(@q);
4026              
4027             # Vertically aligning on certain "good" tokens is usually okay
4028             # so we can be less restrictive in marginal cases.
4029 39         235 @q = qw( { ? => = );
4030 39         107 push @q, (',');
4031 39         227253 @is_good_alignment{@q} = (1) x scalar(@q);
4032             } ## end BEGIN
4033              
4034             sub is_marginal_match {
4035              
4036 256     256 0 852 my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
4037              
4038             # Decide if we should undo some or all of the common alignments of a
4039             # group of just two lines.
4040              
4041             # Given:
4042             # $line_0 and $line_1 - the two lines
4043             # $group_level = the indentation level of the group being processed
4044             # $imax_align = the maximum index of the common alignment tokens
4045             # of the two lines
4046             # $imax_prev = the maximum index of the common alignment tokens
4047             # with the line before $line_0 (=-1 of does not exist)
4048              
4049             # Return:
4050             # $is_marginal = true if the two lines should NOT be fully aligned
4051             # = false if the two lines can remain fully aligned
4052             # $imax_align = the index of the highest alignment token shared by
4053             # these two lines to keep if the match is marginal.
4054              
4055             # When we have an alignment group of just two lines like this, we are
4056             # working in the twilight zone of what looks good and what looks bad.
4057             # This routine is a collection of rules which work have been found to
4058             # work fairly well, but it will need to be updated from time to time.
4059              
4060 256         515 my $is_marginal = 0;
4061              
4062             #---------------------------------------
4063             # Always align certain special cases ...
4064             #---------------------------------------
4065 256 100 100     2269 if (
      100        
4066              
4067             # always keep alignments of a terminal else or ternary
4068             defined( $line_1->{'j_terminal_match'} )
4069              
4070             # always align lists
4071             || $line_0->{'list_type'}
4072              
4073             # always align hanging side comments
4074             || $line_1->{'is_hanging_side_comment'}
4075              
4076             )
4077             {
4078 127         435 return ( $is_marginal, $imax_align );
4079             }
4080              
4081 129         366 my $jmax_0 = $line_0->{'jmax'};
4082 129         1008 my $jmax_1 = $line_1->{'jmax'};
4083 129         333 my $rtokens_1 = $line_1->{'rtokens'};
4084 129         282 my $rtokens_0 = $line_0->{'rtokens'};
4085 129         275 my $rfield_lengths_0 = $line_0->{'rfield_lengths'};
4086 129         290 my $rfield_lengths_1 = $line_1->{'rfield_lengths'};
4087 129         283 my $rpatterns_0 = $line_0->{'rpatterns'};
4088 129         285 my $rpatterns_1 = $line_1->{'rpatterns'};
4089 129         305 my $imax_next = $line_1->{'imax_pair'};
4090              
4091             # We will scan the alignment tokens and set a flag '$is_marginal' if
4092             # it seems that the an alignment would look bad.
4093 129         314 my $max_pad = 0;
4094 129         262 my $saw_good_alignment = 0;
4095 129         261 my $saw_if_or; # if we saw an 'if' or 'or' at group level
4096 129         308 my $raw_tokb = EMPTY_STRING; # first token seen at group level
4097 129         444 my $jfirst_bad;
4098             my $line_ending_fat_comma; # is last token just a '=>' ?
4099 129         0 my $j0_eq_pad;
4100 129         296 my $j0_max_pad = 0;
4101              
4102 129         489 foreach my $j ( 0 .. $jmax_1 - 2 ) {
4103 162         535 my ( $raw_tok, $lev, $tag, $tok_count ) =
4104             decode_alignment_token( $rtokens_1->[$j] );
4105 162 100 66     1069 if ( $raw_tok && $lev == $group_level ) {
4106 140 100       509 if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
  119         295  
4107 140   100     692 $saw_if_or ||= $is_if_or{$raw_tok};
4108             }
4109              
4110             # When the first of the two lines ends in a bare '=>' this will
4111             # probably be marginal match. (For a bare =>, the next field length
4112             # will be 2 or 3, depending on side comment)
4113             $line_ending_fat_comma =
4114 162   100     1082 $j == $jmax_1 - 2
4115             && $raw_tok eq '=>'
4116             && $rfield_lengths_0->[ $j + 1 ] <= 3;
4117              
4118 162         465 my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
4119 162 100       516 if ( $j == 0 ) {
4120             $pad += $line_1->{'leading_space_count'} -
4121 124         386 $line_0->{'leading_space_count'};
4122              
4123             # Remember the pad at a leading equals
4124 124 100 66     679 if ( $raw_tok eq '=' && $lev == $group_level ) {
4125 73         163 $j0_eq_pad = $pad;
4126 73         276 $j0_max_pad =
4127             0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
4128 73 100       299 $j0_max_pad = 4 if ( $j0_max_pad < 4 );
4129             }
4130             }
4131              
4132 162 100       518 if ( $pad < 0 ) { $pad = -$pad }
  36         117  
4133 162 100       496 if ( $pad > $max_pad ) { $max_pad = $pad }
  89         199  
4134 162 100 100     918 if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
4135 128         301 $saw_good_alignment = 1;
4136             }
4137             else {
4138 34 100       124 $jfirst_bad = $j unless defined($jfirst_bad);
4139             }
4140 162 100       687 if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
4141              
4142             # Flag this as a marginal match since patterns differ.
4143             # Normally, we will not allow just two lines to match if
4144             # marginal. But we can allow matching in some specific cases.
4145              
4146 33 100       133 $jfirst_bad = $j if ( !defined($jfirst_bad) );
4147 33 50       125 $is_marginal = 1 if ( $is_marginal == 0 );
4148 33 100       139 if ( $raw_tok eq '=' ) {
4149              
4150             # Here is an example of a marginal match:
4151             # $done{$$op} = 1;
4152             # $op = compile_bblock($op);
4153             # The left tokens are both identifiers, but
4154             # one accesses a hash and the other doesn't.
4155             # We'll let this be a tentative match and undo
4156             # it later if we don't find more than 2 lines
4157             # in the group.
4158 12         37 $is_marginal = 2;
4159             }
4160             }
4161             }
4162              
4163 129 50 66     733 $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
4164              
4165             # Turn off the "marginal match" flag in some cases...
4166             # A "marginal match" occurs when the alignment tokens agree
4167             # but there are differences in the other tokens (patterns).
4168             # If we leave the marginal match flag set, then the rule is that we
4169             # will align only if there are more than two lines in the group.
4170             # We will turn of the flag if we almost have a match
4171             # and either we have seen a good alignment token or we
4172             # just need a small pad (2 spaces) to fit. These rules are
4173             # the result of experimentation. Tokens which misaligned by just
4174             # one or two characters are annoying. On the other hand,
4175             # large gaps to less important alignment tokens are also annoying.
4176 129 100 100     541 if ( $is_marginal == 1
      100        
4177             && ( $saw_good_alignment || $max_pad < 3 ) )
4178             {
4179 17         47 $is_marginal = 0;
4180             }
4181              
4182             # We will use the line endings to help decide on alignments...
4183             # See if the lines end with semicolons...
4184 129         351 my $sc_term0;
4185             my $sc_term1;
4186 129 50 33     739 if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
4187              
4188             # shouldn't happen
4189             }
4190             else {
4191 129         412 my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
4192 129         387 my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
4193 129         838 $sc_term0 = $pat0 =~ /;b?$/;
4194 129         580 $sc_term1 = $pat1 =~ /;b?$/;
4195             }
4196              
4197 129 100 100     820 if ( !$is_marginal && !$sc_term0 ) {
4198              
4199             # First line of assignment should be semicolon terminated.
4200             # For example, do not align here:
4201             # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4202             # $$href{-NUM_DIRS} = 0;
4203 30 100       177 if ( $is_assignment{$raw_tokb} ) {
4204 1         4 $is_marginal = 1;
4205             }
4206             }
4207              
4208             # Try to avoid some undesirable alignments of opening tokens
4209             # for example, the space between grep and { here:
4210             # return map { ( $_ => $_ ) }
4211             # grep { /$handles/ } $self->_get_delegate_method_list;
4212             $is_marginal ||=
4213 129   100     1382 ( $raw_tokb eq '(' || $raw_tokb eq '{' )
      100        
4214             && $jmax_1 == 2
4215             && $sc_term0 ne $sc_term1;
4216              
4217             #---------------------------------------
4218             # return if this is not a marginal match
4219             #---------------------------------------
4220 129 100       442 if ( !$is_marginal ) {
4221 111         534 return ( $is_marginal, $imax_align );
4222             }
4223              
4224             # Undo the marginal match flag in certain cases,
4225              
4226             # Two lines with a leading equals-like operator are allowed to
4227             # align if the patterns to the left of the equals are the same.
4228             # For example the following two lines are a marginal match but have
4229             # the same left side patterns, so we will align the equals.
4230             # my $orig = my $format = "^<<<<< ~~\n";
4231             # my $abc = "abc";
4232             # But these have a different left pattern so they will not be
4233             # aligned
4234             # $xmldoc .= $`;
4235             # $self->{'leftovers'} .= "<bx-seq:seq" . $';
4236              
4237             # First line semicolon terminated but second not, usually ok:
4238             # my $want = "'ab', 'a', 'b'";
4239             # my $got = join( ", ",
4240             # map { defined($_) ? "'$_'" : "undef" }
4241             # @got );
4242             # First line not semicolon terminated, Not OK to match:
4243             # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4244             # $$href{-NUM_DIRS} = 0;
4245 18         54 my $pat0 = $rpatterns_0->[0];
4246 18         46 my $pat1 = $rpatterns_1->[0];
4247              
4248             #---------------------------------------------------------
4249             # Turn off the marginal flag for some types of assignments
4250             #---------------------------------------------------------
4251 18 100       98 if ( $is_assignment{$raw_tokb} ) {
    50          
    50          
4252              
4253             # undo marginal flag if first line is semicolon terminated
4254             # and leading patters match
4255 13 100       43 if ($sc_term0) { # && $sc_term1) {
4256 12         27 $is_marginal = $pat0 ne $pat1;
4257             }
4258             }
4259             elsif ( $raw_tokb eq '=>' ) {
4260              
4261             # undo marginal flag if patterns match
4262 0   0     0 $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
4263             }
4264             elsif ( $raw_tokb eq '=~' ) {
4265              
4266             # undo marginal flag if both lines are semicolon terminated
4267             # and leading patters match
4268 0 0 0     0 if ( $sc_term1 && $sc_term0 ) {
4269 0         0 $is_marginal = $pat0 ne $pat1;
4270             }
4271             }
4272             else {
4273             ##ok: (none of the above)
4274             }
4275              
4276             #-----------------------------------------------------
4277             # Turn off the marginal flag if we saw an 'if' or 'or'
4278             #-----------------------------------------------------
4279              
4280             # A trailing 'if' and 'or' often gives a good alignment
4281             # For example, we can align these:
4282             # return -1 if $_[0] =~ m/^CHAPT|APPENDIX/;
4283             # return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
4284              
4285             # or
4286             # $d_in_m[2] = 29 if ( &Date_LeapYear($y) );
4287             # $d = $d_in_m[$m] if ( $d > $d_in_m[$m] );
4288              
4289 18 100       70 if ($saw_if_or) {
4290              
4291             # undo marginal flag if both lines are semicolon terminated
4292 4 50 33     26 if ( $sc_term0 && $sc_term1 ) {
4293 4         11 $is_marginal = 0;
4294             }
4295             }
4296              
4297             # For a marginal match, only keep matches before the first 'bad' match
4298 18 50 100     146 if ( $is_marginal
      66        
4299             && defined($jfirst_bad)
4300             && $imax_align > $jfirst_bad - 1 )
4301             {
4302 0         0 $imax_align = $jfirst_bad - 1;
4303             }
4304              
4305             #----------------------------------------------------------
4306             # Allow sweep to match lines with leading '=' in some cases
4307             #----------------------------------------------------------
4308 18 100 66     136 if ( $imax_align < 0 && defined($j0_eq_pad) ) {
4309              
4310 13 0 50     127 if (
      33        
      33        
4311              
4312             # If there is a following line with leading equals, or
4313             # preceding line with leading equals, then let the sweep align
4314             # them without restriction. For example, the first two lines
4315             # here are a marginal match, but they are followed by a line
4316             # with leading equals, so the sweep-lr logic can align all of
4317             # the lines:
4318              
4319             # $date[1] = $month_to_num{ $date[1] }; # <--line_0
4320             # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4321             # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4322             # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4323              
4324             # Likewise, if we reverse the two pairs we want the same result
4325              
4326             # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4327             # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4328             # $date[1] = $month_to_num{ $date[1] }; # <--line_0
4329             # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4330              
4331             (
4332             $imax_next >= 0
4333             || $imax_prev >= 0
4334             || TEST_MARGINAL_EQ_ALIGNMENT
4335             )
4336             && $j0_eq_pad >= -$j0_max_pad
4337             && $j0_eq_pad <= $j0_max_pad
4338             )
4339             {
4340              
4341             # But do not do this if there is a comma before the '='.
4342             # For example, the first two lines below have commas and
4343             # therefore are not allowed to align with lines 3 & 4:
4344              
4345             # my ( $x, $y ) = $self->Size(); #<--line_0
4346             # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
4347             # my $vx = $right - $left;
4348             # my $vy = $bottom - $top;
4349              
4350 0 0 0     0 if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
4351 0         0 $imax_align = 0;
4352             }
4353             }
4354             }
4355              
4356 18         88 return ( $is_marginal, $imax_align );
4357             } ## end sub is_marginal_match
4358             } ## end closure for sub is_marginal_match
4359              
4360             sub get_extra_leading_spaces {
4361              
4362 376     376 0 1231 my ( $rlines, $rgroups ) = @_;
4363              
4364             #----------------------------------------------------------
4365             # Define any extra indentation space (for the -lp option).
4366             # Here is why:
4367             # If a list has side comments, sub scan_list must dump the
4368             # list before it sees everything. When this happens, it sets
4369             # the indentation to the standard scheme, but notes how
4370             # many spaces it would have liked to use. We may be able
4371             # to recover that space here in the event that all of the
4372             # lines of a list are back together again.
4373             #----------------------------------------------------------
4374              
4375 376 50 33     692 return 0 if ( !@{$rlines} || !@{$rgroups} );
  376         2550  
  376         1406  
4376              
4377 376         1065 my $object = $rlines->[0]->{'indentation'};
4378 376 100       1242 return 0 if ( !ref($object) );
4379 58         120 my $extra_leading_spaces = 0;
4380 58         205 my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
4381 58 100       241 return ($extra_leading_spaces) if ( !$extra_indentation_spaces_wanted );
4382              
4383 13         42 my $min_spaces = $extra_indentation_spaces_wanted;
4384 13 50       64 if ( $min_spaces > 0 ) { $min_spaces = 0 }
  13         28  
4385              
4386             # loop over all groups
4387 13         31 my $ng = -1;
4388 13         24 my $ngroups = @{$rgroups};
  13         31  
4389 13         35 foreach my $item ( @{$rgroups} ) {
  13         41  
4390 33         63 $ng++;
4391 33         95 my ( $jbeg, $jend ) = @{$item};
  33         81  
4392 33         94 foreach my $j ( $jbeg .. $jend ) {
4393 44 100       113 next if ( $j == 0 );
4394              
4395             # all indentation objects must be the same
4396 31 100       132 if ( $object != $rlines->[$j]->{'indentation'} ) {
4397 1         3 return 0;
4398             }
4399             }
4400              
4401             # find the maximum space without exceeding the line length for this group
4402 32         116 my $avail = $rlines->[$jbeg]->get_available_space_on_right();
4403 32 100       127 my $spaces =
4404             ( $avail > $extra_indentation_spaces_wanted )
4405             ? $extra_indentation_spaces_wanted
4406             : $avail;
4407              
4408             #--------------------------------------------------------
4409             # Note: min spaces can be negative; for example with -gnu
4410             # f(
4411             # do { 1; !!(my $x = bless []); }
4412             # );
4413             #--------------------------------------------------------
4414             # The following rule is needed to match older formatting:
4415             # For multiple groups, we will keep spaces non-negative.
4416             # For a single group, we will allow a negative space.
4417 32 50 66     141 if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
  0         0  
4418              
4419             # update the minimum spacing
4420 32 100 66     150 if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
4421 13         35 $extra_leading_spaces = $spaces;
4422             }
4423             }
4424              
4425             # update the indentation object because with -icp the terminal
4426             # ');' will use the same adjustment.
4427 12         176 $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
4428 12         34 return $extra_leading_spaces;
4429             } ## end sub get_extra_leading_spaces
4430              
4431             sub forget_side_comment {
4432 111     111 0 368 my ($self) = @_;
4433 111         283 $self->[_last_side_comment_column_] = 0;
4434 111         228 return;
4435             }
4436              
4437             sub is_good_side_comment_column {
4438 199     199 0 609 my ( $self, $line, $line_number, $level, $num5 ) = @_;
4439              
4440             # Upon encountering the first side comment of a group, decide if
4441             # a previous side comment should be forgotten. This involves
4442             # checking several rules.
4443              
4444             # Return true to KEEP old comment location
4445             # Return false to FORGET old comment location
4446 199         383 my $KEEP = 1;
4447 199         373 my $FORGET = 0;
4448              
4449 199         412 my $rfields = $line->{'rfields'};
4450 199         425 my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4451              
4452             # RULE1: Never forget comment before a hanging side comment
4453 199 100       556 return $KEEP if ($is_hanging_side_comment);
4454              
4455             # RULE2: Forget a side comment after a short line difference,
4456             # where 'short line difference' is computed from a formula.
4457             # Using a smooth formula helps minimize sudden large changes.
4458 189         470 my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
4459 189         473 my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
4460              
4461             # '$num5' is the number of comments in the first 5 lines after the first
4462             # comment. It is needed to keep a compact group of side comments from
4463             # being influenced by a more distant side comment.
4464 189 50       521 $num5 = 1 if ( !$num5 );
4465              
4466             # Some values:
4467              
4468             # $adiff $num5 $short_diff
4469             # 0 * 12
4470             # 1 1 6
4471             # 1 2 4
4472             # 1 3 3
4473             # 1 4 2
4474             # 2 1 4
4475             # 2 2 2
4476             # 2 3 1
4477             # 3 1 3
4478             # 3 2 1
4479              
4480 189         549 my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
4481              
4482 189 100 100     1057 return $FORGET
4483             if ( $line_diff > $short_diff
4484             || !$self->[_rOpts_valign_side_comments_] );
4485              
4486             # RULE3: Forget a side comment if this line is at lower level and
4487             # ends a block
4488 122         297 my $last_sc_level = $self->[_last_side_comment_level_];
4489             return $FORGET
4490             if ( $level < $last_sc_level
4491 122 100 100     688 && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
4492              
4493             # RULE 4: Forget the last side comment if this comment might join a cached
4494             # line ...
4495 104 100       503 if ( my $cached_line_type = get_cached_line_type() ) {
4496              
4497             # ... otherwise side comment alignment will get messed up.
4498             # For example, in the following test script
4499             # with using 'perltidy -sct -act=2', the last comment would try to
4500             # align with the previous and then be in the wrong column when
4501             # the lines are combined:
4502              
4503             # foreach $line (
4504             # [0, 1, 2], [3, 4, 5], [6, 7, 8], # rows
4505             # [0, 3, 6], [1, 4, 7], [2, 5, 8], # columns
4506             # [0, 4, 8], [2, 4, 6]
4507             # ) # diagonals
4508 4 50 33     26 return $FORGET
4509             if ( $cached_line_type == 2 || $cached_line_type == 4 );
4510             }
4511              
4512             # Otherwise, keep it alive
4513 104         276 return $KEEP;
4514             } ## end sub is_good_side_comment_column
4515              
4516             sub align_side_comments {
4517              
4518 199     199 0 541 my ( $self, $rlines, $rgroups ) = @_;
4519              
4520             # Align any side comments in this batch of lines
4521              
4522             # Given:
4523             # $rlines - the lines
4524             # $rgroups - the partition of the lines into groups
4525             #
4526             # We will be working group-by-group because all side comments
4527             # (real or fake) in each group are already aligned. So we just have
4528             # to make alignments between groups wherever possible.
4529              
4530             # An unusual aspect is that within each group we have aligned both real
4531             # and fake side comments. This has the consequence that the lengths of
4532             # long lines without real side comments can cause 'push' all side comments
4533             # to the right. This seems unusual, but testing with and without this
4534             # feature shows that it is usually better this way. Otherwise, side
4535             # comments can be hidden between long lines without side comments and
4536             # thus be harder to read.
4537              
4538 199         476 my $group_level = $self->[_group_level_];
4539 199   100     842 my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
4540             && $group_level == $self->[_last_level_written_];
4541              
4542             # Find groups with side comments, and remember the first nonblank comment
4543 199         437 my $j_sc_beg;
4544             my @todo;
4545 199         384 my $ng = -1;
4546 199         412 foreach my $item ( @{$rgroups} ) {
  199         509  
4547 312         502 $ng++;
4548 312         521 my ( $jbeg, $jend ) = @{$item};
  312         685  
4549 312         742 foreach my $j ( $jbeg .. $jend ) {
4550 346         684 my $line = $rlines->[$j];
4551 346         622 my $jmax = $line->{'jmax'};
4552 346 100       995 if ( $line->{'rfield_lengths'}->[$jmax] ) {
4553              
4554             # this group has a line with a side comment
4555 228         551 push @todo, $ng;
4556 228 100       936 if ( !defined($j_sc_beg) ) {
4557 199         377 $j_sc_beg = $j;
4558             }
4559 228         558 last;
4560             }
4561             }
4562             }
4563              
4564             # done if no groups with side comments
4565 199 50       680 return unless @todo;
4566              
4567             # Count $num5 = number of comments in the 5 lines after the first comment
4568             # This is an important factor in a decision formula
4569 199         421 my $num5 = 1;
4570 199         492 foreach my $jj ( $j_sc_beg + 1 .. @{$rlines} - 1 ) {
  199         652  
4571 194         357 my $ldiff = $jj - $j_sc_beg;
4572 194 100       520 last if ( $ldiff > 5 );
4573 190         334 my $line = $rlines->[$jj];
4574 190         321 my $jmax = $line->{'jmax'};
4575 190         375 my $sc_len = $line->{'rfield_lengths'}->[$jmax];
4576 190 100       451 next if ( !$sc_len );
4577 121         272 $num5++;
4578             }
4579              
4580             # Forget the old side comment location if necessary
4581 199         572 my $line_0 = $rlines->[$j_sc_beg];
4582 199         1237 my $lnum =
4583             $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
4584 199         1821 my $keep_it =
4585             $self->is_good_side_comment_column( $line_0, $lnum, $group_level, $num5 );
4586 199 100       617 my $last_side_comment_column =
4587             $keep_it ? $self->[_last_side_comment_column_] : 0;
4588              
4589             # If there are multiple groups we will do two passes
4590             # so that we can find a common alignment for all groups.
4591 199 100       601 my $MAX_PASS = @todo > 1 ? 2 : 1;
4592              
4593             # Loop over passes
4594 199         386 my $max_comment_column = $last_side_comment_column;
4595 199         512 foreach my $PASS ( 1 .. $MAX_PASS ) {
4596              
4597             # If there are two passes, then on the last pass make the old column
4598             # equal to the largest of the group. This will result in the comments
4599             # being aligned if possible.
4600 223 100       664 if ( $PASS == $MAX_PASS ) {
4601 199         384 $last_side_comment_column = $max_comment_column;
4602             }
4603              
4604             # Loop over the groups with side comments
4605 223         365 my $column_limit;
4606 223         485 foreach my $ng (@todo) {
4607 281         504 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
  281         654  
4608              
4609             # Note that since all lines in a group have common alignments, we
4610             # just have to work on one of the lines (the first line).
4611 281         570 my $line = $rlines->[$jbeg];
4612 281         558 my $jmax = $line->{'jmax'};
4613 281         551 my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4614             last
4615 281 100 100     942 if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
4616              
4617             # the maximum space without exceeding the line length:
4618 277         1046 my $avail = $line->get_available_space_on_right();
4619              
4620             # try to use the previous comment column
4621 277         1029 my $side_comment_column = $line->get_column( $jmax - 1 );
4622 277         752 my $move = $last_side_comment_column - $side_comment_column;
4623              
4624             # Remember the maximum possible column of the first line with
4625             # side comment
4626 277 100       834 if ( !defined($column_limit) ) {
4627 223         440 $column_limit = $side_comment_column + $avail;
4628             }
4629              
4630 277 50       857 next if ( $jmax <= 0 );
4631              
4632             # but if this doesn't work, give up and use the minimum space
4633 277         561 my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
4634 277 100       731 if ( $move > $avail ) {
4635 13         27 $move = $min_move;
4636             }
4637              
4638             # but we want some minimum space to the comment
4639 277 100 100     1293 if ( $move >= 0
      100        
4640             && $j_sc_beg == 0
4641             && $continuing_sc_flow )
4642             {
4643 3         10 $min_move = 0;
4644             }
4645              
4646             # remove constraints on hanging side comments
4647 277 100       664 if ($is_hanging_side_comment) { $min_move = 0 }
  14         35  
4648              
4649 277 100       767 if ( $move < $min_move ) {
4650 194         319 $move = $min_move;
4651             }
4652              
4653             # don't exceed the available space
4654 277 100       680 if ( $move > $avail ) { $move = $avail }
  11         26  
4655              
4656             # We can only increase space, never decrease.
4657 277 100       730 if ( $move < 0 ) { $move = 0 }
  8         17  
4658              
4659             # Discover the largest column on the preliminary pass
4660 277 100       644 if ( $PASS < $MAX_PASS ) {
4661 49         145 my $col = $line->get_column( $jmax - 1 ) + $move;
4662              
4663             # but ignore columns too large for the starting line
4664 49 100 66     304 if ( $col > $max_comment_column && $col < $column_limit ) {
4665 23         64 $max_comment_column = $col;
4666             }
4667             }
4668              
4669             # Make the changes on the final pass
4670             else {
4671 228         932 $line->increase_field_width( $jmax - 1, $move );
4672              
4673             # remember this column for the next group
4674 228         791 $last_side_comment_column = $line->get_column( $jmax - 1 );
4675             }
4676             } ## end loop over groups
4677             } ## end loop over passes
4678              
4679             # Find the last side comment
4680 199         414 my $j_sc_last;
4681 199         443 my $ng_last = $todo[-1];
4682 199         351 my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
  199         509  
4683 199         652 foreach my $jj ( reverse( $jbeg .. $jend ) ) {
4684 201         450 my $line = $rlines->[$jj];
4685 201         384 my $jmax = $line->{'jmax'};
4686 201 100       614 if ( $line->{'rfield_lengths'}->[$jmax] ) {
4687 199         396 $j_sc_last = $jj;
4688 199         438 last;
4689             }
4690             }
4691              
4692             # Save final side comment info for possible use by the next batch
4693 199 50       588 if ( defined($j_sc_last) ) {
4694 199         674 my $line_number =
4695             $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
4696 199         457 $self->[_last_side_comment_column_] = $last_side_comment_column;
4697 199         381 $self->[_last_side_comment_line_number_] = $line_number;
4698 199         411 $self->[_last_side_comment_level_] = $group_level;
4699             }
4700 199         488 return;
4701             } ## end sub align_side_comments
4702              
4703             ###############################
4704             # CODE SECTION 6: Output Step A
4705             ###############################
4706              
4707             sub valign_output_step_A {
4708              
4709             #------------------------------------------------------------
4710             # This is Step A in writing vertically aligned lines.
4711             # The line is prepared according to the alignments which have
4712             # been found. Then it is shipped to the next step.
4713             #------------------------------------------------------------
4714              
4715 3065     3065 0 6637 my ( $self, $rinput_hash ) = @_;
4716              
4717 3065         5751 my $line = $rinput_hash->{line};
4718 3065         5072 my $min_ci_gap = $rinput_hash->{min_ci_gap};
4719 3065         4914 my $do_not_align = $rinput_hash->{do_not_align};
4720 3065         4811 my $group_leader_length = $rinput_hash->{group_leader_length};
4721 3065         4825 my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
4722 3065         4943 my $level = $rinput_hash->{level};
4723 3065         4921 my $maximum_line_length = $rinput_hash->{maximum_line_length};
4724              
4725 3065         5244 my $rfields = $line->{'rfields'};
4726 3065         4972 my $rfield_lengths = $line->{'rfield_lengths'};
4727 3065         4831 my $leading_space_count = $line->{'leading_space_count'};
4728 3065         5373 my $outdent_long_lines = $line->{'outdent_long_lines'};
4729 3065         5044 my $maximum_field_index = $line->{'jmax'};
4730 3065         4967 my $rvertical_tightness_flags = $line->{'rvertical_tightness_flags'};
4731 3065         4993 my $Kend = $line->{'Kend'};
4732 3065         5290 my $level_end = $line->{'level_end'};
4733              
4734             # Check for valid hash keys at end of lifetime of $line during development
4735 3065         4302 DEVEL_MODE
4736             && check_keys( $line, \%valid_LINE_keys,
4737             "Checking line keys at valign_output_step_A", 1 );
4738              
4739             # add any extra spaces
4740 3065 100       6706 if ( $leading_space_count > $group_leader_length ) {
4741 47         148 $leading_space_count += $min_ci_gap;
4742             }
4743              
4744 3065         6040 my $str = $rfields->[0];
4745 3065         4917 my $str_len = $rfield_lengths->[0];
4746              
4747 3065         4769 my @alignments = @{ $line->{'ralignments'} };
  3065         7293  
4748 3065 50       8241 if ( @alignments != $maximum_field_index + 1 ) {
4749              
4750             # Shouldn't happen: sub install_new_alignments makes jmax alignments
4751 0         0 my $jmax_alignments = @alignments - 1;
4752 0         0 if (DEVEL_MODE) {
4753             Fault(
4754             "alignment jmax=$jmax_alignments should equal $maximum_field_index\n"
4755             );
4756             }
4757 0         0 $do_not_align = 1;
4758             }
4759              
4760             # loop to concatenate all fields of this line and needed padding
4761 3065         5104 my $total_pad_count = 0;
4762 3065         6796 for my $j ( 1 .. $maximum_field_index ) {
4763              
4764             # skip zero-length side comments
4765             last
4766             if (
4767 7282 100 66     24262 ( $j == $maximum_field_index )
      100        
4768             && ( !defined( $rfields->[$j] )
4769             || ( $rfield_lengths->[$j] == 0 ) )
4770             );
4771              
4772             # compute spaces of padding before this field
4773 4542         9020 my $col = $alignments[ $j - 1 ]->{'column'};
4774 4542         7275 my $pad = $col - ( $str_len + $leading_space_count );
4775              
4776 4542 50       8574 if ($do_not_align) {
4777 0 0       0 $pad =
4778             ( $j < $maximum_field_index )
4779             ? 0
4780             : $self->[_rOpts_minimum_space_to_comment_] - 1;
4781             }
4782              
4783             # if the -fpsc flag is set, move the side comment to the selected
4784             # column if and only if it is possible, ignoring constraints on
4785             # line length and minimum space to comment
4786 4542 100 100     10291 if ( $self->[_rOpts_fixed_position_side_comment_]
4787             && $j == $maximum_field_index )
4788             {
4789 9         20 my $newpad =
4790             $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
4791 9 50       22 if ( $newpad >= 0 ) { $pad = $newpad; }
  9         12  
4792             }
4793              
4794             # accumulate the padding
4795 4542 100       9169 if ( $pad > 0 ) { $total_pad_count += $pad; }
  1321         2151  
4796              
4797             # only add padding when we have a finite field;
4798             # this avoids extra terminal spaces if we have empty fields
4799 4542 100       8731 if ( $rfield_lengths->[$j] > 0 ) {
4800 4531         9024 $str .= SPACE x $total_pad_count;
4801 4531         6305 $str_len += $total_pad_count;
4802 4531         6300 $total_pad_count = 0;
4803 4531         7702 $str .= $rfields->[$j];
4804 4531         7700 $str_len += $rfield_lengths->[$j];
4805             }
4806             else {
4807 11         27 $total_pad_count = 0;
4808             }
4809             }
4810              
4811 3065         5650 my $side_comment_length = $rfield_lengths->[$maximum_field_index];
4812              
4813             # ship this line off
4814 3065         25622 $self->valign_output_step_B(
4815             {
4816             leading_space_count => $leading_space_count + $extra_leading_spaces,
4817             line => $str,
4818             line_length => $str_len,
4819             side_comment_length => $side_comment_length,
4820             outdent_long_lines => $outdent_long_lines,
4821             rvertical_tightness_flags => $rvertical_tightness_flags,
4822             level => $level,
4823             level_end => $level_end,
4824             Kend => $Kend,
4825             maximum_line_length => $maximum_line_length,
4826             }
4827             );
4828 3065         14980 return;
4829             } ## end sub valign_output_step_A
4830              
4831             sub combine_fields {
4832              
4833             # We have a group of two lines for which we do not want to align tokens
4834             # between index $imax_align and the side comment. So we will delete fields
4835             # between $imax_align and the side comment. Alignments have already
4836             # been set so we have to adjust them.
4837              
4838 14     14 0 46 my ( $line_0, $line_1, $imax_align ) = @_;
4839              
4840 14 50       58 if ( !defined($imax_align) ) { $imax_align = -1 }
  0         0  
4841              
4842             # First delete the unwanted tokens
4843 14         45 my $jmax_old = $line_0->{'jmax'};
4844 14         56 my @idel = ( $imax_align + 1 .. $jmax_old - 2 );
4845 14 50       52 return if ( !@idel );
4846              
4847             # Get old alignments before any changes are made
4848 14         31 my @old_alignments = @{ $line_0->{'ralignments'} };
  14         45  
4849              
4850 14         53 foreach my $line ( $line_0, $line_1 ) {
4851 28         104 delete_selected_tokens( $line, \@idel );
4852             }
4853              
4854             # Now adjust the alignments. Note that the side comment alignment
4855             # is always at jmax-1, and there is an ending alignment at jmax.
4856 14         43 my @new_alignments;
4857 14 50       76 if ( $imax_align >= 0 ) {
4858 0         0 @new_alignments[ 0 .. $imax_align ] =
4859             @old_alignments[ 0 .. $imax_align ];
4860             }
4861              
4862 14         47 my $jmax_new = $line_0->{'jmax'};
4863              
4864 14         56 $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
4865 14         33 $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
4866 14         53 $line_0->{'ralignments'} = \@new_alignments;
4867 14         44 $line_1->{'ralignments'} = \@new_alignments;
4868 14         56 return;
4869             } ## end sub combine_fields
4870              
4871             sub get_output_line_number {
4872              
4873             # The output line number reported to a caller =
4874             # the number of items still in the buffer +
4875             # the number of items written.
4876 49     49 0 157 return $_[0]->group_line_count() +
4877             $_[0]->[_file_writer_object_]->get_output_line_number();
4878             } ## end sub get_output_line_number
4879              
4880             ###############################
4881             # CODE SECTION 7: Output Step B
4882             ###############################
4883              
4884             { ## closure for sub valign_output_step_B
4885              
4886             # These are values for a cache used by valign_output_step_B.
4887             my $cached_line_text;
4888             my $cached_line_text_length;
4889             my $cached_line_type;
4890             my $cached_line_opening_flag;
4891             my $cached_line_closing_flag;
4892             my $cached_seqno;
4893             my $cached_line_valid;
4894             my $cached_line_leading_space_count;
4895             my $cached_seqno_string;
4896             my $cached_line_Kend;
4897             my $cached_line_maximum_length;
4898              
4899             # These are passed to step_C:
4900             my $seqno_string;
4901             my $last_nonblank_seqno_string;
4902              
4903             sub set_last_nonblank_seqno_string {
4904 394     394 0 854 my ($val) = @_;
4905 394         648 $last_nonblank_seqno_string = $val;
4906 394         643 return;
4907             }
4908              
4909             sub get_cached_line_opening_flag {
4910 224     224 0 485 return $cached_line_opening_flag;
4911             }
4912              
4913             sub get_cached_line_type {
4914 7481     7481 0 13708 return $cached_line_type;
4915             }
4916              
4917             sub set_cached_line_valid {
4918 3     3 0 14 my ($val) = @_;
4919 3         7 $cached_line_valid = $val;
4920 3         9 return;
4921             }
4922              
4923             sub get_cached_seqno {
4924 224     224 0 512 return $cached_seqno;
4925             }
4926              
4927             sub initialize_step_B_cache {
4928              
4929             # valign_output_step_B cache:
4930 560     560 0 1887 $cached_line_text = EMPTY_STRING;
4931 560         1269 $cached_line_text_length = 0;
4932 560         1179 $cached_line_type = 0;
4933 560         1245 $cached_line_opening_flag = 0;
4934 560         1262 $cached_line_closing_flag = 0;
4935 560         1256 $cached_seqno = 0;
4936 560         1125 $cached_line_valid = 0;
4937 560         1121 $cached_line_leading_space_count = 0;
4938 560         1162 $cached_seqno_string = EMPTY_STRING;
4939 560         1199 $cached_line_Kend = undef;
4940 560         1099 $cached_line_maximum_length = undef;
4941              
4942             # These vars hold a string of sequence numbers joined together used by
4943             # the cache
4944 560         1405 $seqno_string = EMPTY_STRING;
4945 560         1316 $last_nonblank_seqno_string = EMPTY_STRING;
4946 560         1169 return;
4947             } ## end sub initialize_step_B_cache
4948              
4949             sub _flush_step_B_cache {
4950 1817     1817   3952 my ($self) = @_;
4951              
4952             # Send any text in the step_B cache on to step_C
4953 1817 100       4470 if ($cached_line_type) {
4954 1         4 $seqno_string = $cached_seqno_string;
4955 1         7 $self->valign_output_step_C(
4956             $seqno_string,
4957             $last_nonblank_seqno_string,
4958              
4959             $cached_line_text,
4960             $cached_line_leading_space_count,
4961             $self->[_last_level_written_],
4962             $cached_line_Kend,
4963             );
4964 1         2 $cached_line_type = 0;
4965 1         2 $cached_line_text = EMPTY_STRING;
4966 1         3 $cached_line_text_length = 0;
4967 1         3 $cached_seqno_string = EMPTY_STRING;
4968 1         2 $cached_line_Kend = undef;
4969 1         2 $cached_line_maximum_length = undef;
4970             }
4971 1817         3083 return;
4972             } ## end sub _flush_step_B_cache
4973              
4974             sub handle_cached_line {
4975              
4976 158     158 0 515 my ( $self, $rinput, $leading_string, $leading_string_length ) = @_;
4977              
4978             # The cached line will either be:
4979             # - passed along to step_C, or
4980             # - or combined with the current line
4981              
4982 158         327 my $last_level_written = $self->[_last_level_written_];
4983              
4984 158         302 my $leading_space_count = $rinput->{leading_space_count};
4985 158         347 my $str = $rinput->{line};
4986 158         282 my $str_length = $rinput->{line_length};
4987 158         293 my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
4988 158         300 my $level = $rinput->{level};
4989 158         302 my $level_end = $rinput->{level_end};
4990 158         292 my $maximum_line_length = $rinput->{maximum_line_length};
4991              
4992 158         345 my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
4993             $seqno_beg, $seqno_end );
4994 158 50       391 if ($rvertical_tightness_flags) {
4995              
4996 158         268 $open_or_close = $rvertical_tightness_flags->{_vt_type};
4997 158         332 $seqno_beg = $rvertical_tightness_flags->{_vt_seqno_beg};
4998             }
4999              
5000             # Dump an invalid cached line
5001 158 100 100     714 if ( !$cached_line_valid ) {
    100          
5002 91         280 $self->valign_output_step_C(
5003             $seqno_string,
5004             $last_nonblank_seqno_string,
5005              
5006             $cached_line_text,
5007             $cached_line_leading_space_count,
5008             $last_level_written,
5009             $cached_line_Kend,
5010             );
5011             }
5012              
5013             # Handle cached line ending in OPENING tokens
5014             elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
5015              
5016 30         94 my $gap = $leading_space_count - $cached_line_text_length;
5017              
5018             # handle option of just one tight opening per line:
5019 30 100       96 if ( $cached_line_opening_flag == 1 ) {
5020 14 50 33     61 if ( defined($open_or_close) && $open_or_close == 1 ) {
5021 0         0 $gap = -1;
5022             }
5023             }
5024              
5025             # Do not join the lines if this might produce a one-line
5026             # container which exceeds the maximum line length. This is
5027             # necessary prevent blinking, particularly with the combination
5028             # -xci -pvt=2. In that case a one-line block alternately forms
5029             # and breaks, causing -xci to alternately turn on and off (case
5030             # b765).
5031             # Patched to fix cases b656 b862 b971 b972: always do the check
5032             # if the maximum line length changes (due to -vmll).
5033 30 50 33     252 if (
      66        
5034             $gap >= 0
5035             && ( $maximum_line_length != $cached_line_maximum_length
5036             || ( defined($level_end) && $level > $level_end ) )
5037             )
5038             {
5039 0         0 my $test_line_length =
5040             $cached_line_text_length + $gap + $str_length;
5041              
5042             # Add a small tolerance in the length test (fixes case b862)
5043 0 0       0 if ( $test_line_length > $cached_line_maximum_length - 2 ) {
5044 0         0 $gap = -1;
5045             }
5046             }
5047              
5048 30 100 66     148 if ( $gap >= 0 && defined($seqno_beg) ) {
5049 18         48 $maximum_line_length = $cached_line_maximum_length;
5050 18         62 $leading_string = $cached_line_text . SPACE x $gap;
5051 18         33 $leading_string_length = $cached_line_text_length + $gap;
5052 18         34 $leading_space_count = $cached_line_leading_space_count;
5053 18         66 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5054 18         49 $level = $last_level_written;
5055             }
5056             else {
5057 12         44 $self->valign_output_step_C(
5058             $seqno_string,
5059             $last_nonblank_seqno_string,
5060              
5061             $cached_line_text,
5062             $cached_line_leading_space_count,
5063             $last_level_written,
5064             $cached_line_Kend,
5065             );
5066             }
5067             }
5068              
5069             # Handle cached line ending in CLOSING tokens
5070             else {
5071 37         196 my $test_line =
5072             $cached_line_text . SPACE x $cached_line_closing_flag . $str;
5073 37         94 my $test_line_length =
5074             $cached_line_text_length +
5075             $cached_line_closing_flag +
5076             $str_length;
5077 37 100 66     480 if (
      66        
      100        
5078              
5079             # The new line must start with container
5080             $seqno_beg
5081              
5082             # The container combination must be okay..
5083             && (
5084              
5085             # okay to combine like types
5086             ( $open_or_close == $cached_line_type )
5087              
5088             # closing block brace may append to non-block
5089             || ( $cached_line_type == 2 && $open_or_close == 4 )
5090              
5091             # something like ');'
5092             || ( !$open_or_close && $cached_line_type == 2 )
5093              
5094             )
5095              
5096             # The combined line must fit
5097             && ( $test_line_length <= $cached_line_maximum_length )
5098             )
5099             {
5100              
5101 33         91 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5102              
5103             # Patch to outdent closing tokens ending # in ');' If we
5104             # are joining a line like ');' to a previous stacked set of
5105             # closing tokens, then decide if we may outdent the
5106             # combined stack to the indentation of the ');'. Since we
5107             # should not normally outdent any of the other tokens more
5108             # than the indentation of the lines that contained them, we
5109             # will only do this if all of the corresponding opening
5110             # tokens were on the same line. This can happen with -sot
5111             # and -sct.
5112              
5113             # For example, it is ok here:
5114             # __PACKAGE__->load_components( qw(
5115             # PK::Auto
5116             # Core
5117             # ));
5118             #
5119             # But, for example, we do not outdent in this example
5120             # because that would put the closing sub brace out farther
5121             # than the opening sub brace:
5122             #
5123             # perltidy -sot -sct
5124             # $c->Tk::bind(
5125             # '<Control-f>' => sub {
5126             # my ($c) = @_;
5127             # my $e = $c->XEvent;
5128             # itemsUnderArea $c;
5129             # } );
5130             #
5131 33 100 100     310 if ( $str =~ /^\);/
5132             && $cached_line_text =~ /^[\)\}\]\s]*$/ )
5133             {
5134              
5135             # The way to tell this is if the stacked sequence
5136             # numbers of this output line are the reverse of the
5137             # stacked sequence numbers of the previous non-blank
5138             # line of sequence numbers. So we can join if the
5139             # previous nonblank string of tokens is the mirror
5140             # image. For example if stack )}] is 13:8:6 then we
5141             # are looking for a leading stack like [{( which
5142             # is 6:8:13. We only need to check the two ends,
5143             # because the intermediate tokens must fall in order.
5144             # Note on speed: having to split on colons and
5145             # eliminate multiple colons might appear to be slow,
5146             # but it's not an issue because we almost never come
5147             # through here. In a typical file we don't.
5148              
5149 4         19 $seqno_string =~ s/^:+//;
5150 4         11 $last_nonblank_seqno_string =~ s/^:+//;
5151 4         18 $seqno_string =~ s/:+/:/g;
5152 4         18 $last_nonblank_seqno_string =~ s/:+/:/g;
5153              
5154             # how many spaces can we outdent?
5155 4         9 my $diff =
5156             $cached_line_leading_space_count - $leading_space_count;
5157 4 100 33     35 if ( $diff > 0
      66        
5158             && length($seqno_string)
5159             && length($last_nonblank_seqno_string) ==
5160             length($seqno_string) )
5161             {
5162 3         21 my @seqno_last =
5163             ( split /:/, $last_nonblank_seqno_string );
5164 3         11 my @seqno_now = ( split /:/, $seqno_string );
5165 3 50 33     49 if ( @seqno_now
      33        
      33        
5166             && @seqno_last
5167             && $seqno_now[-1] == $seqno_last[0]
5168             && $seqno_now[0] == $seqno_last[-1] )
5169             {
5170              
5171             # OK to outdent ..
5172             # for absolute safety, be sure we only remove
5173             # whitespace
5174 3         10 my $ws = substr( $test_line, 0, $diff );
5175 3 50 33     33 if ( ( length($ws) == $diff )
5176             && $ws =~ /^\s+$/ )
5177             {
5178              
5179 3         8 $test_line = substr( $test_line, $diff );
5180 3         9 $cached_line_leading_space_count -= $diff;
5181 3         16 $last_level_written =
5182             $self->level_change(
5183             $cached_line_leading_space_count,
5184             $diff, $last_level_written );
5185 3         20 $self->reduce_valign_buffer_indentation($diff);
5186             }
5187              
5188             # shouldn't happen, but not critical:
5189             ##else {
5190             ## ERROR transferring indentation here
5191             ##}
5192             }
5193             }
5194             }
5195              
5196             # Change the args to look like we received the combined line
5197 33         79 $str = $test_line;
5198 33         67 $str_length = $test_line_length;
5199 33         70 $leading_string = EMPTY_STRING;
5200 33         63 $leading_string_length = 0;
5201 33         66 $leading_space_count = $cached_line_leading_space_count;
5202 33         64 $level = $last_level_written;
5203 33         75 $maximum_line_length = $cached_line_maximum_length;
5204             }
5205             else {
5206 4         26 $self->valign_output_step_C(
5207             $seqno_string,
5208             $last_nonblank_seqno_string,
5209              
5210             $cached_line_text,
5211             $cached_line_leading_space_count,
5212             $last_level_written,
5213             $cached_line_Kend,
5214             );
5215             }
5216             }
5217 158         927 return ( $str, $str_length, $leading_string, $leading_string_length,
5218             $leading_space_count, $level, $maximum_line_length, );
5219              
5220             } ## end sub handle_cached_line
5221              
5222             sub valign_output_step_B {
5223              
5224             #---------------------------------------------------------
5225             # This is Step B in writing vertically aligned lines.
5226             # Vertical tightness is applied according to preset flags.
5227             # In particular this routine handles stacking of opening
5228             # and closing tokens.
5229             #---------------------------------------------------------
5230              
5231 7376     7376 0 14702 my ( $self, $rinput ) = @_;
5232              
5233 7376         13611 my $leading_space_count = $rinput->{leading_space_count};
5234 7376         12755 my $str = $rinput->{line};
5235 7376         11428 my $str_length = $rinput->{line_length};
5236 7376         10866 my $side_comment_length = $rinput->{side_comment_length};
5237 7376         11982 my $outdent_long_lines = $rinput->{outdent_long_lines};
5238 7376         11096 my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
5239 7376         11671 my $level = $rinput->{level};
5240 7376         11365 my $level_end = $rinput->{level_end};
5241 7376         11114 my $Kend = $rinput->{Kend};
5242 7376         10914 my $maximum_line_length = $rinput->{maximum_line_length};
5243              
5244             # Useful -gcs test cases for wide characters are
5245             # perl527/(method.t.2, reg_mesg.t, mime-header.t)
5246              
5247             # handle outdenting of long lines:
5248 7376         10882 my $is_outdented_line;
5249 7376 100       14985 if ($outdent_long_lines) {
5250 276         776 my $excess =
5251             $str_length -
5252             $side_comment_length +
5253             $leading_space_count -
5254             $maximum_line_length;
5255 276 100       941 if ( $excess > 0 ) {
5256 10         22 $leading_space_count = 0;
5257 10         34 my $file_writer_object = $self->[_file_writer_object_];
5258 10         45 my $last_outdented_line_at =
5259             $file_writer_object->get_output_line_number();
5260 10         25 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
5261              
5262 10         23 my $outdented_line_count = $self->[_outdented_line_count_];
5263 10 100       34 if ( !$outdented_line_count ) {
5264 3         10 $self->[_first_outdented_line_at_] =
5265             $last_outdented_line_at;
5266             }
5267 10         20 $outdented_line_count++;
5268 10         16 $self->[_outdented_line_count_] = $outdented_line_count;
5269 10         20 $is_outdented_line = 1;
5270             }
5271             }
5272              
5273             # Make preliminary leading whitespace. It could get changed
5274             # later by entabbing, so we have to keep track of any changes
5275             # to the leading_space_count from here on.
5276 7376 100       18705 my $leading_string =
5277             $leading_space_count > 0
5278             ? ( SPACE x $leading_space_count )
5279             : EMPTY_STRING;
5280 7376         11777 my $leading_string_length = length($leading_string);
5281              
5282             # Unpack any recombination data; it was packed by
5283             # sub 'Formatter::set_vertical_tightness_flags'
5284              
5285             # old hash Meaning
5286             # index key
5287             #
5288             # 0 _vt_type: 1=opening non-block 2=closing non-block
5289             # 3=opening block brace 4=closing block brace
5290             #
5291             # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
5292             # 1b _vt_closing_flag: spaces of padding to use if closing
5293             # 2 _vt_seqno: sequence number of container
5294             # 3 _vt_valid flag: do not append if this flag is false. Will be
5295             # true if appropriate -vt flag is set. Otherwise, Will be
5296             # made true only for 2 line container in parens with -lp
5297             # 4 _vt_seqno_beg: sequence number of first token of line
5298             # 5 _vt_seqno_end: sequence number of last token of line
5299             # 6 _vt_min_lines: min number of lines for joining opening cache,
5300             # 0=no constraint
5301             # 7 _vt_max_lines: max number of lines for joining opening cache,
5302             # 0=no constraint
5303              
5304 7376         12521 my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
5305             $seqno_beg, $seqno_end );
5306 7376 100       14675 if ($rvertical_tightness_flags) {
5307              
5308 1308         2268 $open_or_close = $rvertical_tightness_flags->{_vt_type};
5309 1308         2071 $opening_flag = $rvertical_tightness_flags->{_vt_opening_flag};
5310 1308         2001 $closing_flag = $rvertical_tightness_flags->{_vt_closing_flag};
5311 1308         2112 $seqno = $rvertical_tightness_flags->{_vt_seqno};
5312 1308         2085 $valid = $rvertical_tightness_flags->{_vt_valid_flag};
5313 1308         2151 $seqno_beg = $rvertical_tightness_flags->{_vt_seqno_beg};
5314 1308         2087 $seqno_end = $rvertical_tightness_flags->{_vt_seqno_end};
5315             }
5316              
5317 7376         11590 $seqno_string = $seqno_end;
5318              
5319             # handle any cached line ..
5320             # either append this line to it or write it out
5321             # Note: the function length() is used in this next test out of caution.
5322             # All testing has shown that the variable $cached_line_text_length is
5323             # correct, but its calculation is complex and a loss of cached text
5324             # would be a disaster.
5325 7376 100       15286 if ( length($cached_line_text) ) {
5326              
5327             (
5328 158         843 $str,
5329             $str_length,
5330             $leading_string,
5331             $leading_string_length,
5332             $leading_space_count,
5333             $level,
5334             $maximum_line_length
5335              
5336             ) = $self->handle_cached_line( $rinput, $leading_string,
5337             $leading_string_length );
5338              
5339 158         343 $cached_line_type = 0;
5340 158         319 $cached_line_text = EMPTY_STRING;
5341 158         280 $cached_line_text_length = 0;
5342 158         269 $cached_line_Kend = undef;
5343 158         249 $cached_line_maximum_length = undef;
5344              
5345             }
5346              
5347             # make the line to be written
5348 7376         15627 my $line = $leading_string . $str;
5349 7376         11841 my $line_length = $leading_string_length + $str_length;
5350              
5351             # Safety check: be sure that a line to be cached as a stacked block
5352             # brace line ends in the appropriate opening or closing block brace.
5353             # This should always be the case if the caller set flags correctly.
5354             # Code '3' is for -sobb, code '4' is for -scbb.
5355 7376 100       14109 if ($open_or_close) {
5356 159 50 66     1205 if ( $open_or_close == 3 && $line !~ /\{\s*$/
      66        
      33        
5357             || $open_or_close == 4 && $line !~ /\}\s*$/ )
5358             {
5359 0         0 $open_or_close = 0;
5360             }
5361             }
5362              
5363             # write or cache this line ...
5364             # fix for case b999: do not cache an outdented line
5365             # fix for b1378: do not cache an empty line
5366 7376 100 66     21155 if ( !$open_or_close
      66        
      33        
5367             || $side_comment_length > 0
5368             || $is_outdented_line
5369             || !$line_length )
5370             {
5371 7217         18101 $self->valign_output_step_C(
5372             $seqno_string,
5373             $last_nonblank_seqno_string,
5374              
5375             $line,
5376             $leading_space_count,
5377             $level,
5378             $Kend,
5379             );
5380             }
5381             else {
5382 159         332 $cached_line_text = $line;
5383 159         311 $cached_line_text_length = $line_length;
5384 159         290 $cached_line_type = $open_or_close;
5385 159         352 $cached_line_opening_flag = $opening_flag;
5386 159         286 $cached_line_closing_flag = $closing_flag;
5387 159         284 $cached_seqno = $seqno;
5388 159         279 $cached_line_valid = $valid;
5389 159         256 $cached_line_leading_space_count = $leading_space_count;
5390 159         283 $cached_seqno_string = $seqno_string;
5391 159         246 $cached_line_Kend = $Kend;
5392 159         258 $cached_line_maximum_length = $maximum_line_length;
5393             }
5394              
5395 7376         12590 $self->[_last_level_written_] = $level;
5396 7376         11411 $self->[_last_side_comment_length_] = $side_comment_length;
5397 7376         16221 return;
5398             } ## end sub valign_output_step_B
5399             }
5400              
5401             ###############################
5402             # CODE SECTION 8: Output Step C
5403             ###############################
5404              
5405             { ## closure for sub valign_output_step_C
5406              
5407             # Vertical alignment buffer used by valign_output_step_C
5408             my $valign_buffer_filling;
5409             my @valign_buffer;
5410              
5411             sub initialize_valign_buffer {
5412 560     560 0 1621 @valign_buffer = ();
5413 560         1373 $valign_buffer_filling = EMPTY_STRING;
5414 560         1091 return;
5415             }
5416              
5417             sub dump_valign_buffer {
5418 1819     1819 0 3593 my ($self) = @_;
5419              
5420             # Send all lines in the current buffer on to step_D
5421 1819 100       4682 if (@valign_buffer) {
5422 2         7 foreach (@valign_buffer) {
5423 7         12 $self->valign_output_step_D( @{$_} );
  7         21  
5424             }
5425 2         11 @valign_buffer = ();
5426             }
5427 1819         3509 $valign_buffer_filling = EMPTY_STRING;
5428 1819         3021 return;
5429             } ## end sub dump_valign_buffer
5430              
5431             sub reduce_valign_buffer_indentation {
5432              
5433 3     3 0 12 my ( $self, $diff ) = @_;
5434              
5435             # Reduce the leading indentation of lines in the current
5436             # buffer by $diff spaces
5437 3 100 66     17 if ( $valign_buffer_filling && $diff ) {
5438 2         7 my $max_valign_buffer = @valign_buffer;
5439 2         10 foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
5440             my ( $line, $leading_space_count, $level, $Kend ) =
5441 7         11 @{ $valign_buffer[$i] };
  7         18  
5442 7         19 my $ws = substr( $line, 0, $diff );
5443 7 50 33     42 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
5444 7         18 $line = substr( $line, $diff );
5445             }
5446 7 50       16 if ( $leading_space_count >= $diff ) {
5447 7         13 $leading_space_count -= $diff;
5448 7         27 $level =
5449             $self->level_change( $leading_space_count, $diff,
5450             $level );
5451             }
5452 7         49 $valign_buffer[$i] =
5453             [ $line, $leading_space_count, $level, $Kend ];
5454             }
5455             }
5456 3         26 return;
5457             } ## end sub reduce_valign_buffer_indentation
5458              
5459             sub valign_output_step_C {
5460              
5461             #-----------------------------------------------------------------------
5462             # This is Step C in writing vertically aligned lines.
5463             # Lines are either stored in a buffer or passed along to the next step.
5464             # The reason for storing lines is that we may later want to reduce their
5465             # indentation when -sot and -sct are both used.
5466             #-----------------------------------------------------------------------
5467             my (
5468 7325     7325 0 20880 $self,
5469             $seqno_string,
5470             $last_nonblank_seqno_string,
5471              
5472             @args_to_D,
5473             ) = @_;
5474              
5475             # Dump any saved lines if we see a line with an unbalanced opening or
5476             # closing token.
5477 7325 100 100     17363 $self->dump_valign_buffer()
5478             if ( $seqno_string && $valign_buffer_filling );
5479              
5480             # Either store or write this line
5481 7325 100       13341 if ($valign_buffer_filling) {
5482 7         40 push @valign_buffer, [@args_to_D];
5483             }
5484             else {
5485 7318         17040 $self->valign_output_step_D(@args_to_D);
5486             }
5487              
5488             # For lines starting or ending with opening or closing tokens..
5489 7325 100       14586 if ($seqno_string) {
5490 394         711 $last_nonblank_seqno_string = $seqno_string;
5491 394         1294 set_last_nonblank_seqno_string($seqno_string);
5492              
5493             # Start storing lines when we see a line with multiple stacked
5494             # opening tokens.
5495             # patch for RT #94354, requested by Colin Williams
5496 394 100 100     1886 if ( index( $seqno_string, ':' ) >= 0
      100        
5497             && $seqno_string =~ /^\d+(\:+\d+)+$/
5498             && $args_to_D[0] !~ /^[\}\)\]\:\?]/ )
5499             {
5500              
5501             # This test is efficient but a little subtle: The first test
5502             # says that we have multiple sequence numbers and hence
5503             # multiple opening or closing tokens in this line. The second
5504             # part of the test rejects stacked closing and ternary tokens.
5505             # So if we get here then we should have stacked unbalanced
5506             # opening tokens.
5507              
5508             # Here is a complex example:
5509              
5510             # Foo($Bar[0], { # (side comment)
5511             # baz => 1,
5512             # });
5513              
5514             # The first line has sequence 6::4. It does not begin with
5515             # a closing token or ternary, so it passes the test and must be
5516             # stacked opening tokens.
5517              
5518             # The last line has sequence 4:6 but is a stack of closing
5519             # tokens, so it gets rejected.
5520              
5521             # Note that the sequence number of an opening token for a qw
5522             # quote is a negative number and will be rejected. For
5523             # example, for the following line: skip_symbols([qw(
5524             # $seqno_string='10:5:-1'. It would be okay to accept it but I
5525             # decided not to do this after testing.
5526              
5527 8         25 $valign_buffer_filling = $seqno_string;
5528              
5529             }
5530             }
5531 7325         13755 return;
5532             } ## end sub valign_output_step_C
5533             }
5534              
5535             ###############################
5536             # CODE SECTION 9: Output Step D
5537             ###############################
5538              
5539             sub valign_output_step_D {
5540              
5541             #----------------------------------------------------------------
5542             # This is Step D in writing vertically aligned lines.
5543             # It is the end of the vertical alignment pipeline.
5544             # Write one vertically aligned line of code to the output object.
5545             #----------------------------------------------------------------
5546              
5547 7325     7325 0 16312 my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
5548              
5549             # The line is currently correct if there is no tabbing (recommended!)
5550             # We may have to lop off some leading spaces and replace with tabs.
5551 7325 100       15794 if ( $leading_space_count > 0 ) {
5552              
5553 4320         8318 my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5554 4320         6942 my $rOpts_tabs = $self->[_rOpts_tabs_];
5555 4320         7074 my $rOpts_entab_leading_whitespace =
5556             $self->[_rOpts_entab_leading_whitespace_];
5557              
5558             # Nothing to do if no tabs
5559 4320 100 66     18514 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
    50 66        
5560             || $rOpts_indent_columns <= 0 )
5561             {
5562              
5563             # nothing to do
5564             }
5565              
5566             # Handle entab option
5567             elsif ($rOpts_entab_leading_whitespace) {
5568              
5569             # Patch 12-nov-2018 based on report from Glenn. Extra padding was
5570             # not correctly entabbed, nor were side comments: Increase leading
5571             # space count for a padded line to get correct tabbing
5572 45 50       213 if ( $line =~ /^(\s+)(.*)$/ ) {
5573 45         99 my $spaces = length($1);
5574 45 50       86 if ( $spaces > $leading_space_count ) {
5575 0         0 $leading_space_count = $spaces;
5576             }
5577             }
5578              
5579 45         71 my $space_count =
5580             $leading_space_count % $rOpts_entab_leading_whitespace;
5581 45         86 my $tab_count =
5582             int( $leading_space_count / $rOpts_entab_leading_whitespace );
5583 45         91 my $leading_string = "\t" x $tab_count . SPACE x $space_count;
5584 45 50       464 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5585 45         127 substr( $line, 0, $leading_space_count ) = $leading_string;
5586             }
5587             else {
5588              
5589             # shouldn't happen - program error counting whitespace
5590             # - skip entabbing
5591 0         0 DEBUG_TABS
5592             && warning(
5593             "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5594             );
5595             }
5596             }
5597              
5598             # Handle option of one tab per level
5599             else {
5600 0         0 my $leading_string = ( "\t" x $level );
5601 0         0 my $space_count =
5602             $leading_space_count - $level * $rOpts_indent_columns;
5603              
5604             # shouldn't happen:
5605 0 0       0 if ( $space_count < 0 ) {
5606              
5607             # But it could be an outdented comment
5608 0 0       0 if ( $line !~ /^\s*#/ ) {
5609 0         0 DEBUG_TABS
5610             && warning(
5611             "Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
5612             );
5613             }
5614 0         0 $leading_string = ( SPACE x $leading_space_count );
5615             }
5616             else {
5617 0         0 $leading_string .= ( SPACE x $space_count );
5618             }
5619 0 0       0 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5620 0         0 substr( $line, 0, $leading_space_count ) = $leading_string;
5621             }
5622             else {
5623              
5624             # shouldn't happen - program error counting whitespace
5625             # we'll skip entabbing
5626 0         0 DEBUG_TABS
5627             && warning(
5628             "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5629             );
5630             }
5631             }
5632             }
5633 7325         13191 my $file_writer_object = $self->[_file_writer_object_];
5634 7325         33529 $file_writer_object->write_code_line( $line . "\n", $Kend );
5635              
5636 7325         14517 return;
5637             } ## end sub valign_output_step_D
5638              
5639             ##########################
5640             # CODE SECTION 10: Summary
5641             ##########################
5642              
5643             sub report_anything_unusual {
5644 560     560 0 1387 my $self = shift;
5645              
5646 560         1697 my $outdented_line_count = $self->[_outdented_line_count_];
5647 560 100       2144 if ( $outdented_line_count > 0 ) {
5648 21         159 write_logfile_entry(
5649             "$outdented_line_count long lines were outdented:\n");
5650 21         107 my $first_outdented_line_at = $self->[_first_outdented_line_at_];
5651 21         155 write_logfile_entry(
5652             " First at output line $first_outdented_line_at\n");
5653              
5654 21 100       198 if ( $outdented_line_count > 1 ) {
5655 7         35 my $last_outdented_line_at = $self->[_last_outdented_line_at_];
5656 7         39 write_logfile_entry(
5657             " Last at output line $last_outdented_line_at\n");
5658             }
5659             write_logfile_entry(
5660 21         160 " use -noll to prevent outdenting, -l=n to increase line length\n"
5661             );
5662 21         154 write_logfile_entry("\n");
5663             }
5664 560         1396 return;
5665             } ## end sub report_anything_unusual
5666             1;