File Coverage

blib/lib/Perl/Tidy/VerticalAligner.pm
Criterion Covered Total %
statement 2384 2612 91.2
branch 874 1106 79.0
condition 522 692 75.4
subroutine 108 118 91.5
pod 0 78 0.0
total 3888 4606 84.4


line stmt bran cond sub pod time code
1             package Perl::Tidy::VerticalAligner;
2 44     44   280 use strict;
  44         85  
  44         1778  
3 44     44   188 use warnings;
  44         71  
  44         2575  
4 44     44   203 use Carp;
  44         65  
  44         4120  
5              
6             { #<<< A non-indenting brace to contain all lexical variables
7              
8             our $VERSION = '20260204';
9 44     44   244 use English qw( -no_match_vars );
  44         98  
  44         335  
10 44     44   13760 use Scalar::Util 'refaddr'; # perl 5.8.1 and later
  44         79  
  44         1818  
11 44     44   15876 use Perl::Tidy::VerticalAligner::Alignment;
  44         111  
  44         1397  
12 44     44   15685 use Perl::Tidy::VerticalAligner::Line;
  44         108  
  44         1420  
13              
14 44     44   279 use constant DEVEL_MODE => 0;
  44         72  
  44         2908  
15 44     44   205 use constant EMPTY_STRING => q{};
  44         69  
  44         1662  
16 44     44   169 use constant SPACE => q{ };
  44         70  
  44         1352  
17 44     44   150 use constant COMMA => q{,};
  44         69  
  44         18228  
18              
19             # The Perl::Tidy::VerticalAligner package collects output lines and
20             # attempts to line up certain common tokens, such as => and #, which are
21             # identified by the calling routine.
22             #
23             # Usage:
24             # - Initiate an object with a call to new().
25             # - Write lines one-by-one with calls to valign_input().
26             # - Make a final call to flush() to empty the pipeline.
27             #
28             # The sub valign_input collects lines into groups. When a group reaches
29             # the maximum possible size it is processed for alignment and output.
30             # The maximum group size is reached whenever there is a change in indentation
31             # level, a blank line, a block comment, or an external flush call. The calling
32             # routine may also force a break in alignment at any time.
33             #
34             # If the calling routine needs to interrupt the output and send other text to
35             # the output, it must first call flush() to empty the output pipeline. This
36             # might occur for example if a block of pod text needs to be sent to the output
37             # between blocks of code.
38              
39             # It is essential that a final call to flush() be made. Otherwise some
40             # final lines of text will be lost.
41              
42             # Index...
43             # CODE SECTION 1: Preliminary code, global definitions and sub new
44             # sub new
45             # CODE SECTION 2: Some Basic Utilities
46             # CODE SECTION 3: Code to accept input and form groups
47             # sub valign_input
48             # CODE SECTION 4: Code to process comment lines
49             # sub _flush_comment_lines
50             # CODE SECTION 5: Code to process groups of code lines
51             # sub _flush_group_lines
52             # CODE SECTION 6: Pad Signed Number Columns
53             # sub pad_signed_number_columns
54             # CODE SECTION 7: Pad Wide Equals Columns
55             # sub pad_wide_equals_columns
56             # CODE SECTION 8: Output Step A
57             # sub valign_output_step_A
58             # CODE SECTION 9: Output Step B
59             # sub valign_output_step_B
60             # CODE SECTION 10: Output Step C
61             # sub valign_output_step_C
62             # CODE SECTION 11: Output Step D
63             # sub valign_output_step_D
64             # CODE SECTION 12: Summary
65             # sub report_anything_unusual
66              
67             ##################################################################
68             # CODE SECTION 1: Preliminary code, global definitions and sub new
69             ##################################################################
70              
71             sub AUTOLOAD {
72              
73             # Catch any undefined sub calls so that we are sure to get
74             # some diagnostic information. This sub should never be called
75             # except for a programming error.
76 0     0   0 our $AUTOLOAD;
77 0 0       0 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
78 0         0 my ( $pkg, $fname, $lno ) = caller();
79 0         0 my $my_package = __PACKAGE__;
80 0         0 print {*STDERR} <<EOM;
  0         0  
81             ======================================================================
82             Error detected in package '$my_package', version $VERSION
83             Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
84             Called from package: '$pkg'
85             Called from File '$fname' at line '$lno'
86             This error is probably due to a recent programming change
87             ======================================================================
88             EOM
89 0         0 exit 1;
90             } ## end sub AUTOLOAD
91              
92       0     sub DESTROY {
93              
94             # required to avoid call to AUTOLOAD in some versions of perl
95             }
96              
97             sub Die {
98 0     0 0 0 my ($msg) = @_;
99 0         0 Perl::Tidy::Die($msg);
100 0         0 croak "unexpected return from Perl::Tidy::Die";
101             }
102              
103             sub Warn {
104 0     0 0 0 my ($msg) = @_;
105 0         0 Perl::Tidy::Warn($msg);
106 0         0 return;
107             }
108              
109             sub Fault {
110              
111 0     0 0 0 my ($msg) = @_;
112              
113             # This routine is called for errors that really should not occur
114             # except if there has been a bug introduced by a recent program change.
115             # Please add comments at calls to Fault to explain why the call
116             # should not occur, and where to look to fix it.
117 0         0 my ( $package0_uu, $filename0_uu, $line0, $subroutine0_uu ) = caller(0);
118 0         0 my ( $package1_uu, $filename1, $line1, $subroutine1 ) = caller(1);
119 0         0 my ( $package2_uu, $filename2_uu, $line2_uu, $subroutine2 ) = caller(2);
120 0         0 my $pkg = __PACKAGE__;
121 0         0 my $input_stream_name = Perl::Tidy::get_input_stream_name();
122              
123 0         0 Die(<<EOM);
124             ==============================================================================
125             While operating on input stream with name: '$input_stream_name'
126             A fault was detected at line $line0 of sub '$subroutine1'
127             in file '$filename1'
128             which was called from line $line1 of sub '$subroutine2'
129             Message: '$msg'
130             This is probably an error introduced by a recent programming change.
131             $pkg reports VERSION='$VERSION'.
132             ==============================================================================
133             EOM
134 0         0 croak "unexpected return from sub Die";
135             } ## end sub Fault
136              
137             my %valid_LINE_keys;
138              
139             BEGIN {
140              
141             # define valid keys in a line object
142 44     44   275 my @q = qw(
143             jmax
144             rtokens
145             rfields
146             rfield_lengths
147             rpatterns
148             indentation
149             leading_space_count
150             outdent_long_lines
151             list_type
152             list_seqno
153             is_hanging_side_comment
154             maximum_line_length
155             rvertical_tightness_flags
156             is_terminal_ternary
157             j_terminal_match
158             end_group
159             Kend
160             ci_level
161             level
162             level_end
163             imax_pair
164              
165             ralignments
166             );
167              
168 44         4758 $valid_LINE_keys{$_} = 1 for @q;
169              
170             } ## end BEGIN
171              
172             BEGIN {
173              
174             # Define the fixed indexes for variables in $self, which is an array
175             # reference. Note the convention of leading and trailing underscores to
176             # keep them unique.
177             # Do not combine with other BEGIN blocks (c101).
178 44     44   132 my $i = 0;
179             use constant {
180 44         7449 _file_writer_object_ => $i++,
181             _logger_object_ => $i++,
182             _diagnostics_object_ => $i++,
183              
184             _rOpts_ => $i++,
185              
186             _last_level_written_ => $i++,
187             _last_side_comment_column_ => $i++,
188             _last_side_comment_line_number_ => $i++,
189             _last_side_comment_length_ => $i++,
190             _last_side_comment_level_ => $i++,
191             _outdented_line_count_ => $i++,
192             _first_outdented_line_at_ => $i++,
193             _last_outdented_line_at_ => $i++,
194             _consecutive_block_comments_ => $i++,
195              
196             _rgroup_lines_ => $i++,
197             _group_level_ => $i++,
198             _group_type_ => $i++,
199             _group_maximum_line_length_ => $i++,
200             _zero_count_ => $i++,
201             _last_leading_space_count_ => $i++,
202             _comment_leading_space_count_ => $i++,
203 44     44   256 };
  44         69  
204              
205             # Debug flag. This is a relic from the original program development
206             # looking for problems with tab characters. Caution: this debug flag can
207             # produce a lot of output It should be 0 except when debugging small
208             # scripts.
209              
210 44     44   225 use constant DEBUG_TABS => 0;
  44         83  
  44         4349  
211              
212             my $debug_warning = sub {
213 0         0 my $msg = shift;
214 0         0 print {*STDOUT} "VALIGN_DEBUGGING with key $msg\n";
  0         0  
215 0         0 return;
216 44         177 };
217              
218 44         55456 DEBUG_TABS && $debug_warning->('TABS');
219             } ## end BEGIN
220              
221             # GLOBAL variables
222             my (
223              
224             %valign_control_hash,
225             $valign_control_default,
226              
227             $rOpts_indent_columns,
228             $rOpts_tabs,
229             $rOpts_entab_leading_whitespace,
230             $rOpts_fixed_position_side_comment,
231             $rOpts_maximum_line_length,
232             $rOpts_minimum_space_to_comment,
233             $rOpts_valign_code,
234             $rOpts_valign_block_comments,
235             $rOpts_valign_side_comments,
236             $rOpts_valign_signed_numbers,
237             $rOpts_valign_signed_numbers_limit,
238             $rOpts_valign_wide_equals,
239              
240             $require_tabs,
241              
242             );
243              
244             sub check_valign_list_items {
245 4     4 0 10 my ( $rlist, ( $option_name, $die_on_error ) ) = @_;
246              
247             # Warn if obviously invalid token types or keywords occur in one of the
248             # --valign lists. This is a crude check for valid token types and valid
249             # keywords.
250              
251             # Given:
252             # $rlist = ref to list of input items
253             # $option_name = (optional) name of option for a warning message
254             # Return:
255             # nothing if no errors, or
256             # ref to list of unknown token types
257 4 50       10 return if ( !defined($rlist) );
258              
259 4         7 my @unknown_items;
260 4         6 foreach my $item ( @{$rlist} ) {
  4         9  
261 4 50       15 next if ( Perl::Tidy::Tokenizer::is_valid_token_type($item) );
262 0 0       0 next if ( Perl::Tidy::Tokenizer::is_keyword($item) );
263 0         0 push @unknown_items, $item;
264             }
265 4 50       11 return if ( !@unknown_items );
266              
267 0 0       0 if ($option_name) {
268 0         0 my $num = @unknown_items;
269 0         0 local $LIST_SEPARATOR = SPACE;
270 0         0 my $msg = <<EOM;
271             $num unrecognized items input with $option_name :
272             @unknown_items
273             EOM
274 0 0       0 Die($msg) if ($die_on_error);
275 0         0 Warn($msg);
276             }
277 0         0 return \@unknown_items;
278             } ## end sub check_valign_list_items
279              
280             sub check_options {
281              
282 647     647 0 1640 my ($rOpts) = @_;
283              
284             # This routine is called to check the user-supplied run parameters
285             # in $rOpts and to configure the control hashes to them.
286              
287             # All alignments are done by default
288 647         1498 %valign_control_hash = ();
289 647         1252 $valign_control_default = 1;
290              
291             # If -vil=s is entered without -vxl, assume -vxl='*'
292 647 50 66     4576 if ( !$rOpts->{'valign-exclusion-list'}
293             && $rOpts->{'valign-inclusion-list'} )
294             {
295 0         0 $rOpts->{'valign-exclusion-list'} = '*';
296             }
297              
298             # See if the user wants to exclude any alignment types ...
299 647 100       2225 if ( $rOpts->{'valign-exclusion-list'} ) {
300              
301             # The inclusion list is only relevant if there is an exclusion list
302 3 100       12 if ( $rOpts->{'valign-inclusion-list'} ) {
303 1         4 my @vil = split /\s+/, $rOpts->{'valign-inclusion-list'};
304 1         4 check_valign_list_items( \@vil, 'valign-inclusion-list', 1 );
305 1         3 $valign_control_hash{$_} = 1 for @vil;
306             }
307              
308             # Note that the -vxl list is done after -vil, so -vxl has priority
309             # in the event of duplicate entries.
310 3         10 my @vxl = split /\s+/, $rOpts->{'valign-exclusion-list'};
311 3         11 check_valign_list_items( \@vxl, 'valign-exclusion-list', 1 );
312 3         9 $valign_control_hash{$_} = 0 for @vxl;
313              
314             # Optimization: revert to defaults if no exclusions.
315             # This could happen with -vxl=' ' and any -vil list
316 3 50       6 if ( !@vxl ) {
317 0         0 %valign_control_hash = ();
318             }
319              
320             # '$valign_control_default' applies to types not in the hash:
321             # - If a '*' was entered then set it to be that default type
322             # - Otherwise, leave it set it to 1
323 3 100       10 if ( defined( $valign_control_hash{'*'} ) ) {
324 1         2 $valign_control_default = $valign_control_hash{'*'};
325             }
326              
327             # Side comments are controlled separately and must be removed
328             # if given in a list.
329 3 50       11 if (%valign_control_hash) {
330 3         7 $valign_control_hash{'#'} = 1;
331             }
332             }
333              
334             # Initialize some global options
335 647         1548 $rOpts_indent_columns = $rOpts->{'indent-columns'};
336 647         1362 $rOpts_tabs = $rOpts->{'tabs'};
337 647         1366 $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
338 647   66     3182 $require_tabs = ( $rOpts_tabs || $rOpts_entab_leading_whitespace )
339             && $rOpts_indent_columns > 0;
340              
341             $rOpts_fixed_position_side_comment =
342 647         1434 $rOpts->{'fixed-position-side-comment'};
343              
344 647         1221 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
345 647         1420 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
346 647         1295 $rOpts_valign_code = $rOpts->{'valign-code'};
347 647         1350 $rOpts_valign_block_comments = $rOpts->{'valign-block-comments'};
348 647         1150 $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'};
349 647         1309 $rOpts_valign_signed_numbers = $rOpts->{'valign-signed-numbers'};
350             $rOpts_valign_signed_numbers_limit =
351 647         1332 $rOpts->{'valign-signed-numbers-limit'};
352 647         1128 $rOpts_valign_wide_equals = $rOpts->{'valign-wide-equals'};
353              
354 647         1286 return;
355             } ## end sub check_options
356              
357             sub check_keys {
358              
359 0     0 0 0 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
360              
361             # Check the keys of a hash:
362             # $rtest = ref to hash to test
363             # $rvalid = ref to hash with valid keys
364              
365             # $msg = a message to write in case of error
366             # $exact_match defines the type of check:
367             # = false: test hash must not have unknown key
368             # = true: test hash must have exactly same keys as known hash
369             my @unknown_keys =
370 0         0 grep { !exists $rvalid->{$_} } keys %{$rtest};
  0         0  
  0         0  
371             my @missing_keys =
372 0         0 grep { !exists $rtest->{$_} } keys %{$rvalid};
  0         0  
  0         0  
373 0         0 my $error = @unknown_keys;
374 0 0 0     0 if ($exact_match) { $error ||= @missing_keys }
  0         0  
375 0 0       0 if ($error) {
376 0         0 local $LIST_SEPARATOR = ')(';
377 0         0 my @expected_keys = sort keys %{$rvalid};
  0         0  
378 0         0 @unknown_keys = sort @unknown_keys;
379 0         0 Fault(<<EOM);
380             ------------------------------------------------------------------------
381             Program error detected checking hash keys
382             Message is: '$msg'
383             Expected keys: (@expected_keys)
384             Unknown key(s): (@unknown_keys)
385             Missing key(s): (@missing_keys)
386             ------------------------------------------------------------------------
387             EOM
388             }
389 0         0 return;
390             } ## end sub check_keys
391              
392             sub new {
393              
394 648     648 0 2543 my ( $class, @arglist ) = @_;
395              
396             # Create a VerticalAligner object
397             # Given:
398             # @arglist is the hash of values shown below in %defaults
399              
400 648 50       1821 if ( @arglist % 2 ) { croak "Odd number of items in arg hash list\n" }
  0         0  
401              
402 648         3573 my %defaults = (
403             rOpts => undef,
404             file_writer_object => undef,
405             logger_object => undef,
406             diagnostics_object => undef,
407             );
408 648         3021 my %args = ( %defaults, @arglist );
409              
410             # Initialize other caches and buffers
411 648         3589 initialize_step_B_cache();
412 648         2708 initialize_valign_buffer();
413 648         2861 initialize_decode();
414              
415             # Initialize all variables in $self.
416             # To add an item to $self, first define a new constant index in the BEGIN
417             # section.
418 648         1028 my $self = [];
419              
420             # objects
421 648         1611 $self->[_file_writer_object_] = $args{file_writer_object};
422 648         1370 $self->[_logger_object_] = $args{logger_object};
423 648         1328 $self->[_diagnostics_object_] = $args{diagnostics_object};
424              
425             # shortcut to user options
426 648         1182 my $rOpts = $args{rOpts};
427 648         1151 $self->[_rOpts_] = $rOpts;
428              
429             # Batch of lines being collected
430 648         1455 $self->[_rgroup_lines_] = [];
431 648         1517 $self->[_group_level_] = 0;
432 648         1200 $self->[_group_type_] = EMPTY_STRING;
433 648         1280 $self->[_group_maximum_line_length_] = undef;
434 648         1482 $self->[_zero_count_] = 0;
435 648         1131 $self->[_comment_leading_space_count_] = 0;
436 648         1127 $self->[_last_leading_space_count_] = 0;
437              
438             # Memory of what has been processed
439 648         1076 $self->[_last_level_written_] = -1;
440 648         1065 $self->[_last_side_comment_column_] = 0;
441 648         1101 $self->[_last_side_comment_line_number_] = 0;
442 648         1090 $self->[_last_side_comment_length_] = 0;
443 648         1070 $self->[_last_side_comment_level_] = -1;
444 648         1022 $self->[_outdented_line_count_] = 0;
445 648         1003 $self->[_first_outdented_line_at_] = 0;
446 648         1016 $self->[_last_outdented_line_at_] = 0;
447 648         948 $self->[_consecutive_block_comments_] = 0;
448              
449 648         1220 bless $self, $class;
450 648         2582 return $self;
451             } ## end sub new
452              
453             #################################
454             # CODE SECTION 2: Basic Utilities
455             #################################
456              
457             sub flush {
458              
459 2217     2217 0 3653 my ($self) = @_;
460              
461             # flush() is the external call to completely empty the pipeline.
462              
463             # push out any current group lines
464             $self->_flush_group_lines()
465 2217 100       2869 if ( @{ $self->[_rgroup_lines_] } );
  2217         6650  
466              
467             # then anything left in the cache of step_B
468 2217         7031 $self->_flush_step_B_cache();
469              
470             # then anything left in the buffer of step_C
471 2217         6271 $self->dump_valign_buffer();
472              
473 2217         3551 return;
474             } ## end sub flush
475              
476             sub initialize_for_new_group {
477              
478 2611     2611 0 4624 my ($self) = @_;
479              
480             # initialize for a new group of lines to be aligned vertically
481              
482 2611         4621 $self->[_rgroup_lines_] = [];
483 2611         4416 $self->[_group_type_] = EMPTY_STRING;
484 2611         3683 $self->[_zero_count_] = 0;
485 2611         3595 $self->[_comment_leading_space_count_] = 0;
486 2611         5785 $self->[_last_leading_space_count_] = 0;
487 2611         3925 $self->[_group_maximum_line_length_] = undef;
488              
489             # Note that the value for _group_level_ is
490             # handled separately in sub valign_input
491 2611         3794 return;
492             } ## end sub initialize_for_new_group
493              
494             sub group_line_count {
495 94     94 0 137 my $self = shift;
496 94         119 return +@{ $self->[_rgroup_lines_] };
  94         435  
497             }
498              
499             sub write_diagnostics {
500              
501 0     0 0 0 my ( $self, $msg ) = @_;
502              
503             # Interface to Perl::Tidy::Diagnostics routines
504             # For debugging; not currently used
505 0         0 my $diagnostics_object = $self->[_diagnostics_object_];
506 0 0       0 if ($diagnostics_object) {
507 0         0 $diagnostics_object->write_diagnostics($msg);
508             }
509 0         0 return;
510             } ## end sub write_diagnostics
511              
512             sub warning {
513 0     0 0 0 my ( $self, $msg ) = @_;
514 0         0 my $logger_object = $self->[_logger_object_];
515 0 0       0 if ($logger_object) {
516 0         0 $logger_object->warning($msg);
517             }
518 0         0 return;
519             } ## end sub warning
520              
521             sub get_cached_line_count {
522 1     1 0 1 my $self = shift;
523 1 50       5 return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
524             }
525              
526             sub get_recoverable_spaces {
527              
528 4583     4583 0 6562 my $indentation = shift;
529              
530             # Return the number of spaces (+ means shift right, - means shift left)
531             # that we would like to shift a group of lines with the same indentation
532             # to get them to line up with their opening parens
533              
534 4583 100       13296 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
535             } ## end sub get_recoverable_spaces
536              
537             ######################################################
538             # CODE SECTION 3: Code to accept input and form groups
539             ######################################################
540              
541 44     44   344 use constant DEBUG_VALIGN => 0;
  44         73  
  44         2661  
542 44     44   232 use constant SC_LONG_LINE_DIFF => 12;
  44         83  
  44         8206  
543              
544             my %is_opening_token;
545             my %is_closing_token;
546             my %is_digit_char;
547             my %is_plus_or_minus;
548             my %is_if_or;
549             my %is_comma_token;
550             my %is_assignment;
551             my %is_good_marginal_alignment;
552              
553             BEGIN {
554              
555 44     44   319 $is_opening_token{$_} = 1 for qw# { ( [ #;
556 44         166 $is_closing_token{$_} = 1 for qw# } ) ] #;
557 44         497 $is_digit_char{$_} = 1 for qw# 0 1 2 3 4 5 6 7 8 9 #;
558 44         136 $is_plus_or_minus{$_} = 1 for qw# + - #;
559 44         181 $is_if_or{$_} = 1 for qw# if unless or || #;
560 44         130 $is_comma_token{$_} = 1 for ( '=>', COMMA );
561             $is_assignment{$_} = 1
562 44         469 for qw# = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= ^^= #;
563              
564             # We can be less restrictive in marginal cases at certain "good" alignments
565 44         74908 $is_good_marginal_alignment{$_} = 1 for ( COMMA, qw# { ? => = # );
566             }
567              
568             #--------------------------------------------
569             # VTFLAGS: Vertical tightness types and flags
570             #--------------------------------------------
571             # Vertical tightness is controlled by a 'type' and associated 'flags' for each
572             # line. These values are set by sub Formatter::set_vertical_tightness_flags.
573             # These are defined as follows:
574              
575             # Vertical Tightness Line Type Codes:
576             # Type 0, no vertical tightness condition
577             # Type 1, last token of this line is a non-block opening token
578             # Type 2, first token of next line is a non-block closing
579             # Type 3, isolated opening block brace
580             # type 4, isolated closing block brace
581              
582             # Opening token flag values are the vertical tightness flags
583             # 0 do not join with next line
584             # 1 just one join per line
585             # 2 any number of joins
586              
587             # Closing token flag values indicate spacing:
588             # 0 = no space added before closing token
589             # 1 = single space added before closing token
590              
591             sub valign_input {
592              
593 8399     8399 0 14342 my ( $self, $rcall_hash ) = @_;
594              
595             #---------------------------------------------------------------------
596             # This is the front door of the vertical aligner. On each call
597             # we receive one line of specially marked text for vertical alignment.
598             # We compare the line with the current group, and either:
599             # - the line joins the current group if alignments match, or
600             # - the current group is flushed and a new group is started
601             #---------------------------------------------------------------------
602             #
603             # The key input parameters describing each line are:
604             # $level = indentation level of this line
605             # $rfields = ref to array of fields
606             # $rpatterns = ref to array of patterns, one per field
607             # $rtokens = ref to array of tokens starting fields 1,2,..
608             # $rfield_lengths = ref to array of field display widths
609             #
610             # Here is an example of what this package does. In this example,
611             # we are trying to line up both the '=>' and the '#'.
612             #
613             # '18' => 'grave', # \`
614             # '19' => 'acute', # `'
615             # '20' => 'caron', # \v
616             # <-tabs-><f1-><--field 2 ---><-f3->
617             # | | | |
618             # | | | |
619             # col1 col2 col3 col4
620             #
621             # The calling routine has already broken the entire line into 3 fields as
622             # indicated. (So the work of identifying promising common tokens has
623             # already been done).
624             #
625             # In this example, there will be 2 tokens being matched: '=>' and '#'.
626             # They are the leading parts of fields 2 and 3, but we do need to know
627             # what they are so that we can dump a group of lines when these tokens
628             # change.
629             #
630             # The fields contain the actual characters of each field. The patterns
631             # are like the fields, but they contain mainly token types instead
632             # of tokens, so they have fewer characters. They are used to be
633             # sure we are matching fields of similar type.
634             #
635             # In this example, there will be 4 column indexes being adjusted. The
636             # first one is always at zero. The interior columns are at the start of
637             # the matching tokens, and the last one tracks the maximum line length.
638             #
639             # Each time a new line comes in, it joins the current vertical
640             # group if possible. Otherwise it causes the current group to be flushed
641             # and a new group is started.
642             #
643             # For each new group member, the column locations are increased, as
644             # necessary, to make room for the new fields. When the group is finally
645             # output, these column numbers are used to compute the amount of spaces of
646             # padding needed for each field.
647             #
648             # Programming note: the fields are assumed not to have any tab characters.
649             # Tabs have been previously removed except for tabs in quoted strings and
650             # side comments. Tabs in these fields can mess up the column counting.
651             # The log file warns the user if there are any such tabs.
652              
653             # Unpack the call args. This form is significantly faster than getting them
654             # one-by-one.
655             my (
656              
657             $Kend,
658             $break_alignment_after,
659             $break_alignment_before,
660             $ci_level,
661             $forget_side_comment,
662             $indentation,
663             $is_terminal_ternary,
664             $level,
665             $level_end,
666             $list_seqno,
667             $maximum_line_length,
668             $outdent_long_lines,
669             $rline_alignment,
670             $rvertical_tightness_flags,
671              
672             ) =
673              
674 8399         31472 @{$rcall_hash}{
675 8399         13520 qw(
676             Kend
677             break_alignment_after
678             break_alignment_before
679             ci_level
680             forget_side_comment
681             indentation
682             is_terminal_ternary
683             level
684             level_end
685             list_seqno
686             maximum_line_length
687             outdent_long_lines
688             rline_alignment
689             rvertical_tightness_flags
690             )
691             };
692              
693             my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
694 8399         10734 @{$rline_alignment};
  8399         15220  
695              
696             # The index '$Kend' is a value which passed along with the line text to sub
697             # 'write_code_line' for a convergence check.
698              
699             # number of fields is $jmax
700             # number of tokens between fields is $jmax-1
701 8399         9889 my $jmax = @{$rfields} - 1;
  8399         11588  
702              
703 8399 100       14306 my $leading_space_count =
704             ref($indentation) ? $indentation->get_spaces() : $indentation;
705              
706             # set outdented flag to be sure we either align within statements or
707             # across statement boundaries, but not both.
708 8399         13408 my $is_outdented =
709             $self->[_last_leading_space_count_] > $leading_space_count;
710 8399         11267 $self->[_last_leading_space_count_] = $leading_space_count;
711              
712             # Identify a hanging side comment. Hanging side comments have an empty
713             # initial field.
714 8399   100     21727 my $is_hanging_side_comment =
715             ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
716              
717             # Undo outdented flag for a hanging side comment
718 8399 100       14143 $is_outdented = 0 if ($is_hanging_side_comment);
719              
720             # Identify a block comment.
721 8399   100     22029 my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
722              
723             # Block comment .. update count
724 8399 100       13195 if ($is_block_comment) {
725 711         1225 $self->[_consecutive_block_comments_]++;
726             }
727              
728             # Not a block comment ..
729             # Forget side comment column if we saw 2 or more block comments,
730             # and reset the count
731             else {
732              
733 7688 100       13795 if ( $self->[_consecutive_block_comments_] > 1 ) {
734 78         406 $self->forget_side_comment();
735             }
736 7688         10881 $self->[_consecutive_block_comments_] = 0;
737             }
738              
739             # Reset side comment location if we are entering a new block from level 0.
740             # This is intended to keep them from drifting too far to the right.
741 8399 100       13767 if ($forget_side_comment) {
742 46         179 $self->forget_side_comment();
743             }
744              
745 8399         10951 my $is_balanced_line = $level_end == $level;
746              
747 8399         11476 my $group_level = $self->[_group_level_];
748 8399         10606 my $group_maximum_line_length = $self->[_group_maximum_line_length_];
749              
750 8399         9424 DEBUG_VALIGN && do {
751             my $nlines = $self->group_line_count();
752             print {*STDOUT}
753             "Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level=$level, group_level=$group_level, level_end=$level_end\n";
754             };
755              
756             # Validate cached line if necessary: If we can produce a container
757             # with just 2 lines total by combining an existing cached opening
758             # token with the closing token to follow, then we will mark both
759             # cached flags as valid.
760 8399         18897 my $cached_line_type = get_cached_line_type();
761 8399 100       14581 if ($cached_line_type) {
762 227         462 my $cached_line_opening_flag = get_cached_line_opening_flag();
763 227 100       467 if ($rvertical_tightness_flags) {
764 157         359 my $cached_seqno = get_cached_seqno();
765 157 100 100     703 if ( $cached_seqno
      100        
766             && $rvertical_tightness_flags->{_vt_seqno}
767             && $rvertical_tightness_flags->{_vt_seqno} == $cached_seqno )
768             {
769              
770             # Fix for b1187 and b1188: Normally this step is only done
771             # if the number of existing lines is 0 or 1. But to prevent
772             # blinking, this range can be controlled by the caller.
773             # If zero values are given we fall back on the range 0 to 1.
774 4         15 my $line_count = $self->group_line_count();
775 4         7 my $min_lines = $rvertical_tightness_flags->{_vt_min_lines};
776 4         7 my $max_lines = $rvertical_tightness_flags->{_vt_max_lines};
777 4 50       12 $min_lines = 0 if ( !$min_lines );
778 4 50       9 $max_lines = 1 if ( !$max_lines );
779 4 100 66     20 if ( ( $line_count >= $min_lines )
780             && ( $line_count <= $max_lines ) )
781             {
782 3   50     13 $rvertical_tightness_flags->{_vt_valid_flag} ||= 1;
783 3         10 set_cached_line_valid(1);
784             }
785             }
786             }
787              
788             # do not join an opening block brace (type 3, see VTFLAGS)
789             # with an unbalanced line unless requested with a flag value of 2
790 227 50 100     587 if ( $cached_line_type == 3
      66        
      66        
791             && !$self->group_line_count()
792             && $cached_line_opening_flag < 2
793             && !$is_balanced_line )
794             {
795 0         0 set_cached_line_valid(0);
796             }
797             }
798              
799             # shouldn't happen:
800 8399 50       14633 if ( $level < 0 ) { $level = 0 }
  0         0  
801              
802             # do not align code across indentation level changes
803             # or changes in the maximum line length
804             # or if vertical alignment is turned off
805 8399 100 66     52811 if (
      66        
      66        
      100        
      100        
      100        
      100        
806             $level != $group_level
807             || ( $group_maximum_line_length
808             && $maximum_line_length != $group_maximum_line_length )
809             || $is_outdented
810             || ( $is_block_comment && !$rOpts_valign_block_comments )
811             || ( !$is_block_comment
812             && !$rOpts_valign_side_comments
813             && !$rOpts_valign_code )
814             )
815             {
816              
817             $self->_flush_group_lines( $level - $group_level )
818 3215 100       4084 if ( @{ $self->[_rgroup_lines_] } );
  3215         9807  
819              
820 3215         4621 $group_level = $level;
821 3215         4600 $self->[_group_level_] = $group_level;
822 3215         4413 $self->[_group_maximum_line_length_] = $maximum_line_length;
823              
824             # Update leading spaces after the above flush because the leading space
825             # count may have been changed if the -icp flag is in effect
826 3215 100       6011 $leading_space_count =
827             ref($indentation) ? $indentation->get_spaces() : $indentation;
828             }
829              
830             # --------------------------------------------------------------------
831             # Collect outdentable block COMMENTS
832             # --------------------------------------------------------------------
833 8399 100       17071 if ( $self->[_group_type_] eq 'COMMENT' ) {
834 599 100 66     2589 if ( $is_block_comment
      66        
835             && $outdent_long_lines
836             && $leading_space_count == $self->[_comment_leading_space_count_] )
837             {
838              
839             # Note that for a comment group we are not storing a line
840             # but rather just the text and its length.
841 90         144 push @{ $self->[_rgroup_lines_] },
  90         330  
842             [ $rfields->[0], $rfield_lengths->[0], $Kend ];
843 90         281 return;
844             }
845             else {
846             $self->_flush_group_lines()
847 509 50       759 if ( @{ $self->[_rgroup_lines_] } );
  509         3011  
848             }
849             }
850              
851 8309         11397 my $rgroup_lines = $self->[_rgroup_lines_];
852 8309 100 100     15733 if ( $break_alignment_before && @{$rgroup_lines} ) {
  129         390  
853 30         78 $rgroup_lines->[-1]->{'end_group'} = 1;
854             }
855              
856             # --------------------------------------------------------------------
857             # add dummy fields for terminal ternary
858             # --------------------------------------------------------------------
859 8309         9771 my $j_terminal_match;
860              
861 8309 100 100     15140 if ( $is_terminal_ternary && @{$rgroup_lines} ) {
  17         62  
862 14         123 $j_terminal_match = fix_terminal_ternary(
863             {
864             old_line => $rgroup_lines->[-1],
865             rfields => $rfields,
866             rtokens => $rtokens,
867             rpatterns => $rpatterns,
868             rfield_lengths => $rfield_lengths,
869             group_level => $group_level,
870             }
871             );
872 14         42 $jmax = @{$rfields} - 1;
  14         42  
873             }
874              
875             # --------------------------------------------------------------------
876             # add dummy fields for else statement
877             # --------------------------------------------------------------------
878              
879             # Note the trailing space after 'else' here. If there were no space between
880             # the else and the next '{' then we would not be able to do vertical
881             # alignment of the '{'.
882 8309 100 100     18063 if ( $rfields->[0] eq 'else '
      66        
883 12         90 && @{$rgroup_lines}
884             && $is_balanced_line )
885             {
886 9         98 $j_terminal_match = fix_terminal_else(
887             {
888             old_line => $rgroup_lines->[-1],
889             rfields => $rfields,
890             rtokens => $rtokens,
891             rpatterns => $rpatterns,
892             rfield_lengths => $rfield_lengths,
893             }
894             );
895 9         27 $jmax = @{$rfields} - 1;
  9         14  
896             }
897              
898             # --------------------------------------------------------------------
899             # Handle simple line of code with no fields to match.
900             # --------------------------------------------------------------------
901 8309 100       13450 if ( $jmax <= 0 ) {
902 4811         6807 $self->[_zero_count_]++;
903              
904             # VSN PATCH for a single number, part 1.
905 4811   100     14492 my $is_numeric =
906             $rOpts_valign_signed_numbers && $rpatterns->[0] eq 'n,';
907              
908 4811 100 100     12053 if ( !$is_numeric
      100        
909 4770         13215 && @{$rgroup_lines}
910             && !get_recoverable_spaces( $rgroup_lines->[0]->{'indentation'} ) )
911             {
912              
913             # flush the current group if it has some aligned columns..
914             # or we haven't seen a comment lately
915 384 100 100     1604 if ( $rgroup_lines->[0]->{'jmax'} > 1
916             || $self->[_zero_count_] > 3 )
917             {
918             $self->_flush_group_lines()
919 348 50       589 if ( @{ $self->[_rgroup_lines_] } );
  348         1728  
920              
921             # Update '$rgroup_lines' - it will become a ref to empty array.
922             # This allows avoiding a call to get_group_line_count below.
923 348         774 $rgroup_lines = $self->[_rgroup_lines_];
924             }
925             }
926              
927             # start new COMMENT group if this comment may be outdented
928 4811 100 100     11468 if ( $is_block_comment
      66        
929             && $outdent_long_lines
930 594         1540 && !@{$rgroup_lines} )
931             {
932 594         1253 $self->[_group_type_] = 'COMMENT';
933 594         927 $self->[_comment_leading_space_count_] = $leading_space_count;
934 594         981 $self->[_group_maximum_line_length_] = $maximum_line_length;
935 594         831 push @{$rgroup_lines},
  594         1892  
936             [ $rfields->[0], $rfield_lengths->[0], $Kend ];
937 594         2230 return;
938             }
939              
940             # just write this line directly if no current group, no side comment,
941             # and no space recovery is needed,
942             # and not numeric - VSN PATCH for a single number, part 4.
943 4217 100 100     5321 if ( !@{$rgroup_lines}
  4217   100     16697  
944             && !$is_numeric
945             && !get_recoverable_spaces($indentation) )
946             {
947              
948 4126         32561 $self->valign_output_step_B(
949             {
950             leading_space_count => $leading_space_count,
951             line => $rfields->[0],
952             line_length => $rfield_lengths->[0],
953             side_comment_length => 0,
954             outdent_long_lines => $outdent_long_lines,
955             rvertical_tightness_flags => $rvertical_tightness_flags,
956             level => $level,
957             level_end => $level_end,
958             Kend => $Kend,
959             maximum_line_length => $maximum_line_length,
960             }
961             );
962 4126         18821 return;
963             }
964             }
965             else {
966 3498         5378 $self->[_zero_count_] = 0;
967             }
968              
969             # --------------------------------------------------------------------
970             # It simplifies things to create a zero length side comment
971             # if none exists.
972             # --------------------------------------------------------------------
973 3589 100 100     12609 if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
974 3241         4498 $jmax += 1;
975 3241         6080 $rtokens->[ $jmax - 1 ] = '#';
976 3241         5729 $rfields->[$jmax] = EMPTY_STRING;
977 3241         4556 $rfield_lengths->[$jmax] = 0;
978 3241         5175 $rpatterns->[$jmax] = '#';
979             }
980              
981             # --------------------------------------------------------------------
982             # create an object to hold this line
983             # --------------------------------------------------------------------
984              
985             # The hash keys below must match the list of keys in %valid_LINE_keys.
986             # Values in this hash are accessed directly, except for 'ralignments',
987             # rather than with get/set calls for efficiency.
988 3589         56762 my $new_line = Perl::Tidy::VerticalAligner::Line->new(
989             {
990             jmax => $jmax,
991             rtokens => $rtokens,
992             rfields => $rfields,
993             rpatterns => $rpatterns,
994             rfield_lengths => $rfield_lengths,
995             indentation => $indentation,
996             leading_space_count => $leading_space_count,
997             outdent_long_lines => $outdent_long_lines,
998             list_seqno => $list_seqno,
999             list_type => EMPTY_STRING,
1000             is_hanging_side_comment => $is_hanging_side_comment,
1001             rvertical_tightness_flags => $rvertical_tightness_flags,
1002             is_terminal_ternary => $is_terminal_ternary,
1003             j_terminal_match => $j_terminal_match,
1004             end_group => $break_alignment_after,
1005             Kend => $Kend,
1006             ci_level => $ci_level,
1007             level => $level,
1008             level_end => $level_end,
1009             imax_pair => -1,
1010             maximum_line_length => $maximum_line_length,
1011              
1012             ralignments => [],
1013             }
1014             );
1015              
1016 3589         4987 DEVEL_MODE
1017             && check_keys( $new_line, \%valid_LINE_keys,
1018             "Checking line keys at line definition", 1 );
1019              
1020             # --------------------------------------------------------------------
1021             # Decide if this is a simple list of items.
1022             # We use this to be less restrictive in deciding what to align.
1023             # --------------------------------------------------------------------
1024 3589 100       8371 decide_if_list($new_line) if ($list_seqno);
1025              
1026             # --------------------------------------------------------------------
1027             # Append this line to the current group (or start new group)
1028             # --------------------------------------------------------------------
1029              
1030 3589         4701 push @{ $self->[_rgroup_lines_] }, $new_line;
  3589         7121  
1031 3589         5726 $self->[_group_maximum_line_length_] = $maximum_line_length;
1032              
1033             # output this group if it ends in a terminal else or ternary line
1034 3589 100 100     17525 if ( defined($j_terminal_match) ) {
    100          
1035             $self->_flush_group_lines()
1036 21 50       39 if ( @{ $self->[_rgroup_lines_] } );
  21         161  
1037             }
1038              
1039             # Force break after jump to lower level
1040             elsif ($level_end < $level
1041             || $is_closing_token{ substr( $rfields->[0], 0, 1 ) } )
1042             {
1043             $self->_flush_group_lines(-1)
1044 143 50       221 if ( @{ $self->[_rgroup_lines_] } );
  143         637  
1045             }
1046              
1047             else {
1048             ##ok: no output needed
1049             }
1050              
1051             # --------------------------------------------------------------------
1052             # Some old debugging stuff
1053             # --------------------------------------------------------------------
1054 3589         4271 DEBUG_VALIGN && do {
1055             print {*STDOUT} "exiting valign_input fields:";
1056             dump_array( @{$rfields} );
1057             print {*STDOUT} "exiting valign_input tokens:";
1058             dump_array( @{$rtokens} );
1059             print {*STDOUT} "exiting valign_input patterns:";
1060             dump_array( @{$rpatterns} );
1061             };
1062              
1063 3589         10516 return;
1064             } ## end sub valign_input
1065              
1066             sub join_hanging_comment {
1067              
1068 38     38 0 69 my ( $new_line, $old_line ) = @_;
1069              
1070             # Add dummy fields to a hanging side comment to make it look
1071             # like the first line in its potential group. This simplifies
1072             # the coding.
1073              
1074             # Given:
1075             # $new_line = ref to hash of the line to be possibly changed
1076             # $old_line = ref to hash of the previous reference line
1077             # Return:
1078             # true if new line modified
1079             # false otherwise
1080              
1081 38         68 my $jmax = $new_line->{'jmax'};
1082              
1083             # must be 2 fields
1084 38 50       91 return 0 unless ( $jmax == 1 );
1085 38         63 my $rtokens = $new_line->{'rtokens'};
1086              
1087             # the second field must be a comment
1088 38 50       104 return 0 unless ( $rtokens->[0] eq '#' );
1089 38         67 my $rfields = $new_line->{'rfields'};
1090              
1091             # the first field must be empty
1092 38 50       167 return 0 if ( $rfields->[0] !~ /^\s*$/ );
1093              
1094             # the current line must have fewer fields
1095 38         80 my $maximum_field_index = $old_line->{'jmax'};
1096 38 100       109 return 0
1097             if ( $maximum_field_index <= $jmax );
1098              
1099             # looks ok..
1100 3         8 my $rpatterns = $new_line->{'rpatterns'};
1101 3         6 my $rfield_lengths = $new_line->{'rfield_lengths'};
1102              
1103 3         4 $new_line->{'is_hanging_side_comment'} = 1;
1104              
1105 3         4 $jmax = $maximum_field_index;
1106 3         6 $new_line->{'jmax'} = $jmax;
1107 3         7 $rfields->[$jmax] = $rfields->[1];
1108 3         6 $rfield_lengths->[$jmax] = $rfield_lengths->[1];
1109 3         8 $rtokens->[ $jmax - 1 ] = $rtokens->[0];
1110 3         6 $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
1111              
1112 3         7 foreach my $j ( 1 .. $jmax - 1 ) {
1113 3         5 $rfields->[$j] = EMPTY_STRING;
1114 3         5 $rfield_lengths->[$j] = 0;
1115 3         6 $rtokens->[ $j - 1 ] = EMPTY_STRING;
1116 3         6 $rpatterns->[ $j - 1 ] = EMPTY_STRING;
1117             }
1118 3         4 return 1;
1119             } ## end sub join_hanging_comment
1120              
1121             sub decide_if_list {
1122              
1123 1193     1193 0 1660 my $line = shift;
1124              
1125             # Given:
1126             # $line = ref to hash of values for a line
1127             # Task:
1128             # Set 'list_type' property
1129              
1130             # A list will be taken to be a line with a forced break in which all
1131             # of the field separators are commas or comma-arrows (except for the
1132             # trailing #)
1133              
1134 1193         1902 my $rtokens = $line->{'rtokens'};
1135 1193         1957 my $test_token = $rtokens->[0];
1136 1193         2856 my ( $raw_tok, $lev, $tag, $tok_count ) =
1137             decode_alignment_token($test_token);
1138 1193 100       3079 if ( $is_comma_token{$raw_tok} ) {
1139 1028         1472 my $list_type = $test_token;
1140 1028         1465 my $jmax = $line->{'jmax'};
1141              
1142 1028         2478 foreach ( 1 .. $jmax - 2 ) {
1143 1077         1626 ( $raw_tok, $lev, $tag, $tok_count ) =
1144             decode_alignment_token( $rtokens->[$_] );
1145 1077 100       2170 if ( !$is_comma_token{$raw_tok} ) {
1146 26         41 $list_type = EMPTY_STRING;
1147 26         76 last;
1148             }
1149             }
1150 1028         1883 $line->{'list_type'} = $list_type;
1151             }
1152 1193         1836 return;
1153             } ## end sub decide_if_list
1154              
1155             sub fix_terminal_ternary {
1156              
1157             # Add empty fields as necessary to align a ternary term
1158             # like this:
1159             #
1160             # my $leapyear =
1161             # $year % 4 ? 0
1162             # : $year % 100 ? 1
1163             # : $year % 400 ? 0
1164             # : 1;
1165             #
1166             # returns the index of the terminal question token, if any
1167              
1168 14     14 0 35 my ($rcall_hash) = @_;
1169              
1170 14         36 my $old_line = $rcall_hash->{old_line};
1171 14         38 my $rfields = $rcall_hash->{rfields};
1172 14         28 my $rtokens = $rcall_hash->{rtokens};
1173 14         27 my $rpatterns = $rcall_hash->{rpatterns};
1174 14         25 my $rfield_lengths = $rcall_hash->{rfield_lengths};
1175 14         29 my $group_level = $rcall_hash->{group_level};
1176              
1177 14 50       44 return if ( !$old_line );
1178 44     44   348 use constant EXPLAIN_TERNARY => 0;
  44         103  
  44         57326  
1179              
1180 14 50       46 if (%valign_control_hash) {
1181 0         0 my $align_ok = $valign_control_hash{'?'};
1182 0 0       0 $align_ok = $valign_control_default unless ( defined($align_ok) );
1183 0 0       0 return if ( !$align_ok );
1184             }
1185              
1186 14         40 my $jmax = @{$rfields} - 1;
  14         32  
1187 14         33 my $rfields_old = $old_line->{'rfields'};
1188              
1189 14         29 my $rpatterns_old = $old_line->{'rpatterns'};
1190 14         75 my $rtokens_old = $old_line->{'rtokens'};
1191 14         49 my $maximum_field_index = $old_line->{'jmax'};
1192              
1193             # look for the question mark after the :
1194 14         30 my ($jquestion);
1195             my $depth_question;
1196 14         30 my $pad = EMPTY_STRING;
1197 14         44 my $pad_length = 0;
1198 14         49 foreach my $j ( 0 .. $maximum_field_index - 1 ) {
1199 15         47 my $tok = $rtokens_old->[$j];
1200 15         60 my ( $raw_tok, $lev, $tag_uu, $tok_count_uu ) =
1201             decode_alignment_token($tok);
1202 15 100       54 if ( $raw_tok eq '?' ) {
1203 14         26 $depth_question = $lev;
1204              
1205             # depth must be correct
1206 14 50       54 next if ( $depth_question ne $group_level );
1207              
1208 14         25 $jquestion = $j;
1209 14 50       88 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
1210 14         37 $pad_length = length($1);
1211 14         43 $pad = SPACE x $pad_length;
1212             }
1213             else {
1214 0         0 return; # shouldn't happen
1215             }
1216 14         34 last;
1217             }
1218             }
1219 14 50       42 return if ( !defined($jquestion) ); # shouldn't happen
1220              
1221             # Now splice the tokens and patterns of the previous line
1222             # into the else line to insure a match. Add empty fields
1223             # as necessary.
1224 14         28 my $jadd = $jquestion;
1225              
1226             # Work on copies of the actual arrays in case we have
1227             # to return due to an error
1228 14         52 my @fields = @{$rfields};
  14         36  
1229 14         25 my @patterns = @{$rpatterns};
  14         31  
1230 14         26 my @tokens = @{$rtokens};
  14         40  
1231 14         26 my @field_lengths = @{$rfield_lengths};
  14         32  
1232              
1233 14         50 EXPLAIN_TERNARY && do {
1234             local $LIST_SEPARATOR = '><';
1235             print {*STDOUT} "CURRENT FIELDS=<@{$rfields_old}>\n";
1236             print {*STDOUT} "CURRENT TOKENS=<@{$rtokens_old}>\n";
1237             print {*STDOUT} "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
1238             print {*STDOUT} "UNMODIFIED FIELDS=<@{$rfields}>\n";
1239             print {*STDOUT} "UNMODIFIED TOKENS=<@{$rtokens}>\n";
1240             print {*STDOUT} "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
1241             };
1242              
1243             # handle cases of leading colon on this line
1244 14 50       85 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
1245              
1246 14         50 my ( $colon, $therest ) = ( $1, $2 );
1247              
1248             # Handle sub-case of first field with leading colon plus additional code
1249             # This is the usual situation as at the '1' below:
1250             # ...
1251             # : $year % 400 ? 0
1252             # : 1;
1253 14 50       36 if ($therest) {
1254              
1255             # Split the first field after the leading colon and insert padding.
1256             # Note that this padding will remain even if the terminal value goes
1257             # out on a separate line. This does not seem to look to bad, so no
1258             # mechanism has been included to undo it.
1259 14         29 my $field1_uu = shift @fields;
1260 14         24 my $field_length1 = shift @field_lengths;
1261 14         29 my $len_colon = length($colon);
1262 14         60 unshift @fields, ( $colon, $pad . $therest );
1263 14         37 unshift @field_lengths,
1264             ( $len_colon, $pad_length + $field_length1 - $len_colon );
1265              
1266             # change the leading pattern from : to ?
1267 14 50       78 return if ( $patterns[0] !~ s/^\:/?/ );
1268              
1269             # install leading tokens and patterns of existing line
1270 14         36 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
  14         38  
1271 14         60 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
  14         32  
1272              
1273             # insert appropriate number of empty fields
1274 14 100       40 splice( @fields, 1, 0, (EMPTY_STRING) x $jadd ) if ($jadd);
1275 14 100       41 splice( @field_lengths, 1, 0, (0) x $jadd ) if ($jadd);
1276             }
1277              
1278             # handle sub-case of first field just equal to leading colon.
1279             # This can happen for example in the example below where
1280             # the leading '(' would create a new alignment token
1281             # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
1282             # : ( $mname = $name . '->' );
1283             else {
1284              
1285 0 0 0     0 return if ( $jmax <= 0 || $tokens[0] eq '#' ); # shouldn't happen
1286              
1287             # prepend a leading ? onto the second pattern
1288 0         0 $patterns[1] = "?b" . $patterns[1];
1289              
1290             # pad the second field
1291 0         0 $fields[1] = $pad . $fields[1];
1292 0         0 $field_lengths[1] = $pad_length + $field_lengths[1];
1293              
1294             # install leading tokens and patterns of existing line, replacing
1295             # leading token and inserting appropriate number of empty fields
1296 0         0 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
  0         0  
1297 0         0 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
  0         0  
1298 0 0       0 splice( @fields, 1, 0, (EMPTY_STRING) x $jadd ) if ($jadd);
1299 0 0       0 splice( @field_lengths, 1, 0, (0) x $jadd ) if ($jadd);
1300             }
1301             }
1302              
1303             # Handle case of no leading colon on this line. This will
1304             # be the case when -wba=':' is used. For example,
1305             # $year % 400 ? 0 :
1306             # 1;
1307             else {
1308              
1309             # install leading tokens and patterns of existing line
1310 0         0 $patterns[0] = '?' . 'b' . $patterns[0];
1311 0         0 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
  0         0  
1312 0         0 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
  0         0  
1313              
1314             # insert appropriate number of empty fields
1315 0         0 $jadd = $jquestion + 1;
1316 0         0 $fields[0] = $pad . $fields[0];
1317 0         0 $field_lengths[0] = $pad_length + $field_lengths[0];
1318 0 0       0 splice( @fields, 0, 0, (EMPTY_STRING) x $jadd ) if ($jadd);
1319 0 0       0 splice( @field_lengths, 0, 0, (0) x $jadd ) if ($jadd);
1320             }
1321              
1322 14         28 EXPLAIN_TERNARY && do {
1323             local $LIST_SEPARATOR = '><';
1324             print {*STDOUT} "MODIFIED TOKENS=<@tokens>\n";
1325             print {*STDOUT} "MODIFIED PATTERNS=<@patterns>\n";
1326             print {*STDOUT} "MODIFIED FIELDS=<@fields>\n";
1327             };
1328              
1329             # all ok .. update the arrays
1330 14         25 @{$rfields} = @fields;
  14         47  
1331 14         24 @{$rtokens} = @tokens;
  14         33  
1332 14         30 @{$rpatterns} = @patterns;
  14         35  
1333 14         20 @{$rfield_lengths} = @field_lengths;
  14         29  
1334              
1335             # force a flush after this line
1336 14         51 return $jquestion;
1337             } ## end sub fix_terminal_ternary
1338              
1339             sub fix_terminal_else {
1340              
1341 9     9 0 25 my ($rcall_hash) = @_;
1342              
1343             # Add empty fields as necessary to align a balanced terminal
1344             # else block to a previous if/elsif/unless block,
1345             # like this:
1346             #
1347             # if ( 1 || $x ) { print "ok 13\n"; }
1348             # else { print "not ok 13\n"; }
1349             #
1350             # returns a positive value if the else block should be indented
1351             #
1352              
1353 9         25 my $old_line = $rcall_hash->{old_line};
1354 9         20 my $rfields = $rcall_hash->{rfields};
1355 9         16 my $rtokens = $rcall_hash->{rtokens};
1356 9         19 my $rpatterns = $rcall_hash->{rpatterns};
1357 9         17 my $rfield_lengths = $rcall_hash->{rfield_lengths};
1358              
1359 9 50       43 return if ( !$old_line );
1360 9         14 my $jmax = @{$rfields} - 1;
  9         25  
1361 9 50       30 return if ( $jmax <= 0 );
1362              
1363 9 50       26 if (%valign_control_hash) {
1364 0         0 my $align_ok = $valign_control_hash{'{'};
1365 0 0       0 $align_ok = $valign_control_default unless ( defined($align_ok) );
1366 0 0       0 return if ( !$align_ok );
1367             }
1368              
1369             # check for balanced else block following if/elsif/unless
1370 9         17 my $rfields_old = $old_line->{'rfields'};
1371              
1372             # TBD: add handling for 'case'
1373 9 100       85 return if ( $rfields_old->[0] !~ /^(?:if|elsif|unless)\s*$/ );
1374              
1375             # look for the opening brace after the else, and extract the depth
1376 7         40 my $tok_brace = $rtokens->[0];
1377 7         15 my $depth_brace;
1378 7 50       168 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
  7         32  
1379              
1380             # probably: "else # side_comment"
1381 0         0 else { return }
1382              
1383 7         15 my $rpatterns_old = $old_line->{'rpatterns'};
1384 7         16 my $rtokens_old = $old_line->{'rtokens'};
1385 7         13 my $maximum_field_index = $old_line->{'jmax'};
1386              
1387             # be sure the previous if/elsif is followed by an opening paren
1388 7         17 my $jparen = 0;
1389 7         17 my $tok_paren = '(' . $depth_brace;
1390 7         23 my $tok_test = $rtokens_old->[$jparen];
1391 7 50       28 if ( $tok_test ne $tok_paren ) {
1392             ## no opening paren - possible syntax error - give up.
1393 0         0 return;
1394             }
1395              
1396             # Now find the opening block brace
1397 7         14 my ($jbrace);
1398 7         22 foreach my $j ( 1 .. $maximum_field_index - 1 ) {
1399 8         17 my $tok = $rtokens_old->[$j];
1400 8 100       26 if ( $tok eq $tok_brace ) {
1401 7         13 $jbrace = $j;
1402 7         19 last;
1403             }
1404             }
1405 7 50       24 if ( !defined($jbrace) ) {
1406             ## no opening brace - possible syntax error - give up.
1407 0         0 return;
1408             }
1409              
1410             # Now splice the tokens and patterns of the previous line
1411             # into the else line to insure a match. Add empty fields
1412             # as necessary.
1413 7         13 my $jadd = $jbrace - $jparen;
1414 7         15 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
  7         36  
  7         39  
1415 7         14 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
  7         20  
  7         19  
1416 7         12 splice( @{$rfields}, 1, 0, (EMPTY_STRING) x $jadd );
  7         22  
1417 7         23 splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
  7         18  
1418              
1419             # force a flush after this line if it does not follow a case
1420 7 50       24 if ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
  0         0  
1421 7         22 else { return $jbrace }
1422             } ## end sub fix_terminal_else
1423              
1424             my %is_closing_block_type;
1425              
1426             BEGIN {
1427 44     44   1278 $is_closing_block_type{$_} = 1 for qw# } ] #;
1428             }
1429              
1430             # This is a flag for testing alignment by sub sweep_left_to_right only.
1431             # This test can help find problems with the alignment logic.
1432             # This flag should normally be zero.
1433 44     44   268 use constant TEST_SWEEP_ONLY => 0;
  44         82  
  44         2529  
1434              
1435 44     44   220 use constant EXPLAIN_CHECK_MATCH => 0;
  44         95  
  44         3108  
1436              
1437             sub check_match {
1438              
1439 1321     1321 0 2423 my ( $self, $new_line, $base_line, $prev_line, $group_line_count ) = @_;
1440              
1441             # See if the current line matches the current vertical alignment group.
1442              
1443             # Given:
1444             # $new_line = the line being considered for group inclusion
1445             # $base_line = the first line of the current group
1446             # $prev_line = the line just before $new_line
1447             # $group_line_count = number of lines in the current group
1448              
1449             # Returns: a flag and a value as follows:
1450             # return (0, $imax_align) if the line does not match
1451             # return (1, $imax_align) if the line matches but does not fit
1452             # return (2, $imax_align) if the line matches and fits
1453              
1454 44     44   220 use constant NO_MATCH => 0;
  44         77  
  44         1873  
1455 44     44   184 use constant MATCH_NO_FIT => 1;
  44         76  
  44         1559  
1456 44     44   187 use constant MATCH_AND_FIT => 2;
  44         78  
  44         64865  
1457              
1458             # Return value '$return_value' describes the match with 3 possible values
1459 1321         1667 my $return_value;
1460              
1461             # Return value '$imax_align' is the index of the maximum matching token.
1462             # It will be used in the subsequent left-to-right sweep to align as many
1463             # tokens as possible for lines which partially match.
1464 1321         1800 my $imax_align = -1;
1465              
1466             # variable $GoToMsg explains reason for no match, for debugging
1467 1321         1822 my $GoToMsg = EMPTY_STRING;
1468              
1469 1321         1896 my $jmax = $new_line->{'jmax'};
1470 1321         1892 my $maximum_field_index = $base_line->{'jmax'};
1471              
1472 1321         1821 my $jlimit = $jmax - 2;
1473 1321 100       2554 if ( $jmax > $maximum_field_index ) {
1474 101         197 $jlimit = $maximum_field_index - 2;
1475             }
1476              
1477 1321 100       2329 if ( $new_line->{'is_hanging_side_comment'} ) {
1478              
1479             # HSC's can join the group if they fit
1480             }
1481              
1482             # Everything else
1483             else {
1484              
1485             # A group with hanging side comments ends with the first non hanging
1486             # side comment.
1487 1283 50       2360 if ( $base_line->{'is_hanging_side_comment'} ) {
1488 0         0 $GoToMsg = "end of hanging side comments";
1489 0         0 $return_value = NO_MATCH;
1490             }
1491             else {
1492              
1493             # The number of tokens that this line shares with the previous
1494             # line has been stored with the previous line. This value was
1495             # calculated and stored by sub 'match_line_pair'.
1496 1283         4324 $imax_align = $prev_line->{'imax_pair'};
1497              
1498             # Only the following ci sequences are accepted (issue c225):
1499             # 0 0 0 ... OK
1500             # 0 1 1 ... OK but marginal*
1501             # 1 1 1 ... OK
1502             # This check is rarely activated, but for example we want
1503             # to avoid something like this 'tail wag dog' situation:
1504             # $tag =~ s/\b([a-z]+)/\L\u$1/gio;
1505             # $tag =~ s/\b([b-df-hj-np-tv-z]+)\b/\U$1/gio
1506             # if $tag =~ /-/;
1507             # *Note: we could set a flag for the 0 1 marginal case and
1508             # use it to prevent alignment of selected token types.
1509 1283         1760 my $ci_prev = $prev_line->{'ci_level'};
1510 1283         1778 my $ci_new = $new_line->{'ci_level'};
1511 1283 50 100     4781 if ( $ci_prev != $ci_new
    100 33        
      66        
1512             && $imax_align >= 0
1513             && ( $ci_new == 0 || $group_line_count > 1 ) )
1514             {
1515 0         0 $imax_align = -1;
1516 0         0 $GoToMsg =
1517             "Rejected ci: ci_prev=$ci_prev ci_new=$ci_new num=$group_line_count\n";
1518 0         0 $return_value = NO_MATCH;
1519             }
1520             elsif ( $imax_align != $jlimit ) {
1521 39         93 $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
1522 39         69 $return_value = NO_MATCH;
1523             }
1524             else {
1525             ##ok: continue
1526             }
1527             }
1528             }
1529              
1530 1321 100       2512 if ( !defined($return_value) ) {
1531              
1532             # The tokens match, but the lines must have identical number of
1533             # tokens to join the group.
1534 1282 100 100     3557 if ( $maximum_field_index != $jmax ) {
    100          
1535 141         238 $GoToMsg = "token count differs";
1536 141         216 $return_value = NO_MATCH;
1537             }
1538              
1539             # The tokens match. Now See if there is space for this line in the
1540             # current group.
1541             elsif ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY )
1542             {
1543              
1544 1127         2161 $GoToMsg = "match and fit, imax_align=$imax_align, jmax=$jmax\n";
1545 1127         1527 $return_value = MATCH_AND_FIT;
1546 1127         1599 $imax_align = $jlimit;
1547             }
1548             else {
1549 14         32 $GoToMsg = "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
1550 14         25 $return_value = MATCH_NO_FIT;
1551 14         23 $imax_align = $jlimit;
1552             }
1553             }
1554              
1555             EXPLAIN_CHECK_MATCH
1556 1321         1632 && print
1557             "returning $return_value because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
1558              
1559 1321         2889 return ( $return_value, $imax_align );
1560             } ## end sub check_match
1561              
1562             sub check_fit {
1563              
1564 1141     1141 0 1919 my ( $self, $new_line, $old_line ) = @_;
1565              
1566             # The new line has alignments identical to the current group. Now we have
1567             # to fit the new line into the group without causing a field to exceed the
1568             # line length limit.
1569              
1570             # Given:
1571             # $new_line = ref to hash of the new line values
1572             # $old_line = ref to hash of the previous line values
1573             # Returns:
1574             # true if the new line alignments fit the old line
1575             # false otherwise
1576              
1577 1141         1688 my $jmax = $new_line->{'jmax'};
1578 1141         1640 my $leading_space_count = $new_line->{'leading_space_count'};
1579 1141         1633 my $rfield_lengths = $new_line->{'rfield_lengths'};
1580 1141         3578 my $padding_available = $old_line->get_available_space_on_right();
1581 1141         1845 my $jmax_old = $old_line->{'jmax'};
1582              
1583             # Safety check ... only lines with equal array sizes should arrive here
1584             # from sub check_match. So if this error occurs, look at recent changes in
1585             # sub check_match. It is only supposed to check the fit of lines with
1586             # identical numbers of alignment tokens.
1587 1141 50       2584 if ( $jmax_old ne $jmax ) {
1588              
1589 0         0 $self->warning(<<EOM);
1590             Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
1591             unexpected difference in array lengths: $jmax != $jmax_old
1592             EOM
1593 0         0 return;
1594             }
1595              
1596             # Save current columns in case this line does not fit.
1597 1141         1463 my @alignments = @{ $old_line->{'ralignments'} };
  1141         2221  
1598 1141         1857 foreach my $alignment (@alignments) {
1599 4033         6802 $alignment->save_column();
1600             }
1601              
1602             # Loop over all alignments ...
1603 1141         2080 for my $j ( 0 .. $jmax ) {
1604              
1605 4014         7539 my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
1606              
1607 4014 100       6304 if ( $j == 0 ) {
1608 1141         1624 $pad += $leading_space_count;
1609             }
1610              
1611             # Keep going if this field does not need any space.
1612 4014 100       6340 next if ( $pad < 0 );
1613              
1614             # Revert to the starting state if does not fit
1615 2697 100       4115 if ( $pad > $padding_available ) {
1616              
1617             #----------------------------------------------
1618             # Line does not fit -- revert to starting state
1619             #----------------------------------------------
1620 14         29 foreach my $alignment (@alignments) {
1621 43         98 $alignment->restore_column();
1622             }
1623 14         72 return;
1624             }
1625              
1626             # make room for this field
1627 2683         5731 $old_line->increase_field_width( $j, $pad );
1628 2683         3501 $padding_available -= $pad;
1629             }
1630              
1631             #-------------------------------------
1632             # The line fits, the match is accepted
1633             #-------------------------------------
1634 1127         4281 return 1;
1635              
1636             } ## end sub check_fit
1637              
1638             sub install_new_alignments {
1639              
1640 2462     2462 0 3997 my ($new_line) = @_;
1641              
1642             # Given:
1643             # $new_line = ref to hash of a line starting a new group
1644             # Task:
1645             # setup alignment fields for this line
1646              
1647 2462         4163 my $jmax = $new_line->{'jmax'};
1648 2462         3805 my $rfield_lengths = $new_line->{'rfield_lengths'};
1649 2462         3655 my $col = $new_line->{'leading_space_count'};
1650              
1651 2462         3299 my @alignments;
1652 2462         4778 for my $j ( 0 .. $jmax ) {
1653 8274         10203 $col += $rfield_lengths->[$j];
1654              
1655             # create initial alignments for the new group
1656 8274         23762 my $alignment =
1657             Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
1658 8274         13712 push @alignments, $alignment;
1659             }
1660 2462         4965 $new_line->{'ralignments'} = \@alignments;
1661 2462         4414 return;
1662             } ## end sub install_new_alignments
1663              
1664             sub copy_old_alignments {
1665 1127     1127 0 1802 my ( $new_line, $old_line ) = @_;
1666 1127         1401 my @new_alignments = @{ $old_line->{'ralignments'} };
  1127         2587  
1667 1127         2251 $new_line->{'ralignments'} = \@new_alignments;
1668 1127         2093 return;
1669             } ## end sub copy_old_alignments
1670              
1671             sub dump_array {
1672              
1673             # debug routine to dump array contents
1674 0     0 0 0 local $LIST_SEPARATOR = ')(';
1675 0         0 print {*STDOUT} "(@_)\n";
  0         0  
1676 0         0 return;
1677             } ## end sub dump_array
1678              
1679             sub level_change {
1680              
1681 10     10 0 25 my ( $self, $leading_space_count, $diff, $level ) = @_;
1682              
1683             # compute decrease in level when we remove $diff spaces from the
1684             # leading spaces
1685              
1686             # Given:
1687             # $leading_space_count = current leading line spaces
1688             # $diff = number of spaces to remove
1689             # $level = current indentation level
1690             # Return:
1691             # $level = updated level accounting for the loss of spaces
1692              
1693 10 50       22 if ($rOpts_indent_columns) {
1694 10         21 my $olev =
1695             int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
1696 10         16 my $nlev = int( $leading_space_count / $rOpts_indent_columns );
1697 10         14 $level -= ( $olev - $nlev );
1698 10 50       23 if ( $level < 0 ) { $level = 0 }
  0         0  
1699             }
1700 10         19 return $level;
1701             } ## end sub level_change
1702              
1703             ###############################################
1704             # CODE SECTION 4: Code to process comment lines
1705             ###############################################
1706              
1707             sub _flush_comment_lines {
1708              
1709 594     594   1205 my ($self) = @_;
1710              
1711             # Output a group consisting of COMMENT lines
1712              
1713 594         1043 my $rgroup_lines = $self->[_rgroup_lines_];
1714 594 50       868 return if ( !@{$rgroup_lines} );
  594         1411  
1715 594         1016 my $group_level = $self->[_group_level_];
1716 594         947 my $group_maximum_line_length = $self->[_group_maximum_line_length_];
1717 594         1002 my $leading_space_count = $self->[_comment_leading_space_count_];
1718              
1719             # look for excessively long lines
1720 594         915 my $max_excess = 0;
1721 594         958 foreach my $item ( @{$rgroup_lines} ) {
  594         1414  
1722 684         1037 my ( $str_uu, $str_len ) = @{$item};
  684         1919  
1723 684         1292 my $excess =
1724             $str_len + $leading_space_count - $group_maximum_line_length;
1725 684 100       1971 if ( $excess > $max_excess ) {
1726 39         80 $max_excess = $excess;
1727             }
1728             }
1729              
1730             # zero leading space count if any lines are too long
1731 594 100       1565 if ( $max_excess > 0 ) {
1732 37         60 $leading_space_count -= $max_excess;
1733 37 50       102 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
  37         62  
1734 37         71 my $file_writer_object = $self->[_file_writer_object_];
1735 37         156 my $last_outdented_line_at =
1736             $file_writer_object->get_output_line_number();
1737 37         71 my $nlines = @{$rgroup_lines};
  37         60  
1738 37         72 $self->[_last_outdented_line_at_] =
1739             $last_outdented_line_at + $nlines - 1;
1740 37         61 my $outdented_line_count = $self->[_outdented_line_count_];
1741 37 100       87 if ( !$outdented_line_count ) {
1742 19         35 $self->[_first_outdented_line_at_] = $last_outdented_line_at;
1743             }
1744 37         59 $outdented_line_count += $nlines;
1745 37         61 $self->[_outdented_line_count_] = $outdented_line_count;
1746             }
1747              
1748             # write the lines
1749 594         940 my $outdent_long_lines = 0;
1750              
1751 594         912 foreach my $item ( @{$rgroup_lines} ) {
  594         1033  
1752 684         994 my ( $str, $str_len, $Kend ) = @{$item};
  684         1413  
1753 684         8010 $self->valign_output_step_B(
1754             {
1755             leading_space_count => $leading_space_count,
1756             line => $str,
1757             line_length => $str_len,
1758             side_comment_length => 0,
1759             outdent_long_lines => $outdent_long_lines,
1760             rvertical_tightness_flags => undef,
1761             level => $group_level,
1762             level_end => $group_level,
1763             Kend => $Kend,
1764             maximum_line_length => $group_maximum_line_length,
1765             }
1766             );
1767             }
1768              
1769 594         2485 $self->initialize_for_new_group();
1770 594         1054 return;
1771             } ## end sub _flush_comment_lines
1772              
1773             ######################################################
1774             # CODE SECTION 5: Code to process groups of code lines
1775             ######################################################
1776              
1777             sub _flush_group_lines {
1778              
1779 2611     2611   5117 my ( $self, ($level_jump) ) = @_;
1780              
1781             # This is the vertical aligner internal flush, which leaves the cache
1782             # intact
1783              
1784             # $level_jump = $next_level-$group_level, if known
1785             # = undef if not known
1786             # Note: only the sign of the jump is needed
1787              
1788 2611         4100 my $rgroup_lines = $self->[_rgroup_lines_];
1789 2611 50       3493 return if ( !@{$rgroup_lines} );
  2611         5367  
1790 2611         4512 my $group_type = $self->[_group_type_];
1791 2611         3855 my $group_level = $self->[_group_level_];
1792              
1793             # Debug
1794 2611         3117 0 && do {
1795             my ( $a, $b, $c ) = caller();
1796             my $nlines = @{$rgroup_lines};
1797             print {*STDOUT}
1798             "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
1799             };
1800              
1801             #-------------------------------------------
1802             # Section 1: Handle a group of COMMENT lines
1803             #-------------------------------------------
1804 2611 100       5703 if ( $group_type eq 'COMMENT' ) {
1805 594         2167 $self->_flush_comment_lines();
1806 594         1152 return;
1807             }
1808              
1809             #------------------------------------------------------------------------
1810             # Section 2: Handle line(s) of CODE. Most of the actual work of vertical
1811             # aligning happens here in the following steps:
1812             #------------------------------------------------------------------------
1813              
1814             # STEP 1: Remove most unmatched tokens. They block good alignments.
1815 2017         6740 my ( $max_lev_diff_uu, $saw_side_comment, $saw_signed_number ) =
1816             delete_unmatched_tokens( $rgroup_lines, $group_level );
1817              
1818             # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
1819             # matching common alignments. The indexes of these subgroups are in the
1820             # return variable.
1821 2017         7925 my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
1822              
1823             # STEP 3: Sweep left to right through the lines, looking for leading
1824             # alignment tokens shared by groups.
1825             sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
1826 2017 100       2817 if ( @{$rgroups} > 1 );
  2017         5363  
1827              
1828             # STEP 4: Move side comments to a common column if possible.
1829 2017 100       4192 if ($saw_side_comment) {
1830 222         962 $self->align_side_comments( $rgroup_lines, $rgroups );
1831             }
1832              
1833             # STEP 5: For the -lp option, increase the indentation of lists
1834             # to the desired amount, but do not exceed the line length limit.
1835              
1836             # We are allowed to shift a group of lines to the right if:
1837             # (1) its level is greater than the level of the previous group, and
1838             # (2) its level is greater than the level of the next line to be written.
1839              
1840 2017         2865 my $extra_indent_ok;
1841 2017 100       4776 if ( $group_level > $self->[_last_level_written_] ) {
1842              
1843             # Use the level jump to next line to come, if given
1844 1017 100       2260 if ( defined($level_jump) ) {
1845 672         1288 $extra_indent_ok = $level_jump < 0;
1846             }
1847              
1848             # Otherwise, assume the next line has the level of the end of last line.
1849             # This fixes case c008.
1850             else {
1851 345         715 my $level_end = $rgroup_lines->[-1]->{'level_end'};
1852 345         684 $extra_indent_ok = $group_level > $level_end;
1853             }
1854             }
1855              
1856 2017 100       5086 my $extra_leading_spaces =
1857             $extra_indent_ok
1858             ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
1859             : 0;
1860              
1861             # STEP 6: add sign padding to columns numbers if needed
1862 2017 100 100     5147 pad_signed_number_columns($rgroup_lines)
1863             if ( $saw_signed_number && $rOpts_valign_signed_numbers );
1864              
1865             # STEP 7: pad wide equals
1866 2017 100       4158 pad_wide_equals_columns($rgroup_lines)
1867             if ($rOpts_valign_wide_equals);
1868              
1869             # STEP 8: Output the lines.
1870             # All lines in this group have the same leading spacing and maximum line
1871             # length
1872 2017         3308 my $group_leader_length = $rgroup_lines->[0]->{'leading_space_count'};
1873 2017         3126 my $group_maximum_line_length = $rgroup_lines->[0]->{'maximum_line_length'};
1874              
1875 2017         2766 foreach my $line ( @{$rgroup_lines} ) {
  2017         3432  
1876 3589         23808 $self->valign_output_step_A(
1877             {
1878             line => $line,
1879             min_ci_gap => 0,
1880             do_not_align => 0,
1881             group_leader_length => $group_leader_length,
1882             extra_leading_spaces => $extra_leading_spaces,
1883             level => $group_level,
1884             maximum_line_length => $group_maximum_line_length,
1885             }
1886             );
1887             }
1888              
1889             # Let the formatter know that this object has been processed and any
1890             # recoverable spaces have been handled. This is needed for setting the
1891             # closing paren location in -lp mode.
1892 2017         3905 my $object = $rgroup_lines->[0]->{'indentation'};
1893 2017 100       4185 if ( ref($object) ) { $object->set_recoverable_spaces(0) }
  94         330  
1894              
1895 2017         6937 $self->initialize_for_new_group();
1896 2017         3855 return;
1897             } ## end sub _flush_group_lines
1898              
1899             { ## closure for sub sweep_top_down
1900              
1901             my $rall_lines; # all of the lines
1902             my $grp_level; # level of all lines
1903             my $rgroups; # describes the partition of lines we will make here
1904             my $group_line_count; # number of lines in current partition
1905              
1906 44     44   76178 BEGIN { $rgroups = [] }
1907              
1908             sub initialize_for_new_rgroup {
1909 4479     4479 0 5639 $group_line_count = 0;
1910 4479         5691 return;
1911             }
1912              
1913             sub add_to_rgroup {
1914              
1915 3589     3589 0 5577 my ($jend) = @_;
1916              
1917             # Include the line at index $jend in the current alignment group
1918              
1919 3589         4857 my $rline = $rall_lines->[$jend];
1920              
1921 3589         4758 my $jbeg = $jend;
1922 3589 100       6223 if ( $group_line_count == 0 ) {
1923 2462         6305 install_new_alignments($rline);
1924             }
1925             else {
1926 1127         1423 my $rvals = pop @{$rgroups};
  1127         1929  
1927 1127         1667 $jbeg = $rvals->[0];
1928 1127         2319 copy_old_alignments( $rline, $rall_lines->[$jbeg] );
1929             }
1930 3589         4728 push @{$rgroups}, [ $jbeg, $jend, undef ];
  3589         7145  
1931 3589         4671 $group_line_count++;
1932 3589         4889 return;
1933             } ## end sub add_to_rgroup
1934              
1935             sub get_rgroup_jrange {
1936              
1937 1493 50   1493 0 1845 return if ( !@{$rgroups} );
  1493         3153  
1938 1493 50       3007 return if ( $group_line_count <= 0 );
1939 1493         1904 my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
  1493         2751  
1940 1493         2782 return ( $jbeg, $jend );
1941             } ## end sub get_rgroup_jrange
1942              
1943             sub end_rgroup {
1944              
1945 2482     2482 0 4132 my ($imax_align) = @_;
1946              
1947             # End the current alignment group and set its maximum alignment field
1948             # Given:
1949             # $imax_align = maximum field to be vertically aligned
1950              
1951 2482 50       2988 return if ( !@{$rgroups} );
  2482         4975  
1952 2482 100       5149 return if ( $group_line_count <= 0 );
1953              
1954 2462         3245 my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
  2462         3045  
  2462         5360  
1955 2462         3831 push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
  2462         4994  
1956              
1957             # Undo some alignments of poor two-line combinations.
1958             # We had to wait until now to know the line count.
1959 2462 100       5685 if ( $jend - $jbeg == 1 ) {
1960 290         503 my $line_0 = $rall_lines->[$jbeg];
1961 290         489 my $line_1 = $rall_lines->[$jend];
1962              
1963 290         532 my $imax_pair = $line_1->{'imax_pair'};
1964 290 50       712 if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
  0         0  
1965              
1966             ## flag for possible future use:
1967             ## my $is_isolated_pair = $imax_pair < 0
1968             ## && ( $jbeg == 0
1969             ## || $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} < 0 );
1970              
1971             my $imax_prev =
1972 290 100       807 $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} : -1;
1973              
1974 290         3530 my ( $is_marginal, $imax_align_fix ) =
1975             is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
1976             $imax_prev );
1977 290 100       1125 if ($is_marginal) {
1978 16         85 combine_fields( $line_0, $line_1, $imax_align_fix );
1979             }
1980             }
1981              
1982 2462         5195 initialize_for_new_rgroup();
1983 2462         3323 return;
1984             } ## end sub end_rgroup
1985              
1986             sub block_penultimate_match {
1987              
1988             # emergency reset to prevent sweep_left_to_right from trying to match a
1989             # failed terminal else match
1990 1 50   1 0 1 return if ( @{$rgroups} <= 1 );
  1         4  
1991 1         1 $rgroups->[-2]->[2] = -1;
1992 1         2 return;
1993             } ## end sub block_penultimate_match
1994              
1995             sub sweep_top_down {
1996              
1997 2017     2017 0 6303 my ( $self, $rlines, $group_level ) = @_;
1998              
1999             # This is the first of two major sweeps to find alignments.
2000             # The other is sweep_left_to_right.
2001              
2002             # Given:
2003             # $rlines = ref to hash of lines in this main alignment group
2004             # $group_level = common indentation level of these lines
2005             # Return:
2006             # $rgroups = ref to hash of subgroups created
2007              
2008             # Partition the set of lines into final alignment subgroups
2009             # and store the alignments with the lines.
2010              
2011             # The alignment subgroups we are making here are groups of consecutive
2012             # lines which have (1) identical alignment tokens and (2) do not
2013             # exceed the allowable maximum line length. A later sweep from
2014             # left-to-right ('sweep_lr') will handle additional alignments.
2015              
2016             # transfer args to closure variables
2017 2017         36340 $rall_lines = $rlines;
2018 2017         3276 $grp_level = $group_level;
2019 2017         3495 $rgroups = [];
2020 2017         7398 initialize_for_new_rgroup();
2021 2017 50       2644 if ( !@{$rlines} ) {
  2017         4607  
2022 0         0 DEVEL_MODE && Fault("Unexpected empty alignment group\n");
2023 0         0 return;
2024             }
2025              
2026             # Unset the _end_group flag for the last line if it set because it
2027             # is not needed and can causes problems for -lp formatting
2028 2017         4063 $rall_lines->[-1]->{'end_group'} = 0;
2029              
2030             # Loop over all lines ...
2031 2017         3099 my $jline = -1;
2032 2017         3127 foreach my $new_line ( @{$rall_lines} ) {
  2017         4201  
2033 3589         4606 $jline++;
2034              
2035             # Start a new subgroup if necessary
2036 3589 100       6689 if ( !$group_line_count ) {
2037 2096         5513 add_to_rgroup($jline);
2038 2096 100       4881 if ( $new_line->{'end_group'} ) {
2039 26         108 end_rgroup(-1);
2040             }
2041 2096         4051 next;
2042             }
2043              
2044 1493         2665 my $j_terminal_match = $new_line->{'j_terminal_match'};
2045 1493         2991 my ( $jbeg, $jend_uu ) = get_rgroup_jrange();
2046 1493 50       3069 if ( !defined($jbeg) ) {
2047              
2048             # safety check, shouldn't happen
2049 0         0 $self->warning(<<EOM);
2050             Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
2051             undefined index for group line count $group_line_count
2052             EOM
2053 0         0 $jbeg = $jline;
2054             }
2055 1493         2153 my $base_line = $rall_lines->[$jbeg];
2056              
2057             # Initialize a global flag saying if the last line of the group
2058             # should match end of group and also terminate the group. There
2059             # should be no returns between here and where the flag is handled
2060             # at the bottom.
2061 1493         1892 my $col_matching_terminal = 0;
2062 1493 100       2966 if ( defined($j_terminal_match) ) {
2063              
2064             # remember the column of the terminal ? or { to match with
2065 20         104 $col_matching_terminal =
2066             $base_line->get_column($j_terminal_match);
2067              
2068             # Ignore an undefined value as a defensive step; shouldn't
2069             # normally happen.
2070 20 50       79 $col_matching_terminal = 0
2071             unless ( defined($col_matching_terminal) );
2072             }
2073              
2074             # -------------------------------------------------------------
2075             # Allow hanging side comment to join current group, if any. The
2076             # only advantage is to keep the other tokens in the same group. For
2077             # example, this would make the '=' align here:
2078             # $ax = 1; # side comment
2079             # # hanging side comment
2080             # $boondoggle = 5; # side comment
2081             # $beetle = 5; # side comment
2082              
2083             # here is another example..
2084              
2085             # _rtoc_name_count => {}, # hash to track ..
2086             # _rpackage_stack => [], # stack to check ..
2087             # # name changes
2088             # _rlast_level => \$last_level, # brace indentation
2089             #
2090             #
2091             # If this were not desired, the next step could be skipped.
2092             # -------------------------------------------------------------
2093 1493 100       4208 if ( $new_line->{'is_hanging_side_comment'} ) {
    100          
2094 38         99 join_hanging_comment( $new_line, $base_line );
2095             }
2096              
2097             # If this line has no matching tokens, then flush out the lines
2098             # BEFORE this line unless both it and the previous line have side
2099             # comments. This prevents this line from pushing side comments out
2100             # to the right.
2101             elsif ( $new_line->{'jmax'} == 1 ) {
2102              
2103             # There are no matching tokens, so now check side comments.
2104             # Programming note: accessing arrays with index -1 is
2105             # risky in Perl, but we have verified there is at least one
2106             # line in the group and that there is at least one field,
2107             my $prev_comment =
2108 244         693 $rall_lines->[ $jline - 1 ]->{'rfields'}->[-1];
2109 244         482 my $side_comment = $new_line->{'rfields'}->[-1];
2110              
2111             # do not end group if both lines have side comments
2112 244 100 100     854 if ( !$side_comment || !$prev_comment ) {
2113              
2114             # Otherwise - VSN PATCH for a single number:
2115             # - do not end group if numeric and no side comment, or
2116             # - end if !numeric or side comment
2117 199         390 my $pat = $new_line->{'rpatterns'}->[0];
2118 199   66     1040 my $is_numeric = $rOpts_valign_signed_numbers
2119             && ( $pat eq 'n,'
2120             || $pat eq 'n,b' );
2121 199 100 100     827 end_rgroup(-1) if ( !$is_numeric || $side_comment );
2122             }
2123             }
2124             else {
2125             ##ok: continue
2126             }
2127              
2128             # See if the new line matches and fits the current group,
2129             # if it still exists. Flush the current group if not.
2130 1493         1976 my $match_code;
2131 1493 100       2744 if ($group_line_count) {
2132 1321         3705 ( $match_code, my $imax_align ) =
2133             $self->check_match( $new_line, $base_line,
2134             $rall_lines->[ $jline - 1 ],
2135             $group_line_count );
2136 1321 100       2825 if ( $match_code != 2 ) { end_rgroup($imax_align) }
  194         472  
2137             }
2138              
2139             # Store the new line
2140 1493         3163 add_to_rgroup($jline);
2141              
2142 1493 100       4504 if ( defined($j_terminal_match) ) {
    100          
2143              
2144             # Decide if we should fix a terminal match. We can either:
2145             # 1. fix it and prevent the sweep_lr from changing it, or
2146             # 2. leave it alone and let sweep_lr try to fix it.
2147              
2148             # The current logic is to fix it if:
2149             # -it has not joined to previous lines,
2150             # -and either the previous subgroup has just 1 line, or
2151             # -this line matched but did not fit (so sweep won't work)
2152 20         38 my $fixit;
2153 20 100       68 if ( $group_line_count == 1 ) {
2154 3   66     30 $fixit ||= $match_code;
2155 3 100       9 if ( !$fixit ) {
2156 2 50       5 if ( @{$rgroups} > 1 ) {
  2         7  
2157 2         3 my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
  2         6  
2158 2         5 my $nlines = $jendx - $jbegx + 1;
2159 2   66     14 $fixit ||= $nlines <= 1;
2160             }
2161             }
2162             }
2163              
2164 20 100       59 if ($fixit) {
2165 2         5 $base_line = $new_line;
2166 2         8 my $col_now = $base_line->get_column($j_terminal_match);
2167              
2168             # Ignore an undefined value as a defensive step; shouldn't
2169             # normally happen.
2170 2 50       7 $col_now = 0 unless ( defined($col_now) );
2171              
2172 2         4 my $pad = $col_matching_terminal - $col_now;
2173 2         8 my $padding_available =
2174             $base_line->get_available_space_on_right();
2175 2 100 33     20 if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
      66        
2176 1         7 $base_line->increase_field_width( $j_terminal_match,
2177             $pad );
2178             }
2179              
2180             # do not let sweep_left_to_right change an isolated 'else'
2181 2 100       6 if ( !$new_line->{'is_terminal_ternary'} ) {
2182 1         3 block_penultimate_match();
2183             }
2184             }
2185 20         59 end_rgroup(-1);
2186             }
2187              
2188             # end the group if we know we cannot match next line.
2189             elsif ( $new_line->{'end_group'} ) {
2190 53         167 end_rgroup(-1);
2191             }
2192              
2193             else {
2194             ##ok: continue
2195             }
2196             } ## end loop over lines
2197              
2198 2017         5784 end_rgroup(-1);
2199 2017         3610 return ($rgroups);
2200             } ## end sub sweep_top_down
2201             }
2202              
2203             sub two_line_pad {
2204              
2205 21     21 0 50 my ( $line_m, $line, $imax_min ) = @_;
2206              
2207             # Decide if two adjacent, isolated lines should be aligned
2208              
2209             # Given:
2210             # $line_m, $line = two isolated (list) lines
2211             # imax_min = number of common alignment tokens
2212             # Return:
2213             # $pad_max = maximum suggested pad distance
2214             # = 0 if alignment not recommended
2215              
2216             # Allow alignment if the difference in the two unpadded line lengths
2217             # is not more than either line length. The idea is to avoid
2218             # aligning lines with very different field lengths, like these two:
2219              
2220             # [
2221             # 'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
2222             # 1, 0, 0, 0, undef, 0, 0
2223             # ];
2224              
2225             # Note that this is only for two lines which do not have alignment tokens
2226             # in common with any other lines. It is intended for lists, but it might
2227             # also be used for two non-list lines with a common leading '='.
2228              
2229 21         61 my $rfield_lengths = $line->{'rfield_lengths'};
2230 21         53 my $rfield_lengths_m = $line_m->{'rfield_lengths'};
2231              
2232             # Safety check - shouldn't happen
2233             return 0
2234 21         74 if ( $imax_min >= @{$rfield_lengths}
2235 21 50 33     43 || $imax_min >= @{$rfield_lengths_m} );
  21         90  
2236              
2237 21         57 my $lensum_m = 0;
2238 21         40 my $lensum = 0;
2239 21         56 foreach my $i ( 0 .. $imax_min ) {
2240 54         65 $lensum_m += $rfield_lengths_m->[$i];
2241 54         114 $lensum += $rfield_lengths->[$i];
2242             }
2243              
2244 21 100       91 my ( $lenmin, $lenmax ) =
2245             $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
2246              
2247 21         52 my $patterns_match;
2248 21 50 66     124 if ( $line_m->{'list_type'} && $line->{'list_type'} ) {
2249 17         33 $patterns_match = 1;
2250 17         40 my $rpatterns_m = $line_m->{'rpatterns'};
2251 17         32 my $rpatterns = $line->{'rpatterns'};
2252 17         44 foreach my $i ( 0 .. $imax_min ) {
2253 47         67 my $pat = $rpatterns->[$i];
2254 47         82 my $pat_m = $rpatterns_m->[$i];
2255              
2256             # VSN PATCH: allow numbers to match quotes
2257 47 50 66     105 if ( $pat_m ne $pat && length($pat_m) eq length($pat) ) {
2258 0         0 $pat =~ tr/n/Q/;
2259 0         0 $pat_m =~ tr/n/Q/;
2260             }
2261              
2262 47 100       100 if ( $pat ne $pat_m ) { $patterns_match = 0; last; }
  2         4  
  2         6  
2263             }
2264             }
2265              
2266 21         43 my $pad_max = $lenmax;
2267 21 50 66     108 if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
  0         0  
2268              
2269 21         46 return $pad_max;
2270             } ## end sub two_line_pad
2271              
2272             sub sweep_left_to_right {
2273              
2274 299     299 0 707 my ( $rlines, $rgroups, $group_level ) = @_;
2275              
2276             # This is the second of two major sweeps to find alignments.
2277             # The other is sweep_top_down.
2278              
2279             # Given:
2280             # $rlines = ref to hash of lines in this main alignment group
2281             # $rgroups = ref to hash of subgroups
2282             # $group_level = common indentation level of these lines
2283             # Task:
2284             # add leading alignments where possible
2285              
2286             # So far we have divided the lines into groups having an equal number of
2287             # identical alignments. Here we are going to look for common leading
2288             # alignments between the different groups and align them when possible.
2289              
2290             # For example, the three lines below are in three groups because each line
2291             # has a different number of commas. In this routine we will sweep from
2292             # left to right, aligning the leading commas as we go, but stopping if we
2293             # hit the line length limit.
2294              
2295             # my ( $num, $numi, $numj, $xyza, $ka, $xyzb, $kb, $aff, $error );
2296             # my ( $i, $j, $error, $aff, $asum, $avec );
2297             # my ( $km, $area, $varea );
2298              
2299             # nothing to do if just one group
2300 299         423 my $ng_max = @{$rgroups} - 1;
  299         552  
2301 299 50       745 return if ( $ng_max <= 0 );
2302              
2303             #---------------------------------------------------------------------
2304             # Step 1: Loop over groups to find all common leading alignment tokens
2305             #---------------------------------------------------------------------
2306              
2307 299         4671 my $line;
2308             my $rtokens;
2309 299         0 my $imax; # index of maximum non-side-comment alignment token
2310 299         0 my $istop; # an optional stopping index
2311 299         0 my $jbeg; # starting line index
2312 299         0 my $jend; # ending line index
2313              
2314 299         0 my $line_m;
2315 299         0 my $rtokens_m;
2316 299         0 my $imax_m;
2317 299         0 my $istop_m;
2318 299         0 my $jbeg_m;
2319 299         0 my $jend_m;
2320              
2321 299         0 my $istop_mm;
2322              
2323             # Look at neighboring pairs of groups and form a simple list
2324             # of all common leading alignment tokens. Foreach such match we
2325             # store [$i, $ng], where
2326             # $i = index of the token in the line (0,1,...)
2327             # $ng is the second of the two groups with this common token
2328 299         0 my @icommon;
2329              
2330             # Hash to hold the maximum alignment change for any group
2331 299         0 my %max_move;
2332              
2333             # a small number of columns
2334 299         390 my $short_pad = 4;
2335              
2336 299         513 my $ng = -1;
2337 299         435 foreach my $item ( @{$rgroups} ) {
  299         623  
2338 744         911 $ng++;
2339              
2340 744         915 $istop_mm = $istop_m;
2341              
2342             # save _m values of previous group
2343 744         844 $line_m = $line;
2344 744         931 $rtokens_m = $rtokens;
2345 744         880 $imax_m = $imax;
2346 744         870 $istop_m = $istop;
2347 744         893 $jbeg_m = $jbeg;
2348 744         885 $jend_m = $jend;
2349              
2350             # Get values for this group. Note that we just have to use values for
2351             # one of the lines of the group since all members have the same
2352             # alignments.
2353 744         890 ( $jbeg, $jend, $istop ) = @{$item};
  744         1224  
2354              
2355 744         1045 $line = $rlines->[$jbeg];
2356 744         1037 $rtokens = $line->{'rtokens'};
2357 744         1066 $imax = $line->{'jmax'} - 2;
2358 744 50       1347 $istop = -1 if ( !defined($istop) );
2359 744 50       1188 $istop = $imax if ( $istop > $imax );
2360              
2361             # Initialize on first group
2362 744 100       1444 next if ( $ng == 0 );
2363              
2364             # Use the minimum index limit of the two groups
2365 445 100       849 my $imax_min = $imax > $imax_m ? $imax_m : $imax;
2366              
2367             # Also impose a limit if given.
2368 445 100       946 if ( $istop_m < $imax_min ) {
2369 65         109 $imax_min = $istop_m;
2370             }
2371              
2372             # Special treatment of two one-line groups isolated from other lines,
2373             # unless they form a simple list or a terminal match. Otherwise the
2374             # alignment can look strange in some cases.
2375 445         888 my $list_type = $rlines->[$jbeg]->{'list_type'};
2376 445 100 100     4076 if (
      100        
      100        
      100        
      100        
      100        
      100        
      100        
2377             $jend == $jbeg
2378             && $jend_m == $jbeg_m
2379             && ( $ng == 1 || $istop_mm < 0 )
2380             && ( $ng == $ng_max || $istop < 0 )
2381             && !$line->{'j_terminal_match'}
2382              
2383             # Only do this for imperfect matches. This is normally true except
2384             # when two perfect matches cannot form a group because the line
2385             # length limit would be exceeded. In that case we can still try
2386             # to match as many alignments as possible.
2387             && ( $imax != $imax_m || $istop_m != $imax_m )
2388             )
2389             {
2390              
2391             # We will just align assignments and simple lists
2392 79 100       268 next if ( $imax_min < 0 );
2393             next
2394 26 100 100     208 if ( $rtokens->[0] !~ /^=\d/
2395             && !$list_type );
2396              
2397             # In this case we will limit padding to a short distance. This
2398             # is a compromise to keep some vertical alignment but prevent large
2399             # gaps, which do not look good for just two lines.
2400 21         85 my $pad_max =
2401             two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
2402 21 50       53 next if ( !$pad_max );
2403 21         37 my $ng_m = $ng - 1;
2404 21         1214 $max_move{"$ng_m"} = $pad_max;
2405 21         74 $max_move{"$ng"} = $pad_max;
2406             }
2407              
2408             # Loop to find all common leading tokens.
2409 387 100       980 if ( $imax_min >= 0 ) {
2410 97         194 foreach my $i ( 0 .. $imax_min ) {
2411 174         257 my $tok = $rtokens->[$i];
2412 174         255 my $tok_m = $rtokens_m->[$i];
2413 174 50       322 last if ( $tok ne $tok_m );
2414 174         449 push @icommon, [ $i, $ng, $tok ];
2415             }
2416             }
2417             }
2418 299 100       989 return unless (@icommon);
2419              
2420             #----------------------------------------------------------
2421             # Step 2: Reorder and consolidate the list into a task list
2422             #----------------------------------------------------------
2423              
2424             # We have to work first from lowest token index to highest, then by group,
2425             # sort our list first on token index then group number
2426 74 50       317 @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
  199         433  
2427              
2428             # Make a task list of the form
2429             # [$i, ng_beg, $ng_end, $tok], ..
2430             # where
2431             # $i is the index of the token to be aligned
2432             # $ng_beg..$ng_end is the group range for this action
2433 74         120 my @todo;
2434 74         143 my ( $i, $ng_end, $tok );
2435 74         149 foreach my $item (@icommon) {
2436 174         225 my $ng_last = $ng_end;
2437 174         227 my $i_last = $i;
2438 174         212 ( $i, $ng_end, $tok ) = @{$item};
  174         292  
2439 174         227 my $ng_beg = $ng_end - 1;
2440 174 100 100     574 if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
      66        
2441 38         48 my $var = pop @todo;
2442 38         64 $ng_beg = $var->[1];
2443             }
2444 174         332 my ( $raw_tok, $lev, $tag_uu, $tok_count_uu ) =
2445             decode_alignment_token($tok);
2446 174         445 push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
2447             }
2448              
2449             #------------------------------
2450             # Step 3: Execute the task list
2451             #------------------------------
2452             tap_dancer(
2453             {
2454 74         737 rlines => $rlines,
2455             rgroups => $rgroups,
2456             rtodo => \@todo,
2457             rmax_move => \%max_move,
2458             short_pad => $short_pad,
2459             group_level => $group_level,
2460             }
2461             );
2462 74         371 return;
2463             } ## end sub sweep_left_to_right
2464              
2465             { ## closure for sub tap_dancer
2466              
2467             my %is_good_alignment_token;
2468              
2469             BEGIN {
2470              
2471             # One of the most difficult aspects of vertical alignment is knowing
2472             # when not to align. Alignment can go from looking very nice to very
2473             # bad when overdone. In the sweep algorithm there are two special
2474             # cases where we may need to limit padding to a '$short_pad' distance
2475             # to avoid some very ugly formatting:
2476              
2477             # 1. Two isolated lines with partial alignment
2478             # 2. A 'tail-wag-dog' situation, in which a single terminal
2479             # line with partial alignment could cause a significant pad
2480             # increase in many previous lines if allowed to join the alignment.
2481              
2482             # For most alignment tokens, we will allow only a small pad to be
2483             # introduced (the hardwired $short_pad variable) . But for some 'good'
2484             # alignments we can be less restrictive.
2485              
2486             # The hash values are set so that:
2487             # if ($is_good_alignment_token{$raw_tok}) => best
2488             # if defined ($is_good_alignment_token{$raw_tok}) => good or best
2489              
2490             # Start by defining these 'good' alignments, which are allowed more
2491             # padding (so note the '0' hash value here):
2492             $is_good_alignment_token{$_} = 0
2493 44     44   528 for ( COMMA, qw# => = ? if unless or || { # );
2494              
2495             # Promote a few of these to 'best', with essentially no pad limit:
2496 44         98 $is_good_alignment_token{'='} = 1;
2497 44         95 $is_good_alignment_token{'if'} = 1;
2498 44         106 $is_good_alignment_token{'unless'} = 1;
2499 44         36174 $is_good_alignment_token{'=>'} = 1;
2500              
2501             } ## end BEGIN
2502              
2503             sub move_to_common_column {
2504              
2505 139     139 0 249 my ($rcall_hash) = @_;
2506              
2507             # This is a sub called by sub tap_dancer to
2508             # move the alignment column of token $itok to $col_want for a
2509             # sequence of groups.
2510              
2511 139         220 my $rlines = $rcall_hash->{rlines};
2512 139         198 my $rgroups = $rcall_hash->{rgroups};
2513 139         178 my $rmax_move = $rcall_hash->{rmax_move};
2514 139         185 my $ngb = $rcall_hash->{ngb};
2515 139         213 my $nge = $rcall_hash->{nge};
2516 139         193 my $itok = $rcall_hash->{itok};
2517 139         188 my $col_want = $rcall_hash->{col_want};
2518 139         201 my $raw_tok = $rcall_hash->{raw_tok};
2519              
2520 139 100 66     496 return if ( !defined($ngb) || $nge <= $ngb );
2521 127         252 foreach my $ng ( $ngb .. $nge ) {
2522              
2523 289         332 my ( $jbeg, $jend_uu ) = @{ $rgroups->[$ng] };
  289         510  
2524 289         352 my $line = $rlines->[$jbeg];
2525 289         495 my $col = $line->get_column($itok);
2526 289         350 my $move = $col_want - $col;
2527 289 100       662 if ( $move > 0 ) {
    50          
2528              
2529             # limit padding increase in isolated two lines
2530             next
2531             if ( defined( $rmax_move->{$ng} )
2532             && $move > $rmax_move->{$ng}
2533 92 50 66     419 && !$is_good_alignment_token{$raw_tok} );
      33        
2534              
2535 92         208 $line->increase_field_width( $itok, $move );
2536             }
2537             elsif ( $move < 0 ) {
2538              
2539             # spot to take special action on failure to move
2540             }
2541             else {
2542             ##ok: (move==0)
2543             }
2544             }
2545 127         538 return;
2546             } ## end sub move_to_common_column
2547              
2548             sub tap_dancer {
2549              
2550 74     74 0 173 my ($rcall_hash) = @_;
2551              
2552             # This is the worker routine for sub 'sweep_left_to_right'. It makes
2553             # vertical alignments as it sweeps from left to right over groups
2554             # of lines which have been located and prepared by the caller.
2555              
2556 74         158 my $rlines = $rcall_hash->{rlines};
2557 74         117 my $rgroups = $rcall_hash->{rgroups};
2558 74         143 my $rtodo = $rcall_hash->{rtodo};
2559 74         133 my $rmax_move = $rcall_hash->{rmax_move};
2560 74         125 my $short_pad = $rcall_hash->{short_pad};
2561 74         144 my $group_level = $rcall_hash->{group_level};
2562              
2563             # $blocking_level[$nj is the level at a match failure between groups
2564             # $ng-1 and $ng
2565 74         111 my @blocking_level;
2566 74         161 my $group_list_type = $rlines->[0]->{'list_type'};
2567              
2568 74         134 foreach my $task ( @{$rtodo} ) {
  74         151  
2569 136         208 my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
  136         320  
2570              
2571             # Nothing to do for a single group
2572 136 50       340 next if ( $ng_end <= $ng_beg );
2573              
2574 136         280 my $ng_first; # index of the first group of a continuous sequence
2575             my $col_want; # the common alignment column of a sequence of groups
2576 136         0 my $col_limit; # maximum column before bumping into max line length
2577 136         197 my $line_count_ng_m = 0;
2578 136         174 my $jmax_m;
2579             my $it_stop_m;
2580              
2581             # Loop over the groups
2582             # 'ix_' = index in the array of lines
2583             # 'ng_' = index in the array of groups
2584             # 'it_' = index in the array of tokens
2585 136         212 my $ix_min = $rgroups->[$ng_beg]->[0];
2586 136         190 my $ix_max = $rgroups->[$ng_end]->[1];
2587 136         221 my $lines_total = $ix_max - $ix_min + 1;
2588 136         259 foreach my $ng ( $ng_beg .. $ng_end ) {
2589 310         350 my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
  310         472  
2590 310         397 my $line_count_ng = $ix_end - $ix_beg + 1;
2591              
2592             # Important: note that since all lines in a group have a common
2593             # alignments object, we just have to work on one of the lines
2594             # (the first line). All of the rest will be changed
2595             # automatically.
2596 310         358 my $line = $rlines->[$ix_beg];
2597 310         410 my $jmax = $line->{'jmax'};
2598              
2599             # the maximum space without exceeding the line length:
2600 310         699 my $avail = $line->get_available_space_on_right();
2601 310         567 my $col = $line->get_column($itok);
2602 310         387 my $col_max = $col + $avail;
2603              
2604             # Initialize on first group
2605 310 100       532 if ( !defined($col_want) ) {
2606 136         175 $ng_first = $ng;
2607 136         171 $col_want = $col;
2608 136         229 $col_limit = $col_max;
2609 136         178 $line_count_ng_m = $line_count_ng;
2610 136         158 $jmax_m = $jmax;
2611 136         181 $it_stop_m = $it_stop;
2612 136         231 next;
2613             }
2614              
2615             # RULE: Throw a blocking flag upon encountering a token level
2616             # different from the level of the first blocking token. For
2617             # example, in the following example, if the = matches get
2618             # blocked between two groups as shown, then we want to start
2619             # blocking matches at the commas, which are at deeper level, so
2620             # that we do not get the big gaps shown here:
2621              
2622             # my $unknown3 = pack( "v", -2 );
2623             # my $unknown4 = pack( "v", 0x09 );
2624             # my $unknown5 = pack( "VVV", 0x06, 0x00, 0x00 );
2625             # my $num_bbd_blocks = pack( "V", $num_lists );
2626             # my $root_startblock = pack( "V", $root_start );
2627             # my $unknown6 = pack( "VV", 0x00, 0x1000 );
2628              
2629             # On the other hand, it is okay to keep matching at the same
2630             # level such as in a simple list of commas and/or fat commas.
2631              
2632 174   66     380 my $is_blocked = defined( $blocking_level[$ng] )
2633             && $lev > $blocking_level[$ng];
2634              
2635             # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrome, meaning:
2636             # Do not let one or two lines with a **different number of
2637             # alignments** open up a big gap in a large block. For
2638             # example, we will prevent something like this, where the first
2639             # line pries open the rest:
2640              
2641             # $worksheet->write( "B7", "http://www.perl.com", undef, $format );
2642             # $worksheet->write( "C7", "", $format );
2643             # $worksheet->write( "D7", "", $format );
2644             # $worksheet->write( "D8", "", $format );
2645             # $worksheet->write( "D8", "", $format );
2646              
2647             # We should exclude from consideration two groups which are
2648             # effectively the same but separated because one does not
2649             # fit in the maximum allowed line length.
2650 174   100     410 my $is_same_group =
2651             $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
2652              
2653 174         225 my $lines_above = $ix_beg - $ix_min;
2654 174         210 my $lines_below = $lines_total - $lines_above;
2655              
2656             # Increase the tolerable gap for certain favorable factors
2657 174         204 my $factor = 1;
2658 174         253 my $top_level = $lev == $group_level;
2659              
2660             # Align best top level alignment tokens like '=', 'if', ...
2661             # A factor of 10 allows a gap of up to 40 spaces
2662 174 100 100     564 if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
2663 56         75 $factor = 10;
2664             }
2665              
2666             # Otherwise allow some minimal padding of good alignments
2667             else {
2668              
2669 118 100 100     600 if (
      100        
2670              
2671             defined( $is_good_alignment_token{$raw_tok} )
2672              
2673             # We have to be careful if there are just 2 lines.
2674             # This two-line factor allows large gaps only for 2
2675             # lines which are simple lists with fewer items on the
2676             # second line. It gives results similar to previous
2677             # versions of perltidy.
2678             && (
2679             $lines_total > 2
2680             || ( $group_list_type
2681             && $jmax < $jmax_m
2682             && $top_level )
2683             )
2684             )
2685             {
2686 106         128 $factor += 1;
2687 106 100       198 if ($top_level) {
2688 70         99 $factor += 1;
2689             }
2690             }
2691             }
2692              
2693 174         213 my $is_big_gap;
2694 174 100       325 if ( !$is_same_group ) {
2695 145   66     860 $is_big_gap ||=
      33        
2696             ( $lines_above == 1
2697             || $lines_above == 2 && $lines_below >= 4 )
2698             && $col_want > $col + $short_pad * $factor;
2699 145   66     749 $is_big_gap ||=
      33        
2700             ( $lines_below == 1
2701             || $lines_below == 2 && $lines_above >= 4 )
2702             && $col > $col_want + $short_pad * $factor;
2703             }
2704              
2705             # if match is limited by gap size, stop aligning at this level
2706 174 50       370 if ($is_big_gap) {
2707 0         0 $blocking_level[$ng] = $lev - 1;
2708             }
2709              
2710             # quit and restart if it cannot join this batch
2711 174 50 100     914 if ( $col_want > $col_max
      66        
      66        
2712             || $col > $col_limit
2713             || $is_big_gap
2714             || $is_blocked )
2715             {
2716              
2717             # remember the level of the first blocking token
2718 12 100       56 if ( !defined( $blocking_level[$ng] ) ) {
2719 10         27 $blocking_level[$ng] = $lev;
2720             }
2721              
2722             move_to_common_column(
2723             {
2724 12         89 rlines => $rlines,
2725             rgroups => $rgroups,
2726             rmax_move => $rmax_move,
2727             ngb => $ng_first,
2728             nge => $ng - 1,
2729             itok => $itok,
2730             col_want => $col_want,
2731             raw_tok => $raw_tok,
2732             }
2733             );
2734 12         36 $ng_first = $ng;
2735 12         18 $col_want = $col;
2736 12         16 $col_limit = $col_max;
2737 12         22 $line_count_ng_m = $line_count_ng;
2738 12         32 $jmax_m = $jmax;
2739 12         15 $it_stop_m = $it_stop;
2740 12         32 next;
2741             }
2742              
2743 162         207 $line_count_ng_m += $line_count_ng;
2744              
2745             # update the common column and limit
2746 162 100       294 if ( $col > $col_want ) { $col_want = $col }
  49         67  
2747 162 100       359 if ( $col_max < $col_limit ) { $col_limit = $col_max }
  47         131  
2748              
2749             } ## end loop over groups
2750              
2751 136 100       315 if ( $ng_end > $ng_first ) {
2752 127         828 move_to_common_column(
2753             {
2754             rlines => $rlines,
2755             rgroups => $rgroups,
2756             rmax_move => $rmax_move,
2757             ngb => $ng_first,
2758             nge => $ng_end,
2759             itok => $itok,
2760             col_want => $col_want,
2761             raw_tok => $raw_tok,
2762             }
2763             );
2764             }
2765             } ## end loop over tasks
2766              
2767 74         171 return;
2768             } ## end sub tap_dancer
2769             }
2770              
2771             sub delete_selected_tokens {
2772              
2773 539     539 0 1068 my ( $line_obj, $ridel ) = @_;
2774              
2775             # Given:
2776             # $line_obj = the line to be modified
2777             # $ridel = a ref to list of indexes to be deleted
2778              
2779             # remove unused alignment token(s) to improve alignment chances
2780              
2781 539 50 33     2118 return if ( !defined($line_obj) || !defined($ridel) || !@{$ridel} );
  539   33     1343  
2782              
2783 539         978 my $jmax_old = $line_obj->{'jmax'};
2784 539         894 my $rfields_old = $line_obj->{'rfields'};
2785 539         964 my $rfield_lengths_old = $line_obj->{'rfield_lengths'};
2786 539         829 my $rpatterns_old = $line_obj->{'rpatterns'};
2787 539         937 my $rtokens_old = $line_obj->{'rtokens'};
2788 539         859 my $j_terminal_match = $line_obj->{'j_terminal_match'};
2789              
2790 44     44   338 use constant EXPLAIN_DELETE_SELECTED => 0;
  44         93  
  44         118488  
2791              
2792 539         1166 local $LIST_SEPARATOR = '> <';
2793 539         647 EXPLAIN_DELETE_SELECTED && print <<EOM;
2794             delete indexes: <@{$ridel}>
2795             old jmax: $jmax_old
2796             old tokens: <@{$rtokens_old}>
2797             old patterns: <@{$rpatterns_old}>
2798             old fields: <@{$rfields_old}>
2799             old field_lengths: <@{$rfield_lengths_old}>
2800             EOM
2801              
2802 539         820 my $rfields_new = [];
2803 539         745 my $rpatterns_new = [];
2804 539         779 my $rtokens_new = [];
2805 539         1863 my $rfield_lengths_new = [];
2806              
2807             # Convert deletion list to a hash to allow any order, multiple entries,
2808             # and avoid problems with index values out of range
2809 539         748 my %delete_me = map { $_ => 1 } @{$ridel};
  881         2546  
  539         1027  
2810              
2811 539         1147 my $pattern_0 = $rpatterns_old->[0];
2812 539         928 my $field_0 = $rfields_old->[0];
2813 539         884 my $field_length_0 = $rfield_lengths_old->[0];
2814 539         714 push @{$rfields_new}, $field_0;
  539         1030  
2815 539         775 push @{$rfield_lengths_new}, $field_length_0;
  539         931  
2816 539         781 push @{$rpatterns_new}, $pattern_0;
  539         981  
2817              
2818             # Loop to either copy items or concatenate fields and patterns
2819 539         781 my $jmin_del;
2820 539         1173 foreach my $j ( 0 .. $jmax_old - 1 ) {
2821 1727         2279 my $token = $rtokens_old->[$j];
2822 1727         2542 my $field = $rfields_old->[ $j + 1 ];
2823 1727         2073 my $field_length = $rfield_lengths_old->[ $j + 1 ];
2824 1727         2323 my $pattern = $rpatterns_old->[ $j + 1 ];
2825 1727 100       2991 if ( !$delete_me{$j} ) {
2826 846         1011 push @{$rtokens_new}, $token;
  846         1419  
2827 846         1050 push @{$rfields_new}, $field;
  846         1322  
2828 846         1073 push @{$rpatterns_new}, $pattern;
  846         1285  
2829 846         1107 push @{$rfield_lengths_new}, $field_length;
  846         1510  
2830             }
2831             else {
2832 881 100       1851 if ( !defined($jmin_del) ) { $jmin_del = $j }
  539         808  
2833 881         1699 $rfields_new->[-1] .= $field;
2834 881         1148 $rfield_lengths_new->[-1] += $field_length;
2835 881         1524 $rpatterns_new->[-1] .= $pattern;
2836             }
2837             }
2838              
2839             # ----- x ------ x ------ x ------
2840             #t 0 1 2 <- token indexing
2841             #f 0 1 2 3 <- field and pattern
2842              
2843 539         785 my $jmax_new = @{$rfields_new} - 1;
  539         884  
2844 539         949 $line_obj->{'rtokens'} = $rtokens_new;
2845 539         867 $line_obj->{'rpatterns'} = $rpatterns_new;
2846 539         803 $line_obj->{'rfields'} = $rfields_new;
2847 539         794 $line_obj->{'rfield_lengths'} = $rfield_lengths_new;
2848 539         762 $line_obj->{'jmax'} = $jmax_new;
2849              
2850             # The value of j_terminal_match will be incorrect if we delete tokens prior
2851             # to it. We will have to give up on aligning the terminal tokens if this
2852             # happens.
2853 539 100 100     1378 if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
2854 1         2 $line_obj->{'j_terminal_match'} = undef;
2855             }
2856              
2857             # update list type -
2858 539 100       1294 if ( $line_obj->{'list_seqno'} ) {
2859              
2860             ## This works, but for efficiency see if we need to make a change:
2861             ## decide_if_list($line_obj);
2862              
2863             # An existing list will still be a list but with possibly different
2864             # leading token
2865 76         176 my $old_list_type = $line_obj->{'list_type'};
2866 76         122 my $new_list_type = EMPTY_STRING;
2867 76 100       412 if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
2868 49         89 $new_list_type = $rtokens_new->[0];
2869             }
2870 76 100 100     332 if ( !$old_list_type || $old_list_type ne $new_list_type ) {
2871 44         148 decide_if_list($line_obj);
2872             }
2873             }
2874              
2875 539         705 EXPLAIN_DELETE_SELECTED && print <<EOM;
2876              
2877             new jmax: $jmax_new
2878             new tokens: <@{$rtokens_new}>
2879             new patterns: <@{$rpatterns_new}>
2880             new fields: <@{$rfields_new}>
2881             EOM
2882 539         2939 return;
2883             } ## end sub delete_selected_tokens
2884              
2885             { ## closure for sub decode_alignment_token
2886              
2887             # This routine is called repeatedly for each token, so it needs to be
2888             # efficient. We can speed things up by remembering the inputs and outputs
2889             # in a hash.
2890             my %decoded_token;
2891              
2892             sub initialize_decode {
2893              
2894             # We will re-initialize the hash for each file. Otherwise, there is
2895             # a danger that the hash can become arbitrarily large if a very large
2896             # number of files is processed at once.
2897 648     648 0 4555 %decoded_token = ();
2898 648         1016 return;
2899             } ## end sub initialize_decode
2900              
2901             sub decode_alignment_token {
2902              
2903 10871     10871 0 15128 my ($tok) = @_;
2904              
2905             # Unpack the values packed in an alignment token
2906              
2907             # Given:
2908             # $tok = an alignment token
2909             # Returns:
2910             # ( $raw_tok, $lev, $tag, $tok_count )
2911             #
2912             # Usage:
2913             # my ( $raw_tok, $lev, $tag, $tok_count ) =
2914             # decode_alignment_token($token);
2915              
2916             # Alignment tokens have a trailing decimal level and optional tag (for
2917             # commas):
2918             # For example, the first comma in the following line
2919             # sub banner { crlf; report( shift, '/', shift ); crlf }
2920             # is decorated as follows:
2921             # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6)
2922              
2923             # An optional token count may be appended with a leading dot.
2924             # Currently this is only done for '=' tokens but this could change.
2925             # For example, consider the following line:
2926             # $nport = $port = shift || $name;
2927             # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
2928             # The second '=' will be '=0.2' [level 0, second equals]
2929              
2930 10871 100       17891 if ( defined( $decoded_token{$tok} ) ) {
2931 9205         9525 return @{ $decoded_token{$tok} };
  9205         27360  
2932             }
2933              
2934 1666         3337 my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, EMPTY_STRING, 1 );
2935 1666 100       9928 if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
2936 1308         3650 $raw_tok = $1;
2937 1308         2290 $lev = $2;
2938 1308 100       3512 $tag = $3 if ($3);
2939 1308 100       3463 $tok_count = $5 if ($5);
2940             }
2941 1666         5035 my @vals = ( $raw_tok, $lev, $tag, $tok_count );
2942 1666         3685 $decoded_token{$tok} = \@vals;
2943 1666         6065 return @vals;
2944             } ## end sub decode_alignment_token
2945             }
2946              
2947             sub delete_unmatched_tokens {
2948              
2949 2017     2017 0 3919 my ( $rlines, $group_level ) = @_;
2950              
2951             # Remove as many obviously un-needed alignment tokens as possible.
2952             # This will prevent them from interfering with the final alignment.
2953              
2954             # Given:
2955             # $rlines = ref to hash of all lines in this alignment group
2956             # $group_level = their comment indentation level
2957              
2958             # Return:
2959 2017         2774 my $max_lev_diff = 0; # used to avoid a call to prune_tree
2960 2017         2767 my $saw_side_comment = 0; # used to avoid a call for side comments
2961 2017         2686 my $saw_signed_number = 0; # used to avoid a call for -vsn
2962              
2963             # Handle no lines -- shouldn't happen
2964 2017 50       2551 return unless ( @{$rlines} );
  2017         4153  
2965              
2966             # Handle a single line
2967 2017 100       2852 if ( @{$rlines} == 1 ) {
  2017         4534  
2968 1343         2216 my $line = $rlines->[0];
2969 1343         2823 my $jmax = $line->{'jmax'};
2970 1343         2389 my $length = $line->{'rfield_lengths'}->[$jmax];
2971 1343         2201 $saw_side_comment = $length > 0;
2972 1343         4659 return ( $max_lev_diff, $saw_side_comment, $saw_signed_number );
2973             }
2974              
2975             # ignore hanging side comments in these operations
2976 674         1218 my @filtered = grep { !$_->{'is_hanging_side_comment'} } @{$rlines};
  2246         5480  
  674         1796  
2977 674         1385 my $rnew_lines = \@filtered;
2978              
2979 674         1141 $saw_side_comment = @filtered != @{$rlines};
  674         1306  
2980 674         3384 $max_lev_diff = 0;
2981              
2982             # nothing to do if all lines were hanging side comments
2983 674         993 my $jmax = @{$rnew_lines} - 1;
  674         1279  
2984 674 100       1689 return ( $max_lev_diff, $saw_side_comment, $saw_signed_number )
2985             if ( $jmax < 0 );
2986              
2987             #----------------------------------------------------
2988             # Create a hash of alignment token info for each line
2989             #----------------------------------------------------
2990 673         1978 ( my $rline_hashes, my $requals_info, $saw_side_comment, $max_lev_diff ) =
2991             make_alignment_info( $group_level, $rnew_lines, $saw_side_comment );
2992              
2993             #------------------------------------------------------------
2994             # Find independent subgroups of lines. Neighboring subgroups
2995             # do not have a common alignment token.
2996             #------------------------------------------------------------
2997 673         1097 my @subgroups;
2998 673         1536 push @subgroups, [ 0, $jmax ];
2999 673         1527 foreach my $jl ( 0 .. $jmax - 1 ) {
3000 1525 100       3215 if ( $rnew_lines->[$jl]->{'end_group'} ) {
3001 79         168 $subgroups[-1]->[1] = $jl;
3002 79         229 push @subgroups, [ $jl + 1, $jmax ];
3003             }
3004             }
3005              
3006             #-----------------------------------------------------------
3007             # PASS 1 over subgroups to remove unmatched alignment tokens
3008             #-----------------------------------------------------------
3009             delete_unmatched_tokens_main_loop(
3010 673         2521 $group_level, $rnew_lines, \@subgroups,
3011             $rline_hashes, $requals_info
3012             );
3013              
3014             #----------------------------------------------------------------
3015             # PASS 2: Construct a tree of matched lines and delete some small
3016             # deeper levels of tokens. They also block good alignments.
3017             #----------------------------------------------------------------
3018 673 100       2207 prune_alignment_tree($rnew_lines) if ($max_lev_diff);
3019              
3020             #--------------------------------------------
3021             # PASS 3: compare all lines for common tokens
3022             #--------------------------------------------
3023 673         2389 $saw_signed_number =
3024             match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
3025              
3026 673         7334 return ( $max_lev_diff, $saw_side_comment, $saw_signed_number );
3027             } ## end sub delete_unmatched_tokens
3028              
3029             sub make_alignment_info {
3030              
3031 673     673 0 1549 my ( $group_level, $rnew_lines, $saw_side_comment ) = @_;
3032              
3033             # Create a hash of alignment token info for each line
3034             # This info will be used to find common alignments
3035              
3036             # Given:
3037             # $group_level = common indentation level
3038             # $rnew_lines = ref to hash of line info
3039             # $saw_side_comment = true if there is a side comment
3040             # Return:
3041             # $rline_hashes = ref to hash with new line vars
3042             # \@equals_info = ref to array with info on any '=' tokens
3043             # $saw_side_comment = updated side comment flag
3044             # $max_lev_diff = maximum level change seen
3045              
3046             #----------------
3047             # Loop over lines
3048             #----------------
3049 673         1058 my $rline_hashes = [];
3050 673         1037 my @equals_info;
3051 673         920 my $jmax = @{$rnew_lines} - 1;
  673         1152  
3052 673         1194 my $max_lev_diff = 0;
3053 673         969 foreach my $line ( @{$rnew_lines} ) {
  673         1435  
3054 2198         2876 my $rhash = {};
3055 2198         3486 my $rtokens = $line->{'rtokens'};
3056 2198         3042 my $rpatterns = $line->{'rpatterns'};
3057 2198         2559 my $i = 0;
3058 2198         4177 my ( $i_eq, $tok_eq, $pat_eq );
3059 2198         0 my ( $lev_min, $lev_max );
3060 2198         2564 foreach my $tok ( @{$rtokens} ) {
  2198         3308  
3061 6054         8579 my ( $raw_tok, $lev, $tag, $tok_count ) =
3062             decode_alignment_token($tok);
3063              
3064 6054 100       9408 if ( $tok ne '#' ) {
3065 3856 100       5444 if ( !defined($lev_min) ) {
3066 2030         2451 $lev_min = $lev;
3067 2030         2698 $lev_max = $lev;
3068             }
3069             else {
3070 1826 100       3098 if ( $lev < $lev_min ) { $lev_min = $lev }
  84         170  
3071 1826 100       2887 if ( $lev > $lev_max ) { $lev_max = $lev }
  294         436  
3072             }
3073             }
3074             else {
3075 2198 100       3750 if ( !$saw_side_comment ) {
3076 1991         3462 my $length = $line->{'rfield_lengths'}->[ $i + 1 ];
3077 1991   66     4831 $saw_side_comment ||= $length;
3078             }
3079             }
3080              
3081             # Possible future upgrade: for multiple matches,
3082             # record [$i1, $i2, ..] instead of $i
3083 6054         15749 $rhash->{$tok} =
3084             [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
3085              
3086             # remember the first equals at line level
3087 6054 100 100     13778 if ( !defined($i_eq) && $raw_tok eq '=' ) {
3088              
3089 629 100       1276 if ( $lev eq $group_level ) {
3090 512         666 $i_eq = $i;
3091 512         619 $tok_eq = $tok;
3092 512         883 $pat_eq = $rpatterns->[$i];
3093             }
3094             }
3095 6054         7990 $i++;
3096             }
3097 2198         2690 push @{$rline_hashes}, $rhash;
  2198         3388  
3098 2198         5302 push @equals_info, [ $i_eq, $tok_eq, $pat_eq ];
3099 2198 100       3630 if ( defined($lev_min) ) {
3100 2030         2938 my $lev_diff = $lev_max - $lev_min;
3101 2030 100       4183 if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
  179         423  
3102             }
3103             }
3104              
3105             #----------------------------------------------------
3106             # Loop to compare each line pair and remember matches
3107             #----------------------------------------------------
3108 673         1121 my $rtok_hash = {};
3109 673         1028 my $nr = 0;
3110 673         1780 foreach my $jl ( 0 .. $jmax - 1 ) {
3111 1525         1969 my $nl = $nr;
3112 1525         1856 $nr = 0;
3113 1525         1919 my $jr = $jl + 1;
3114 1525         2052 my $rhash_l = $rline_hashes->[$jl];
3115 1525         2005 my $rhash_r = $rline_hashes->[$jr];
3116 1525         1799 foreach my $tok ( keys %{$rhash_l} ) {
  1525         4044  
3117 3646 100       5845 if ( defined( $rhash_r->{$tok} ) ) {
3118 3087         3709 my $il = $rhash_l->{$tok}->[0];
3119 3087         3603 my $ir = $rhash_r->{$tok}->[0];
3120 3087         3700 $rhash_l->{$tok}->[2] = $ir;
3121 3087         3546 $rhash_r->{$tok}->[1] = $il;
3122 3087 100       5110 if ( $tok ne '#' ) {
3123 1562         1784 push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
  1562         3363  
3124 1562         2248 $nr++;
3125             }
3126             }
3127             }
3128              
3129             # Set a line break if no matching tokens between these lines
3130             # (this is not strictly necessary now but does not hurt)
3131 1525 100 100     4027 if ( $nr == 0 && $nl > 0 ) {
3132 39         111 $rnew_lines->[$jl]->{'end_group'} = 1;
3133             }
3134              
3135             # Also set a line break if both lines have simple equals but with
3136             # different leading characters in patterns. This check is similar
3137             # to one in sub check_match, and will prevent sub
3138             # prune_alignment_tree from removing alignments which otherwise
3139             # should be kept. This fix is rarely needed, but it can
3140             # occasionally improve formatting.
3141             # For example:
3142             # my $name = $this->{Name};
3143             # $type = $this->ctype($genlooptype) if defined $genlooptype;
3144             # my $declini = ( $asgnonly ? "" : "\t$type *" );
3145             # my $cast = ( $type ? "($type *)" : "" );
3146             # The last two lines start with 'my' and will not match the
3147             # previous line starting with $type, so we do not want
3148             # prune_alignment tree to delete their ? : alignments at a deeper
3149             # level.
3150 1525         1799 my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
  1525         2831  
3151 1525         1875 my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
  1525         2490  
3152 1525 100 100     4242 if ( defined($i_eq_l) && defined($i_eq_r) ) {
3153              
3154             # Also, do not align equals across a change in ci level
3155             my $ci_jump = $rnew_lines->[$jl]->{'ci_level'} !=
3156 264         532 $rnew_lines->[$jr]->{'ci_level'};
3157              
3158 264 100 66     2087 if (
      66        
      100        
      100        
3159             $tok_eq_l eq $tok_eq_r
3160             && $i_eq_l == 0
3161             && $i_eq_r == 0
3162             && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
3163             || $ci_jump )
3164             )
3165             {
3166 13         42 $rnew_lines->[$jl]->{'end_group'} = 1;
3167             }
3168             }
3169             }
3170 673         3142 return ( $rline_hashes, \@equals_info, $saw_side_comment, $max_lev_diff );
3171             } ## end sub make_alignment_info
3172              
3173             sub delete_unmatched_tokens_main_loop {
3174              
3175 673     673 0 1601 my ( $group_level, $rnew_lines, $rsubgroups, $rline_hashes, $requals_info )
3176             = @_;
3177              
3178             #--------------------------------------------------------------
3179             # Main loop over subgroups to remove unmatched alignment tokens
3180             #--------------------------------------------------------------
3181              
3182             # flag to allow skipping pass 2 - not currently used
3183 673         1015 my $saw_large_group;
3184              
3185 673         1434 my $has_terminal_match = $rnew_lines->[-1]->{'j_terminal_match'};
3186              
3187 673         1050 foreach my $item ( @{$rsubgroups} ) {
  673         1223  
3188 752         1058 my ( $jbeg, $jend ) = @{$item};
  752         1470  
3189              
3190 752         1333 my $nlines = $jend - $jbeg + 1;
3191              
3192             #---------------------------------------------------
3193             # Look for complete if/elsif/else and ternary blocks
3194             #---------------------------------------------------
3195              
3196             # We are looking for a common '$dividing_token' like these:
3197              
3198             # if ( $b and $s ) { $p->{'type'} = 'a'; }
3199             # elsif ($b) { $p->{'type'} = 'b'; }
3200             # elsif ($s) { $p->{'type'} = 's'; }
3201             # else { $p->{'type'} = ''; }
3202             # ^----------- dividing_token
3203              
3204             # my $severity =
3205             # !$routine ? '[PFX]'
3206             # : $routine =~ /warn.*_d\z/ ? '[DS]'
3207             # : $routine =~ /ck_warn/ ? 'W'
3208             # : $routine =~ /ckWARN\d*reg_d/ ? 'S'
3209             # : $routine =~ /ckWARN\d*reg/ ? 'W'
3210             # : $routine =~ /vWARN\d/ ? '[WDS]'
3211             # : '[PFX]';
3212             # ^----------- dividing_token
3213              
3214             # Only look for groups which are more than 2 lines long. Two lines
3215             # can get messed up doing this, probably due to the various
3216             # two-line rules.
3217              
3218 752         1345 my $dividing_token;
3219             my %token_line_count;
3220 752 100       2041 if ( $nlines > 2 ) {
3221              
3222 354         802 foreach my $jj ( $jbeg .. $jend ) {
3223 1503         1718 my %seen;
3224 1503         1839 my $line = $rnew_lines->[$jj];
3225 1503         1888 my $rtokens = $line->{'rtokens'};
3226 1503         1658 foreach my $tok ( @{$rtokens} ) {
  1503         1951  
3227 4255 100       6176 if ( !$seen{$tok} ) {
3228 3582         4236 $seen{$tok}++;
3229 3582         5172 $token_line_count{$tok}++;
3230             }
3231             }
3232             }
3233              
3234 354         1005 foreach my $tok ( keys %token_line_count ) {
3235 1096 100       2023 if ( $token_line_count{$tok} == $nlines ) {
3236 657 100 100     2736 if ( substr( $tok, 0, 1 ) eq '?'
      100        
3237             || substr( $tok, 0, 1 ) eq '{' && $tok =~ /^\{\d+if/ )
3238             {
3239 23         48 $dividing_token = $tok;
3240 23         58 last;
3241             }
3242             }
3243             }
3244             }
3245              
3246             #-------------------------------------------------------------
3247             # Loop over subgroup lines to remove unwanted alignment tokens
3248             #-------------------------------------------------------------
3249 752         1855 foreach my $jj ( $jbeg .. $jend ) {
3250 2198         2990 my $line = $rnew_lines->[$jj];
3251 2198         2903 my $rtokens = $line->{'rtokens'};
3252 2198         2841 my $rhash = $rline_hashes->[$jj];
3253 2198         2946 my $i_eq = $requals_info->[$jj]->[0];
3254 2198         2547 my @idel;
3255 2198         2501 my $imax = @{$rtokens} - 2;
  2198         3022  
3256 2198         2727 my $delete_above_level;
3257             my $deleted_assignment_token;
3258              
3259 2198         2778 my $saw_dividing_token = EMPTY_STRING;
3260 2198   100     8069 $saw_large_group ||= $nlines > 2 && $imax > 1;
      100        
3261              
3262             # Loop over all alignment tokens
3263 2198         3369 foreach my $i ( 0 .. $imax ) {
3264 3856         4898 my $tok = $rtokens->[$i];
3265 3856 50       5797 next if ( $tok eq '#' ); # shouldn't happen
3266             my ( $iii_uu, $il, $ir, $raw_tok, $lev, $tag_uu, $tok_count ) =
3267 3856         4078 @{ $rhash->{$tok} };
  3856         8055  
3268              
3269             #------------------------------------------------------
3270             # Here is the basic RULE: remove an unmatched alignment
3271             # which does not occur in the surrounding lines.
3272             #------------------------------------------------------
3273 3856   100     7680 my $delete_me = !defined($il) && !defined($ir);
3274              
3275             # Apply any user controls. Note that not all lines pass
3276             # this way so they have to be applied elsewhere too.
3277 3856         4203 my $align_ok = 1;
3278 3856 100       5775 if (%valign_control_hash) {
3279 33         42 $align_ok = $valign_control_hash{$raw_tok};
3280 33 100       70 $align_ok = $valign_control_default
3281             unless ( defined($align_ok) );
3282 33   100     68 $delete_me ||= !$align_ok;
3283             }
3284              
3285             # But now we modify this with exceptions...
3286              
3287             # EXCEPTION 1: If we are in a complete ternary or
3288             # if/elsif/else group, and this token is not on every line
3289             # of the group, should we delete it to preserve overall
3290             # alignment?
3291 3856 100       5783 if ($dividing_token) {
3292 163 100       293 if ( $token_line_count{$tok} >= $nlines ) {
3293 132   100     351 $saw_dividing_token ||= $tok eq $dividing_token;
3294             }
3295             else {
3296              
3297             # For shorter runs, delete toks to save alignment.
3298             # For longer runs, keep toks after the '{' or '?'
3299             # to allow sub-alignments within braces. The
3300             # number 5 lines is arbitrary but seems to work ok.
3301 31   66     80 $delete_me ||= ( $nlines < 5 || !$saw_dividing_token );
      100        
3302             }
3303             }
3304              
3305             # EXCEPTION 2: Remove all tokens above a certain level
3306             # following a previous deletion. For example, we have to
3307             # remove tagged higher level alignment tokens following a
3308             # '=>' deletion because the tags of higher level tokens
3309             # will now be incorrect. For example, this will prevent
3310             # aligning commas as follows after deleting the second '=>'
3311             # $w->insert(
3312             # ListBox => origin => [ 270, 160 ],
3313             # size => [ 200, 55 ],
3314             # );
3315 3856 100       5665 if ( defined($delete_above_level) ) {
3316 326 100       673 if ( $lev > $delete_above_level ) {
3317 153   100     324 $delete_me ||= 1;
3318             }
3319 173         310 else { $delete_above_level = undef }
3320             }
3321              
3322             # EXCEPTION 3: Remove all but certain tokens after an
3323             # assignment deletion.
3324 3856 100 100     5843 if (
      100        
3325             $deleted_assignment_token
3326             && ( $lev > $group_level
3327             || !$is_if_or{$raw_tok} )
3328             )
3329             {
3330 62   100     139 $delete_me ||= 1;
3331             }
3332              
3333             # EXCEPTION 4: Do not touch the first line of a 2 line
3334             # terminal match, such as below, because j_terminal has
3335             # already been set.
3336             # if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
3337             # else { $tago = $tagc = ''; }
3338             # But see snippets 'else1.t' and 'else2.t'
3339 3856 100 100     7794 $delete_me = 0
      100        
3340             if ( $jj == $jbeg
3341             && $has_terminal_match
3342             && $nlines == 2 );
3343              
3344             # EXCEPTION 5: misc additional rules for commas and equals
3345 3856 100 100     7445 if ( $delete_me && $tok_count == 1 ) {
3346              
3347             # okay to delete second and higher copies of a token
3348              
3349             # for a comma...
3350 828 100       1762 if ( $raw_tok eq COMMA ) {
3351              
3352             # Do not delete commas before an equals
3353 305 100 100     796 $delete_me = 0
3354             if ( defined($i_eq) && $i < $i_eq );
3355              
3356             # Do not delete line-level commas
3357 305 100       609 $delete_me = 0 if ( $lev <= $group_level );
3358             }
3359              
3360             # For an assignment at group level..
3361 828 100 100     2510 if ( $is_assignment{$raw_tok}
3362             && $lev == $group_level )
3363             {
3364              
3365             # Do not delete if it is the last alignment of
3366             # multiple tokens; this will prevent some
3367             # undesirable alignments
3368 136 100 100     522 if ( $imax > 0 && $i == $imax ) {
3369 13         25 $delete_me = 0;
3370             }
3371              
3372             # Otherwise, set a flag to delete most
3373             # remaining tokens
3374 123         233 else { $deleted_assignment_token = $raw_tok }
3375             }
3376             }
3377              
3378             # Do not let a user exclusion be reactivated by above rules
3379 3856   66     9367 $delete_me ||= !$align_ok;
3380              
3381             #------------------------------------
3382             # Add this token to the deletion list
3383             #------------------------------------
3384 3856 100       6508 if ($delete_me) {
3385 766         1143 push @idel, $i;
3386              
3387             # update deletion propagation flags
3388 766 100 66     2000 if ( !defined($delete_above_level)
3389             || $lev < $delete_above_level )
3390             {
3391              
3392             # delete all following higher level alignments
3393 613         820 $delete_above_level = $lev;
3394              
3395             # but keep deleting after => to next lower level
3396             # to avoid some bizarre alignments
3397 613 100       1606 if ( $raw_tok eq '=>' ) {
3398 55         151 $delete_above_level = $lev - 1;
3399             }
3400             }
3401             }
3402             } ## End loop over alignment tokens
3403              
3404             # Process all deletion requests for this line
3405 2198 100       4795 if (@idel) {
3406 479         1407 delete_selected_tokens( $line, \@idel );
3407             }
3408             } ## End loop over lines
3409             } ## End main loop over subgroups
3410              
3411 673         1451 return;
3412             } ## end sub delete_unmatched_tokens_main_loop
3413              
3414             sub match_line_pairs {
3415              
3416 673     673 0 1611 my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
3417              
3418             # Compare each pair of lines and save information about common matches
3419              
3420             # Given:
3421             # $rlines = list of lines including hanging side comments
3422             # $rnew_lines = list of lines without any hanging side comments
3423             # $rsubgroups = list of subgroups of the new lines
3424             # Return:
3425             # $saw_signed_number = true if a field has a signed number
3426             # (needed for --valign-signed-numbers)
3427              
3428             # NOTE: A possible future generalization would be to change
3429             # imax_pair => $imax_align into a ref with additional information:
3430             # imax_pair => [$imax_align, $rMsg, ... ]
3431             # This could eventually hold multi-level match info
3432              
3433             # Previous line vars
3434 673         2926 my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
3435             $list_type_m, $ci_level_m );
3436              
3437             # Current line vars
3438 673         0 my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
3439             $ci_level );
3440              
3441             # Return parameter to avoid calls to sub pad_signed_number_columns
3442 673         0 my $saw_signed_number;
3443              
3444             # loop over subgroups
3445 673         1115 foreach my $item ( @{$rsubgroups} ) {
  673         1342  
3446 752         1096 my ( $jbeg, $jend ) = @{$item};
  752         1534  
3447 752         3970 my $nlines = $jend - $jbeg + 1;
3448 752 100       1779 next if ( $nlines <= 1 );
3449              
3450             # loop over lines in a subgroup
3451 651         1458 foreach my $jj ( $jbeg .. $jend ) {
3452              
3453 2097         2398 $line_m = $line;
3454 2097         2392 $rtokens_m = $rtokens;
3455 2097         2341 $rpatterns_m = $rpatterns;
3456 2097         2250 $rfield_lengths_m = $rfield_lengths;
3457 2097         2323 $imax_m = $imax;
3458 2097         2486 $list_type_m = $list_type;
3459 2097         2290 $ci_level_m = $ci_level;
3460              
3461 2097         2722 $line = $rnew_lines->[$jj];
3462 2097         2777 $rtokens = $line->{'rtokens'};
3463 2097         2711 $rpatterns = $line->{'rpatterns'};
3464 2097         2738 $rfield_lengths = $line->{'rfield_lengths'};
3465 2097         2255 $imax = @{$rtokens} - 2;
  2097         2632  
3466 2097         3063 $list_type = $line->{'list_type'};
3467 2097         2747 $ci_level = $line->{'ci_level'};
3468              
3469             # Quick approximate check for signed numbers in this line.
3470             # This speeds up large runs by about 0.5%
3471 2097 100       3374 if ( !$saw_signed_number ) {
3472              
3473 1924         2665 my $rfields = $line->{'rfields'};
3474 1924         2995 foreach my $i ( 0 .. $imax + 1 ) {
3475 4600 100       8670 next if ( index( $rpatterns->[$i], 'n' ) < 0 );
3476 952         1399 my $field = $rfields->[$i];
3477 952 100 100     2926 if ( index( $field, '-' ) >= 0
3478             || index( $field, '+' ) >= 0 )
3479             {
3480 72         125 $saw_signed_number = 1;
3481 72         166 last;
3482             }
3483             }
3484             }
3485              
3486             # nothing to do for first line
3487 2097 100       3553 next if ( $jj == $jbeg );
3488              
3489 1446         2010 my $ci_jump = $ci_level - $ci_level_m;
3490              
3491 1446 100       2710 my $imax_min = $imax_m < $imax ? $imax_m : $imax;
3492              
3493 1446         1756 my $imax_align = -1;
3494              
3495             # find number of leading common tokens
3496              
3497             #---------------------------------
3498             # No match to hanging side comment
3499             #---------------------------------
3500 1446 50 100     4591 if ( $line->{'is_hanging_side_comment'} ) {
    100          
3501              
3502             # Should not get here; HSC's have been filtered out
3503 0         0 $imax_align = -1;
3504             }
3505              
3506             #-----------------------------
3507             # Handle comma-separated lists
3508             #-----------------------------
3509             elsif ( $list_type && $list_type eq $list_type_m ) {
3510              
3511             # do not align lists across a ci jump with new list method
3512 526 50       942 if ($ci_jump) { $imax_min = -1 }
  0         0  
3513              
3514 526         737 my $i_nomatch = $imax_min + 1;
3515 526         838 foreach my $i ( 0 .. $imax_min ) {
3516 1049         1319 my $tok = $rtokens->[$i];
3517 1049         1261 my $tok_m = $rtokens_m->[$i];
3518 1049 50       1956 if ( $tok ne $tok_m ) {
3519 0         0 $i_nomatch = $i;
3520 0         0 last;
3521             }
3522             }
3523              
3524 526         731 $imax_align = $i_nomatch - 1;
3525             }
3526              
3527             #-----------------
3528             # Handle non-lists
3529             #-----------------
3530             else {
3531 920         1327 my $i_nomatch = $imax_min + 1;
3532 920         1540 foreach my $i ( 0 .. $imax_min ) {
3533 909         1312 my $tok = $rtokens->[$i];
3534 909         1224 my $tok_m = $rtokens_m->[$i];
3535 909 100       1689 if ( $tok ne $tok_m ) {
3536 31         50 $i_nomatch = $i;
3537 31         79 last;
3538             }
3539              
3540 878         1215 my $pat = $rpatterns->[$i];
3541 878         1133 my $pat_m = $rpatterns_m->[$i];
3542              
3543             # VSN PATCH: allow numbers to match quotes
3544 878 100       1499 if ( $pat_m ne $pat ) {
3545 198         429 $pat =~ tr/n/Q/;
3546 198         309 $pat_m =~ tr/n/Q/;
3547             }
3548              
3549             # If patterns don't match, we have to be careful...
3550 878 100       1623 if ( $pat_m ne $pat ) {
3551 186         329 my $pad =
3552             $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
3553 186         1217 my $match_code = compare_patterns(
3554             {
3555             group_level => $group_level,
3556             tok => $tok,
3557             pat => $pat,
3558             pat_m => $pat_m,
3559             pad => $pad,
3560             ## tok_m => $tok_m,
3561             }
3562             );
3563 186 100       628 if ($match_code) {
3564 8 100       22 if ( $match_code == 1 ) { $i_nomatch = $i }
  7 50       13  
3565 1         2 elsif ( $match_code == 2 ) { $i_nomatch = 0 }
3566             else { } ##ok
3567 8         19 last;
3568             }
3569             }
3570             }
3571 920         1296 $imax_align = $i_nomatch - 1;
3572             }
3573              
3574 1446         2549 $line_m->{'imax_pair'} = $imax_align;
3575              
3576             } ## end loop over lines
3577              
3578             # Put fence at end of subgroup
3579 651         1458 $line->{'imax_pair'} = -1;
3580              
3581             } ## end loop over subgroups
3582              
3583             # if there are hanging side comments, propagate the pair info down to them
3584             # so that lines can just look back one line for their pair info.
3585 673 100       1197 if ( @{$rlines} > @{$rnew_lines} ) {
  673         1116  
  673         1643  
3586 26         37 my $last_pair_info = -1;
3587 26         44 foreach my $line_t ( @{$rlines} ) {
  26         63  
3588 99 100       163 if ( $line_t->{'is_hanging_side_comment'} ) {
3589 41         66 $line_t->{'imax_pair'} = $last_pair_info;
3590             }
3591             else {
3592 58         86 $last_pair_info = $line_t->{'imax_pair'};
3593             }
3594             }
3595             }
3596 673         1607 return $saw_signed_number;
3597             } ## end sub match_line_pairs
3598              
3599             sub compare_patterns {
3600              
3601 186     186 0 355 my ($rcall_hash) = @_;
3602              
3603             # This is a helper routine for sub match_line_pairs to decide if patterns
3604             # in two lines match well enough
3605             # Given these values in $rcall_hash:
3606             # $tok_m, $pat_m = token and pattern of first line
3607             # $tok, $pat = token and pattern of second line
3608             # $pad = 0 if no padding is needed, !=0 otherwise
3609             # Return code:
3610             # 0 = patterns match, continue
3611             # 1 = no match
3612             # 2 = no match, and lines do not match at all
3613              
3614 186         344 my $group_level = $rcall_hash->{group_level};
3615 186         302 my $tok = $rcall_hash->{tok};
3616             ## my $tok_m = $rcall_hash->{tok_m};
3617 186         283 my $pat = $rcall_hash->{pat};
3618 186         285 my $pat_m = $rcall_hash->{pat_m};
3619 186         273 my $pad = $rcall_hash->{pad};
3620              
3621 186         1408 my $GoToMsg = EMPTY_STRING;
3622 186         252 my $return_code = 0;
3623              
3624 44     44   392 use constant EXPLAIN_COMPARE_PATTERNS => 0;
  44         125  
  44         47256  
3625              
3626 186         375 my ( $alignment_token, $lev, $tag_uu, $tok_count_uu ) =
3627             decode_alignment_token($tok);
3628              
3629             # We have to be very careful about aligning commas
3630             # when the pattern's don't match, because it can be
3631             # worse to create an alignment where none is needed
3632             # than to omit one. Here's an example where the ','s
3633             # are not in named containers. The first line below
3634             # should not match the next two:
3635             # ( $a, $b ) = ( $b, $r );
3636             # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
3637             # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
3638 186 100       676 if ( $alignment_token eq COMMA ) {
    100          
    100          
3639              
3640             # do not align commas unless they are in named
3641             # containers
3642 26 100       108 if ( $tok !~ /[A-Za-z]/ ) {
3643 3         5 $return_code = 1;
3644 3         12 $GoToMsg = "do not align commas in unnamed containers";
3645             }
3646             else {
3647 23         50 $return_code = 0;
3648             }
3649             }
3650              
3651             # do not align parens unless patterns match;
3652             # large ugly spaces can occur in math expressions.
3653             elsif ( $alignment_token eq '(' ) {
3654              
3655             # But we can allow a match if the parens don't
3656             # require any padding.
3657 4 50       17 if ( $pad != 0 ) {
3658 4         7 $return_code = 1;
3659 4         10 $GoToMsg = "do not align '(' unless patterns match or pad=0";
3660             }
3661             else {
3662 0         0 $return_code = 0;
3663             }
3664             }
3665              
3666             # Handle an '=' alignment with different patterns to
3667             # the left.
3668             elsif ( $alignment_token eq '=' ) {
3669              
3670             # It is best to be a little restrictive when
3671             # aligning '=' tokens. Here is an example of
3672             # two lines that we will not align:
3673             # my $variable=6;
3674             # $bb=4;
3675             # The problem is that one is a 'my' declaration,
3676             # and the other isn't, so they're not very similar.
3677             # We will filter these out by comparing the first
3678             # letter of the pattern. This is crude, but works
3679             # well enough.
3680 20 50       113 if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
    100          
3681 0         0 $GoToMsg = "first character before equals differ";
3682 0         0 $return_code = 1;
3683             }
3684              
3685             # The introduction of sub 'prune_alignment_tree'
3686             # enabled alignment of lists left of the equals with
3687             # other scalar variables. For example:
3688             # my ( $D, $s, $e ) = @_;
3689             # my $d = length $D;
3690             # my $c = $e - $s - $d;
3691              
3692             # But this would change formatting of a lot of scripts,
3693             # so for now we prevent alignment of comma lists on the
3694             # left with scalars on the left. We will also prevent
3695             # any partial alignments.
3696              
3697             # set return code 2 if the = is at line level, but
3698             # set return code 1 if the = is below line level, i.e.
3699             # sub new { my ( $p, $v ) = @_; bless \$v, $p }
3700             # sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
3701              
3702             elsif (
3703             ( index( $pat_m, COMMA ) >= 0 ) ne ( index( $pat, COMMA ) >= 0 ) )
3704             {
3705 1         2 $GoToMsg = "mixed commas/no-commas before equals";
3706 1         1 $return_code = 1;
3707 1 50       4 if ( $lev eq $group_level ) {
3708 1         2 $return_code = 2;
3709             }
3710             }
3711             else {
3712 19         41 $return_code = 0;
3713             }
3714             }
3715             else {
3716 136         193 $return_code = 0;
3717             }
3718              
3719             EXPLAIN_COMPARE_PATTERNS
3720             && $return_code
3721 186         257 && print {*STDOUT} "no match because $GoToMsg\n";
3722              
3723 186         351 return $return_code;
3724              
3725             } ## end sub compare_patterns
3726              
3727             sub fat_comma_to_comma {
3728              
3729 833     833 0 1208 my ($str) = @_;
3730              
3731             # Given:
3732             # $str = a decorated fat comma alignment token
3733              
3734             # Change '=>' to ','
3735             # and remove any trailing decimal count because currently fat commas have a
3736             # count and commas do not.
3737              
3738             # For example, change '=>2+{-3.2' into ',2+{-3'
3739 833 100       1904 if ( $str =~ /^=>([^\.]*)/ ) { $str = COMMA . $1 }
  181         451  
3740 833         1544 return $str;
3741             } ## end sub fat_comma_to_comma
3742              
3743             sub get_line_token_info {
3744              
3745 171     171 0 385 my ($rlines) = @_;
3746              
3747             # Given:
3748             # $rlines = ref to array of lines in this group
3749              
3750             # Scan lines of tokens and return summary information about the range of
3751             # levels and patterns.
3752              
3753             # First scan to check monotonicity. Here is an example of several
3754             # lines which are monotonic. The = is the lowest level, and
3755             # the commas are all one level deeper. So this is not nonmonotonic.
3756             # $$d{"weeks"} = [ "w", "wk", "wks", "week", "weeks" ];
3757             # $$d{"days"} = [ "d", "day", "days" ];
3758             # $$d{"hours"} = [ "h", "hr", "hrs", "hour", "hours" ];
3759 171         243 my @all_token_info;
3760 171         346 my $all_monotonic = 1;
3761 171         269 foreach my $jj ( 0 .. @{$rlines} - 1 ) {
  171         455  
3762 691         968 my ($line) = $rlines->[$jj];
3763 691         902 my $rtokens = $line->{'rtokens'};
3764 691         796 my $last_lev;
3765 691         867 my $is_monotonic = 1;
3766 691         764 my $i = -1;
3767 691         783 foreach my $tok ( @{$rtokens} ) {
  691         975  
3768 1813         1863 $i++;
3769 1813         2259 my ( $raw_tok, $lev, $tag, $tok_count ) =
3770             decode_alignment_token($tok);
3771 1813         2184 push @{ $all_token_info[$jj] },
  1813         4169  
3772             [ $raw_tok, $lev, $tag, $tok_count ];
3773 1813 100       2942 last if ( $tok eq '#' );
3774 1122 100 100     2301 if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
  90         114  
3775 1122         1456 $last_lev = $lev;
3776             }
3777 691 100       1317 if ( !$is_monotonic ) { $all_monotonic = 0 }
  87         139  
3778             }
3779              
3780 171         342 my $rline_values = [];
3781 171         303 foreach my $jj ( 0 .. @{$rlines} - 1 ) {
  171         402  
3782 691         954 my ($line) = $rlines->[$jj];
3783              
3784 691         997 my $rtokens = $line->{'rtokens'};
3785 691         793 my $i = -1;
3786 691         812 my ( $lev_min, $lev_max );
3787 691         867 my $token_pattern_max = EMPTY_STRING;
3788 691         776 my %saw_level;
3789 691         770 my $is_monotonic = 1;
3790              
3791             # find the index of the last token before the side comment
3792 691         795 my $imax = @{$rtokens} - 2;
  691         966  
3793 691         832 my $imax_true = $imax;
3794              
3795             # If the entire group is monotonic, and the line ends in a comma list,
3796             # walk it back to the first such comma. this will have the effect of
3797             # making all trailing ragged comma lists match in the prune tree
3798             # routine. these trailing comma lists can better be handled by later
3799             # alignment rules.
3800              
3801             # Treat fat commas the same as commas here by converting them to
3802             # commas. This will improve the chance of aligning the leading parts
3803             # of ragged lists.
3804              
3805 691         1265 my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
3806 691 100 100     1964 if ( $all_monotonic && $tok_end =~ /^,/ ) {
3807 146         200 my $ii = $imax - 1;
3808 146   100     458 while ( $ii >= 0
3809             && fat_comma_to_comma( $rtokens->[$ii] ) eq $tok_end )
3810             {
3811 93         129 $imax = $ii;
3812 93         144 $ii--;
3813             } ## end while ( $ii >= 0 && fat_comma_to_comma...)
3814             }
3815              
3816             # make a first pass to find level range
3817 691         1310 my $last_lev;
3818 691         794 foreach my $tok ( @{$rtokens} ) {
  691         950  
3819 1720         1723 $i++;
3820 1720 100       2421 last if ( $i > $imax );
3821 1029 50       1461 last if ( $tok eq '#' );
3822             my ( $raw_tok_uu, $lev, $tag_uu, $tok_count_uu ) =
3823 1029         1125 @{ $all_token_info[$jj]->[$i] };
  1029         1860  
3824              
3825 1029 50       1560 last if ( $tok eq '#' );
3826 1029         1270 $token_pattern_max .= $tok;
3827 1029         1414 $saw_level{$lev}++;
3828 1029 100       1497 if ( !defined($lev_min) ) {
3829 584         702 $lev_min = $lev;
3830 584         1731 $lev_max = $lev;
3831             }
3832             else {
3833 445 100       710 if ( $lev < $lev_min ) { $lev_min = $lev; }
  60         77  
3834 445 100       706 if ( $lev > $lev_max ) { $lev_max = $lev; }
  134         191  
3835 445 100       670 if ( $lev < $last_lev ) { $is_monotonic = 0 }
  90         108  
3836             }
3837 1029         1278 $last_lev = $lev;
3838             }
3839              
3840             # handle no levels
3841 691         926 my $rtoken_patterns = {};
3842 691         791 my $rtoken_indexes = {};
3843 691         2049 my @levs = sort { $a <=> $b } keys %saw_level;
  200         676  
3844 691 100       1588 if ( !defined($lev_min) ) {
    100          
3845 107         150 $lev_min = -1;
3846 107         145 $lev_max = -1;
3847 107         169 $levs[0] = -1;
3848 107         264 $rtoken_patterns->{$lev_min} = EMPTY_STRING;
3849 107         203 $rtoken_indexes->{$lev_min} = [];
3850             }
3851              
3852             # handle one level
3853             elsif ( $lev_max == $lev_min ) {
3854 399         1795 $rtoken_patterns->{$lev_max} = $token_pattern_max;
3855 399         933 $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
3856             }
3857              
3858             # handle multiple levels
3859             else {
3860 185         342 $rtoken_patterns->{$lev_max} = $token_pattern_max;
3861 185         487 $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
3862              
3863 185         298 my $lev_top = pop @levs; # already did max level
3864 185         272 my $itok = -1;
3865 185         222 foreach my $tok ( @{$rtokens} ) {
  185         318  
3866 771         772 $itok++;
3867 771 100       1055 last if ( $itok > $imax );
3868             my ( $raw_tok, $lev, $tag_uu, $tok_count_uu ) =
3869 586         574 @{ $all_token_info[$jj]->[$itok] };
  586         909  
3870 586 50       835 last if ( $raw_tok eq '#' );
3871 586         690 foreach my $lev_test (@levs) {
3872 626 100       1053 next if ( $lev > $lev_test );
3873 309         488 $rtoken_patterns->{$lev_test} .= $tok;
3874 309         337 push @{ $rtoken_indexes->{$lev_test} }, $itok;
  309         602  
3875             }
3876             }
3877 185         310 push @levs, $lev_top;
3878             }
3879              
3880 691         853 push @{$rline_values},
  691         2331  
3881             [
3882             $lev_min, $lev_max, $rtoken_patterns, \@levs,
3883             $rtoken_indexes, $is_monotonic, $imax_true, $imax,
3884             ];
3885              
3886             # debug
3887 691         1597 0 && do {
3888             local $LIST_SEPARATOR = ')(';
3889             print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
3890             foreach my $key ( sort keys %{$rtoken_patterns} ) {
3891             print "$key => $rtoken_patterns->{$key}\n";
3892             print "$key => @{$rtoken_indexes->{$key}}\n";
3893             }
3894             };
3895             } ## end loop over lines
3896 171         1124 return ( $rline_values, $all_monotonic );
3897             } ## end sub get_line_token_info
3898              
3899             sub prune_alignment_tree {
3900              
3901 171     171 0 3079 my ($rlines) = @_;
3902              
3903             # Given:
3904             # $rlines = ref to array of lines in this group
3905              
3906             # Prune the tree of alignments to limit depth of alignments
3907              
3908 171         289 my $jmax = @{$rlines} - 1;
  171         399  
3909 171 50       456 return if ( $jmax <= 0 );
3910              
3911             # Vertical alignment in perltidy is done as an iterative process. The
3912             # starting point is to mark all possible alignment tokens ('=', ',', '=>',
3913             # etc) for vertical alignment. Then we have to delete all alignments
3914             # which, if actually made, would detract from overall alignment. This
3915             # is done in several phases of which this is one.
3916              
3917             # In this routine we look at the alignments of a group of lines as a
3918             # hierarchical tree. We will 'prune' the tree to limited depths if that
3919             # will improve overall alignment at the lower depths.
3920             # For each line we will be looking at its alignment patterns down to
3921             # different fixed depths. For each depth, we include all lower depths and
3922             # ignore all higher depths. We want to see if we can get alignment of a
3923             # larger group of lines if we ignore alignments at some lower depth.
3924             # Here is an # example:
3925              
3926             # for (
3927             # [ '$var', sub { join $_, "bar" }, 0, "bar" ],
3928             # [ 'CONSTANT', sub { join "foo", "bar" }, 0, "bar" ],
3929             # [ 'CONSTANT', sub { join "foo", "bar", 3 }, 1, "barfoo3" ],
3930             # [ '$myvar', sub { my $var; join $var, "bar" }, 0, "bar" ],
3931             # );
3932              
3933             # In the above example, all lines have three commas at the lowest depth
3934             # (zero), so if there were no other alignments, these lines would all
3935             # align considering only the zero depth alignment token. But some lines
3936             # have additional comma alignments at the next depth, so we need to decide
3937             # if we should drop those to keep the top level alignments, or keep those
3938             # for some additional low level alignments at the expense losing some top
3939             # level alignments. In this case we will drop the deeper level commas to
3940             # keep the entire collection aligned. But in some cases the decision could
3941             # go the other way.
3942              
3943             # The tree for this example at the zero depth has one node containing
3944             # all four lines, since they are identical at zero level (three commas).
3945             # At depth one, there are three 'children' nodes, namely:
3946             # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
3947             # - line 3, which has 2 commas at depth 1
3948             # - line4, which has a ';' and a ',' at depth 1
3949             # There are no deeper alignments in this example.
3950             # so the tree structure for this example is:
3951             #
3952             # depth 0 depth 1 depth 2
3953             # [lines 1-4] -- [line 1-2] - (empty)
3954             # | [line 3] - (empty)
3955             # | [line 4] - (empty)
3956              
3957             # We can carry this to any depth, but it is not really useful to go below
3958             # depth 2. To cleanly stop there, we will consider depth 2 to contain all
3959             # alignments at depth >=2.
3960              
3961 44     44   322 use constant EXPLAIN_PRUNE => 0;
  44         87  
  44         52120  
3962              
3963             #-------------------------------------------------------------------
3964             # Prune Tree Step 1. Start by scanning the lines and collecting info
3965             #-------------------------------------------------------------------
3966              
3967             # Note that the caller had this info but we have to redo this now because
3968             # alignment tokens may have been deleted.
3969 171         613 my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
3970              
3971             # If all the lines have levels which increase monotonically from left to
3972             # right, then the sweep-left-to-right pass can do a better job of alignment
3973             # than pruning, and without deleting alignments.
3974 171 100       1066 return if ($all_monotonic);
3975              
3976             # Contents of $rline_values
3977             # [
3978             # $lev_min, $lev_max, $rtoken_patterns, \@levs,
3979             # $rtoken_indexes, $is_monotonic, $imax_true, $imax,
3980             # ];
3981              
3982             # We can work to any depth, but there is little advantage to working
3983             # to a depth greater than 2
3984 36         62 my $MAX_DEPTH = 2;
3985              
3986             # This arrays will hold the tree of alignment tokens at different depths
3987             # for these lines.
3988 36         71 my @match_tree;
3989              
3990             # Tree nodes contain these values:
3991             # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
3992             # $nc_beg_p, $nc_end_p, $rindexes];
3993             # where
3994             # $depth = 0,1,2 = index of depth of the match
3995              
3996             # $jbeg beginning index j of the range of lines in this match
3997             # $jend ending index j of the range of lines in this match
3998             # $n_parent = index of the containing group at $depth-1, if it exists
3999             # $level = actual level of code being matched in this group
4000             # $pattern = alignment pattern being matched
4001             # $nc_beg_p = first child
4002             # $nc_end_p = last child
4003             # $rindexes = ref to token indexes
4004              
4005             # the patterns and levels of the current group being formed at each depth
4006 36         134 my ( @token_patterns_current, @levels_current, @token_indexes_current );
4007              
4008             # the patterns and levels of the next line being tested at each depth
4009 36         0 my ( @token_patterns_next, @levels_next, @token_indexes_next );
4010              
4011             #-----------------------------------------------------------
4012             # define a recursive worker subroutine for tree construction
4013             #-----------------------------------------------------------
4014              
4015             # This is a recursive routine which is called if a match condition changes
4016             # at any depth when a new line is encountered. It ends the match node
4017             # which changed plus all deeper nodes attached to it.
4018 36         0 my $end_node;
4019             $end_node = sub {
4020              
4021 345     345   490 my ( $depth, $jl, $n_parent ) = @_;
4022              
4023             # $depth is the tree depth
4024             # $jl is the index of the line
4025             # $n_parent is index of the parent node of this node
4026              
4027 345 100       891 return if ( $depth > $MAX_DEPTH );
4028              
4029             # end any current group at this depth
4030 252 100 100     625 if ( $jl >= 0
      66        
      100        
4031             && defined( $match_tree[$depth] )
4032 77         270 && @{ $match_tree[$depth] }
4033             && defined( $levels_current[$depth] ) )
4034             {
4035 71         104 $match_tree[$depth]->[-1]->[1] = $jl;
4036             }
4037              
4038             # Define the index of the node we will create below
4039 252         288 my $ng_self = 0;
4040 252 100       390 if ( defined( $match_tree[$depth] ) ) {
4041 77         88 $ng_self = @{ $match_tree[$depth] };
  77         112  
4042             }
4043              
4044             # end any next deeper child node(s)
4045 252         628 $end_node->( $depth + 1, $jl, $ng_self );
4046              
4047             # update the levels being matched
4048 252         352 $token_patterns_current[$depth] = $token_patterns_next[$depth];
4049 252         300 $token_indexes_current[$depth] = $token_indexes_next[$depth];
4050 252         322 $levels_current[$depth] = $levels_next[$depth];
4051              
4052             # Do not start a new group at this level if it is not being used
4053 252 100 66     745 if ( !defined( $levels_next[$depth] )
      66        
4054             || $depth > 0
4055             && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
4056             {
4057 127         1409 return;
4058             }
4059              
4060             # Create a node for the next group at this depth. We initially assume
4061             # that it will continue to $jmax, and correct that later if the node
4062             # ends earlier.
4063 125         159 push @{ $match_tree[$depth] },
  125         438  
4064             [
4065             $jl + 1, $jmax, $n_parent, $levels_current[$depth],
4066             $token_patterns_current[$depth],
4067             undef, undef, $token_indexes_current[$depth],
4068             ];
4069              
4070 125         208 return;
4071 36         360 }; ## end $end_node = sub
4072              
4073             #-----------------------------------------------------
4074             # Prune Tree Step 2. Loop to form the tree of matches.
4075             #-----------------------------------------------------
4076 36         151 foreach my $jp ( 0 .. $jmax ) {
4077              
4078             # working with two adjacent line indexes, 'm'=minus, 'p'=plus
4079 246         336 my $jm = $jp - 1;
4080              
4081             # Pull out needed values for the next line
4082             my ( $lev_min_uu, $lev_max_uu, $rtoken_patterns, $rlevs,
4083             $rtoken_indexes, $is_monotonic_uu, $imax_true_uu, $imax_uu )
4084 246         257 = @{ $rline_values->[$jp] };
  246         524  
4085              
4086             # Transfer levels and patterns for this line to the working arrays.
4087             # If the number of levels differs from our chosen MAX_DEPTH ...
4088             # if fewer than MAX_DEPTH: leave levels at missing depths undefined
4089             # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
4090 246         338 @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
  246         486  
4091 246 100       350 if ( @{$rlevs} > $MAX_DEPTH ) {
  246         405  
4092 5         7 $levels_next[$MAX_DEPTH] = $rlevs->[-1];
4093             }
4094 246         289 my $depth = 0;
4095 246         303 foreach my $item (@levels_next) {
4096             $token_patterns_next[$depth] =
4097 738 100       1138 defined($item) ? $rtoken_patterns->{$item} : undef;
4098             $token_indexes_next[$depth] =
4099 738 100       1044 defined($item) ? $rtoken_indexes->{$item} : undef;
4100 738         827 $depth++;
4101             }
4102              
4103             # Look for a change in match groups...
4104              
4105             # Initialize on the first line
4106 246 100       686 if ( $jp == 0 ) {
    100          
    50          
4107 36         58 my $n_parent;
4108 36         125 $end_node->( 0, $jm, $n_parent );
4109             }
4110              
4111             # End groups if a hard flag has been set
4112             elsif ( $rlines->[$jm]->{'end_group'} ) {
4113 11         21 my $n_parent;
4114 11         29 $end_node->( 0, $jm, $n_parent );
4115             }
4116              
4117             # Continue at hanging side comment
4118             elsif ( $rlines->[$jp]->{'is_hanging_side_comment'} ) {
4119 0         0 next;
4120             }
4121              
4122             # Otherwise see if anything changed and update the tree if so
4123             else {
4124 199         287 foreach my $dep ( 0 .. $MAX_DEPTH ) {
4125              
4126 413         475 my $def_current = defined( $token_patterns_current[$dep] );
4127 413         470 my $def_next = defined( $token_patterns_next[$dep] );
4128 413 100 100     809 last if ( !$def_current && !$def_next );
4129 261 100 100     936 if ( !$def_current
      100        
4130             || !$def_next
4131             || $token_patterns_current[$dep] ne
4132             $token_patterns_next[$dep] )
4133             {
4134 46         63 my $n_parent;
4135 46 100 66     168 if ( $dep > 0 && defined( $match_tree[ $dep - 1 ] ) ) {
4136 23         26 $n_parent = @{ $match_tree[ $dep - 1 ] } - 1;
  23         42  
4137             }
4138 46         112 $end_node->( $dep, $jm, $n_parent );
4139 46         109 last;
4140             }
4141             }
4142             }
4143             } ## end loop to form tree of matches
4144              
4145             #---------------------------------------------------------
4146             # Prune Tree Step 3. Make links from parent to child nodes
4147             #---------------------------------------------------------
4148              
4149             # It seemed cleaner to do this as a separate step rather than during tree
4150             # construction. The children nodes have links up to the parent node which
4151             # created them. Now make links in the opposite direction, so the parents
4152             # can find the children. We store the range of children nodes ($nc_beg,
4153             # $nc_end) of each parent with two additional indexes in the original array.
4154             # These will be undef if no children.
4155 36         97 foreach my $depth ( reverse( 1 .. $MAX_DEPTH ) ) {
4156 72 100       162 next unless ( defined( $match_tree[$depth] ) );
4157 37         57 my $nc_max = @{ $match_tree[$depth] } - 1;
  37         79  
4158 37         71 my $np_now;
4159 37         103 foreach my $nc ( 0 .. $nc_max ) {
4160 55         103 my $np = $match_tree[$depth]->[$nc]->[2];
4161 55 50       132 if ( !defined($np) ) {
4162              
4163             # shouldn't happen
4164             #print STDERR "lost child $np at depth $depth\n";
4165 0         0 next;
4166             }
4167 55 100 100     170 if ( !defined($np_now) || $np != $np_now ) {
4168 40         90 $np_now = $np;
4169 40         100 $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
4170             }
4171 55         117 $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
4172             }
4173             } ## end loop to make links down to the child nodes
4174              
4175 36         56 EXPLAIN_PRUNE > 0 && do {
4176             print "Tree complete. Found these groups:\n";
4177             foreach my $depth ( 0 .. $MAX_DEPTH ) {
4178             Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
4179             }
4180             };
4181              
4182             #------------------------------------------------------
4183             # Prune Tree Step 4. Make a list of nodes to be deleted
4184             #------------------------------------------------------
4185              
4186             # list of lines with tokens to be deleted:
4187             # [$jbeg, $jend, $level_keep]
4188             # $jbeg..$jend is the range of line indexes,
4189             # $level_keep is the minimum level to keep
4190 36         70 my @delete_list;
4191              
4192             # We work with a list of nodes to visit at the next deeper depth.
4193             my @todo_list;
4194 36 50       106 if ( defined( $match_tree[0] ) ) {
4195 36         67 @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
  36         89  
4196             }
4197              
4198 36         81 foreach my $depth ( 0 .. $MAX_DEPTH ) {
4199 101 100       205 last if ( !@todo_list );
4200 65         88 my @todo_next;
4201 65         96 foreach my $np (@todo_list) {
4202             my ( $jbeg_p, $jend_p, $np_p_uu, $lev_p, $pat_p_uu, $nc_beg_p,
4203             $nc_end_p, $rindexes_p_uu )
4204 103         138 = @{ $match_tree[$depth]->[$np] };
  103         246  
4205 103         159 my $nlines_p = $jend_p - $jbeg_p + 1;
4206              
4207             # nothing to do if no children
4208 103 100       199 next unless ( defined($nc_beg_p) );
4209              
4210             # Define the number of lines to either keep or delete a child node.
4211             # This is the key decision we have to make. We want to delete
4212             # short runs of matched lines, and keep long runs. It seems easier
4213             # for the eye to follow breaks in monotonic level changes than
4214             # non-monotonic level changes. For example, the following looks
4215             # best if we delete the lower level alignments:
4216              
4217             # [1] ~~ [];
4218             # [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
4219             # [ qr/o/, qr/a/ ] ~~ [ ["foo"], ["bar"] ];
4220             # [ "foo", "bar" ] ~~ [ qr/o/, qr/a/ ];
4221             # [ qr/o/, qr/a/ ] ~~ [ "foo", "bar" ];
4222             # $deep1 ~~ $deep1;
4223              
4224             # So we will use two thresholds.
4225 40         65 my $nmin_mono = $depth + 2;
4226 40         63 my $nmin_non_mono = $depth + 6;
4227 40 100       111 if ( $nmin_mono > $nlines_p - 1 ) {
4228 26         43 $nmin_mono = $nlines_p - 1;
4229             }
4230 40 100       96 if ( $nmin_non_mono > $nlines_p - 1 ) {
4231 36         52 $nmin_non_mono = $nlines_p - 1;
4232             }
4233              
4234             # loop to keep or delete each child node
4235 40         107 foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
4236             my ( $jbeg_c, $jend_c, $np_c_uu, $lev_c_uu, $pat_c_uu,
4237             $nc_beg_c_uu, $nc_end_c_uu )
4238 55         71 = @{ $match_tree[ $depth + 1 ]->[$nc] };
  55         155  
4239 55         87 my $nlines_c = $jend_c - $jbeg_c + 1;
4240 55         84 my $is_monotonic = $rline_values->[$jbeg_c]->[5];
4241 55 100       113 my $nmin = $is_monotonic ? $nmin_mono : $nmin_non_mono;
4242 55 100       111 if ( $nlines_c < $nmin ) {
4243             ##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
4244 22         67 push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
4245             }
4246             else {
4247             ##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
4248 33         91 push @todo_next, $nc;
4249             }
4250             }
4251             }
4252 65         149 @todo_list = @todo_next;
4253             } ## end loop to mark nodes to delete
4254              
4255             #------------------------------------------------------------
4256             # Prune Tree Step 5. Loop to delete selected alignment tokens
4257             #------------------------------------------------------------
4258 36         68 foreach my $item (@delete_list) {
4259 22         26 my ( $jbeg, $jend, $level_keep ) = @{$item};
  22         39  
4260 22         37 foreach my $jj ( $jbeg .. $jend ) {
4261 28         40 my $line = $rlines->[$jj];
4262 28         35 my @idel;
4263 28         39 my $rtokens = $line->{'rtokens'};
4264 28         33 my $imax = @{$rtokens} - 2;
  28         67  
4265 28         46 foreach my $i ( 0 .. $imax ) {
4266 152         177 my $tok = $rtokens->[$i];
4267 152         194 my ( $raw_tok_uu, $lev, $tag_uu, $tok_count_uu ) =
4268             decode_alignment_token($tok);
4269 152 100       244 if ( $lev > $level_keep ) {
4270 83         109 push @idel, $i;
4271             }
4272             }
4273 28 50       52 if (@idel) {
4274 28         50 delete_selected_tokens( $line, \@idel );
4275             }
4276             }
4277             } ## end loop to delete selected alignment tokens
4278              
4279 36         438 return;
4280             } ## end sub prune_alignment_tree
4281              
4282             sub Dump_tree_groups {
4283              
4284 0     0 0 0 my ( $rgroup, $msg ) = @_;
4285              
4286             # Debug routine
4287 0         0 print "$msg\n";
4288 0         0 local $LIST_SEPARATOR = ')(';
4289 0         0 foreach my $item ( @{$rgroup} ) {
  0         0  
4290 0         0 my @fix = @{$item};
  0         0  
4291 0 0       0 foreach my $val (@fix) { $val = "undef" unless ( defined($val) ); }
  0         0  
4292 0         0 $fix[4] = "...";
4293 0         0 print "(@fix)\n";
4294             }
4295 0         0 return;
4296             } ## end sub Dump_tree_groups
4297              
4298             # This test did not give sufficiently better results to use as an update,
4299             # but the flag is kept as a starting point for future testing.
4300 44     44   332 use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
  44         99  
  44         82889  
4301              
4302             sub is_marginal_match {
4303              
4304 290     290 0 786 my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
4305              
4306             # Decide if we should undo some or all of the common alignments of a
4307             # group of just two lines.
4308              
4309             # Given:
4310             # $line_0 and $line_1 - the two lines
4311             # $group_level = the indentation level of the group being processed
4312             # $imax_align = the maximum index of the common alignment tokens
4313             # of the two lines
4314             # $imax_prev = the maximum index of the common alignment tokens
4315             # with the line before $line_0 (=-1 of does not exist)
4316              
4317             # Return:
4318             # $is_marginal = true if the two lines should NOT be fully aligned
4319             # = false if the two lines can remain fully aligned
4320             # $imax_align = the index of the highest alignment token shared by
4321             # these two lines to keep if the match is marginal.
4322              
4323             # When we have an alignment group of just two lines like this, we are
4324             # working in the twilight zone of what looks good and what looks bad.
4325             # This routine is a collection of rules which work have been found to
4326             # work fairly well, but it will need to be updated from time to time.
4327              
4328 290         520 my $is_marginal = 0;
4329              
4330             #---------------------------------------
4331             # Always align certain special cases ...
4332             #---------------------------------------
4333 290 100 100     3173 if (
      100        
4334              
4335             # always keep alignments of a terminal else or ternary
4336             defined( $line_1->{'j_terminal_match'} )
4337              
4338             # always align lists
4339             || $line_0->{'list_type'}
4340              
4341             # always align hanging side comments
4342             || $line_1->{'is_hanging_side_comment'}
4343              
4344             )
4345             {
4346 134         361 return ( $is_marginal, $imax_align );
4347             }
4348              
4349 156         325 my $jmax_0 = $line_0->{'jmax'};
4350 156         295 my $jmax_1 = $line_1->{'jmax'};
4351 156         415 my $rtokens_1 = $line_1->{'rtokens'};
4352             ## my $rtokens_0 = $line_0->{'rtokens'};
4353 156         323 my $rfield_lengths_0 = $line_0->{'rfield_lengths'};
4354 156         252 my $rfield_lengths_1 = $line_1->{'rfield_lengths'};
4355 156         284 my $rpatterns_0 = $line_0->{'rpatterns'};
4356 156         259 my $rpatterns_1 = $line_1->{'rpatterns'};
4357 156         280 my $imax_next = $line_1->{'imax_pair'};
4358              
4359             # We will scan the alignment tokens and set a flag '$is_marginal' if
4360             # it seems that the an alignment would look bad.
4361 156         243 my $max_pad = 0;
4362 156         273 my $saw_good_alignment = 0;
4363 156         253 my $saw_if_or; # if we saw an 'if' or 'or' at group level
4364 156         308 my $raw_tokb = EMPTY_STRING; # first token seen at group level
4365 156         363 my $jfirst_bad;
4366             my $line_ending_fat_comma; # is last token just a '=>' ?
4367 156         0 my $j0_eq_pad;
4368 156         257 my $j0_max_pad = 0;
4369              
4370 156         461 foreach my $j ( 0 .. $jmax_1 - 2 ) {
4371 207         544 my ( $raw_tok, $lev, $tag_uu, $tok_count_uu ) =
4372             decode_alignment_token( $rtokens_1->[$j] );
4373 207 100 66     901 if ( $raw_tok && $lev == $group_level ) {
4374 179 100       433 if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
  143         1338  
4375 179   100     786 $saw_if_or ||= $is_if_or{$raw_tok};
4376             }
4377              
4378             # When the first of the two lines ends in a bare '=>' this will
4379             # probably be marginal match. (For a bare =>, the next field length
4380             # will be 2 or 3, depending on side comment)
4381             $line_ending_fat_comma =
4382 207   100     885 $j == $jmax_1 - 2
4383             && $raw_tok eq '=>'
4384             && $rfield_lengths_0->[ $j + 1 ] <= 3;
4385              
4386 207         366 my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
4387 207 100       547 if ( $j == 0 ) {
4388             $pad += $line_1->{'leading_space_count'} -
4389 148         320 $line_0->{'leading_space_count'};
4390              
4391             # Remember the pad at a leading equals
4392 148 100 66     661 if ( $raw_tok eq '=' && $lev == $group_level ) {
4393 88         145 $j0_eq_pad = $pad;
4394 88         217 $j0_max_pad =
4395             0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
4396 88 100       406 $j0_max_pad = 4 if ( $j0_max_pad < 4 );
4397             }
4398             }
4399              
4400 207 100       489 if ( $pad < 0 ) { $pad = -$pad }
  48         105  
4401 207 100       462 if ( $pad > $max_pad ) { $max_pad = $pad }
  110         211  
4402 207 100 100     940 if ( $is_good_marginal_alignment{$raw_tok}
4403             && !$line_ending_fat_comma )
4404             {
4405 151         267 $saw_good_alignment = 1;
4406             }
4407             else {
4408 56 100       369 $jfirst_bad = $j unless ( defined($jfirst_bad) );
4409             }
4410 207         360 my $pat_0 = $rpatterns_0->[$j];
4411 207         345 my $pat_1 = $rpatterns_1->[$j];
4412 207 100 100     713 if ( $pat_0 ne $pat_1 && length($pat_0) eq length($pat_1) ) {
4413 3         8 $pat_0 =~ tr/n/Q/;
4414 3         7 $pat_1 =~ tr/n/Q/;
4415             }
4416 207 100       539 if ( $pat_0 ne $pat_1 ) {
4417              
4418             # Flag this as a marginal match since patterns differ.
4419             # Normally, we will not allow just two lines to match if
4420             # marginal. But we can allow matching in some specific cases.
4421              
4422 44 100       135 $jfirst_bad = $j if ( !defined($jfirst_bad) );
4423 44 100       123 $is_marginal = 1 if ( $is_marginal == 0 );
4424 44 100       166 if ( $raw_tok eq '=' ) {
4425              
4426             # Here is an example of a marginal match:
4427             # $done{$$op} = 1;
4428             # $op = compile_bblock($op);
4429             # The left tokens are both identifiers, but
4430             # one accesses a hash and the other doesn't.
4431             # We'll let this be a tentative match and undo
4432             # it later if we don't find more than 2 lines
4433             # in the group.
4434 13         37 $is_marginal = 2;
4435             }
4436             }
4437             }
4438              
4439 156 50 66     724 $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
4440              
4441             # Turn off the "marginal match" flag in some cases...
4442             # A "marginal match" occurs when the alignment tokens agree
4443             # but there are differences in the other tokens (patterns).
4444             # If we leave the marginal match flag set, then the rule is that we
4445             # will align only if there are more than two lines in the group.
4446             # We will turn of the flag if we almost have a match
4447             # and either we have seen a good alignment token or we
4448             # just need a small pad (2 spaces) to fit. These rules are
4449             # the result of experimentation. Tokens which misaligned by just
4450             # one or two characters are annoying. On the other hand,
4451             # large gaps to less important alignment tokens are also annoying.
4452 156 100 100     512 if ( $is_marginal == 1
      100        
4453             && ( $saw_good_alignment || $max_pad < 3 ) )
4454             {
4455 24         45 $is_marginal = 0;
4456             }
4457              
4458             # We will use the line endings to help decide on alignments...
4459             # See if the lines end with semicolons...
4460 156         274 my $sc_term0;
4461             my $sc_term1;
4462 156 50 33     634 if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
4463              
4464             # shouldn't happen
4465             }
4466             else {
4467 156         345 my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
4468 156         333 my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
4469 156         1071 $sc_term0 = $pat0 =~ /;b?$/;
4470 156         550 $sc_term1 = $pat1 =~ /;b?$/;
4471             }
4472              
4473 156 100 100     627 if ( !$is_marginal && !$sc_term0 ) {
4474              
4475             # First line of assignment should be semicolon terminated.
4476             # For example, do not align here:
4477             # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4478             # $$href{-NUM_DIRS} = 0;
4479 37 100       119 if ( $is_assignment{$raw_tokb} ) {
4480 1         3 $is_marginal = 1;
4481             }
4482             }
4483              
4484             # Try to avoid some undesirable alignments of opening tokens
4485             # for example, the space between grep and { here:
4486             # return map { ( $_ => $_ ) }
4487             # grep { /$handles/ } $self->_get_delegate_method_list;
4488             $is_marginal ||=
4489 156   100     1308 ( $raw_tokb eq '(' || $raw_tokb eq '{' )
      100        
4490             && $jmax_1 == 2
4491             && $sc_term0 ne $sc_term1;
4492              
4493             #---------------------------------------
4494             # return if this is not a marginal match
4495             #---------------------------------------
4496 156 100       344 if ( !$is_marginal ) {
4497 135         511 return ( $is_marginal, $imax_align );
4498             }
4499              
4500             # Undo the marginal match flag in certain cases,
4501              
4502             # Two lines with a leading equals-like operator are allowed to
4503             # align if the patterns to the left of the equals are the same.
4504             # For example the following two lines are a marginal match but have
4505             # the same left side patterns, so we will align the equals.
4506             # my $orig = my $format = "^<<<<< ~~\n";
4507             # my $abc = "abc";
4508             # But these have a different left pattern so they will not be
4509             # aligned
4510             # $xmldoc .= $`;
4511             # $self->{'leftovers'} .= "<bx-seq:seq" . $';
4512              
4513             # First line semicolon terminated but second not, usually ok:
4514             # my $want = "'ab', 'a', 'b'";
4515             # my $got = join( ", ",
4516             # map { defined($_) ? "'$_'" : "undef" }
4517             # @got );
4518             # First line not semicolon terminated, Not OK to match:
4519             # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4520             # $$href{-NUM_DIRS} = 0;
4521 21         45 my $pat0 = $rpatterns_0->[0];
4522 21         45 my $pat1 = $rpatterns_1->[0];
4523              
4524             #---------------------------------------------------------
4525             # Turn off the marginal flag for some types of assignments
4526             #---------------------------------------------------------
4527 21 100       102 if ( $is_assignment{$raw_tokb} ) {
    50          
    50          
4528              
4529             # undo marginal flag if first line is semicolon terminated
4530             # and leading patterns match
4531 14 100       66 if ($sc_term0) {
4532 13         32 $is_marginal = $pat0 ne $pat1;
4533             }
4534             }
4535             elsif ( $raw_tokb eq '=>' ) {
4536              
4537             # undo marginal flag if patterns match
4538 0   0     0 $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
4539             }
4540             elsif ( $raw_tokb eq '=~' ) {
4541              
4542             # undo marginal flag if both lines are semicolon terminated
4543             # and leading patters match
4544 0 0 0     0 if ( $sc_term1 && $sc_term0 ) {
4545 0         0 $is_marginal = $pat0 ne $pat1;
4546             }
4547             }
4548             else {
4549             ##ok: (none of the above)
4550             }
4551              
4552             #-----------------------------------------------------
4553             # Turn off the marginal flag if we saw an 'if' or 'or'
4554             #-----------------------------------------------------
4555              
4556             # A trailing 'if' and 'or' often gives a good alignment
4557             # For example, we can align these:
4558             # return -1 if $_[0] =~ m/^CHAPT|APPENDIX/;
4559             # return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
4560              
4561             # or
4562             # $d_in_m[2] = 29 if ( &Date_LeapYear($y) );
4563             # $d = $d_in_m[$m] if ( $d > $d_in_m[$m] );
4564              
4565 21 100       70 if ($saw_if_or) {
4566              
4567             # undo marginal flag if both lines are semicolon terminated
4568 5 50 33     31 if ( $sc_term0 && $sc_term1 ) {
4569 5         11 $is_marginal = 0;
4570             }
4571             }
4572              
4573             # For a marginal match, only keep matches before the first 'bad' match
4574 21 50 100     161 if ( $is_marginal
      66        
4575             && defined($jfirst_bad)
4576             && $imax_align > $jfirst_bad - 1 )
4577             {
4578 0         0 $imax_align = $jfirst_bad - 1;
4579             }
4580              
4581             #----------------------------------------------------------
4582             # Allow sweep to match lines with leading '=' in some cases
4583             #----------------------------------------------------------
4584 21 100 66     151 if ( $imax_align < 0 && defined($j0_eq_pad) ) {
4585              
4586 14 0 50     149 if (
      33        
      33        
4587              
4588             # If there is a following line with leading equals, or
4589             # preceding line with leading equals, then let the sweep align
4590             # them without restriction. For example, the first two lines
4591             # here are a marginal match, but they are followed by a line
4592             # with leading equals, so the sweep-lr logic can align all of
4593             # the lines:
4594              
4595             # $date[1] = $month_to_num{ $date[1] }; # <--line_0
4596             # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4597             # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4598             # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4599              
4600             # Likewise, if we reverse the two pairs we want the same result
4601              
4602             # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4603             # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4604             # $date[1] = $month_to_num{ $date[1] }; # <--line_0
4605             # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4606              
4607             (
4608             $imax_next >= 0
4609             || $imax_prev >= 0
4610             || TEST_MARGINAL_EQ_ALIGNMENT
4611             )
4612             && $j0_eq_pad >= -$j0_max_pad
4613             && $j0_eq_pad <= $j0_max_pad
4614             )
4615             {
4616              
4617             # But do not do this if there is a comma before the '='.
4618             # For example, the first two lines below have commas and
4619             # therefore are not allowed to align with lines 3 & 4:
4620              
4621             # my ( $x, $y ) = $self->Size(); #<--line_0
4622             # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
4623             # my $vx = $right - $left;
4624             # my $vy = $bottom - $top;
4625              
4626 0 0 0     0 if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
4627 0         0 $imax_align = 0;
4628             }
4629             }
4630             }
4631              
4632 21         94 return ( $is_marginal, $imax_align );
4633             } ## end sub is_marginal_match
4634              
4635             sub get_extra_leading_spaces {
4636              
4637 443     443 0 1012 my ( $rlines, $rgroups ) = @_;
4638              
4639             #----------------------------------------------------------
4640             # Define any extra indentation space (for the -lp option).
4641             # Here is why:
4642             # If a list has side comments, sub scan_list must dump the
4643             # list before it sees everything. When this happens, it sets
4644             # the indentation to the standard scheme, but notes how
4645             # many spaces it would have liked to use. We may be able
4646             # to recover that space here in the event that all of the
4647             # lines of a list are back together again.
4648             #----------------------------------------------------------
4649              
4650 443 50 33     665 return 0 if ( !@{$rlines} || !@{$rgroups} );
  443         1329  
  443         1283  
4651              
4652 443         1047 my $object = $rlines->[0]->{'indentation'};
4653 443 100       1238 return 0 if ( !ref($object) );
4654 59         114 my $extra_leading_spaces = 0;
4655 59         166 my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
4656 59 100       187 return ($extra_leading_spaces) if ( !$extra_indentation_spaces_wanted );
4657              
4658 13         25 my $min_spaces = $extra_indentation_spaces_wanted;
4659 13 50       34 if ( $min_spaces > 0 ) { $min_spaces = 0 }
  13         21  
4660              
4661             # loop over all groups
4662 13         23 my $ng = -1;
4663 13         16 my $ngroups = @{$rgroups};
  13         36  
4664 13         20 foreach my $item ( @{$rgroups} ) {
  13         24  
4665 32         54 $ng++;
4666 32         36 my ( $jbeg, $jend ) = @{$item};
  32         54  
4667 32         54 foreach my $j ( $jbeg .. $jend ) {
4668 44 100       85 next if ( $j == 0 );
4669              
4670             # all indentation objects must be the same
4671 31 100       92 if ( $object != $rlines->[$j]->{'indentation'} ) {
4672 1         3 return 0;
4673             }
4674             }
4675              
4676             # find the maximum space without exceeding the line length for this group
4677 31         88 my $avail = $rlines->[$jbeg]->get_available_space_on_right();
4678 31 100       63 my $spaces =
4679             ( $avail > $extra_indentation_spaces_wanted )
4680             ? $extra_indentation_spaces_wanted
4681             : $avail;
4682              
4683             #--------------------------------------------------------
4684             # Note: min spaces can be negative; for example with -gnu
4685             # f(
4686             # do { 1; !!(my $x = bless []); }
4687             # );
4688             #--------------------------------------------------------
4689             # The following rule is needed to match older formatting:
4690             # For multiple groups, we will keep spaces non-negative.
4691             # For a single group, we will allow a negative space.
4692 31 50 66     102 if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
  0         0  
4693              
4694             # update the minimum spacing
4695 31 100 66     138 if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
4696 13         27 $extra_leading_spaces = $spaces;
4697             }
4698             }
4699              
4700             # update the indentation object because with -icp the terminal
4701             # ');' will use the same adjustment.
4702 12         65 $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
4703 12         24 return $extra_leading_spaces;
4704             } ## end sub get_extra_leading_spaces
4705              
4706             sub forget_side_comment {
4707 124     124 0 275 my ($self) = @_;
4708 124         268 $self->[_last_side_comment_column_] = 0;
4709 124         216 return;
4710             }
4711              
4712             sub is_good_side_comment_column {
4713              
4714 222     222 0 547 my ( $self, $line, $line_number, $level, $num5 ) = @_;
4715              
4716             # Upon encountering the first side comment of a group, decide if
4717             # a previous side comment should be forgotten. This involves
4718             # checking several rules.
4719              
4720             # Given:
4721             # $line = ref to info hash for the line of interest
4722             # $line_number = number of this line in the output stream
4723             # $level = indentation level of this line
4724             # $num5 = ..see comments below
4725              
4726             # Return:
4727             # true to KEEP old comment location
4728             # false to FORGET old comment location
4729 222         325 my $KEEP = 1;
4730 222         366 my $FORGET = 0;
4731              
4732 222         397 my $rfields = $line->{'rfields'};
4733 222         461 my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4734              
4735             # RULE1: Never forget comment before a hanging side comment
4736 222 100       520 return $KEEP if ($is_hanging_side_comment);
4737              
4738             # RULE2: Forget a side comment after a short line difference,
4739             # where 'short line difference' is computed from a formula.
4740             # Using a smooth formula helps minimize sudden large changes.
4741 210         434 my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
4742 210         437 my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
4743              
4744             # '$num5' is the number of comments in the first 5 lines after the first
4745             # comment. It is needed to keep a compact group of side comments from
4746             # being influenced by a more distant side comment.
4747 210 50       772 $num5 = 1 if ( !$num5 );
4748              
4749             # Some values:
4750              
4751             # $adiff $num5 $short_diff
4752             # 0 * 12
4753             # 1 1 6
4754             # 1 2 4
4755             # 1 3 3
4756             # 1 4 2
4757             # 2 1 4
4758             # 2 2 2
4759             # 2 3 1
4760             # 3 1 3
4761             # 3 2 1
4762              
4763 210         514 my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
4764              
4765 210 100 100     1057 return $FORGET
4766             if ( $line_diff > $short_diff
4767             || !$rOpts_valign_side_comments );
4768              
4769             # RULE3: Forget a side comment if this line is at lower level and
4770             # ends a block
4771 128         205 my $last_sc_level = $self->[_last_side_comment_level_];
4772             return $FORGET
4773             if ( $level < $last_sc_level
4774 128 100 100     452 && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
4775              
4776             # RULE 4: Forget the last side comment if this comment might join a cached
4777             # line ...
4778 110 100       260 if ( my $cached_line_type = get_cached_line_type() ) {
4779              
4780             # ... otherwise side comment alignment will get messed up.
4781             # For example, in the following test script
4782             # with using 'perltidy -sct -act=2', the last comment would try to
4783             # align with the previous and then be in the wrong column when
4784             # the lines are combined:
4785              
4786             # foreach $line (
4787             # [0, 1, 2], [3, 4, 5], [6, 7, 8], # rows
4788             # [0, 3, 6], [1, 4, 7], [2, 5, 8], # columns
4789             # [0, 4, 8], [2, 4, 6]
4790             # ) # diagonals
4791 4 50 33     42 return $FORGET
4792             if ( $cached_line_type == 2 || $cached_line_type == 4 );
4793             }
4794              
4795             # Otherwise, keep it alive
4796 110         236 return $KEEP;
4797             } ## end sub is_good_side_comment_column
4798              
4799             sub align_side_comments {
4800              
4801 222     222 0 493 my ( $self, $rlines, $rgroups ) = @_;
4802              
4803             # Align any side comments in this batch of lines
4804              
4805             # Given:
4806             # $rlines - the lines
4807             # $rgroups - the partition of the lines into groups
4808             #
4809             # We will be working group-by-group because all side comments
4810             # (real or fake) in each group are already aligned. So we just have
4811             # to make alignments between groups wherever possible.
4812              
4813             # An unusual aspect is that within each group we have aligned both real
4814             # and fake side comments. This has the consequence that the lengths of
4815             # long lines without real side comments can cause 'push' all side comments
4816             # to the right. This seems unusual, but testing with and without this
4817             # feature shows that it is usually better this way. Otherwise, side
4818             # comments can be hidden between long lines without side comments and
4819             # thus be harder to read.
4820              
4821 222         459 my $group_level = $self->[_group_level_];
4822 222   100     742 my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
4823             && $group_level == $self->[_last_level_written_];
4824              
4825             # Find groups with side comments, and remember the first nonblank comment
4826 222         381 my $j_sc_beg;
4827             my @todo;
4828 222         378 my $ng = -1;
4829 222         319 foreach my $item ( @{$rgroups} ) {
  222         468  
4830 348         444 $ng++;
4831 348         465 my ( $jbeg, $jend ) = @{$item};
  348         600  
4832 348         656 foreach my $j ( $jbeg .. $jend ) {
4833 403         587 my $line = $rlines->[$j];
4834 403         585 my $jmax = $line->{'jmax'};
4835 403 100       930 if ( $line->{'rfield_lengths'}->[$jmax] ) {
4836              
4837             # this group has a line with a side comment
4838 251         428 push @todo, $ng;
4839 251 100       584 if ( !defined($j_sc_beg) ) {
4840 222         381 $j_sc_beg = $j;
4841             }
4842 251         494 last;
4843             }
4844             }
4845             }
4846              
4847             # done if no groups with side comments
4848 222 50       550 return unless (@todo);
4849              
4850             # Count $num5 = number of comments in the 5 lines after the first comment
4851             # This is an important factor in a decision formula
4852 222         383 my $num5 = 1;
4853 222         365 foreach my $jj ( $j_sc_beg + 1 .. @{$rlines} - 1 ) {
  222         525  
4854 210         307 my $ldiff = $jj - $j_sc_beg;
4855 210 100       390 last if ( $ldiff > 5 );
4856 204         292 my $line = $rlines->[$jj];
4857 204         285 my $jmax = $line->{'jmax'};
4858 204         282 my $sc_len = $line->{'rfield_lengths'}->[$jmax];
4859 204 100       379 next if ( !$sc_len );
4860 121         206 $num5++;
4861             }
4862              
4863             # Forget the old side comment location if necessary
4864 222         379 my $line_0 = $rlines->[$j_sc_beg];
4865 222         1074 my $lnum =
4866             $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
4867 222         772 my $keep_it =
4868             $self->is_good_side_comment_column( $line_0, $lnum, $group_level, $num5 );
4869 222 100       608 my $last_side_comment_column =
4870             $keep_it ? $self->[_last_side_comment_column_] : 0;
4871              
4872             # If there are multiple groups we will do two passes
4873             # so that we can find a common alignment for all groups.
4874 222 100       587 my $MAX_PASS = @todo > 1 ? 2 : 1;
4875              
4876             # Loop over passes
4877 222         324 my $max_comment_column = $last_side_comment_column;
4878 222         444 foreach my $PASS ( 1 .. $MAX_PASS ) {
4879              
4880             # If there are two passes, then on the last pass make the old column
4881             # equal to the largest of the group. This will result in the comments
4882             # being aligned if possible.
4883 246 100       543 if ( $PASS == $MAX_PASS ) {
4884 222         343 $last_side_comment_column = $max_comment_column;
4885             }
4886              
4887             # Loop over the groups with side comments
4888 246         373 my $column_limit;
4889 246         433 foreach my $ngr (@todo) {
4890 304         394 my ( $jbeg, $jend_uu ) = @{ $rgroups->[$ngr] };
  304         536  
4891              
4892             # Note that since all lines in a group have common alignments, we
4893             # just have to work on one of the lines (the first line).
4894 304         482 my $line = $rlines->[$jbeg];
4895 304         477 my $jmax = $line->{'jmax'};
4896 304         441 my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4897             last
4898 304 100 100     831 if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
4899              
4900             # the maximum space without exceeding the line length:
4901 300         917 my $avail = $line->get_available_space_on_right();
4902              
4903             # try to use the previous comment column
4904 300         653 my $side_comment_column = $line->get_column( $jmax - 1 );
4905 300         458 my $move = $last_side_comment_column - $side_comment_column;
4906              
4907             # Remember the maximum possible column of the first line with
4908             # side comment
4909 300 100       723 if ( !defined($column_limit) ) {
4910 246         4069 $column_limit = $side_comment_column + $avail;
4911             }
4912              
4913 300 50       595 next if ( $jmax <= 0 );
4914              
4915             # but if this doesn't work, give up and use the minimum space
4916 300         500 my $min_move = $rOpts_minimum_space_to_comment - 1;
4917 300 100       698 if ( $move > $avail ) {
4918 13         19 $move = $min_move;
4919             }
4920              
4921             # but we want some minimum space to the comment
4922 300 100 100     1019 if ( $move >= 0
      100        
4923             && $j_sc_beg == 0
4924             && $continuing_sc_flow )
4925             {
4926 5         9 $min_move = 0;
4927             }
4928              
4929             # remove constraints on hanging side comments
4930 300 100       596 if ($is_hanging_side_comment) { $min_move = 0 }
  16         25  
4931              
4932 300 100       582 if ( $move < $min_move ) {
4933 215         326 $move = $min_move;
4934             }
4935              
4936             # don't exceed the available space
4937 300 100       598 if ( $move > $avail ) { $move = $avail }
  11         16  
4938              
4939             # We can only increase space, never decrease.
4940 300 100       612 if ( $move < 0 ) { $move = 0 }
  8         10  
4941              
4942             # Discover the largest column on the preliminary pass
4943 300 100       566 if ( $PASS < $MAX_PASS ) {
4944 49         111 my $col = $line->get_column( $jmax - 1 ) + $move;
4945              
4946             # but ignore columns too large for the starting line
4947 49 100 66     212 if ( $col > $max_comment_column && $col < $column_limit ) {
4948 23         50 $max_comment_column = $col;
4949             }
4950             }
4951              
4952             # Make the changes on the final pass
4953             else {
4954 251         876 $line->increase_field_width( $jmax - 1, $move );
4955              
4956             # remember this column for the next group
4957 251         570 $last_side_comment_column = $line->get_column( $jmax - 1 );
4958             }
4959             } ## end loop over groups
4960             } ## end loop over passes
4961              
4962             # Find the last side comment
4963 222         331 my $j_sc_last;
4964 222         357 my $ng_last = $todo[-1];
4965 222         329 my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
  222         445  
4966 222         586 foreach my $jj ( reverse( $jbeg .. $jend ) ) {
4967 225         348 my $line = $rlines->[$jj];
4968 225         338 my $jmax = $line->{'jmax'};
4969 225 100       589 if ( $line->{'rfield_lengths'}->[$jmax] ) {
4970 222         317 $j_sc_last = $jj;
4971 222         414 last;
4972             }
4973             }
4974              
4975             # Save final side comment info for possible use by the next batch
4976 222 50       508 if ( defined($j_sc_last) ) {
4977 222         654 my $line_number =
4978             $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
4979 222         410 $self->[_last_side_comment_column_] = $last_side_comment_column;
4980 222         356 $self->[_last_side_comment_line_number_] = $line_number;
4981 222         388 $self->[_last_side_comment_level_] = $group_level;
4982             }
4983 222         489 return;
4984             } ## end sub align_side_comments
4985              
4986             ###########################################
4987             # CODE SECTION 6: Pad Signed Number Columns
4988             ###########################################
4989              
4990 44     44   336 use constant DEBUG_VSN => 0;
  44         82  
  44         3867  
4991              
4992             my %is_leading_sign_pattern;
4993              
4994             BEGIN {
4995              
4996             # PATTERNS: A pattern is basically the concatenation of all token types in
4997             # the field, with keywords converted to their actual text. The formatter
4998             # has changed things like 'print' to 'priNt' so that all 'n's are numbers.
4999             # The following patterns 'n' can match a signed number of interest.
5000             # Thus 'n'=a signed or unsigned number, 'b'=a space, '}'=one of ) ] }
5001             $is_leading_sign_pattern{$_} = 1
5002 44     44   130102 for ( 'n,', 'n,b', 'nb', 'nb}', 'nb},', 'n},', 'n};' );
5003             }
5004              
5005             sub min_max_median {
5006              
5007 42     42 0 56 my ($rvalues) = @_;
5008              
5009             # Given: $rvalues = ref to an array of numbers
5010             # Return: the min, max, and median
5011 42         53 my $num = @{$rvalues};
  42         51  
5012 42 50       74 return unless ($num);
5013              
5014 42         54 my @sorted = sort { $a <=> $b } @{$rvalues};
  100         191  
  42         105  
5015              
5016 42         47 my $min = $sorted[0];
5017 42         52 my $max = $sorted[-1];
5018 42         70 my $imid = int( $num / 2 );
5019 42 100       86 my $median =
5020             @sorted % 2
5021             ? $sorted[$imid]
5022             : ( $sorted[ $imid - 1 ] + $sorted[$imid] ) / 2;
5023              
5024 42         98 return ( $min, $max, $median );
5025             } ## end sub min_max_median
5026              
5027             sub end_signed_number_column {
5028              
5029 30     30 0 51 my ( $rgroup_lines, $rcol_hash, $ix_last ) = @_;
5030              
5031             # Finish formatting a column of unsigned numbers
5032             # Given:
5033             # $rgroup_lines - the current vertical alignment group of lines
5034             # $rcol_hash - a hash of information about this vertical column
5035             # $ix_last - index of the last line of this vertical column
5036             # Task:
5037             # If this is a mixture of signed and unsigned numbers, then add a
5038             # single space before the unsigned numbers to improve appearance.
5039 30 50       54 return unless ($rcol_hash);
5040 30         44 my $jcol = $rcol_hash->{jcol};
5041 30         40 my $unsigned = $rcol_hash->{unsigned_count};
5042 30         38 my $signed = $rcol_hash->{signed_count};
5043 30         45 my $rsigned_lines = $rcol_hash->{rsigned_lines};
5044              
5045 30 50 33     63 if ( !$signed && $unsigned ) {
5046 0         0 DEVEL_MODE
5047             && Fault("avoid calling without mixed signed and unsigned\n");
5048 0         0 return;
5049             }
5050              
5051 30         40 my $pos_start_number = $rcol_hash->{pos_start_number};
5052 30         54 my $char_end_part1 = $rcol_hash->{char_end_part1};
5053 30         46 my $ix_first = $rcol_hash->{ix_first};
5054 30         74 my $nlines = $ix_last - $ix_first + 1;
5055              
5056             # check for skipped lines, shouldn't happen
5057 30 50       76 if ( $signed + $unsigned != $nlines ) {
5058 0         0 my $line = $rgroup_lines->[$ix_last];
5059 0         0 my $rfields = $line->{'rfields'};
5060 0         0 my $text = join EMPTY_STRING, @{$rfields};
  0         0  
5061 0         0 DEVEL_MODE && Fault(<<EOM);
5062             We seem to have miscounted lines, please check:
5063             signed=$signed
5064             j=$jcol
5065             unsigned=$unsigned
5066             ix_first=$ix_first
5067             ix_last=$ix_last
5068             nlines=$nlines
5069             text=$text
5070             EOM
5071 0         0 return;
5072             }
5073              
5074             #-----------------------------------------------------------------
5075             # Form groups of unsigned numbers from the list of signed numbers.
5076             #-----------------------------------------------------------------
5077 30         40 my @unsigned_subgroups;
5078 30         40 my $ix_last_negative = $ix_first - 1;
5079 30         54 my %is_signed;
5080 30         34 foreach my $ix ( @{$rsigned_lines} ) {
  30         56  
5081 45         72 $is_signed{$ix} = 1;
5082 45         63 my $Nu = $ix - $ix_last_negative - 1;
5083 45 100 100     113 if ( $Nu > 0 && $Nu <= $rOpts_valign_signed_numbers_limit ) {
5084 18         42 push @unsigned_subgroups, [ $ix_last_negative + 1, $ix - 1 ];
5085             }
5086 45         64 $ix_last_negative = $ix;
5087             }
5088              
5089             # Exclude groups with more than about 20 consecutive numbers. Little
5090             # visual improvement is gained by padding more than this, and this avoids
5091             # large numbers of differences in a file when a single line is changed.
5092 30         41 my $Nu = $ix_last - $ix_last_negative;
5093 30 100 100     78 if ( $Nu > 0 && $Nu <= $rOpts_valign_signed_numbers_limit ) {
5094 18         38 push @unsigned_subgroups, [ $ix_last_negative + 1, $ix_last ];
5095             }
5096              
5097 30 100       55 if ( !@unsigned_subgroups ) { return } # shouldn't happen
  9         16  
5098              
5099             #--------------------------------------------
5100             # Find number lengths for irregularity checks
5101             #--------------------------------------------
5102             # Padding signed numbers looks best when the numbers, excluding signs,
5103             # all have about the same length. When the lengths are irregular, with
5104             # mostly longer unsigned numbers, it doesn't look good to do this. So
5105             # we need to filter out these bad-looking cases.
5106              
5107             # The 'field_lengths' are unreliable because they may include some
5108             # arbitrary trailing text; see 'substr.t' So we must look for the end of
5109             # the number at a space, comma, or closing container token. Note that these
5110             # lengths include the length of any signs.
5111 21         39 my @len_unsigned;
5112             my @len_signed;
5113 21         0 my @lengths;
5114 21         38 foreach my $ix ( $ix_first .. $ix_last ) {
5115 102         126 my $line = $rgroup_lines->[$ix];
5116 102         116 my $rfield = $line->{'rfields'};
5117 102         160 my $str = substr( $rfield->[$jcol], $pos_start_number );
5118 102 50       252 if ( $str =~ /^([^\s\,\)\]\}]*)/ ) { $str = $1 }
  102         154  
5119 102         113 my $len = length($str);
5120 102 100       146 if ( $is_signed{$ix} ) { push @len_signed, $len }
  33         44  
5121 69         85 else { push @len_unsigned, $len }
5122 102         260 push @lengths, [ $len, $ix ];
5123             }
5124              
5125 21         51 my ( $min_unsigned_length, $max_unsigned_length, $median_unsigned_length )
5126             = min_max_median( \@len_unsigned );
5127 21         40 my ( $min_signed_length_uu, $max_signed_length, $median_signed_length ) =
5128             min_max_median( \@len_signed );
5129              
5130             # Skip padding if no signed numbers exceed unsigned numbers in length
5131 21 50       63 if ( $max_signed_length <= $min_unsigned_length ) {
    100          
5132 0         0 return;
5133             }
5134              
5135             # If max signed length is greatest - all unsigned values can be padded
5136             elsif ( $max_signed_length > $max_unsigned_length ) {
5137              
5138             # Example:
5139             # %wind_dir = (
5140             # 'n' => [ 1, 0 ],
5141             # 'ne' => [ 1, 1 ],
5142             # 'e' => [ 0, 1 ],
5143             # 'se' => [ -1, 1 ],
5144             # 's' => [ -1, 0 ],
5145             # 'sw' => [ -1, -1 ],
5146             # 'w' => [ 0, -1 ],
5147             # 'nw' => [ 1, -1 ],
5148             # '' => [ 0, 0 ],
5149             # );
5150              
5151             # This is the ideal case - ok to continue and pad
5152             }
5153              
5154             # intermediate case: some signed numbers cannot be padded ...
5155             else {
5156              
5157             # We have to take a closer look.
5158             # Here is an example which looks bad if we do padding like this:
5159             # my %hash = (
5160             # X0 => -12867.098241163,
5161             # X1 => 2.31694338671684, # unsigned w/ excess>0
5162             # X2 => 0.0597726714860419, # max length => excess=0
5163             # Y0 => 30043.1335503155, # unsigned w/ excess>0
5164             # Y1 => 0.0525784981597044, # max length => excess=0
5165             # Y2 => -2.32447131600783,
5166             # );
5167              
5168             # To decide what looks okay, we count 'good' and 'bad' line interfaces:
5169             # X0 - X1 = good (X0 is signed and X1 can move)
5170             # X1 - X2 = bad (x1 can move but x2 cannot)
5171             # X2 - Y0 = bad (x2 cannot move but Y0 can move)
5172             # Y0 - Y1 = bad (Y0 can move but Y1 cannot move)
5173             # Y1 - Y2 = bad (Y1 cannot move and Y2 is signed)
5174             # Result: 4 bad interfaces and 1 good => so we will skip this
5175 11         17 my $good_count = 0;
5176 11         14 my $bad_count = 0;
5177 11         17 foreach my $item (@lengths) {
5178 54         108 $item->[0] = $max_unsigned_length - $item->[0];
5179             }
5180 11         20 my $item0 = shift @lengths;
5181 11         16 my ( $excess, $ix ) = @{$item0};
  11         19  
5182 11 50       22 my $immobile_count = $excess ? 0 : 1;
5183 11         19 foreach my $item (@lengths) {
5184 43         53 my $excess_m = $excess;
5185 43         48 my $ix_m = $ix;
5186 43         42 ( $excess, $ix ) = @{$item};
  43         61  
5187 43 100       73 if ( !$excess ) { $immobile_count++ }
  13         18  
5188              
5189 43 100       64 if ( $is_signed{$ix_m} ) {
5190              
5191             # signed-unsigned interface
5192 15 100       30 if ( !$is_signed{$ix} ) {
5193 13 100       18 if ($excess) { $good_count++ }
  8         12  
5194 5         8 else { $bad_count++ }
5195             }
5196              
5197             # signed-signed: ok, not good or bad
5198             }
5199             else {
5200              
5201             # unsigned-signed interface
5202 28 100       45 if ( $is_signed{$ix} ) {
5203 9 50       13 if ($excess_m) { $good_count++ }
  9         17  
5204 0         0 else { $bad_count++ }
5205             }
5206              
5207             # unsigned-unsigned: bad if different
5208             else {
5209 19 100 100     70 if ( $excess_m xor $excess ) {
5210 7         11 $bad_count++;
5211             }
5212             }
5213             }
5214             }
5215              
5216             # Filter 1: skip if more interfaces are 'bad' than 'good'
5217 11 100       25 if ( $bad_count > $good_count ) {
5218 1         8 return;
5219             }
5220              
5221             # Filter 2: skip in a table with multiple 'bad' interfaces and where
5222             # 'most' of the unsigned lengths are shorter than the signed lengths.
5223             # Using the median value makes this insensitive to small changes.
5224 10 50 66     39 if ( $median_unsigned_length >= $median_signed_length
      33        
5225             && $bad_count > 1
5226             && $immobile_count > 1 )
5227             {
5228 0         0 return;
5229             }
5230              
5231             # Anything that gets past these filters should look ok if padded
5232             }
5233              
5234             #---------------------------------------------
5235             # Compute actual available space for each line
5236             #---------------------------------------------
5237 20         36 my %excess_space;
5238 20         31 my $movable_count = 0;
5239 20         31 foreach my $item (@unsigned_subgroups) {
5240 34         41 my ( $ix_min, $ix_max ) = @{$item};
  34         63  
5241 34         57 foreach my $ix ( $ix_min .. $ix_max ) {
5242 56         85 my $line = $rgroup_lines->[$ix];
5243 56         71 my $leading_space_count = $line->{'leading_space_count'};
5244 56         107 my $jmax = $line->{'jmax'};
5245 56         89 my $rfield_lengths = $line->{'rfield_lengths'};
5246 56 50       90 if ( $jcol >= $jmax ) {
5247              
5248             # shouldn't happen
5249 0         0 DEVEL_MODE && Fault("jcol=$jcol >= jmax=$jmax\n");
5250 0         0 return;
5251             }
5252 56         65 my @alignments = @{ $line->{'ralignments'} };
  56         107  
5253 56         77 my $col = $alignments[$jcol]->{'column'};
5254             my $col_start =
5255             $jcol == 0
5256             ? $leading_space_count
5257 56 100       99 : $alignments[ $jcol - 1 ]->{'column'};
5258 56         62 my $avail = $col - $col_start;
5259 56         70 my $field_length = $rfield_lengths->[$jcol];
5260 56         65 my $excess = $avail - $field_length;
5261 56         127 $excess_space{$ix} = $excess;
5262 56 50       91 if ( $excess > 0 ) { $movable_count++ }
  56         106  
5263             }
5264             }
5265              
5266 20 50       31 return unless ($movable_count);
5267              
5268             # Count the number of signed-unsigned interfaces that would change
5269             # if we do the padding
5270 20         26 my $Nc = 0;
5271 20         73 foreach my $item (@unsigned_subgroups) {
5272 34         42 my ( $ix_min, $ix_max ) = @{$item};
  34         59  
5273 34 100 66     109 $Nc++ if ( $excess_space{$ix_min} > 0 && $ix_min != $ix_first );
5274 34 100 66     105 $Nc++ if ( $excess_space{$ix_max} > 0 && $ix_max != $ix_last );
5275             }
5276              
5277             #--------------------------------------------------------------------
5278             # Sparsity check:
5279             # Give up if the number of interface changes will be below the cutoff
5280             #--------------------------------------------------------------------
5281 20 50       39 if ( $unsigned > $Nc * $rOpts_valign_signed_numbers_limit ) {
5282 0         0 return;
5283             }
5284              
5285             #------------------------------------------------------------------------
5286             # Insert an extra space before the unsigned numbers if space is available
5287             #------------------------------------------------------------------------
5288 20         30 foreach my $item (@unsigned_subgroups) {
5289 34         46 my ( $ix_min, $ix_max ) = @{$item};
  34         48  
5290              
5291 34         61 foreach my $ix ( $ix_min .. $ix_max ) {
5292 56 50       107 next if ( $excess_space{$ix} <= 0 );
5293 56         70 my $line = $rgroup_lines->[$ix];
5294 56         65 my $rfields = $line->{'rfields'};
5295 56         71 my $rfield_lengths = $line->{'rfield_lengths'};
5296 56         99 pad_signed_field(
5297             \$rfields->[$jcol], \$rfield_lengths->[$jcol],
5298             $pos_start_number, $char_end_part1
5299             );
5300             }
5301             }
5302 20         92 return;
5303             } ## end sub end_signed_number_column
5304              
5305             sub pad_signed_field {
5306              
5307 56     56 0 91 my ( $rstr, $rstr_len, $pos_start_number, $char_end_part1 ) = @_;
5308              
5309             # Insert an extra space before a number to highlight algebraic signs
5310             # in a column of numbers.
5311             # Given:
5312             # $rstr = ref to string
5313             # $rstr_len = ref to display width of string (could include wide chars)
5314             # $pos_start_number = string position of the leading digit
5315             # $char_end_part1 = character at $pos_start_number - 1
5316             # Task: update $rstr and $rstr_len with a single space
5317              
5318             # First partition the string into $part1 and $part2, so that the
5319             # number starts at the beginning of part2.
5320 56         77 my $part1 = EMPTY_STRING;
5321 56         66 my $part2 = ${$rstr};
  56         76  
5322 56         70 my $str = ${$rstr};
  56         65  
5323 56 100       95 if ( $pos_start_number > 0 ) {
5324 16         19 my $len = length($str);
5325 16 50       32 if ( $pos_start_number >= $len ) {
5326 0         0 DEVEL_MODE && Fault(<<EOM);
5327             Expection position '$pos_start_number' < length $len of string '$str'
5328             EOM
5329 0         0 return;
5330             }
5331 16         23 $part1 = substr( $str, 0, $pos_start_number );
5332 16         26 $part2 = substr( $str, $pos_start_number );
5333              
5334             # VERIFY that we are inserting a new space after either
5335             # (1) an existing space or
5336             # (2) an opening token.
5337             # Otherwise disaster can occur. An error here implies a programming
5338             # error in defining '$pos_start_number'.
5339              
5340 16         25 my $test_char1 = substr( $part1, -1, 1 );
5341 16 50       37 if ( $test_char1 ne $char_end_part1 ) {
5342 0         0 DEVEL_MODE && Fault(<<EOM);
5343             Expecting '$char_end_part1' but saw '$test_char1' in string '$str'
5344             Probably bad position '$pos_start_number'
5345             EOM
5346 0         0 return;
5347             }
5348             }
5349              
5350             # VERIFY we are inserting a space before a digit character
5351 56         89 my $test_char2 = substr( $part2, 0, 1 );
5352 56 50       96 if ( $is_digit_char{$test_char2} ) {
5353 56         76 ${$rstr} = $part1 . SPACE . $part2;
  56         78  
5354 56         62 ${$rstr_len} += 1;
  56         69  
5355             }
5356             else {
5357 0         0 DEVEL_MODE && Fault(<<EOM);
5358             Expecting test char2 as leading digit but saw '$test_char2' in string '$str'
5359             May be bad position '$pos_start_number'
5360             EOM
5361             }
5362 56         112 return;
5363             } ## end sub pad_signed_field
5364              
5365             sub split_field {
5366              
5367 124     124 0 275 my ( $pat1, $field, $pattern ) = @_;
5368              
5369             # Given;
5370             # $pat1 = first part of a pattern before a numeric type 'n'
5371             # $field = corresponding text field
5372             # $pattern = full pattern
5373             # Return:
5374             # $pos_start_number = position in $field where the number should start
5375             # = 0 if cannot find
5376             # $char_end_part1 = the character preceding $pos_start_number
5377             # $ch_opening = the preceding opening container character, if any
5378              
5379             # We have to find where the possible number starts in this field.
5380             # The safe thing to do is return @fail if anything does not look right.
5381              
5382 124         157 my $pos_start_number = 0;
5383 124         143 my $char_end_part1 = EMPTY_STRING;
5384 124         159 my $ch_opening = EMPTY_STRING;
5385 124         238 my @fail = ( $pos_start_number, $char_end_part1, $ch_opening );
5386              
5387             # Be sure there is just 'n' in the pattern. Multiple terms can occur
5388             # when fields are joined, but since we are jumping into the middle
5389             # of a field it is safest not to try to handle them.
5390 124         211 my $n_count = ( $pattern =~ tr/n/n/ );
5391 124 100 66     377 if ( $n_count && $n_count > 1 ) {
5392 20         54 return @fail;
5393             }
5394              
5395             # Same thing for commas
5396 104         149 my $comma_count = ( $pattern =~ tr/,/,/ );
5397 104 50 66     279 if ( $comma_count && $comma_count > 1 ) {
5398 0         0 return @fail;
5399             }
5400              
5401             # Require 0 or 1 braces
5402 104         158 my $len_field = length($field);
5403 104         134 my $len_pat1 = length($pat1);
5404 104 50 33     297 return @fail unless ( $len_pat1 && $len_field );
5405              
5406             # Look at the pattern ending
5407 104         128 my $ending_b = 0;
5408 104         180 my $ch = substr( $pat1, -1, 1 );
5409 104 100       201 if ( $ch eq 'b' ) {
5410 79         105 $ending_b = 1;
5411 79         121 $ch = substr( $pat1, -2, 1 );
5412 79         156 $char_end_part1 = SPACE;
5413             }
5414              
5415             # handle either '{b' or '{'
5416 104 100       196 if ( $ch eq '{' ) {
5417              
5418             # Only one brace
5419 56         92 my $brace_count = ( $pat1 =~ tr/\{/\{/ );
5420 56 50       154 return @fail if ( $brace_count != 1 );
5421              
5422 56         78 my $i_paren = index( $field, '(' );
5423 56         87 my $i_bracket = index( $field, '[' );
5424 56         71 my $i_brace = index( $field, '{' );
5425 56         65 my $i_opening = length($field);
5426 56 100       124 if ( $i_paren >= 0 ) {
5427 18         24 $i_opening = $i_paren;
5428 18         28 $ch_opening = '(';
5429             }
5430 56 100 66     143 if ( $i_bracket >= 0
5431             && $i_bracket < $i_opening )
5432             {
5433 38         45 $i_opening = $i_bracket;
5434 38         53 $ch_opening = '[';
5435             }
5436 56 50 33     109 if ( $i_brace >= 0 && $i_brace < $i_opening ) {
5437 0         0 $i_opening = $i_brace;
5438 0         0 $ch_opening = '{';
5439             }
5440 56 50 33     155 if ( $i_opening >= 0
5441             && $i_opening < length($field) - 1 )
5442             {
5443 56         73 $pos_start_number = $i_opening + 1 + $ending_b;
5444 56 100       101 $char_end_part1 = $ch_opening
5445             if ( !$ending_b );
5446             }
5447             else {
5448             # strange - could not find the opening token
5449             }
5450             }
5451              
5452             # no braces: maybe '=>b'
5453             else {
5454              
5455             # looking for patterns ending in '=b' or '=>b'
5456 48 50       95 if ( !$ending_b ) { return @fail }
  0         0  
5457              
5458             # find the = in the text
5459 48         74 my $pos_equals = index( $field, '=' );
5460 48 100       129 return @fail if ( $pos_equals < 0 );
5461              
5462             # be sure there are no other '=' in the pattern
5463 37         50 my $equals_count = ( $pat1 =~ tr/=/=/ );
5464 37 100       80 return @fail if ( $equals_count != 1 );
5465              
5466 34 100 66     174 if ( $len_pat1 >= 2 && substr( $pat1, -2, 2 ) eq '=b' ) {
    100 66        
5467 8         12 $pos_start_number = $pos_equals + 2;
5468             }
5469             elsif ( $len_pat1 >= 3 && substr( $pat1, -3, 3 ) eq '=>b' ) {
5470 16         31 $pos_start_number = $pos_equals + 3;
5471             }
5472             else {
5473              
5474             # cannot handle this pattern
5475 10         36 return @fail;
5476             }
5477             }
5478              
5479 80 50 33     246 if ( $pos_start_number <= 0 || $pos_start_number >= $len_field ) {
5480 0         0 return @fail;
5481             }
5482              
5483 80         225 return ( $pos_start_number, $char_end_part1, $ch_opening );
5484             } ## end sub split_field
5485              
5486             sub field_matches_end_pattern {
5487              
5488 264     264 0 353 my ( $field2, $pat2 ) = @_;
5489              
5490             # Check that a possible numeric field matches the ending pattern
5491              
5492             # Given:
5493             # $field2 = the rest of the field after removing any sign
5494             # $pat2 = the end pattern of this field
5495             # Return:
5496             # false if field is definitely non-numeric
5497             # true otherwise
5498              
5499 264         318 my $next_char = substr( $pat2, 1, 1 );
5500 264         271 my $field2_trim = EMPTY_STRING;
5501              
5502             # if pattern is one of: 'n,', 'n,b'
5503 264 100       415 if ( $next_char eq COMMA ) {
    100          
    50          
5504 192         236 my $icomma = index( $field2, COMMA );
5505 192 50       278 if ( $icomma >= 0 ) {
5506 192         268 $field2_trim = substr( $field2, 0, $icomma );
5507             }
5508             }
5509              
5510             # if pattern is one of: 'nb', 'nb}', 'nb},'
5511             elsif ( $next_char eq 'b' ) {
5512 47         57 my $ispace = index( $field2, SPACE );
5513 47 50       79 if ( $ispace >= 0 ) {
5514 47         63 $field2_trim = substr( $field2, 0, $ispace );
5515             }
5516             }
5517              
5518             # if pattern is one of 'n},', 'n};'
5519             elsif ( $next_char eq '}' ) {
5520 25 50       87 if ( $field2 =~ /^([^\)\}\]]+)/ ) {
5521 25         47 $field2_trim = $1;
5522             }
5523             }
5524              
5525             # unrecognized pattern
5526             else {
5527 0         0 DEVEL_MODE && Fault(<<EOM);
5528             Unexpected ending pattern '$pat2' next='$next_char' field2='$field2'
5529             The hash 'is_leading_sign_pattern' seems to have changed but the code
5530             has not been updated to handle it. Please fix.
5531             EOM
5532 0         0 return;
5533             }
5534              
5535 264 50       409 if ( !length($field2_trim) ) {
5536 0         0 DEVEL_MODE
5537             && Fault(
5538             "STRANGE: cannot find end of field=$field2 for pat=$pat2 \n");
5539 0         0 return;
5540             }
5541              
5542             # Reject obviously non-numeric fields just to be sure we did not
5543             # jump into a quote of some kind
5544 264 100       777 if ( $field2_trim !~ /^[\d\.\+\-abcdefpx_]+$/i ) {
5545             DEBUG_VSN
5546 2         3 && print {*STDERR}
5547             "Rejecting match to pat2='$pat2' with next=$next_char field2=$field2 trimmed='$field2_trim'\n";
5548 2         4 return;
5549             }
5550 262         1903 return 1;
5551             } ## end sub field_matches_end_pattern
5552              
5553             sub pad_signed_number_columns {
5554              
5555 70     70 0 199 my ($rgroup_lines) = @_;
5556              
5557             # Given:
5558             # $rgroup_lines = the current vertical alignment group of lines
5559             # Task:
5560             # Look for columns of aligned numeric values, some of whose numbers
5561             # have algebraic signs. Add a leading space to the unsigned
5562             # numbers, if possible, so that the just the signs appear as the first
5563             # character. Example of what we want to do:
5564              
5565             # my @correct = (
5566             # [ 123456.79, 86753090000.868, 11 ],
5567             # [ -123456.79, -86753090000.868, -11 ],
5568             # [ 123456.001, 80.080, 10 ],
5569             # [ -123456.001, -80.080, 0 ],
5570             # [ 10.9, 10.9, 11 ],
5571             # );
5572              
5573             # The logic here is complex because we are working with bits of text
5574             # which have been broken into patterns which are convenient for the
5575             # vertical aligner, but we no longer have the original tokenization
5576             # which would have indicated the precise bounds of numbers. So we
5577             # have to proceed very carefully with lots of checks. There are
5578             # more checks than really necessary now because originally numbers
5579             # and quotes were both indicated with pattern 'Q'. But now numbers are
5580             # uniquely marked as pattern 'n', so there is less risk of an error.
5581             # The extra checks take very little time so they are retained.
5582              
5583 70 50       177 return unless ($rOpts_valign_signed_numbers);
5584              
5585 70         190 my %column_info;
5586             my @columns;
5587              
5588             #----------------
5589             # loop over lines
5590             #----------------
5591 70         117 my $ix_line = -1;
5592 70         119 my $jmax = -1;
5593 70         119 foreach my $line ( @{$rgroup_lines} ) {
  70         143  
5594 349         424 $ix_line++;
5595 349         450 my $jmax_last = $jmax;
5596 349         535 $jmax = $line->{'jmax'};
5597 349         525 my $jmax_change = $jmax ne $jmax_last;
5598              
5599 349         402 my @alignments = @{ $line->{'ralignments'} };
  349         698  
5600 349         512 my $rfields = $line->{'rfields'};
5601 349         463 my $rpatterns = $line->{'rpatterns'};
5602 349         463 my $rtokens = $line->{'rtokens'};
5603              
5604             #-----------------------------------------------
5605             # Check for a reduction in the number of columns
5606             #-----------------------------------------------
5607 349 100       629 if ( $jmax < $jmax_last ) {
5608              
5609 11         23 foreach my $jcol ( keys %column_info ) {
5610              
5611             # end any stranded columns on the right
5612 6 100       16 next if ( $jcol < $jmax );
5613 2         3 my $rcol_hash = $column_info{$jcol};
5614 2 50       6 next unless ($rcol_hash);
5615 2 0 33     4 if ( $rcol_hash->{signed_count}
5616             && $rcol_hash->{unsigned_count} )
5617             {
5618 0         0 end_signed_number_column( $rgroup_lines, $rcol_hash,
5619             $ix_line - 1 );
5620             }
5621 2         6 delete $column_info{$jcol};
5622             }
5623              
5624             # Try to keep the end data column running; test case 'rfc.in'
5625             # The last item in a list will still need a trailing comma.
5626 11         20 my $jcol = $jmax - 1;
5627 11 100 66     55 if ( $jcol >= 0 && $column_info{$jcol} ) {
5628 2         4 my $alignment = $alignments[$jcol];
5629 2         4 my $old_col = $columns[$jcol];
5630 2         4 my $col = $alignment->{column};
5631              
5632 2 0 33     8 if (
      0        
      33        
5633             $col < $old_col
5634              
5635             # only do this if the text has a leading digit
5636             && $rfields->[$jcol] =~ /^([+-]?)\d/
5637              
5638             # and a signed number has been seen - issue c375
5639             && ( $1 || $column_info{$jcol}->{signed_count} )
5640             )
5641             {
5642 0         0 my $spaces_needed = $old_col - $col;
5643 0         0 my $spaces_available =
5644             $line->get_available_space_on_right();
5645 0 0       0 if ( $spaces_available >= $spaces_needed ) {
5646 0         0 $line->increase_field_width( $jcol, $spaces_needed );
5647             }
5648             }
5649             }
5650             }
5651              
5652             #--------------------------------------------
5653             # Loop over fields except last (side comment)
5654             #--------------------------------------------
5655 349         588 for my $jcol ( 0 .. $jmax - 1 ) {
5656              
5657             #-----------------------------------------
5658             # Decide if this is a new alignment column
5659             #-----------------------------------------
5660 785         917 my $alignment = $alignments[$jcol];
5661 785         856 my $old_col = $columns[$jcol];
5662 785         957 my $col = $alignment->{column};
5663 785         919 $columns[$jcol] = $col;
5664 785 100 100     1878 if ( defined($old_col) && $old_col != $col ) {
5665 55         133 foreach my $jcol_old ( keys %column_info ) {
5666 17 100       47 next if ( $jcol_old < $jcol );
5667 11         39 my $rcol_hash = $column_info{$jcol_old};
5668 11 50 66     40 if ( $rcol_hash->{signed_count}
5669             && $rcol_hash->{unsigned_count} )
5670             {
5671 1         5 end_signed_number_column( $rgroup_lines, $rcol_hash,
5672             $ix_line - 1 );
5673             }
5674 11         46 delete $column_info{$jcol_old};
5675             }
5676             }
5677              
5678             # A new padded sign column can only start at an alignment change
5679 785         1022 my $rcol_hash = $column_info{$jcol};
5680              
5681             #------------------------------------------------------------
5682             # Examine this field, looking for signed and unsigned numbers
5683             #------------------------------------------------------------
5684 785         1102 my $field = $rfields->[$jcol];
5685 785         1057 my $pattern = $rpatterns->[$jcol];
5686              
5687 785         793 my $is_signed_number = 0;
5688 785         821 my $is_unsigned_number = 0;
5689              
5690             #--------------------------------------------------------
5691             # set $pos_start_number = index in field of digit or sign
5692             #--------------------------------------------------------
5693 785         769 my $pos_start_number = 0;
5694 785         825 my $char_end_part1 = EMPTY_STRING;
5695 785         785 my $ch_opening = EMPTY_STRING;
5696              
5697             # Set $field_ok to false on encountering any problem
5698             # Do not pad signed and unsigned hash keys
5699 785   66     1911 my $field_ok = length($field) > 0
5700             && substr( $rtokens->[$jcol], 0, 2 ) ne '=>';
5701              
5702 785 100 66     1766 if ( $field_ok && $pattern ) {
5703              
5704             # Split the pattern at the first 'n'
5705             # $pat1 = pattern before the 'n' (if any)
5706             # $pat2 = pattern starting at the 'n'
5707 755         839 my ( $pat1, $pat2 );
5708 755         917 my $posq = index( $pattern, 'n' );
5709 755 100       985 if ( $posq < 0 ) {
5710 333         390 $field_ok = 0;
5711             }
5712             else {
5713             # Just look at up to 3 of the pattern characters
5714             # We require $pat2 to have one of the known patterns
5715 422         498 $pat1 = substr( $pattern, 0, $posq );
5716 422         555 $pat2 = substr( $pattern, $posq, 3 );
5717 422         636 $field_ok = $is_leading_sign_pattern{$pat2};
5718             }
5719              
5720 755 100       1091 if ($field_ok) {
5721              
5722             # If the number starts within the field then we must
5723             # find its offset position.
5724 310 100       511 if ($pat1) {
5725              
5726             # Note: an optimization would be to remember previous
5727             # calls for each column and use them if possible, but
5728             # benchmarking shows that this is not necessary.
5729             # See .ba54 for example coding.
5730 124         274 ( $pos_start_number, $char_end_part1, $ch_opening ) =
5731             split_field( $pat1, $field, $pattern );
5732              
5733 124   33     259 $field_ok ||= $pos_start_number;
5734             }
5735              
5736 310 50       440 if ($field_ok) {
5737              
5738             # look for an optional + or - sign
5739 310         445 my $test_char = substr( $field, $pos_start_number, 1 );
5740 310         329 my $sign;
5741 310 100       516 if ( $is_plus_or_minus{$test_char} ) {
5742 49         64 $sign = $test_char;
5743 49         69 $test_char =
5744             substr( $field, $pos_start_number + 1, 1 );
5745             }
5746              
5747             # and a digit
5748 310 100       535 if ( $is_digit_char{$test_char} ) {
5749 264         274 my $field2;
5750 264 100       342 if ($sign) {
5751 47         94 $is_signed_number = 1;
5752 47         65 $field2 =
5753             substr( $field, $pos_start_number + 1 );
5754             }
5755             else {
5756 217         245 $is_unsigned_number = 1;
5757 217 100       337 $field2 =
5758             $pos_start_number
5759             ? substr( $field, $pos_start_number )
5760             : $field;
5761             }
5762              
5763             # Check for match to ending pattern
5764 264         1516 $field_ok =
5765             field_matches_end_pattern( $field2, $pat2 );
5766             }
5767             else {
5768 46         81 $field_ok = 0;
5769             }
5770             }
5771             }
5772             }
5773              
5774             #----------------------
5775             # Figure out what to do
5776             #----------------------
5777              
5778             # we require a signed or unsigned number field
5779             # which is not a hash key
5780 785   66     1720 $field_ok &&= ( $is_signed_number || $is_unsigned_number );
      66        
5781              
5782             # if a column has not started..
5783 785 100       1110 if ( !$rcol_hash ) {
5784              
5785             # give up if this is cannot start a new column
5786 584 100       1356 next if ( !$field_ok );
5787              
5788             # otherwise continue on to start a new column
5789              
5790             }
5791              
5792             # if a column has been started...
5793             else {
5794              
5795             # and this cannot be added to it
5796 201 50 100     808 if ( !$field_ok
      66        
      66        
5797             || $rcol_hash->{pos_start_number} ne $pos_start_number
5798             || $rcol_hash->{char_end_part1} ne $char_end_part1
5799             || $rcol_hash->{col} ne $col )
5800             {
5801              
5802             # then end the current column and start over
5803 34 0 33     64 if ( $rcol_hash->{signed_count}
5804             && $rcol_hash->{unsigned_count} )
5805             {
5806 0         0 end_signed_number_column( $rgroup_lines, $rcol_hash,
5807             $ix_line - 1 );
5808             }
5809 34         45 delete $column_info{$jcol};
5810 34         75 $rcol_hash = undef;
5811             }
5812             }
5813              
5814 285         295 if (DEBUG_VSN) {
5815             my $exists = defined($rcol_hash);
5816             print
5817             "VSN: line=$ix_line change=$jmax_change jcol=$jcol field=$field exists?=$exists unsigned?=$is_unsigned_number signed?=$is_signed_number\n";
5818             }
5819              
5820             #---------------------------------------
5821             # Either start a new column, if possible
5822             #---------------------------------------
5823 285 100       407 if ( !defined($rcol_hash) ) {
5824              
5825 118 100       208 next if ( !$field_ok );
5826              
5827 95 100       188 my $rsigned_lines = $is_signed_number ? [$ix_line] : [];
5828 95         770 $column_info{$jcol} = {
5829             unsigned_count => $is_unsigned_number,
5830             signed_count => $is_signed_number,
5831             pos_start_number => $pos_start_number,
5832             char_end_part1 => $char_end_part1,
5833             ix_first => $ix_line,
5834             col => $col,
5835             jcol => $jcol,
5836             rsigned_lines => $rsigned_lines,
5837             };
5838             }
5839              
5840             #------------------------------
5841             # or extend the existing column
5842             #------------------------------
5843             else {
5844 167         196 $rcol_hash->{unsigned_count} += $is_unsigned_number;
5845 167         172 $rcol_hash->{signed_count} += $is_signed_number;
5846 167 100       342 if ($is_signed_number) {
5847 35         42 push @{ $rcol_hash->{rsigned_lines} }, $ix_line;
  35         88  
5848             }
5849             }
5850             }
5851             }
5852              
5853             #-------------------------------------
5854             # Loop to finish any remaining columns
5855             #-------------------------------------
5856 70         204 foreach my $jcol ( keys %column_info ) {
5857 48         77 my $rcol_hash = $column_info{$jcol};
5858 48 100 100     135 if ( $rcol_hash->{signed_count} && $rcol_hash->{unsigned_count} ) {
5859 29         56 end_signed_number_column( $rgroup_lines, $rcol_hash, $ix_line );
5860             }
5861             }
5862 70         267 return;
5863             } ## end sub pad_signed_number_columns
5864              
5865             #########################################
5866             # CODE SECTION 7: Pad Wide Equals Columns
5867             #########################################
5868              
5869 44     44   353 use constant DEBUG_WEC => 0;
  44         93  
  44         189259  
5870              
5871             sub end_wide_equals_column {
5872              
5873 12     12 0 25 my ( $rgroup_lines, $rcol_hash, $ix_last ) = @_;
5874              
5875             # Finish formatting a column of wide equals
5876             # Given:
5877             # $rgroup_lines - the current vertical alignment group of lines
5878             # $rcol_hash - a hash of information about this vertical column
5879             # $ix_last - index of the last line of this vertical column
5880              
5881 12 50       19 return unless ($rcol_hash);
5882 12         20 my $jcol = $rcol_hash->{jcol};
5883 12         15 my $col = $rcol_hash->{col};
5884 12         20 my $min_width = $rcol_hash->{min_width};
5885 12         18 my $max_width = $rcol_hash->{max_width};
5886 12         12 my $rwidths = $rcol_hash->{rwidths};
5887 12         15 my $ix_first = $rcol_hash->{ix_first};
5888              
5889             # check for skipped lines, shouldn't happen
5890 12         17 my $nlines = $ix_last - $ix_first + 1;
5891 12         16 my $num = @{$rwidths};
  12         19  
5892 12 50       23 if ( $num != $nlines ) {
5893 0         0 my $line = $rgroup_lines->[$ix_last];
5894 0         0 my $rfields = $line->{'rfields'};
5895 0         0 my $text = join EMPTY_STRING, @{$rfields};
  0         0  
5896 0         0 DEVEL_MODE && Fault(<<EOM);
5897             We seem to have miscounted lines, please check:
5898             nlines=$nlines
5899             num saved=$num
5900             min width=$min_width
5901             max width=$max_width
5902             j=$jcol
5903             ix_first=$ix_first
5904             ix_last=$ix_last
5905             text=$text
5906             EOM
5907 0         0 return;
5908             }
5909              
5910             #------------------------------------------------------
5911             # loop over all lines of this vertical alignment column
5912             #------------------------------------------------------
5913              
5914             my (
5915 12         19 $current_alignment, $starting_colp,
5916             $current_line, @previous_linked_lines
5917             );
5918 12         15 foreach my $item ( @{$rwidths} ) {
  12         21  
5919 50         49 my ( $ix, $width ) = @{$item};
  50         61  
5920 50         57 my $line = $rgroup_lines->[$ix];
5921              
5922             # add leading spaces to the shorter equality tokens to get
5923             # vertical alignment of the '=' signs
5924 50         57 my $jmax = $line->{'jmax'};
5925 50         58 my $jcolp = $jcol + 1;
5926              
5927 50         52 my @alignments = @{ $line->{'ralignments'} };
  50         66  
5928 50         53 my $alignment = $alignments[$jcolp];
5929 50         54 my $colp = $alignment->{column};
5930              
5931             #------------------------------------------------------------
5932             # Transfer column width changes between equivalent alignments
5933             #------------------------------------------------------------
5934              
5935             # This step keeps alignments to the right correct in case the
5936             # alignment object changes but the actual alignment col does not.
5937             # It is extremely rare for this to occur. Issue c353.
5938              
5939             # nothing to do if no more real alignments on right
5940 50 100       110 if ( $jcolp >= $jmax - 1 ) {
    100          
    100          
5941 18         22 $current_alignment = undef;
5942 18         33 $current_line = undef;
5943 18         18 @previous_linked_lines = ();
5944             }
5945              
5946             # handle new rhs alignment
5947             elsif ( !$current_alignment ) {
5948 9         12 $current_alignment = $alignment;
5949 9         10 $current_line = $line;
5950 9         9 $starting_colp = $colp;
5951 9         12 @previous_linked_lines = ();
5952             }
5953              
5954             # handle change in existing alignment
5955             elsif ( refaddr($alignment) != refaddr($current_alignment) ) {
5956              
5957             # change rhs alignment column - new vertical group on right
5958 6 100       9 if ( $starting_colp != $colp ) {
5959 1         2 $starting_colp = $colp;
5960 1         2 @previous_linked_lines = ();
5961             }
5962             else {
5963              
5964             # Same starting alignment col on right, but different alignment
5965             # object. See if we must increase width of this new alignment
5966             # object.
5967 5         6 my $current_colp = $current_alignment->{column};
5968 5 100       17 if ( $current_colp > $colp ) {
5969 3         6 my $excess = $current_colp - $colp;
5970 3         6 my $padding_available =
5971             $line->get_available_space_on_right();
5972 3 50       5 if ( $excess <= $padding_available ) {
5973 3         8 $line->increase_field_width( $jcolp, $excess );
5974 3         5 $colp = $alignment->{column};
5975             }
5976             }
5977              
5978             # remember the previous line in case we have to go back and
5979             # increase its width
5980 5         7 push @previous_linked_lines, $current_line;
5981             }
5982 6         8 $current_alignment = $alignment;
5983 6         7 $current_line = $line;
5984             }
5985             else {
5986             # continuing with same alignment
5987             }
5988              
5989             #-----------------------
5990             # add any needed padding
5991             #-----------------------
5992 50         65 my $pad = $max_width - $width;
5993 50 100       80 if ( $pad > 0 ) {
5994              
5995 33         37 my $rfields = $line->{'rfields'};
5996 33         37 my $rfield_lengths = $line->{'rfield_lengths'};
5997              
5998 33         45 my $lenp = $rfield_lengths->[$jcolp];
5999 33         34 my $avail = $colp - $col;
6000 33         73 my $excess = $lenp + $pad - $avail;
6001              
6002 33 100       46 if ( $excess > 0 ) {
6003              
6004 13         31 my $padding_available = $line->get_available_space_on_right();
6005 13 100       20 if ( $excess <= $padding_available ) {
6006 12         28 $line->increase_field_width( $jcolp, $excess );
6007              
6008             # Increase space of any previous linked lines
6009 12         18 foreach my $line_prev (@previous_linked_lines) {
6010 1         3 $padding_available =
6011             $line_prev->get_available_space_on_right();
6012 1 50       4 if ( $excess <= $padding_available ) {
6013 1         3 $line_prev->increase_field_width( $jcolp, $excess );
6014             }
6015             else {
6016 0         0 last;
6017             }
6018             }
6019             }
6020             else {
6021 1         1 $pad = 0;
6022             }
6023              
6024             }
6025              
6026             # Add spaces
6027 33         64 $rfields->[$jcolp] = ( SPACE x $pad ) . $rfields->[$jcolp];
6028 33         55 $rfield_lengths->[$jcolp] += $pad;
6029             }
6030             }
6031 12         28 return;
6032             } ## end sub end_wide_equals_column
6033              
6034             sub pad_wide_equals_columns {
6035              
6036 12     12 0 15 my ($rgroup_lines) = @_;
6037              
6038             # Given:
6039             # $rgroup_lines = the current vertical alignment group of lines
6040             # Task:
6041             # Look for columns of aligned equals tokens, some of which may be
6042             # things like '-=', '&&=', etc. Increase the field length of the
6043             # previous field by 1 or 2 spaces where necessary and possible so
6044             # that alignment of all '=' occurs. For example, given
6045              
6046             # $j /= 2;
6047             # $pow2 = $pow2 * $pow2;
6048              
6049             # In this case we want to add a leading space '=' term to get
6050             # $j /= 2;
6051             # $pow2 = $pow2 * $pow2;
6052              
6053             # The logic here is somewhat similar to sub pad_signed_number_columns
6054              
6055 12 50       22 return unless ($rOpts_valign_wide_equals);
6056              
6057 12         14 my %column_info;
6058             my @columns;
6059              
6060             #----------------
6061             # loop over lines
6062             #----------------
6063 12         16 my $ix_line = -1;
6064 12         15 my $jmax = -1;
6065 12         12 foreach my $line ( @{$rgroup_lines} ) {
  12         26  
6066 50         56 $ix_line++;
6067 50         58 my $jmax_last = $jmax;
6068 50         71 $jmax = $line->{'jmax'};
6069 50         61 my $jmax_change = $jmax ne $jmax_last;
6070              
6071 50         49 my @alignments = @{ $line->{'ralignments'} };
  50         76  
6072 50         60 my $rfields = $line->{'rfields'};
6073 50         64 my $rtokens = $line->{'rtokens'};
6074              
6075             #-----------------------------------------------
6076             # Check for a reduction in the number of columns
6077             #-----------------------------------------------
6078 50 100       72 if ( $jmax < $jmax_last ) {
6079              
6080 6         11 foreach my $jcol ( keys %column_info ) {
6081              
6082             # end any stranded columns on the right
6083 6 50       15 next if ( $jcol < $jmax );
6084 0         0 my $rcol_hash = $column_info{$jcol};
6085 0 0       0 next unless ($rcol_hash);
6086 0 0       0 if ( $rcol_hash->{max_width} > $rcol_hash->{min_width} ) {
6087 0         0 end_wide_equals_column( $rgroup_lines, $rcol_hash,
6088             $ix_line - 1 );
6089             }
6090 0         0 delete $column_info{$jcol};
6091             }
6092             }
6093              
6094             #--------------------------------------------------
6095             # Loop over fields except last field (side comment)
6096             #--------------------------------------------------
6097 50         75 for my $jcol ( 0 .. $jmax - 1 ) {
6098              
6099             #-----------------------------------------
6100             # Decide if this is a new alignment column
6101             #-----------------------------------------
6102 147         146 my $alignment = $alignments[$jcol];
6103 147         167 my $old_col = $columns[$jcol];
6104 147         151 my $col = $alignment->{column};
6105 147         153 $columns[$jcol] = $col;
6106 147 100 100     273 if ( defined($old_col) && $old_col != $col ) {
6107 14         20 foreach my $jcol_old ( keys %column_info ) {
6108 14 50       23 next if ( $jcol_old < $jcol );
6109 0         0 my $rcol_hash = $column_info{$jcol_old};
6110 0 0       0 if ( $rcol_hash->{max_width} > $rcol_hash->{min_width} ) {
6111 0         0 end_wide_equals_column( $rgroup_lines, $rcol_hash,
6112             $ix_line - 1 );
6113             }
6114 0         0 delete $column_info{$jcol_old};
6115             }
6116             }
6117              
6118             # A new wide equals column can only start at an alignment change
6119 147         227 my $rcol_hash = $column_info{$jcol};
6120              
6121             #------------------------------------------------------
6122             # Examine this field, looking for equals or wide equals
6123             #------------------------------------------------------
6124 147         182 my $field_next = $rfields->[ $jcol + 1 ];
6125 147         166 my $token = $rtokens->[$jcol];
6126              
6127             # See if this is an equals alignment group;
6128             # indicated by alignment token of '=' followed by a digit
6129 147         217 my $len_equals_symbol = 0;
6130 147 100 100     366 if ( length($token) > 1
      100        
6131             && substr( $token, 0, 1 ) eq '='
6132             && $is_digit_char{ substr( $token, 1, 1 ) } )
6133             {
6134              
6135             # find the actual equality symbol which starts the next field
6136             # i.e. '=' or '**=' or '-=' etc. We just need its length.
6137 50         59 my $pos = index( $field_next, '=' );
6138 50 50 33     110 if ( $pos >= 0 && $pos <= 2 ) {
6139 50         61 $len_equals_symbol = $pos + 1;
6140             }
6141             }
6142              
6143             # if a column has not started..
6144 147 100       166 if ( !$rcol_hash ) {
6145              
6146             # give up if this is cannot start a new column
6147 109 100       214 next if ( !$len_equals_symbol );
6148              
6149             # otherwise continue on to start a new column
6150              
6151             }
6152              
6153             # if a column has been started...
6154             else {
6155              
6156             # and this cannot be added to it
6157 38 50 33     106 if ( !$len_equals_symbol || $rcol_hash->{col} ne $col ) {
6158              
6159             # then end the current column and start over
6160 0 0       0 if ( $rcol_hash->{max_width} > $rcol_hash->{min_width} ) {
6161 0         0 end_wide_equals_column( $rgroup_lines, $rcol_hash,
6162             $ix_line - 1 );
6163             }
6164 0         0 delete $column_info{$jcol};
6165 0         0 $rcol_hash = undef;
6166             }
6167             }
6168              
6169 50         48 if (DEBUG_WEC) {
6170             my $exists = defined($rcol_hash);
6171             print
6172             "WEA: line=$ix_line change=$jmax_change jcol=$jcol field=$field_next exists?=$exists equals?=$len_equals_symbol\n";
6173             }
6174              
6175             #---------------------------------------
6176             # Either start a new column, if possible
6177             #---------------------------------------
6178 50 100       77 if ( !defined($rcol_hash) ) {
6179              
6180 12 50       21 next if ( !$len_equals_symbol );
6181              
6182 12         105 $column_info{$jcol} = {
6183             ix_first => $ix_line,
6184             col => $col,
6185             jcol => $jcol,
6186             min_width => $len_equals_symbol,
6187             max_width => $len_equals_symbol,
6188             rwidths => [ [ $ix_line, $len_equals_symbol ] ],
6189             };
6190             }
6191              
6192             #------------------------------
6193             # or extend the existing column
6194             #------------------------------
6195             else {
6196 38 100       59 if ( $len_equals_symbol > $rcol_hash->{max_width} ) {
6197 9         20 $rcol_hash->{max_width} = $len_equals_symbol;
6198             }
6199 38 100       64 if ( $len_equals_symbol < $rcol_hash->{min_width} ) {
6200 4         12 $rcol_hash->{min_width} = $len_equals_symbol;
6201             }
6202 38         36 push @{ $rcol_hash->{rwidths} },
  38         131  
6203             [ $ix_line, $len_equals_symbol ];
6204             }
6205             }
6206             }
6207              
6208             #-------------------------------------
6209             # Loop to finish any remaining columns
6210             #-------------------------------------
6211 12         28 foreach my $jcol ( keys %column_info ) {
6212 12         18 my $rcol_hash = $column_info{$jcol};
6213 12 50       24 if ( $rcol_hash->{max_width} > $rcol_hash->{min_width} ) {
6214 12         30 end_wide_equals_column( $rgroup_lines, $rcol_hash, $ix_line );
6215             }
6216             }
6217 12         53 return;
6218             } ## end sub pad_wide_equals_columns
6219              
6220             ###############################
6221             # CODE SECTION 8: Output Step A
6222             ###############################
6223              
6224             sub valign_output_step_A {
6225              
6226 3589     3589 0 6239 my ( $self, $rinput_hash ) = @_;
6227              
6228             #------------------------------------------------------------
6229             # This is Step A in writing vertically aligned lines.
6230             # The line is prepared according to the alignments which have
6231             # been found. Then it is shipped to the next step.
6232             #------------------------------------------------------------
6233              
6234             my (
6235              
6236             $line,
6237             $min_ci_gap,
6238             $do_not_align,
6239             $group_leader_length,
6240             $extra_leading_spaces,
6241             $level,
6242             $maximum_line_length,
6243              
6244             ) =
6245              
6246 3589         9224 @{$rinput_hash}{
6247 3589         5608 qw(
6248             line
6249             min_ci_gap
6250             do_not_align
6251             group_leader_length
6252             extra_leading_spaces
6253             level
6254             maximum_line_length
6255             )
6256             };
6257              
6258             my (
6259              
6260             $rfields,
6261             $rfield_lengths,
6262             $leading_space_count,
6263             $outdent_long_lines,
6264             $maximum_field_index,
6265             $rvertical_tightness_flags,
6266             $Kend,
6267             $level_end,
6268              
6269             ) =
6270              
6271 3589         10202 @{$line}{
6272 3589         5755 qw(
6273             rfields
6274             rfield_lengths
6275             leading_space_count
6276             outdent_long_lines
6277             jmax
6278             rvertical_tightness_flags
6279             Kend
6280             level_end
6281             )
6282             };
6283              
6284             # Check for valid hash keys at end of lifetime of $line during development
6285 3589         4160 DEVEL_MODE
6286             && check_keys( $line, \%valid_LINE_keys,
6287             "Checking line keys at valign_output_step_A", 1 );
6288              
6289             # add any extra spaces
6290 3589 100       6450 if ( $leading_space_count > $group_leader_length ) {
6291 49         83 $leading_space_count += $min_ci_gap;
6292             }
6293              
6294 3589         5905 my $str = $rfields->[0];
6295 3589         5056 my $str_len = $rfield_lengths->[0];
6296              
6297 3589         4384 my @alignments = @{ $line->{'ralignments'} };
  3589         7394  
6298 3589 50       7385 if ( @alignments != $maximum_field_index + 1 ) {
6299              
6300             # Shouldn't happen: sub install_new_alignments makes jmax alignments
6301 0         0 my $jmax_alignments = @alignments - 1;
6302 0         0 if (DEVEL_MODE) {
6303             Fault(
6304             "alignment jmax=$jmax_alignments should equal $maximum_field_index\n"
6305             );
6306             }
6307 0         0 $do_not_align = 1;
6308             }
6309              
6310             # loop to concatenate all fields of this line and needed padding
6311 3589         4921 my $total_pad_count = 0;
6312 3589         6555 for my $j ( 1 .. $maximum_field_index ) {
6313              
6314             # skip zero-length side comments
6315             last
6316             if (
6317 8643 100 66     22760 ( $j == $maximum_field_index )
      100        
6318             && ( !defined( $rfields->[$j] )
6319             || ( $rfield_lengths->[$j] == 0 ) )
6320             );
6321              
6322             # compute spaces of padding before this field
6323 5402         8526 my $col = $alignments[ $j - 1 ]->{'column'};
6324 5402         6582 my $pad = $col - ( $str_len + $leading_space_count );
6325              
6326 5402 50       8189 if ($do_not_align) {
6327 0 0       0 $pad =
6328             ( $j < $maximum_field_index )
6329             ? 0
6330             : $rOpts_minimum_space_to_comment - 1;
6331             }
6332              
6333             # if the -fpsc flag is set, move the side comment to the selected
6334             # column if and only if it is possible, ignoring constraints on
6335             # line length and minimum space to comment
6336 5402 100 100     9463 if ( $rOpts_fixed_position_side_comment
6337             && $j == $maximum_field_index )
6338             {
6339 9         20 my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
6340 9 50       22 if ( $newpad >= 0 ) { $pad = $newpad; }
  9         17  
6341             }
6342              
6343             # accumulate the padding
6344 5402 100       8776 if ( $pad > 0 ) { $total_pad_count += $pad; }
  1590         2042  
6345              
6346             # only add padding when we have a finite field;
6347             # this avoids extra terminal spaces if we have empty fields
6348 5402 100       8187 if ( $rfield_lengths->[$j] > 0 ) {
6349 5391         9186 $str .= SPACE x $total_pad_count;
6350 5391         6257 $str_len += $total_pad_count;
6351 5391         5906 $total_pad_count = 0;
6352 5391         7199 $str .= $rfields->[$j];
6353 5391         7440 $str_len += $rfield_lengths->[$j];
6354             }
6355             else {
6356 11         19 $total_pad_count = 0;
6357             }
6358             }
6359              
6360 3589         4980 my $side_comment_length = $rfield_lengths->[$maximum_field_index];
6361              
6362             # ship this line off
6363 3589         25672 $self->valign_output_step_B(
6364             {
6365             leading_space_count => $leading_space_count + $extra_leading_spaces,
6366             line => $str,
6367             line_length => $str_len,
6368             side_comment_length => $side_comment_length,
6369             outdent_long_lines => $outdent_long_lines,
6370             rvertical_tightness_flags => $rvertical_tightness_flags,
6371             level => $level,
6372             level_end => $level_end,
6373             Kend => $Kend,
6374             maximum_line_length => $maximum_line_length,
6375             }
6376             );
6377 3589         16625 return;
6378             } ## end sub valign_output_step_A
6379              
6380             sub combine_fields {
6381              
6382 16     16 0 42 my ( $line_0, $line_1, $imax_align ) = @_;
6383              
6384             # Given:
6385             # $line_0, $line_1 = two adjacent lines
6386             # $imax_align = index of last alignment wanted
6387              
6388             # Task:
6389             # We have a group of two lines for which we do not want to align tokens
6390             # between index $imax_align and the side comment. So we will delete fields
6391             # between $imax_align and the side comment. Alignments have already
6392             # been set so we have to adjust them.
6393              
6394 16 50       46 if ( !defined($imax_align) ) { $imax_align = -1 }
  0         0  
6395              
6396             # First delete the unwanted tokens
6397 16         43 my $jmax_old = $line_0->{'jmax'};
6398 16         58 my @idel = ( $imax_align + 1 .. $jmax_old - 2 );
6399 16 50       47 return if ( !@idel );
6400              
6401             # Get old alignments before any changes are made
6402 16         28 my @old_alignments = @{ $line_0->{'ralignments'} };
  16         47  
6403              
6404 16         40 foreach my $line ( $line_0, $line_1 ) {
6405 32         86 delete_selected_tokens( $line, \@idel );
6406             }
6407              
6408             # Now adjust the alignments. Note that the side comment alignment
6409             # is always at jmax-1, and there is an ending alignment at jmax.
6410 16         28 my @new_alignments;
6411 16 50       56 if ( $imax_align >= 0 ) {
6412 0         0 @new_alignments[ 0 .. $imax_align ] =
6413             @old_alignments[ 0 .. $imax_align ];
6414             }
6415              
6416 16         41 my $jmax_new = $line_0->{'jmax'};
6417              
6418 16         45 $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
6419 16         57 $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
6420 16         43 $line_0->{'ralignments'} = \@new_alignments;
6421 16         45 $line_1->{'ralignments'} = \@new_alignments;
6422 16         72 return;
6423             } ## end sub combine_fields
6424              
6425             sub get_output_line_number {
6426              
6427             # Return the output line number to external modules.
6428             # The output line number reported to a caller =
6429             # the number of items still in the buffer +
6430             # the number of items written.
6431 70     70 0 136 my $self = shift;
6432 70         190 return $self->group_line_count() +
6433             $self->[_file_writer_object_]->get_output_line_number();
6434             } ## end sub get_output_line_number
6435              
6436             ###############################
6437             # CODE SECTION 9: Output Step B
6438             ###############################
6439              
6440             { ## closure for sub valign_output_step_B
6441              
6442             # These are values for a cache used by valign_output_step_B.
6443             my $cached_line_text;
6444             my $cached_line_text_length;
6445             my $cached_line_type;
6446             my $cached_line_opening_flag;
6447             my $cached_line_closing_flag;
6448             my $cached_seqno;
6449             my $cached_line_valid;
6450             my $cached_line_leading_space_count;
6451             my $cached_seqno_string;
6452             my $cached_line_Kend;
6453             my $cached_line_maximum_length;
6454              
6455             # These are passed to step_C:
6456             my $seqno_string;
6457             my $last_nonblank_seqno_string;
6458              
6459             sub set_last_nonblank_seqno_string {
6460 398     398 0 738 my ($val) = @_;
6461 398         610 $last_nonblank_seqno_string = $val;
6462 398         537 return;
6463             }
6464              
6465             sub get_cached_line_opening_flag {
6466 227     227 0 366 return $cached_line_opening_flag;
6467             }
6468              
6469             sub get_cached_line_type {
6470 8510     8510 0 13557 return $cached_line_type;
6471             }
6472              
6473             sub set_cached_line_valid {
6474 3     3 0 6 my ($val) = @_;
6475 3         5 $cached_line_valid = $val;
6476 3         6 return;
6477             }
6478              
6479             sub get_cached_seqno {
6480 157     157 0 370 return $cached_seqno;
6481             }
6482              
6483             sub initialize_step_B_cache {
6484              
6485             # valign_output_step_B cache:
6486 648     648 0 1509 $cached_line_text = EMPTY_STRING;
6487 648         1132 $cached_line_text_length = 0;
6488 648         1194 $cached_line_type = 0;
6489 648         1055 $cached_line_opening_flag = 0;
6490 648         997 $cached_line_closing_flag = 0;
6491 648         979 $cached_seqno = 0;
6492 648         983 $cached_line_valid = 0;
6493 648         925 $cached_line_leading_space_count = 0;
6494 648         1011 $cached_seqno_string = EMPTY_STRING;
6495 648         1065 $cached_line_Kend = undef;
6496 648         890 $cached_line_maximum_length = undef;
6497              
6498             # These vars hold a string of sequence numbers joined together used by
6499             # the cache
6500 648         1145 $seqno_string = EMPTY_STRING;
6501 648         1062 $last_nonblank_seqno_string = EMPTY_STRING;
6502 648         993 return;
6503             } ## end sub initialize_step_B_cache
6504              
6505             sub _flush_step_B_cache {
6506              
6507 2217     2217   4038 my ($self) = @_;
6508              
6509             # Send any text in the step_B cache on to step_C
6510 2217 100       4391 if ($cached_line_type) {
6511 1         2 $seqno_string = $cached_seqno_string;
6512 1         6 $self->valign_output_step_C(
6513             $seqno_string,
6514             $last_nonblank_seqno_string,
6515              
6516             $cached_line_text,
6517             $cached_line_leading_space_count,
6518             $self->[_last_level_written_],
6519             $cached_line_Kend,
6520             );
6521 1         1 $cached_line_type = 0;
6522 1         2 $cached_line_text = EMPTY_STRING;
6523 1         2 $cached_line_text_length = 0;
6524 1         1 $cached_seqno_string = EMPTY_STRING;
6525 1         2 $cached_line_Kend = undef;
6526 1         2 $cached_line_maximum_length = undef;
6527             }
6528 2217         3087 return;
6529             } ## end sub _flush_step_B_cache
6530              
6531             sub handle_cached_line {
6532              
6533 162     162 0 417 my ( $self, $rinput, $leading_string, $leading_string_length ) = @_;
6534              
6535             # handle a cached line ..
6536             # either append the current line to it or write it out
6537              
6538             # The cached line will either be:
6539             # - passed along to step_C, or
6540             # - or combined with the current line
6541              
6542 162         296 my $last_level_written = $self->[_last_level_written_];
6543              
6544 162         282 my $leading_space_count = $rinput->{leading_space_count};
6545 162         293 my $str = $rinput->{line};
6546 162         277 my $str_length = $rinput->{line_length};
6547 162         273 my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
6548 162         284 my $level = $rinput->{level};
6549 162         305 my $level_end = $rinput->{level_end};
6550 162         254 my $maximum_line_length = $rinput->{maximum_line_length};
6551              
6552 162         255 my ( $open_or_close, $seqno_beg );
6553 162 50       2666 if ($rvertical_tightness_flags) {
6554              
6555 162         217 $open_or_close = $rvertical_tightness_flags->{_vt_type};
6556 162         297 $seqno_beg = $rvertical_tightness_flags->{_vt_seqno_beg};
6557             }
6558              
6559             # Dump an invalid cached line
6560 162 100 100     562 if ( !$cached_line_valid ) {
    100          
6561 93         232 $self->valign_output_step_C(
6562             $seqno_string,
6563             $last_nonblank_seqno_string,
6564              
6565             $cached_line_text,
6566             $cached_line_leading_space_count,
6567             $last_level_written,
6568             $cached_line_Kend,
6569             );
6570             }
6571              
6572             # Handle cached line ending in OPENING tokens
6573             elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
6574              
6575 32         81 my $gap = $leading_space_count - $cached_line_text_length;
6576              
6577             # handle option of just one tight opening per line:
6578 32 100       86 if ( $cached_line_opening_flag == 1 ) {
6579 14 50 33     69 if ( defined($open_or_close) && $open_or_close == 1 ) {
6580 0         0 $gap = -1;
6581             }
6582             }
6583              
6584             # Do not join the lines if this might produce a one-line
6585             # container which exceeds the maximum line length. This is
6586             # necessary prevent blinking, particularly with the combination
6587             # -xci -pvt=2. In that case a one-line block alternately forms
6588             # and breaks, causing -xci to alternately turn on and off (case
6589             # b765).
6590             # Patched to fix cases b656 b862 b971 b972: always do the check
6591             # if the maximum line length changes (due to -vmll).
6592 32 50 33     207 if (
      66        
6593             $gap >= 0
6594             && ( $maximum_line_length != $cached_line_maximum_length
6595             || ( defined($level_end) && $level > $level_end ) )
6596             )
6597             {
6598 0         0 my $test_line_length =
6599             $cached_line_text_length + $gap + $str_length;
6600              
6601             # Add a small tolerance in the length test (fixes case b862)
6602 0 0       0 if ( $test_line_length > $cached_line_maximum_length - 2 ) {
6603 0         0 $gap = -1;
6604             }
6605             }
6606              
6607             # NOTE: using defined() since $seqno_beg can be 0 for -bbvt
6608 32 100 66     121 if ( $gap >= 0 && defined($seqno_beg) ) {
6609 20         28 $maximum_line_length = $cached_line_maximum_length;
6610 20         55 $leading_string = $cached_line_text . SPACE x $gap;
6611 20         33 $leading_string_length = $cached_line_text_length + $gap;
6612 20         30 $leading_space_count = $cached_line_leading_space_count;
6613 20         44 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
6614 20         56 $level = $last_level_written;
6615             }
6616             else {
6617 12         44 $self->valign_output_step_C(
6618             $seqno_string,
6619             $last_nonblank_seqno_string,
6620              
6621             $cached_line_text,
6622             $cached_line_leading_space_count,
6623             $last_level_written,
6624             $cached_line_Kend,
6625             );
6626             }
6627             }
6628              
6629             # Handle cached line ending in CLOSING tokens
6630             else {
6631 37         101 my $test_line =
6632             $cached_line_text . SPACE x $cached_line_closing_flag . $str;
6633 37         81 my $test_line_length =
6634             $cached_line_text_length +
6635             $cached_line_closing_flag +
6636             $str_length;
6637 37 100 66     379 if (
      66        
      100        
6638              
6639             # The new line must start with container
6640             $seqno_beg
6641              
6642             # The container combination must be okay..
6643             && (
6644              
6645             # okay to combine like types
6646             ( $open_or_close == $cached_line_type )
6647              
6648             # closing block brace may append to non-block
6649             || ( $cached_line_type == 2 && $open_or_close == 4 )
6650              
6651             # something like ');'
6652             || ( !$open_or_close && $cached_line_type == 2 )
6653              
6654             )
6655              
6656             # The combined line must fit
6657             && ( $test_line_length <= $cached_line_maximum_length )
6658             )
6659             {
6660              
6661 33         72 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
6662              
6663             # Patch to outdent closing tokens ending # in ');' If we
6664             # are joining a line like ');' to a previous stacked set of
6665             # closing tokens, then decide if we may outdent the
6666             # combined stack to the indentation of the ');'. Since we
6667             # should not normally outdent any of the other tokens more
6668             # than the indentation of the lines that contained them, we
6669             # will only do this if all of the corresponding opening
6670             # tokens were on the same line. This can happen with -sot
6671             # and -sct.
6672              
6673             # For example, it is ok here:
6674             # __PACKAGE__->load_components( qw(
6675             # PK::Auto
6676             # Core
6677             # ));
6678             #
6679             # But, for example, we do not outdent in this example
6680             # because that would put the closing sub brace out farther
6681             # than the opening sub brace:
6682             #
6683             # perltidy -sot -sct
6684             # $c->Tk::bind(
6685             # '<Control-f>' => sub {
6686             # my ($c) = @_;
6687             # my $e = $c->XEvent;
6688             # itemsUnderArea $c;
6689             # } );
6690             #
6691 33 100 100     236 if ( $str =~ /^\);/
6692             && $cached_line_text =~ /^[\)\}\]\s]*$/ )
6693             {
6694              
6695             # The way to tell this is if the stacked sequence
6696             # numbers of this output line are the reverse of the
6697             # stacked sequence numbers of the previous non-blank
6698             # line of sequence numbers. So we can join if the
6699             # previous nonblank string of tokens is the mirror
6700             # image. For example if stack )}] is 13:8:6 then we
6701             # are looking for a leading stack like [{( which
6702             # is 6:8:13. We only need to check the two ends,
6703             # because the intermediate tokens must fall in order.
6704             # Note on speed: having to split on colons and
6705             # eliminate multiple colons might appear to be slow,
6706             # but it's not an issue because we almost never come
6707             # through here. In a typical file we don't.
6708              
6709 4         11 $seqno_string =~ s/^:+//;
6710 4         8 $last_nonblank_seqno_string =~ s/^:+//;
6711 4         21 $seqno_string =~ s/:+/:/g;
6712 4         13 $last_nonblank_seqno_string =~ s/:+/:/g;
6713              
6714             # how many spaces can we outdent?
6715 4         8 my $diff =
6716             $cached_line_leading_space_count - $leading_space_count;
6717 4 100 33     44 if ( $diff > 0
      66        
6718             && length($seqno_string)
6719             && length($last_nonblank_seqno_string) ==
6720             length($seqno_string) )
6721             {
6722 3         14 my @seqno_last =
6723             ( split /:/, $last_nonblank_seqno_string );
6724 3         9 my @seqno_now = ( split /:/, $seqno_string );
6725 3 50 33     32 if ( @seqno_now
      33        
      33        
6726             && @seqno_last
6727             && $seqno_now[-1] == $seqno_last[0]
6728             && $seqno_now[0] == $seqno_last[-1] )
6729             {
6730              
6731             # OK to outdent ..
6732             # for absolute safety, be sure we only remove
6733             # whitespace
6734 3         8 my $ws = substr( $test_line, 0, $diff );
6735 3 50 33     22 if ( ( length($ws) == $diff )
6736             && $ws =~ /^\s+$/ )
6737             {
6738              
6739 3         9 $test_line = substr( $test_line, $diff );
6740 3         5 $cached_line_leading_space_count -= $diff;
6741 3         18 $last_level_written =
6742             $self->level_change(
6743             $cached_line_leading_space_count,
6744             $diff, $last_level_written );
6745 3         12 $self->reduce_valign_buffer_indentation($diff);
6746             }
6747              
6748             # shouldn't happen, but not critical:
6749             ##else {
6750             ## ERROR transferring indentation here
6751             ##}
6752             }
6753             }
6754             }
6755              
6756             # Change the args to look like we received the combined line
6757 33         56 $str = $test_line;
6758 33         54 $str_length = $test_line_length;
6759 33         49 $leading_string = EMPTY_STRING;
6760 33         44 $leading_string_length = 0;
6761 33         47 $leading_space_count = $cached_line_leading_space_count;
6762 33         41 $level = $last_level_written;
6763 33         58 $maximum_line_length = $cached_line_maximum_length;
6764             }
6765             else {
6766 4         12 $self->valign_output_step_C(
6767             $seqno_string,
6768             $last_nonblank_seqno_string,
6769              
6770             $cached_line_text,
6771             $cached_line_leading_space_count,
6772             $last_level_written,
6773             $cached_line_Kend,
6774             );
6775             }
6776             }
6777 162         761 return ( $str, $str_length, $leading_string, $leading_string_length,
6778             $leading_space_count, $level, $maximum_line_length );
6779              
6780             } ## end sub handle_cached_line
6781              
6782             sub valign_output_step_B {
6783              
6784 8399     8399 0 13836 my ( $self, $rinput ) = @_;
6785              
6786             #---------------------------------------------------------
6787             # This is Step B in writing vertically aligned lines.
6788             # Vertical tightness is applied according to preset flags.
6789             # In particular this routine handles stacking of opening
6790             # and closing tokens.
6791             #---------------------------------------------------------
6792              
6793             ##Note: key 'level_end' not needed
6794             my (
6795              
6796             $leading_space_count,
6797             $str,
6798             $str_length,
6799             $side_comment_length,
6800             $outdent_long_lines,
6801             $rvertical_tightness_flags,
6802             $level,
6803             $Kend,
6804             $maximum_line_length,
6805              
6806             ) =
6807              
6808 8399         22282 @{$rinput}{
6809 8399         12316 qw(
6810             leading_space_count
6811             line
6812             line_length
6813             side_comment_length
6814             outdent_long_lines
6815             rvertical_tightness_flags
6816             level
6817             Kend
6818             maximum_line_length
6819             )
6820             };
6821              
6822             # Useful -gcs test cases for wide characters are
6823             # perl527/(method.t.2, reg_mesg.t, mime-header.t)
6824              
6825             # handle outdenting of long lines:
6826 8399         10637 my $is_outdented_line;
6827 8399 100       16337 if ($outdent_long_lines) {
6828 371         649 my $excess =
6829             $str_length -
6830             $side_comment_length +
6831             $leading_space_count -
6832             $maximum_line_length;
6833 371 100       863 if ( $excess > 0 ) {
6834 11         20 $leading_space_count = 0;
6835 11         20 my $file_writer_object = $self->[_file_writer_object_];
6836 11         41 my $last_outdented_line_at =
6837             $file_writer_object->get_output_line_number();
6838 11         23 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
6839              
6840 11         30 my $outdented_line_count = $self->[_outdented_line_count_];
6841 11 100       30 if ( !$outdented_line_count ) {
6842 4         11 $self->[_first_outdented_line_at_] =
6843             $last_outdented_line_at;
6844             }
6845 11         18 $outdented_line_count++;
6846 11         20 $self->[_outdented_line_count_] = $outdented_line_count;
6847 11         17 $is_outdented_line = 1;
6848             }
6849             }
6850              
6851             # Make preliminary leading whitespace. It could get changed
6852             # later by entabbing, so we have to keep track of any changes
6853             # to the leading_space_count from here on.
6854 8399 100       17961 my $leading_string =
6855             $leading_space_count > 0
6856             ? ( SPACE x $leading_space_count )
6857             : EMPTY_STRING;
6858 8399         10786 my $leading_string_length = length($leading_string);
6859              
6860             # Unpack any recombination data; it was packed by
6861             # sub 'Formatter::set_vertical_tightness_flags'
6862              
6863             # old hash Meaning
6864             # index key
6865             #
6866             # 0 _vt_type: 1=opening non-block 2=closing non-block
6867             # 3=opening block brace 4=closing block brace
6868             #
6869             # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
6870             # 1b _vt_closing_flag: spaces of padding to use if closing
6871             # 2 _vt_seqno: sequence number of container
6872             # 3 _vt_valid flag: do not append if this flag is false. Will be
6873             # true if appropriate -vt flag is set. Otherwise, Will be
6874             # made true only for 2 line container in parens with -lp
6875             # 4 _vt_seqno_beg: sequence number of first token of line
6876             # 5 _vt_seqno_end: sequence number of last token of line
6877             # 6 _vt_min_lines: min number of lines for joining opening cache,
6878             # 0=no constraint
6879             # 7 _vt_max_lines: max number of lines for joining opening cache,
6880             # 0=no constraint
6881              
6882 8399         11858 my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
6883             $seqno_beg, $seqno_end );
6884 8399 100       13999 if ($rvertical_tightness_flags) {
6885              
6886 796         1227 $open_or_close = $rvertical_tightness_flags->{_vt_type};
6887 796         1111 $opening_flag = $rvertical_tightness_flags->{_vt_opening_flag};
6888 796         1139 $closing_flag = $rvertical_tightness_flags->{_vt_closing_flag};
6889 796         1085 $seqno = $rvertical_tightness_flags->{_vt_seqno};
6890 796         1050 $valid = $rvertical_tightness_flags->{_vt_valid_flag};
6891 796         1083 $seqno_beg = $rvertical_tightness_flags->{_vt_seqno_beg};
6892 796         1103 $seqno_end = $rvertical_tightness_flags->{_vt_seqno_end};
6893             }
6894              
6895 8399 100       15108 $seqno_string = defined($seqno_end) ? $seqno_end : EMPTY_STRING;
6896              
6897             # handle any cached line ..
6898             # either append this line to it or write it out
6899             # Note: the function length() is used in this next test out of caution.
6900             # All testing has shown that the variable $cached_line_text_length is
6901             # correct, but its calculation is complex and a loss of cached text
6902             # would be a disaster.
6903 8399 100       14471 if ( length($cached_line_text) ) {
6904              
6905             (
6906 162         533 $str,
6907             $str_length,
6908             $leading_string,
6909             $leading_string_length,
6910             $leading_space_count,
6911             $level,
6912             $maximum_line_length
6913              
6914             ) = $self->handle_cached_line( $rinput, $leading_string,
6915             $leading_string_length );
6916              
6917 162         323 $cached_line_type = 0;
6918 162         269 $cached_line_text = EMPTY_STRING;
6919 162         224 $cached_line_text_length = 0;
6920 162         273 $cached_line_Kend = undef;
6921 162         216 $cached_line_maximum_length = undef;
6922              
6923             }
6924              
6925             # make the line to be written
6926 8399         12276 my $line = $leading_string . $str;
6927 8399         10539 my $line_length = $leading_string_length + $str_length;
6928              
6929             # Safety check: be sure that a line to be cached as a stacked block
6930             # brace line ends in the appropriate opening or closing block brace.
6931             # This should always be the case if the caller set flags correctly.
6932             # Code '3' is for -sobb, code '4' is for -scbb.
6933 8399 100       13462 if ($open_or_close) {
6934 163 50 66     985 if ( $open_or_close == 3 && $line !~ /\{\s*$/
      66        
      33        
6935             || $open_or_close == 4 && $line !~ /\}\s*$/ )
6936             {
6937 0         0 $open_or_close = 0;
6938             }
6939             }
6940              
6941             # write or cache this line ...
6942             # fix for case b999: do not cache an outdented line
6943             # fix for b1378: do not cache an empty line
6944 8399 100 66     19097 if ( !$open_or_close
      66        
      33        
6945             || $side_comment_length > 0
6946             || $is_outdented_line
6947             || !$line_length )
6948             {
6949 8236         18579 $self->valign_output_step_C(
6950             $seqno_string,
6951             $last_nonblank_seqno_string,
6952              
6953             $line,
6954             $leading_space_count,
6955             $level,
6956             $Kend,
6957             );
6958             }
6959             else {
6960 163         255 $cached_line_text = $line;
6961 163         241 $cached_line_text_length = $line_length;
6962 163         206 $cached_line_type = $open_or_close;
6963 163         225 $cached_line_opening_flag = $opening_flag;
6964 163         239 $cached_line_closing_flag = $closing_flag;
6965 163         226 $cached_seqno = $seqno;
6966 163         251 $cached_line_valid = $valid;
6967 163         252 $cached_line_leading_space_count = $leading_space_count;
6968 163         248 $cached_seqno_string = $seqno_string;
6969 163         230 $cached_line_Kend = $Kend;
6970 163         238 $cached_line_maximum_length = $maximum_line_length;
6971             }
6972              
6973 8399         14027 $self->[_last_level_written_] = $level;
6974 8399         10352 $self->[_last_side_comment_length_] = $side_comment_length;
6975 8399         15303 return;
6976             } ## end sub valign_output_step_B
6977             }
6978              
6979             ################################
6980             # CODE SECTION 10: Output Step C
6981             ################################
6982              
6983             { ## closure for sub valign_output_step_C
6984              
6985             # Vertical alignment buffer used by valign_output_step_C
6986             my $valign_buffer_filling;
6987             my @valign_buffer;
6988              
6989             sub initialize_valign_buffer {
6990 648     648 0 1361 @valign_buffer = ();
6991 648         1177 $valign_buffer_filling = EMPTY_STRING;
6992 648         985 return;
6993             }
6994              
6995             sub dump_valign_buffer {
6996              
6997 2219     2219 0 3613 my ($self) = @_;
6998              
6999             # Send all lines in the current buffer on to step_D
7000 2219 100       4207 if (@valign_buffer) {
7001 2         6 foreach (@valign_buffer) {
7002 7         14 $self->valign_output_step_D( @{$_} );
  7         19  
7003             }
7004 2         7 @valign_buffer = ();
7005             }
7006 2219         3205 $valign_buffer_filling = EMPTY_STRING;
7007 2219         2950 return;
7008             } ## end sub dump_valign_buffer
7009              
7010             sub reduce_valign_buffer_indentation {
7011              
7012 3     3 0 7 my ( $self, $diff ) = @_;
7013              
7014             # Reduce the leading indentation of lines in the current
7015             # buffer by $diff spaces
7016 3 100 66     15 if ( $valign_buffer_filling && $diff ) {
7017 2         4 my $max_valign_buffer = @valign_buffer;
7018 2         8 foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
7019             my ( $line, $leading_space_count, $level, $Kend ) =
7020 7         27 @{ $valign_buffer[$i] };
  7         16  
7021 7         23 my $ws = substr( $line, 0, $diff );
7022 7 50 33     38 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
7023 7         14 $line = substr( $line, $diff );
7024             }
7025 7 50       17 if ( $leading_space_count >= $diff ) {
7026 7         8 $leading_space_count -= $diff;
7027 7         17 $level =
7028             $self->level_change( $leading_space_count, $diff,
7029             $level );
7030             }
7031 7         25 $valign_buffer[$i] =
7032             [ $line, $leading_space_count, $level, $Kend ];
7033             }
7034             }
7035 3         9 return;
7036             } ## end sub reduce_valign_buffer_indentation
7037              
7038             sub valign_output_step_C {
7039              
7040             my (
7041 8346     8346 0 21046 $self,
7042             $seqno_string,
7043             $last_nonblank_seqno_string,
7044              
7045             @args_to_D,
7046             ) = @_;
7047              
7048             #-----------------------------------------------------------------------
7049             # This is Step C in writing vertically aligned lines.
7050             # Lines are either stored in a buffer or passed along to the next step.
7051             # The reason for storing lines is that we may later want to reduce their
7052             # indentation when -sot and -sct are both used.
7053             #-----------------------------------------------------------------------
7054              
7055             # Dump any saved lines if we see a line with an unbalanced opening or
7056             # closing token.
7057 8346 100 100     16612 $self->dump_valign_buffer()
7058             if ( $seqno_string && $valign_buffer_filling );
7059              
7060             # Either store or write this line
7061 8346 100       12275 if ($valign_buffer_filling) {
7062 7         20 push @valign_buffer, [@args_to_D];
7063             }
7064             else {
7065 8339         16596 $self->valign_output_step_D(@args_to_D);
7066             }
7067              
7068             # For lines starting or ending with opening or closing tokens..
7069 8346 100       13655 if ($seqno_string) {
7070 398         608 $last_nonblank_seqno_string = $seqno_string;
7071 398         1065 set_last_nonblank_seqno_string($seqno_string);
7072              
7073             # Start storing lines when we see a line with multiple stacked
7074             # opening tokens.
7075             # patch for RT #94354, requested by Colin Williams
7076 398 100 100     1461 if ( index( $seqno_string, ':' ) >= 0
      100        
7077             && $seqno_string =~ /^\d+(\:+\d+)+$/
7078             && $args_to_D[0] !~ /^[\}\)\]\:\?]/ )
7079             {
7080              
7081             # This test is efficient but a little subtle: The first test
7082             # says that we have multiple sequence numbers and hence
7083             # multiple opening or closing tokens in this line. The second
7084             # part of the test rejects stacked closing and ternary tokens.
7085             # So if we get here then we should have stacked unbalanced
7086             # opening tokens.
7087              
7088             # Here is a complex example:
7089              
7090             # Foo($Bar[0], { # (side comment)
7091             # baz => 1,
7092             # });
7093              
7094             # The first line has sequence 6::4. It does not begin with
7095             # a closing token or ternary, so it passes the test and must be
7096             # stacked opening tokens.
7097              
7098             # The last line has sequence 4:6 but is a stack of closing
7099             # tokens, so it gets rejected.
7100              
7101             # Note that the sequence number of an opening token for a qw
7102             # quote is a negative number and will be rejected. For
7103             # example, for the following line: skip_symbols([qw(
7104             # $seqno_string='10:5:-1'. It would be okay to accept it but I
7105             # decided not to do this after testing.
7106              
7107 8         19 $valign_buffer_filling = $seqno_string;
7108              
7109             }
7110             }
7111 8346         14176 return;
7112             } ## end sub valign_output_step_C
7113             }
7114              
7115             ###############################
7116             # CODE SECTION 11: Output Step D
7117             ###############################
7118              
7119             sub add_leading_tabs {
7120              
7121 45     45 0 79 my ( $self, $line, $leading_space_count, $level ) = @_;
7122              
7123             # Convert leading whitespace to use tabs if -et or -t are set
7124              
7125             # Given:
7126             # $line = the line of text to be written, without any tabs
7127             # $leading_whitespace = expected number of leading blank spaces
7128             # $level = indentation level (needed for -t)
7129              
7130             # Return:
7131             # $line = the line with possible leading tabs
7132              
7133 45         56 my $trimmed_line = $line;
7134 45         211 $trimmed_line =~ s/^ [^\S\n]+ //gxm;
7135              
7136             # Check for discrepancy in actual leading white spaces with estimate
7137 45 50       101 if ( length($line) != length($trimmed_line) + $leading_space_count ) {
7138              
7139             # If $leading_space_count is zero, then this routine must not
7140             # be called because we might be in a quote of some kind
7141 0 0       0 if ( $leading_space_count <= 0 ) {
7142 0         0 DEVEL_MODE && Fault(<<EOM);
7143             should not be here with leading space count = $leading_space_count
7144             EOM
7145 0         0 return $line;
7146             }
7147              
7148 0         0 my $leading_space_count_test = length($line) - length($trimmed_line);
7149              
7150             # Skip tabbing if actual whitespace is less than expected
7151 0 0       0 if ( $leading_space_count_test < $leading_space_count ) {
7152 0         0 DEBUG_TABS
7153             && $self->warning(<<EOM);
7154             Error entabbing: expected count=$leading_space_count but only found $leading_space_count_test for line:
7155             '$line'
7156             EOM
7157 0         0 return $line;
7158             }
7159              
7160             # Use actual whitespace if it exceeds prediction. This mainly
7161             # occurs at hanging side comments.
7162 0         0 $leading_space_count = $leading_space_count_test;
7163             }
7164              
7165             #----------------------------------
7166             # Handle --entab-leading-whitespace
7167             #----------------------------------
7168 45 50 0     76 if ($rOpts_entab_leading_whitespace) {
    0          
7169              
7170 45         65 my $space_count =
7171             $leading_space_count % $rOpts_entab_leading_whitespace;
7172 45         73 my $tab_count =
7173             int( $leading_space_count / $rOpts_entab_leading_whitespace );
7174 45         76 my $leading_string = "\t" x $tab_count . SPACE x $space_count;
7175 45         79 $line = $leading_string . $trimmed_line;
7176             }
7177              
7178             #-----------------------------------------------
7179             # Handle -t (one tab per level; not recommended)
7180             #-----------------------------------------------
7181             elsif ( $rOpts_tabs && $level ) {
7182              
7183 0         0 my $leading_string = ( "\t" x $level );
7184 0         0 my $space_count = $leading_space_count - $level * $rOpts_indent_columns;
7185              
7186             # shouldn't happen:
7187 0 0       0 if ( $space_count < 0 ) {
7188              
7189             # But it could be an outdented comment
7190 0 0       0 if ( $line !~ /^\s*#/ ) {
7191 0         0 DEBUG_TABS
7192             && $self->warning(
7193             "Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
7194             );
7195             }
7196 0         0 $leading_string = ( SPACE x $leading_space_count );
7197             }
7198             else {
7199 0         0 $leading_string .= ( SPACE x $space_count );
7200             }
7201 0         0 $line = $leading_string . $trimmed_line;
7202             }
7203              
7204             # nothing to do; we should have skipped a call to this sub
7205             else {
7206 0         0 if (DEVEL_MODE) {
7207             Fault(
7208             "in tab sub but neither -t nor -et set: check flag 'require_tabs'\n"
7209             );
7210             }
7211             }
7212 45         78 return $line;
7213             } ## end sub add_leading_tabs
7214              
7215             sub valign_output_step_D {
7216              
7217 8346     8346 0 15342 my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
7218              
7219             #----------------------------------------------------------------
7220             # This is Step D in writing vertically aligned lines.
7221             # It is the end of the vertical alignment pipeline.
7222             # Write one vertically aligned line of code to the output object.
7223             #----------------------------------------------------------------
7224              
7225             # Convert leading whitespace to use tabs if requested.
7226 8346 100 100     16010 if ( $require_tabs && $leading_space_count > 0 ) {
7227 45         91 $line = $self->add_leading_tabs( $line, $leading_space_count, $level );
7228             }
7229              
7230 8346         11382 my $file_writer_object = $self->[_file_writer_object_];
7231 8346         33881 $file_writer_object->write_code_line( $line . "\n", $Kend );
7232              
7233 8346         14303 return;
7234             } ## end sub valign_output_step_D
7235              
7236             ##########################
7237             # CODE SECTION 12: Summary
7238             ##########################
7239              
7240             sub report_anything_unusual {
7241 648     648 0 1028 my $self = shift;
7242 648         1155 my $logger_object = $self->[_logger_object_];
7243 648 100       1644 return if ( !$logger_object );
7244              
7245 646         1168 my $outdented_line_count = $self->[_outdented_line_count_];
7246 646 100       1621 if ( $outdented_line_count > 0 ) {
7247 23         106 $logger_object->write_logfile_entry(
7248             "$outdented_line_count long lines were outdented:\n");
7249 23         50 my $first_outdented_line_at = $self->[_first_outdented_line_at_];
7250 23         90 $logger_object->write_logfile_entry(
7251             " First at output line $first_outdented_line_at\n");
7252              
7253 23 100       77 if ( $outdented_line_count > 1 ) {
7254 7         12 my $last_outdented_line_at = $self->[_last_outdented_line_at_];
7255 7         21 $logger_object->write_logfile_entry(
7256             " Last at output line $last_outdented_line_at\n");
7257             }
7258             $logger_object->write_logfile_entry(
7259 23         67 " use -noll to prevent outdenting, -l=n to increase line length\n"
7260             );
7261 23         53 $logger_object->write_logfile_entry("\n");
7262             }
7263 646         1209 return;
7264             } ## end sub report_anything_unusual
7265              
7266             } ## end package Perl::Tidy::VerticalAligner
7267             1;