File Coverage

blib/lib/Perl/Tidy/Formatter.pm
Criterion Covered Total %
statement 8704 10269 84.7
branch 4106 5584 73.5
condition 3205 4716 67.9
subroutine 351 386 90.9
pod 0 285 0.0
total 16366 21240 77.0


line stmt bran cond sub pod time code
1             ####################################################################
2             #
3             # The Perl::Tidy::Formatter package adds indentation, whitespace, and
4             # line breaks to the token stream
5             #
6             #####################################################################
7              
8             # Index...
9             # CODE SECTION 1: Preliminary code, global definitions and sub new
10             # sub new
11             # CODE SECTION 2: Some Basic Utilities
12             # CODE SECTION 3: Check and process options
13             # sub check_options
14             # CODE SECTION 4: Receive lines from the tokenizer
15             # sub write_line
16             # CODE SECTION 5: Pre-process the entire file
17             # sub finish_formatting
18             # CODE SECTION 6: Process line-by-line
19             # sub process_all_lines
20             # CODE SECTION 7: Process lines of code
21             # process_line_of_CODE
22             # CODE SECTION 8: Utilities for setting breakpoints
23             # sub set_forced_breakpoint
24             # CODE SECTION 9: Process batches of code
25             # sub grind_batch_of_CODE
26             # CODE SECTION 10: Code to break long statements
27             # sub break_long_lines
28             # CODE SECTION 11: Code to break long lists
29             # sub break_lists
30             # CODE SECTION 12: Code for setting indentation
31             # CODE SECTION 13: Preparing batch of lines for vertical alignment
32             # sub convey_batch_to_vertical_aligner
33             # CODE SECTION 14: Code for creating closing side comments
34             # sub add_closing_side_comment
35             # CODE SECTION 15: Summarize
36             # sub wrapup
37              
38             #######################################################################
39             # CODE SECTION 1: Preliminary code and global definitions up to sub new
40             #######################################################################
41              
42             package Perl::Tidy::Formatter;
43 39     39   303 use strict;
  39         83  
  39         1362  
44 39     39   205 use warnings;
  39         93  
  39         1231  
45              
46             # DEVEL_MODE gets switched on during automated testing for extra checking
47 39     39   205 use constant DEVEL_MODE => 0;
  39         78  
  39         2299  
48 39     39   267 use constant EMPTY_STRING => q{};
  39         75  
  39         1975  
49 39     39   236 use constant SPACE => q{ };
  39         78  
  39         2081  
50              
51             { #<<< A non-indenting brace to contain all lexical variables
52              
53 39     39   272 use Carp;
  39         120  
  39         2665  
54 39     39   277 use English qw( -no_match_vars );
  39         95  
  39         266  
55 39     39   15046 use List::Util qw( min max first ); # min, max first are in Perl 5.8
  39         89  
  39         43234  
56             our $VERSION = '20230912';
57              
58             # The Tokenizer will be loaded with the Formatter
59             ##use Perl::Tidy::Tokenizer; # for is_keyword()
60              
61             sub AUTOLOAD {
62              
63             # Catch any undefined sub calls so that we are sure to get
64             # some diagnostic information. This sub should never be called
65             # except for a programming error.
66 0     0   0 our $AUTOLOAD;
67 0 0       0 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
68 0         0 my ( $pkg, $fname, $lno ) = caller();
69 0         0 my $my_package = __PACKAGE__;
70 0         0 print {*STDERR} <<EOM;
  0         0  
71             ======================================================================
72             Error detected in package '$my_package', version $VERSION
73             Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
74             Called from package: '$pkg'
75             Called from File '$fname' at line '$lno'
76             This error is probably due to a recent programming change
77             ======================================================================
78             EOM
79 0         0 exit 1;
80             } ## end sub AUTOLOAD
81              
82             sub DESTROY {
83 561     561   1427 my $self = shift;
84 561         2675 $self->_decrement_count();
85 561         39279 return;
86             }
87              
88             sub Die {
89 0     0 0 0 my ($msg) = @_;
90 0         0 Perl::Tidy::Die($msg);
91 0         0 croak "unexpected return from Perl::Tidy::Die";
92             }
93              
94             sub Warn {
95 0     0 0 0 my ($msg) = @_;
96 0         0 Perl::Tidy::Warn($msg);
97 0         0 return;
98             }
99              
100             sub Fault {
101 0     0 0 0 my ($msg) = @_;
102              
103             # This routine is called for errors that really should not occur
104             # except if there has been a bug introduced by a recent program change.
105             # Please add comments at calls to Fault to explain why the call
106             # should not occur, and where to look to fix it.
107 0         0 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
108 0         0 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
109 0         0 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
110 0         0 my $pkg = __PACKAGE__;
111              
112 0         0 my $input_stream_name = get_input_stream_name();
113              
114 0         0 Die(<<EOM);
115             ==============================================================================
116             While operating on input stream with name: '$input_stream_name'
117             A fault was detected at line $line0 of sub '$subroutine1'
118             in file '$filename1'
119             which was called from line $line1 of sub '$subroutine2'
120             Message: '$msg'
121             This is probably an error introduced by a recent programming change.
122             $pkg reports VERSION='$VERSION'.
123             ==============================================================================
124             EOM
125              
126             # We shouldn't get here, but this return is to keep Perl-Critic from
127             # complaining.
128 0         0 return;
129             } ## end sub Fault
130              
131             sub Fault_Warn {
132 0     0 0 0 my ($msg) = @_;
133              
134             # This is the same as Fault except that it calls Warn instead of Die
135             # and returns.
136 0         0 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
137 0         0 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
138 0         0 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
139 0         0 my $input_stream_name = get_input_stream_name();
140              
141 0         0 Warn(<<EOM);
142             ==============================================================================
143             While operating on input stream with name: '$input_stream_name'
144             A fault was detected at line $line0 of sub '$subroutine1'
145             in file '$filename1'
146             which was called from line $line1 of sub '$subroutine2'
147             Message: '$msg'
148             This is probably an error introduced by a recent programming change.
149             Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
150             ==============================================================================
151             EOM
152              
153 0         0 return;
154             } ## end sub Fault_Warn
155              
156             sub Exit {
157 0     0 0 0 my ($msg) = @_;
158 0         0 Perl::Tidy::Exit($msg);
159 0         0 croak "unexpected return from Perl::Tidy::Exit";
160             }
161              
162             # Global variables ...
163             my (
164              
165             #-----------------------------------------------------------------
166             # Section 1: Global variables which are either always constant or
167             # are constant after being configured by user-supplied
168             # parameters. They remain constant as a file is being processed.
169             # The INITIALIZER comment tells the sub responsible for initializing
170             # each variable. Failure to initialize or re-initialize a global
171             # variable can cause bugs which are hard to locate.
172             #-----------------------------------------------------------------
173              
174             # INITIALIZER: sub check_options
175             $rOpts,
176              
177             # short-cut option variables
178             # INITIALIZER: sub initialize_global_option_vars
179             $rOpts_add_newlines,
180             $rOpts_add_whitespace,
181             $rOpts_add_trailing_commas,
182             $rOpts_blank_lines_after_opening_block,
183             $rOpts_block_brace_tightness,
184             $rOpts_block_brace_vertical_tightness,
185             $rOpts_brace_follower_vertical_tightness,
186             $rOpts_break_after_labels,
187             $rOpts_break_at_old_attribute_breakpoints,
188             $rOpts_break_at_old_comma_breakpoints,
189             $rOpts_break_at_old_keyword_breakpoints,
190             $rOpts_break_at_old_logical_breakpoints,
191             $rOpts_break_at_old_semicolon_breakpoints,
192             $rOpts_break_at_old_ternary_breakpoints,
193             $rOpts_break_open_compact_parens,
194             $rOpts_closing_side_comments,
195             $rOpts_closing_side_comment_else_flag,
196             $rOpts_closing_side_comment_maximum_text,
197             $rOpts_comma_arrow_breakpoints,
198             $rOpts_continuation_indentation,
199             $rOpts_cuddled_paren_brace,
200             $rOpts_delete_closing_side_comments,
201             $rOpts_delete_old_whitespace,
202             $rOpts_delete_side_comments,
203             $rOpts_delete_trailing_commas,
204             $rOpts_delete_weld_interfering_commas,
205             $rOpts_extended_continuation_indentation,
206             $rOpts_format_skipping,
207             $rOpts_freeze_whitespace,
208             $rOpts_function_paren_vertical_alignment,
209             $rOpts_fuzzy_line_length,
210             $rOpts_ignore_old_breakpoints,
211             $rOpts_ignore_side_comment_lengths,
212             $rOpts_ignore_perlcritic_comments,
213             $rOpts_indent_closing_brace,
214             $rOpts_indent_columns,
215             $rOpts_indent_only,
216             $rOpts_keep_interior_semicolons,
217             $rOpts_line_up_parentheses,
218             $rOpts_logical_padding,
219             $rOpts_maximum_consecutive_blank_lines,
220             $rOpts_maximum_fields_per_table,
221             $rOpts_maximum_line_length,
222             $rOpts_one_line_block_semicolons,
223             $rOpts_opening_brace_always_on_right,
224             $rOpts_outdent_keywords,
225             $rOpts_outdent_labels,
226             $rOpts_outdent_long_comments,
227             $rOpts_outdent_long_quotes,
228             $rOpts_outdent_static_block_comments,
229             $rOpts_recombine,
230             $rOpts_short_concatenation_item_length,
231             $rOpts_space_prototype_paren,
232             $rOpts_stack_closing_block_brace,
233             $rOpts_static_block_comments,
234             $rOpts_add_missing_else,
235             $rOpts_warn_missing_else,
236             $rOpts_tee_block_comments,
237             $rOpts_tee_pod,
238             $rOpts_tee_side_comments,
239             $rOpts_variable_maximum_line_length,
240             $rOpts_valign_code,
241             $rOpts_valign_side_comments,
242             $rOpts_valign_if_unless,
243             $rOpts_whitespace_cycle,
244             $rOpts_extended_block_tightness,
245             $rOpts_extended_line_up_parentheses,
246              
247             # Static hashes
248             # INITIALIZER: BEGIN block
249             %is_assignment,
250             %is_non_list_type,
251             %is_if_unless_and_or_last_next_redo_return,
252             %is_if_elsif_else_unless_while_until_for_foreach,
253             %is_if_unless_while_until_for_foreach,
254             %is_last_next_redo_return,
255             %is_if_unless,
256             %is_if_elsif,
257             %is_if_unless_elsif,
258             %is_if_unless_elsif_else,
259             %is_elsif_else,
260             %is_and_or,
261             %is_chain_operator,
262             %is_block_without_semicolon,
263             %ok_to_add_semicolon_for_block_type,
264             %is_opening_type,
265             %is_closing_type,
266             %is_opening_token,
267             %is_closing_token,
268             %is_ternary,
269             %is_equal_or_fat_comma,
270             %is_counted_type,
271             %is_opening_sequence_token,
272             %is_closing_sequence_token,
273             %matching_token,
274             %is_container_label_type,
275             %is_die_confess_croak_warn,
276             %is_my_our_local,
277             %is_soft_keep_break_type,
278             %is_indirect_object_taker,
279             @all_operators,
280             %is_do_follower,
281             %is_anon_sub_brace_follower,
282             %is_anon_sub_1_brace_follower,
283             %is_other_brace_follower,
284              
285             # INITIALIZER: sub check_options
286             $controlled_comma_style,
287             %keep_break_before_type,
288             %keep_break_after_type,
289             %outdent_keyword,
290             %keyword_paren_inner_tightness,
291             %container_indentation_options,
292             %tightness,
293             %line_up_parentheses_control_hash,
294             $line_up_parentheses_control_is_lxpl,
295              
296             # These can be modified by grep-alias-list
297             # INITIALIZER: sub initialize_grep_and_friends
298             %is_sort_map_grep,
299             %is_sort_map_grep_eval,
300             %is_sort_map_grep_eval_do,
301             %is_block_with_ci,
302             %is_keyword_returning_list,
303             %block_type_map, # initialized in BEGIN, but may be changed
304             %want_one_line_block, # may be changed in prepare_cuddled_block_types
305              
306             # INITIALIZER: sub prepare_cuddled_block_types
307             $rcuddled_block_types,
308              
309             # INITIALIZER: sub initialize_whitespace_hashes
310             %binary_ws_rules,
311             %want_left_space,
312             %want_right_space,
313              
314             # INITIALIZER: sub initialize_bond_strength_hashes
315             %right_bond_strength,
316             %left_bond_strength,
317              
318             # INITIALIZER: sub initialize_token_break_preferences
319             %want_break_before,
320             %break_before_container_types,
321              
322             # INITIALIZER: sub initialize_space_after_keyword
323             %space_after_keyword,
324              
325             # INITIALIZER: sub initialize_extended_block_tightness_list
326             %extended_block_tightness_list,
327              
328             # INITIALIZED BY initialize_global_option_vars
329             %opening_vertical_tightness,
330             %closing_vertical_tightness,
331             %closing_token_indentation,
332             $some_closing_token_indentation,
333             %opening_token_right,
334             %stack_opening_token,
335             %stack_closing_token,
336              
337             # INITIALIZER: sub initialize_weld_nested_exclusion_rules
338             %weld_nested_exclusion_rules,
339              
340             # INITIALIZER: sub initialize_weld_fat_comma_rules
341             %weld_fat_comma_rules,
342              
343             # INITIALIZER: sub initialize_trailing_comma_rules
344             %trailing_comma_rules,
345              
346             # regex patterns for text identification.
347             # Most can be configured by user parameters.
348             # Most are initialized in a sub make_**_pattern during configuration.
349              
350             # INITIALIZER: sub make_sub_matching_pattern
351             $SUB_PATTERN,
352             $ASUB_PATTERN,
353             %matches_ASUB,
354              
355             # INITIALIZER: make_static_block_comment_pattern
356             $static_block_comment_pattern,
357              
358             # INITIALIZER: sub make_static_side_comment_pattern
359             $static_side_comment_pattern,
360              
361             # INITIALIZER: make_format_skipping_pattern
362             $format_skipping_pattern_begin,
363             $format_skipping_pattern_end,
364              
365             # INITIALIZER: sub make_non_indenting_brace_pattern
366             $non_indenting_brace_pattern,
367              
368             # INITIALIZER: sub make_bl_pattern
369             $bl_exclusion_pattern,
370              
371             # INITIALIZER: make_bl_pattern
372             $bl_pattern,
373              
374             # INITIALIZER: sub make_bli_pattern
375             $bli_exclusion_pattern,
376              
377             # INITIALIZER: sub make_bli_pattern
378             $bli_pattern,
379              
380             # INITIALIZER: sub make_block_brace_vertical_tightness_pattern
381             $block_brace_vertical_tightness_pattern,
382              
383             # INITIALIZER: sub make_blank_line_pattern
384             $blank_lines_after_opening_block_pattern,
385             $blank_lines_before_closing_block_pattern,
386              
387             # INITIALIZER: sub make_keyword_group_list_pattern
388             $keyword_group_list_pattern,
389             $keyword_group_list_comment_pattern,
390              
391             # INITIALIZER: sub make_closing_side_comment_prefix
392             $closing_side_comment_prefix_pattern,
393              
394             # INITIALIZER: sub make_closing_side_comment_list_pattern
395             $closing_side_comment_list_pattern,
396              
397             # Table to efficiently find indentation and max line length
398             # from level.
399             # INITIALIZER: sub initialize_line_length_vars
400             @maximum_line_length_at_level,
401             @maximum_text_length_at_level,
402             $stress_level_alpha,
403             $stress_level_beta,
404             $high_stress_level,
405              
406             # Total number of sequence items in a weld, for quick checks
407             # INITIALIZER: weld_containers
408             $total_weld_count,
409              
410             #--------------------------------------------------------
411             # Section 2: Work arrays for the current batch of tokens.
412             #--------------------------------------------------------
413              
414             # These are re-initialized for each batch of code
415             # INITIALIZER: sub initialize_batch_variables
416             $max_index_to_go,
417             @block_type_to_go,
418             @type_sequence_to_go,
419             @forced_breakpoint_to_go,
420             @token_lengths_to_go,
421             @summed_lengths_to_go,
422             @levels_to_go,
423             @leading_spaces_to_go,
424             @reduced_spaces_to_go,
425             @mate_index_to_go,
426             @ci_levels_to_go,
427             @nesting_depth_to_go,
428             @nobreak_to_go,
429             @old_breakpoint_to_go,
430             @tokens_to_go,
431             @K_to_go,
432             @types_to_go,
433             @inext_to_go,
434             @parent_seqno_to_go,
435              
436             # forced breakpoint variables associated with each batch of code
437             $forced_breakpoint_count,
438             $forced_breakpoint_undo_count,
439             $index_max_forced_break,
440             );
441              
442 0         0 BEGIN {
443              
444             # Index names for token variables.
445             # Do not combine with other BEGIN blocks (c101).
446 39     39   12772 my $i = 0;
447             use constant {
448 39         5264 _CI_LEVEL_ => $i++,
449             _CUMULATIVE_LENGTH_ => $i++,
450             _LINE_INDEX_ => $i++,
451             _KNEXT_SEQ_ITEM_ => $i++,
452             _LEVEL_ => $i++,
453             _TOKEN_ => $i++,
454             _TOKEN_LENGTH_ => $i++,
455             _TYPE_ => $i++,
456             _TYPE_SEQUENCE_ => $i++,
457              
458             # Number of token variables; must be last in list:
459             _NVARS => $i++,
460 39     39   402 };
  39         129  
461             } ## end BEGIN
462              
463 0         0 BEGIN {
464              
465             # Index names for $self variables.
466             # Do not combine with other BEGIN blocks (c101).
467 39     39   3121 my $i = 0;
468             use constant {
469 39         31784 _rlines_ => $i++,
470             _rLL_ => $i++,
471             _Klimit_ => $i++,
472             _rdepth_of_opening_seqno_ => $i++,
473             _rSS_ => $i++,
474             _Iss_opening_ => $i++,
475             _Iss_closing_ => $i++,
476             _rblock_type_of_seqno_ => $i++,
477             _ris_asub_block_ => $i++,
478             _ris_sub_block_ => $i++,
479             _K_opening_container_ => $i++,
480             _K_closing_container_ => $i++,
481             _K_opening_ternary_ => $i++,
482             _K_closing_ternary_ => $i++,
483             _K_first_seq_item_ => $i++,
484             _rtype_count_by_seqno_ => $i++,
485             _ris_function_call_paren_ => $i++,
486             _rlec_count_by_seqno_ => $i++,
487             _ris_broken_container_ => $i++,
488             _ris_permanently_broken_ => $i++,
489             _rblank_and_comment_count_ => $i++,
490             _rhas_list_ => $i++,
491             _rhas_broken_list_ => $i++,
492             _rhas_broken_list_with_lec_ => $i++,
493             _rfirst_comma_line_index_ => $i++,
494             _rhas_code_block_ => $i++,
495             _rhas_broken_code_block_ => $i++,
496             _rhas_ternary_ => $i++,
497             _ris_excluded_lp_container_ => $i++,
498             _rlp_object_by_seqno_ => $i++,
499             _rwant_reduced_ci_ => $i++,
500             _rno_xci_by_seqno_ => $i++,
501             _rbrace_left_ => $i++,
502             _ris_bli_container_ => $i++,
503             _rparent_of_seqno_ => $i++,
504             _rchildren_of_seqno_ => $i++,
505             _ris_list_by_seqno_ => $i++,
506             _ris_cuddled_closing_brace_ => $i++,
507             _rbreak_container_ => $i++,
508             _rshort_nested_ => $i++,
509             _length_function_ => $i++,
510             _is_encoded_data_ => $i++,
511             _fh_tee_ => $i++,
512             _sink_object_ => $i++,
513             _file_writer_object_ => $i++,
514             _vertical_aligner_object_ => $i++,
515             _logger_object_ => $i++,
516             _radjusted_levels_ => $i++,
517             _this_batch_ => $i++,
518              
519             _ris_special_identifier_token_ => $i++,
520             _last_output_short_opening_token_ => $i++,
521              
522             _last_line_leading_type_ => $i++,
523             _last_line_leading_level_ => $i++,
524              
525             _added_semicolon_count_ => $i++,
526             _first_added_semicolon_at_ => $i++,
527             _last_added_semicolon_at_ => $i++,
528              
529             _deleted_semicolon_count_ => $i++,
530             _first_deleted_semicolon_at_ => $i++,
531             _last_deleted_semicolon_at_ => $i++,
532              
533             _embedded_tab_count_ => $i++,
534             _first_embedded_tab_at_ => $i++,
535             _last_embedded_tab_at_ => $i++,
536              
537             _first_tabbing_disagreement_ => $i++,
538             _last_tabbing_disagreement_ => $i++,
539             _tabbing_disagreement_count_ => $i++,
540             _in_tabbing_disagreement_ => $i++,
541             _first_brace_tabbing_disagreement_ => $i++,
542             _in_brace_tabbing_disagreement_ => $i++,
543              
544             _saw_VERSION_in_this_file_ => $i++,
545             _saw_END_or_DATA_ => $i++,
546              
547             _rK_weld_left_ => $i++,
548             _rK_weld_right_ => $i++,
549             _rweld_len_right_at_K_ => $i++,
550              
551             _rspecial_side_comment_type_ => $i++,
552              
553             _rseqno_controlling_my_ci_ => $i++,
554             _ris_seqno_controlling_ci_ => $i++,
555             _save_logfile_ => $i++,
556             _maximum_level_ => $i++,
557             _maximum_level_at_line_ => $i++,
558             _maximum_BLOCK_level_ => $i++,
559             _maximum_BLOCK_level_at_line_ => $i++,
560              
561             _rKrange_code_without_comments_ => $i++,
562             _rbreak_before_Kfirst_ => $i++,
563             _rbreak_after_Klast_ => $i++,
564             _converged_ => $i++,
565              
566             _rstarting_multiline_qw_seqno_by_K_ => $i++,
567             _rending_multiline_qw_seqno_by_K_ => $i++,
568             _rKrange_multiline_qw_by_seqno_ => $i++,
569             _rmultiline_qw_has_extra_level_ => $i++,
570              
571             _rcollapsed_length_by_seqno_ => $i++,
572             _rbreak_before_container_by_seqno_ => $i++,
573             _roverride_cab3_ => $i++,
574             _ris_assigned_structure_ => $i++,
575             _ris_short_broken_eval_block_ => $i++,
576             _ris_bare_trailing_comma_by_seqno_ => $i++,
577              
578             _rseqno_non_indenting_brace_by_ix_ => $i++,
579             _rmax_vertical_tightness_ => $i++,
580              
581             _no_vertical_tightness_flags_ => $i++,
582              
583             _LAST_SELF_INDEX_ => $i - 1,
584 39     39   342 };
  39         91  
585             } ## end BEGIN
586              
587 0         0 BEGIN {
588              
589             # Index names for batch variables.
590             # Do not combine with other BEGIN blocks (c101).
591             # These are stored in _this_batch_, which is a sub-array of $self.
592 39     39   1374 my $i = 0;
593             use constant {
594 39         5090 _starting_in_quote_ => $i++,
595             _ending_in_quote_ => $i++,
596             _is_static_block_comment_ => $i++,
597             _ri_first_ => $i++,
598             _ri_last_ => $i++,
599             _do_not_pad_ => $i++,
600             _peak_batch_size_ => $i++,
601             _batch_count_ => $i++,
602             _rix_seqno_controlling_ci_ => $i++,
603             _batch_CODE_type_ => $i++,
604             _ri_starting_one_line_block_ => $i++,
605             _runmatched_opening_indexes_ => $i++,
606             _lp_object_count_this_batch_ => $i++,
607 39     39   291 };
  39         109  
608             } ## end BEGIN
609              
610             BEGIN {
611              
612             # Sequence number assigned to the root of sequence tree.
613             # The minimum of the actual sequences numbers is 4, so we can use 1
614 39     39   277 use constant SEQ_ROOT => 1;
  39         90  
  39         2400  
615              
616             # Codes for insertion and deletion of blanks
617 39     39   261 use constant DELETE => 0;
  39         86  
  39         2329  
618 39     39   266 use constant STABLE => 1;
  39         71  
  39         2103  
619 39     39   238 use constant INSERT => 2;
  39         88  
  39         2238  
620              
621             # whitespace codes
622 39     39   263 use constant WS_YES => 1;
  39         74  
  39         2055  
623 39     39   236 use constant WS_OPTIONAL => 0;
  39         82  
  39         2173  
624 39     39   238 use constant WS_NO => -1;
  39         74  
  39         2300  
625              
626             # Token bond strengths.
627 39     39   272 use constant NO_BREAK => 10_000;
  39         72  
  39         2349  
628 39     39   270 use constant VERY_STRONG => 100;
  39         128  
  39         2151  
629 39     39   256 use constant STRONG => 2.1;
  39         110  
  39         2304  
630 39     39   307 use constant NOMINAL => 1.1;
  39         133  
  39         2705  
631 39     39   277 use constant WEAK => 0.8;
  39         105  
  39         2023  
632 39     39   236 use constant VERY_WEAK => 0.55;
  39         72  
  39         2296  
633              
634             # values for testing indexes in output array
635 39     39   266 use constant UNDEFINED_INDEX => -1;
  39         105  
  39         2085  
636              
637             # Maximum number of little messages; probably need not be changed.
638 39     39   237 use constant MAX_NAG_MESSAGES => 6;
  39         77  
  39         2073  
639              
640             # This is the decimal range of printable characters in ASCII. It is used to
641             # make quick preliminary checks before resorting to using a regex.
642 39     39   289 use constant ORD_PRINTABLE_MIN => 33;
  39         82  
  39         2188  
643 39     39   257 use constant ORD_PRINTABLE_MAX => 126;
  39         75  
  39         36467  
644              
645             # Initialize constant hashes ...
646 39     39   179 my @q;
647              
648 39         192 @q = qw(
649             = **= += *= &= <<= &&=
650             -= /= |= >>= ||= //=
651             .= %= ^=
652             x=
653             );
654 39         530 @is_assignment{@q} = (1) x scalar(@q);
655              
656             # a hash needed by break_lists for efficiency:
657 39         157 push @q, qw{ ; < > ~ f };
658 39         416 @is_non_list_type{@q} = (1) x scalar(@q);
659              
660 39         180 @q = qw(is if unless and or err last next redo return);
661 39         403 @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
662              
663             # These block types may have text between the keyword and opening
664             # curly. Note: 'else' does not, but must be included to allow trailing
665             # if/elsif text to be appended.
666             # patch for SWITCH/CASE: added 'case' and 'when'
667 39         161 @q = qw(if elsif else unless while until for foreach case when catch);
668 39         303 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
669             (1) x scalar(@q);
670              
671 39         121 @q = qw(if unless while until for foreach);
672 39         155 @is_if_unless_while_until_for_foreach{@q} =
673             (1) x scalar(@q);
674              
675 39         116 @q = qw(last next redo return);
676 39         185 @is_last_next_redo_return{@q} = (1) x scalar(@q);
677              
678             # Map related block names into a common name to allow vertical alignment
679             # used by sub make_alignment_patterns. Note: this is normally unchanged,
680             # but it contains 'grep' and can be re-initialized in
681             # sub initialize_grep_and_friends in a testing mode.
682 39         448 %block_type_map = (
683             'unless' => 'if',
684             'else' => 'if',
685             'elsif' => 'if',
686             'when' => 'if',
687             'default' => 'if',
688             'case' => 'if',
689             'sort' => 'map',
690             'grep' => 'map',
691             );
692              
693 39         171 @q = qw(if unless);
694 39         166 @is_if_unless{@q} = (1) x scalar(@q);
695              
696 39         159 @q = qw(if elsif);
697 39         134 @is_if_elsif{@q} = (1) x scalar(@q);
698              
699 39         121 @q = qw(if unless elsif);
700 39         1500 @is_if_unless_elsif{@q} = (1) x scalar(@q);
701              
702 39         190 @q = qw(if unless elsif else);
703 39         119 @is_if_unless_elsif_else{@q} = (1) x scalar(@q);
704              
705 39         81 @q = qw(elsif else);
706 39         92 @is_elsif_else{@q} = (1) x scalar(@q);
707              
708 39         78 @q = qw(and or err);
709 39         99 @is_and_or{@q} = (1) x scalar(@q);
710              
711             # Identify certain operators which often occur in chains.
712             # Note: the minus (-) causes a side effect of padding of the first line in
713             # something like this (by sub set_logical_padding):
714             # Checkbutton => 'Transmission checked',
715             # -variable => \$TRANS
716             # This usually improves appearance so it seems ok.
717 39         143 @q = qw(&& || and or : ? . + - * /);
718 39         343 @is_chain_operator{@q} = (1) x scalar(@q);
719              
720             # Operators that the user can request break before or after.
721             # Note that some are keywords
722 39         279 @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
723             = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
724             . : ? && || and or err xor
725             );
726              
727             # We can remove semicolons after blocks preceded by these keywords
728 39         232 @q =
729             qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
730             unless while until for foreach given when default);
731 39         337 @is_block_without_semicolon{@q} = (1) x scalar(@q);
732              
733             # We will allow semicolons to be added within these block types
734             # as well as sub and package blocks.
735             # NOTES:
736             # 1. Note that these keywords are omitted:
737             # switch case given when default sort map grep
738             # 2. It is also ok to add for sub and package blocks and a labeled block
739             # 3. But not okay for other perltidy types including:
740             # { } ; G t
741             # 4. Test files: blktype.t, blktype1.t, semicolon.t
742 39         232 @q =
743             qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
744             unless do while until eval for foreach );
745 39         292 @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
746              
747             # 'L' is token for opening { at hash key
748 39         142 @q = qw< L { ( [ >;
749 39         158 @is_opening_type{@q} = (1) x scalar(@q);
750              
751             # 'R' is token for closing } at hash key
752 39         91 @q = qw< R } ) ] >;
753 39         123 @is_closing_type{@q} = (1) x scalar(@q);
754              
755 39         89 @q = qw< { ( [ >;
756 39         125 @is_opening_token{@q} = (1) x scalar(@q);
757              
758 39         164 @q = qw< } ) ] >;
759 39         162 @is_closing_token{@q} = (1) x scalar(@q);
760              
761 39         107 @q = qw( ? : );
762 39         189 @is_ternary{@q} = (1) x scalar(@q);
763              
764 39         140 @q = qw< { ( [ ? >;
765 39         213 @is_opening_sequence_token{@q} = (1) x scalar(@q);
766              
767 39         145 @q = qw< } ) ] : >;
768 39         141 @is_closing_sequence_token{@q} = (1) x scalar(@q);
769              
770 39         288 %matching_token = (
771             '{' => '}',
772             '(' => ')',
773             '[' => ']',
774             '?' => ':',
775              
776             '}' => '{',
777             ')' => '(',
778             ']' => '[',
779             ':' => '?',
780             );
781              
782             # a hash needed by sub break_lists for labeling containers
783 39         165 @q = qw( k => && || ? : . );
784 39         256 @is_container_label_type{@q} = (1) x scalar(@q);
785              
786 39         160 @q = qw( die confess croak warn );
787 39         141 @is_die_confess_croak_warn{@q} = (1) x scalar(@q);
788              
789 39         84 @q = qw( my our local );
790 39         120 @is_my_our_local{@q} = (1) x scalar(@q);
791              
792             # Braces -bbht etc must follow these. Note: experimentation with
793             # including a simple comma shows that it adds little and can lead
794             # to poor formatting in complex lists.
795 39         72 @q = qw( = => );
796 39         103 @is_equal_or_fat_comma{@q} = (1) x scalar(@q);
797              
798 39         79 @q = qw( => ; h f );
799 39         81 push @q, ',';
800 39         139 @is_counted_type{@q} = (1) x scalar(@q);
801              
802             # Tokens where --keep-old-break-xxx flags make soft breaks instead
803             # of hard breaks. See b1433 and b1436.
804             # NOTE: $type is used as the hash key for now; if other container tokens
805             # are added it might be necessary to use a token/type mixture.
806 39         123 @q = qw# -> ? : && || + - / * #;
807 39         199 @is_soft_keep_break_type{@q} = (1) x scalar(@q);
808              
809             # these functions allow an identifier in the indirect object slot
810 39         103 @q = qw( print printf sort exec system say);
811 39         253 @is_indirect_object_taker{@q} = (1) x scalar(@q);
812              
813             # Define here tokens which may follow the closing brace of a do statement
814             # on the same line, as in:
815             # } while ( $something);
816 39         143 my @dof = qw(until while unless if ; : );
817 39         90 push @dof, ',';
818 39         181 @is_do_follower{@dof} = (1) x scalar(@dof);
819              
820             # what can follow a multi-line anonymous sub definition closing curly:
821 39         133 my @asf = qw# ; : => or and && || ~~ !~~ ) #;
822 39         124 push @asf, ',';
823 39         226 @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
824              
825             # what can follow a one-line anonymous sub closing curly:
826             # one-line anonymous subs also have ']' here...
827             # see tk3.t and PP.pm
828 39         217 my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
829 39         177 push @asf1, ',';
830 39         337 @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
831              
832             # What can follow a closing curly of a block
833             # which is not an if/elsif/else/do/sort/map/grep/eval/sub
834             # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
835 39         181 my @obf = qw# ; : => or and && || ) #;
836 39         95 push @obf, ',';
837 39         76650 @is_other_brace_follower{@obf} = (1) x scalar(@obf);
838              
839             } ## end BEGIN
840              
841             { ## begin closure to count instances
842              
843             # methods to count instances
844             my $_count = 0;
845 561     561   2621 sub _increment_count { return ++$_count }
846 561     561   1219 sub _decrement_count { return --$_count }
847             } ## end closure to count instances
848              
849             sub new {
850              
851 561     561 0 3383 my ( $class, @args ) = @_;
852              
853             # we are given an object with a write_line() method to take lines
854 561         5208 my %defaults = (
855             sink_object => undef,
856             diagnostics_object => undef,
857             logger_object => undef,
858             length_function => undef,
859             is_encoded_data => EMPTY_STRING,
860             fh_tee => undef,
861             );
862 561         4490 my %args = ( %defaults, @args );
863              
864 561         2113 my $length_function = $args{length_function};
865 561         1597 my $is_encoded_data = $args{is_encoded_data};
866 561         1272 my $fh_tee = $args{fh_tee};
867 561         1346 my $logger_object = $args{logger_object};
868 561         1238 my $diagnostics_object = $args{diagnostics_object};
869              
870             # we create another object with a get_line() and peek_ahead() method
871 561         1589 my $sink_object = $args{sink_object};
872 561         4195 my $file_writer_object =
873             Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
874              
875             # initialize closure variables...
876 561         2908 set_logger_object($logger_object);
877 561         2604 set_diagnostics_object($diagnostics_object);
878 561         2892 initialize_lp_vars();
879 561         2942 initialize_csc_vars();
880 561         3141 initialize_break_lists();
881 561         2960 initialize_undo_ci();
882 561         2900 initialize_process_line_of_CODE();
883 561         2942 initialize_grind_batch_of_CODE();
884 561         2608 initialize_get_final_indentation();
885 561         2433 initialize_postponed_breakpoint();
886 561         2587 initialize_batch_variables();
887 561         2702 initialize_write_line();
888              
889 561         5125 my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
890             rOpts => $rOpts,
891             file_writer_object => $file_writer_object,
892             logger_object => $logger_object,
893             diagnostics_object => $diagnostics_object,
894             );
895              
896 561         3505 write_logfile_entry("\nStarting tokenization pass...\n");
897              
898 561 100       4006 if ( $rOpts->{'entab-leading-whitespace'} ) {
    50          
899 2         16 write_logfile_entry(
900             "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
901             );
902             }
903             elsif ( $rOpts->{'tabs'} ) {
904 0         0 write_logfile_entry("Indentation will be with a tab character\n");
905             }
906             else {
907 559         3207 write_logfile_entry(
908             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
909             }
910              
911             # Initialize the $self array reference.
912             # To add an item, first add a constant index in the BEGIN block above.
913 561         2601 my $self = [];
914              
915             # Basic data structures...
916 561         2000 $self->[_rlines_] = []; # = ref to array of lines of the file
917              
918             # 'rLL' = reference to the continuous liner array of all tokens in a file.
919             # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
920             # 'LL' stuck because it is easy to type. The 'rLL' array is updated
921             # by sub 'respace_tokens' during reformatting. The indexes in 'rLL' begin
922             # with '$K' by convention.
923 561         1613 $self->[_rLL_] = [];
924 561         1570 $self->[_Klimit_] = undef; # = maximum K index for rLL.
925              
926             # Indexes into the rLL list
927 561         1931 $self->[_K_opening_container_] = {};
928 561         1874 $self->[_K_closing_container_] = {};
929 561         1657 $self->[_K_opening_ternary_] = {};
930 561         1655 $self->[_K_closing_ternary_] = {};
931 561         1565 $self->[_K_first_seq_item_] = undef; # K of first token with a sequence #
932              
933             # 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence
934             # numbers with + or - indicating opening or closing. This list represents
935             # the entire container tree and is invariant under reformatting. It can be
936             # used to quickly travel through the tree. Indexes in the rSS array begin
937             # with '$I' by convention. The 'Iss' arrays give the indexes in this list
938             # of opening and closing sequence numbers.
939 561         1538 $self->[_rSS_] = [];
940 561         1572 $self->[_Iss_opening_] = [];
941 561         1851 $self->[_Iss_closing_] = [];
942              
943             # Arrays to help traverse the tree
944 561         1394 $self->[_rdepth_of_opening_seqno_] = [];
945 561         1448 $self->[_rblock_type_of_seqno_] = {};
946 561         1406 $self->[_ris_asub_block_] = {};
947 561         1414 $self->[_ris_sub_block_] = {};
948              
949             # Mostly list characteristics and processing flags
950 561         1396 $self->[_rtype_count_by_seqno_] = {};
951 561         1440 $self->[_ris_function_call_paren_] = {};
952 561         1607 $self->[_rlec_count_by_seqno_] = {};
953 561         1589 $self->[_ris_broken_container_] = {};
954 561         1409 $self->[_ris_permanently_broken_] = {};
955 561         1446 $self->[_rblank_and_comment_count_] = {};
956 561         1534 $self->[_rhas_list_] = {};
957 561         1513 $self->[_rhas_broken_list_] = {};
958 561         1496 $self->[_rhas_broken_list_with_lec_] = {};
959 561         1453 $self->[_rfirst_comma_line_index_] = {};
960 561         1434 $self->[_rhas_code_block_] = {};
961 561         1882 $self->[_rhas_broken_code_block_] = {};
962 561         1670 $self->[_rhas_ternary_] = {};
963 561         1460 $self->[_ris_excluded_lp_container_] = {};
964 561         1527 $self->[_rlp_object_by_seqno_] = {};
965 561         1359 $self->[_rwant_reduced_ci_] = {};
966 561         1468 $self->[_rno_xci_by_seqno_] = {};
967 561         1640 $self->[_rbrace_left_] = {};
968 561         1457 $self->[_ris_bli_container_] = {};
969 561         1494 $self->[_rparent_of_seqno_] = {};
970 561         1404 $self->[_rchildren_of_seqno_] = {};
971 561         1430 $self->[_ris_list_by_seqno_] = {};
972 561         1372 $self->[_ris_cuddled_closing_brace_] = {};
973              
974 561         1378 $self->[_rbreak_container_] = {}; # prevent one-line blocks
975 561         1661 $self->[_rshort_nested_] = {}; # blocks not forced open
976 561         1262 $self->[_length_function_] = $length_function;
977 561         1376 $self->[_is_encoded_data_] = $is_encoded_data;
978              
979             # Some objects...
980 561         1380 $self->[_fh_tee_] = $fh_tee;
981 561         1313 $self->[_sink_object_] = $sink_object;
982 561         1283 $self->[_file_writer_object_] = $file_writer_object;
983 561         1225 $self->[_vertical_aligner_object_] = $vertical_aligner_object;
984 561         1209 $self->[_logger_object_] = $logger_object;
985              
986             # Reference to the batch being processed
987 561         1666 $self->[_this_batch_] = [];
988              
989             # Memory of processed text...
990 561         1455 $self->[_ris_special_identifier_token_] = {};
991 561         1313 $self->[_last_line_leading_level_] = 0;
992 561         1400 $self->[_last_line_leading_type_] = '#';
993 561         1312 $self->[_last_output_short_opening_token_] = 0;
994 561         1291 $self->[_added_semicolon_count_] = 0;
995 561         1248 $self->[_first_added_semicolon_at_] = 0;
996 561         1300 $self->[_last_added_semicolon_at_] = 0;
997 561         1322 $self->[_deleted_semicolon_count_] = 0;
998 561         1333 $self->[_first_deleted_semicolon_at_] = 0;
999 561         1612 $self->[_last_deleted_semicolon_at_] = 0;
1000 561         1333 $self->[_embedded_tab_count_] = 0;
1001 561         1255 $self->[_first_embedded_tab_at_] = 0;
1002 561         1310 $self->[_last_embedded_tab_at_] = 0;
1003 561         1249 $self->[_first_tabbing_disagreement_] = 0;
1004 561         1236 $self->[_last_tabbing_disagreement_] = 0;
1005 561         1246 $self->[_tabbing_disagreement_count_] = 0;
1006 561         1342 $self->[_in_tabbing_disagreement_] = 0;
1007 561         1690 $self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
1008 561         1505 $self->[_saw_END_or_DATA_] = 0;
1009 561         1312 $self->[_first_brace_tabbing_disagreement_] = undef;
1010 561         1308 $self->[_in_brace_tabbing_disagreement_] = undef;
1011              
1012             # Hashes related to container welding...
1013 561         1466 $self->[_radjusted_levels_] = [];
1014              
1015             # Weld data structures
1016 561         2020 $self->[_rK_weld_left_] = {};
1017 561         1805 $self->[_rK_weld_right_] = {};
1018 561         1598 $self->[_rweld_len_right_at_K_] = {};
1019              
1020             # -xci stuff
1021 561         1486 $self->[_rseqno_controlling_my_ci_] = {};
1022 561         1445 $self->[_ris_seqno_controlling_ci_] = {};
1023              
1024 561         1384 $self->[_rspecial_side_comment_type_] = {};
1025 561         1393 $self->[_maximum_level_] = 0;
1026 561         1278 $self->[_maximum_level_at_line_] = 0;
1027 561         1267 $self->[_maximum_BLOCK_level_] = 0;
1028 561         1227 $self->[_maximum_BLOCK_level_at_line_] = 0;
1029              
1030 561         1415 $self->[_rKrange_code_without_comments_] = [];
1031 561         1373 $self->[_rbreak_before_Kfirst_] = {};
1032 561         1382 $self->[_rbreak_after_Klast_] = {};
1033 561         1758 $self->[_converged_] = 0;
1034              
1035             # qw stuff
1036 561         1547 $self->[_rstarting_multiline_qw_seqno_by_K_] = {};
1037 561         1584 $self->[_rending_multiline_qw_seqno_by_K_] = {};
1038 561         1625 $self->[_rKrange_multiline_qw_by_seqno_] = {};
1039 561         1631 $self->[_rmultiline_qw_has_extra_level_] = {};
1040              
1041 561         1452 $self->[_rcollapsed_length_by_seqno_] = {};
1042 561         1486 $self->[_rbreak_before_container_by_seqno_] = {};
1043 561         1559 $self->[_roverride_cab3_] = {};
1044 561         1479 $self->[_ris_assigned_structure_] = {};
1045 561         1398 $self->[_ris_short_broken_eval_block_] = {};
1046 561         1540 $self->[_ris_bare_trailing_comma_by_seqno_] = {};
1047              
1048 561         1464 $self->[_rseqno_non_indenting_brace_by_ix_] = {};
1049 561         1374 $self->[_rmax_vertical_tightness_] = {};
1050              
1051 561         1395 $self->[_no_vertical_tightness_flags_] = 0;
1052              
1053             # This flag will be updated later by a call to get_save_logfile()
1054 561         1585 $self->[_save_logfile_] = defined($logger_object);
1055              
1056             # Be sure all variables in $self have been initialized above. To find the
1057             # correspondence of index numbers and array names, copy a list to a file
1058             # and use the unix 'nl' command to number lines 1..
1059 561         1084 if (DEVEL_MODE) {
1060             my @non_existant;
1061             foreach ( 0 .. _LAST_SELF_INDEX_ ) {
1062             if ( !exists( $self->[$_] ) ) {
1063             push @non_existant, $_;
1064             }
1065             }
1066             if (@non_existant) {
1067             Fault("These indexes in self not initialized: (@non_existant)\n");
1068             }
1069             }
1070              
1071 561         1561 bless $self, $class;
1072              
1073             # Safety check..this is not a class yet
1074 561 50       2362 if ( _increment_count() > 1 ) {
1075 0         0 confess
1076             "Attempt to create more than 1 object in $class, which is not a true class yet\n";
1077             }
1078 561         4397 return $self;
1079             } ## end sub new
1080              
1081             ######################################
1082             # CODE SECTION 2: Some Basic Utilities
1083             ######################################
1084              
1085             sub check_rLL {
1086              
1087             # Verify that the rLL array has not been auto-vivified
1088 0     0 0 0 my ( $self, $msg ) = @_;
1089 0         0 my $rLL = $self->[_rLL_];
1090 0         0 my $Klimit = $self->[_Klimit_];
1091 0         0 my $num = @{$rLL};
  0         0  
1092 0 0 0     0 if ( ( defined($Klimit) && $Klimit != $num - 1 )
      0        
      0        
1093             || ( !defined($Klimit) && $num > 0 ) )
1094             {
1095              
1096             # This fault can occur if the array has been accessed for an index
1097             # greater than $Klimit, which is the last token index. Just accessing
1098             # the array above index $Klimit, not setting a value, can cause @rLL to
1099             # increase beyond $Klimit. If this occurs, the problem can be located
1100             # by making calls to this routine at different locations in
1101             # sub 'finish_formatting'.
1102 0 0       0 $Klimit = 'undef' if ( !defined($Klimit) );
1103 0 0       0 $msg = EMPTY_STRING unless $msg;
1104 0         0 Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n");
1105             }
1106 0         0 return;
1107             } ## end sub check_rLL
1108              
1109             sub check_keys {
1110 0     0 0 0 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
1111              
1112             # Check the keys of a hash:
1113             # $rtest = ref to hash to test
1114             # $rvalid = ref to hash with valid keys
1115              
1116             # $msg = a message to write in case of error
1117             # $exact_match defines the type of check:
1118             # = false: test hash must not have unknown key
1119             # = true: test hash must have exactly same keys as known hash
1120             my @unknown_keys =
1121 0         0 grep { !exists $rvalid->{$_} } keys %{$rtest};
  0         0  
  0         0  
1122             my @missing_keys =
1123 0         0 grep { !exists $rtest->{$_} } keys %{$rvalid};
  0         0  
  0         0  
1124 0         0 my $error = @unknown_keys;
1125 0 0 0     0 if ($exact_match) { $error ||= @missing_keys }
  0         0  
1126 0 0       0 if ($error) {
1127 0         0 local $LIST_SEPARATOR = ')(';
1128 0         0 my @expected_keys = sort keys %{$rvalid};
  0         0  
1129 0         0 @unknown_keys = sort @unknown_keys;
1130 0         0 Fault(<<EOM);
1131             ------------------------------------------------------------------------
1132             Program error detected checking hash keys
1133             Message is: '$msg'
1134             Expected keys: (@expected_keys)
1135             Unknown key(s): (@unknown_keys)
1136             Missing key(s): (@missing_keys)
1137             ------------------------------------------------------------------------
1138             EOM
1139             }
1140 0         0 return;
1141             } ## end sub check_keys
1142              
1143             sub check_token_array {
1144 0     0 0 0 my $self = shift;
1145              
1146             # Check for errors in the array of tokens. This is only called
1147             # when the DEVEL_MODE flag is set, so this Fault will only occur
1148             # during code development.
1149 0         0 my $rLL = $self->[_rLL_];
1150 0         0 foreach my $KK ( 0 .. @{$rLL} - 1 ) {
  0         0  
1151 0         0 my $nvars = @{ $rLL->[$KK] };
  0         0  
1152 0 0       0 if ( $nvars != _NVARS ) {
1153 0         0 my $NVARS = _NVARS;
1154 0         0 my $type = $rLL->[$KK]->[_TYPE_];
1155 0 0       0 $type = '*' unless defined($type);
1156              
1157             # The number of variables per token node is _NVARS and was set when
1158             # the array indexes were generated. So if the number of variables
1159             # is different we have done something wrong, like not store all of
1160             # them in sub 'write_line' when they were received from the
1161             # tokenizer.
1162 0         0 Fault(
1163             "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
1164             );
1165             }
1166 0         0 foreach my $var ( _TOKEN_, _TYPE_ ) {
1167 0 0       0 if ( !defined( $rLL->[$KK]->[$var] ) ) {
1168 0         0 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
1169              
1170             # This is a simple check that each token has some basic
1171             # variables. In other words, that there are no holes in the
1172             # array of tokens. Sub 'write_line' pushes tokens into the
1173             # $rLL array, so this should guarantee no gaps.
1174 0         0 Fault("Undefined variable $var for K=$KK, line=$iline\n");
1175             }
1176             }
1177             }
1178 0         0 return;
1179             } ## end sub check_token_array
1180              
1181             { ## begin closure check_line_hashes
1182              
1183             # This code checks that no auto-vivification occurs in the 'line' hash
1184              
1185             my %valid_line_hash;
1186              
1187             BEGIN {
1188              
1189             # These keys are defined for each line in the formatter
1190             # Each line must have exactly these quantities
1191 39     39   311 my @valid_line_keys = qw(
1192             _curly_brace_depth
1193             _ending_in_quote
1194             _guessed_indentation_level
1195             _line_number
1196             _line_text
1197             _line_type
1198             _paren_depth
1199             _quote_character
1200             _rK_range
1201             _square_bracket_depth
1202             _starting_in_quote
1203             _ended_in_blank_token
1204             _code_type
1205              
1206             _ci_level_0
1207             _level_0
1208             _nesting_blocks_0
1209             _nesting_tokens_0
1210             );
1211              
1212 39         118518 @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
1213             } ## end BEGIN
1214              
1215             sub check_line_hashes {
1216 0     0 0 0 my $self = shift;
1217 0         0 my $rlines = $self->[_rlines_];
1218 0         0 foreach my $rline ( @{$rlines} ) {
  0         0  
1219 0         0 my $iline = $rline->{_line_number};
1220 0         0 my $line_type = $rline->{_line_type};
1221 0         0 check_keys( $rline, \%valid_line_hash,
1222             "Checkpoint: line number =$iline, line_type=$line_type", 1 );
1223             }
1224 0         0 return;
1225             } ## end sub check_line_hashes
1226             } ## end closure check_line_hashes
1227              
1228             { ## begin closure for logger routines
1229             my $logger_object;
1230              
1231             # Called once per file to initialize the logger object
1232             sub set_logger_object {
1233 561     561 0 1623 $logger_object = shift;
1234 561         1244 return;
1235             }
1236              
1237             sub get_logger_object {
1238 0     0 0 0 return $logger_object;
1239             }
1240              
1241             sub get_input_stream_name {
1242 0     0 0 0 my $input_stream_name = EMPTY_STRING;
1243 0 0       0 if ($logger_object) {
1244 0         0 $input_stream_name = $logger_object->get_input_stream_name();
1245             }
1246 0         0 return $input_stream_name;
1247             } ## end sub get_input_stream_name
1248              
1249             # interface to Perl::Tidy::Logger routines
1250             sub warning {
1251 0     0 0 0 my ( $msg, $msg_line_number ) = @_;
1252 0 0       0 if ($logger_object) {
1253 0         0 $logger_object->warning( $msg, $msg_line_number );
1254             }
1255 0         0 return;
1256             }
1257              
1258             sub complain {
1259 0     0 0 0 my ( $msg, $msg_line_number ) = @_;
1260 0 0       0 if ($logger_object) {
1261 0         0 $logger_object->complain( $msg, $msg_line_number );
1262             }
1263 0         0 return;
1264             } ## end sub complain
1265              
1266             sub write_logfile_entry {
1267 3035     3035 0 7381 my @msg = @_;
1268 3035 100       7315 if ($logger_object) {
1269 3025         8548 $logger_object->write_logfile_entry(@msg);
1270             }
1271 3035         6109 return;
1272             } ## end sub write_logfile_entry
1273              
1274             sub get_saw_brace_error {
1275 561 100   561 0 2277 if ($logger_object) {
1276 559         2725 return $logger_object->get_saw_brace_error();
1277             }
1278 2         8 return;
1279             } ## end sub get_saw_brace_error
1280              
1281             sub we_are_at_the_last_line {
1282 561 100   561 0 2144 if ($logger_object) {
1283 559         3544 $logger_object->we_are_at_the_last_line();
1284             }
1285 561         1177 return;
1286             } ## end sub we_are_at_the_last_line
1287              
1288             } ## end closure for logger routines
1289              
1290             { ## begin closure for diagnostics routines
1291             my $diagnostics_object;
1292              
1293             # Called once per file to initialize the diagnostics object
1294             sub set_diagnostics_object {
1295 561     561 0 1519 $diagnostics_object = shift;
1296 561         1143 return;
1297             }
1298              
1299             # Available for debugging but not currently used:
1300             sub write_diagnostics {
1301 0     0 0 0 my ( $msg, $line_number ) = @_;
1302 0 0       0 if ($diagnostics_object) {
1303 0         0 $diagnostics_object->write_diagnostics( $msg, $line_number );
1304             }
1305 0         0 return;
1306             } ## end sub write_diagnostics
1307             } ## end closure for diagnostics routines
1308              
1309             sub get_convergence_check {
1310 5     5 0 14 my ($self) = @_;
1311 5         25 return $self->[_converged_];
1312             }
1313              
1314             sub get_output_line_number {
1315 43     43 0 81 my ($self) = @_;
1316 43         79 my $vao = $self->[_vertical_aligner_object_];
1317 43         168 return $vao->get_output_line_number();
1318             }
1319              
1320             sub want_blank_line {
1321 21     21 0 57 my $self = shift;
1322 21         80 $self->flush();
1323 21         128 my $file_writer_object = $self->[_file_writer_object_];
1324 21         128 $file_writer_object->want_blank_line();
1325 21         54 return;
1326             } ## end sub want_blank_line
1327              
1328             sub write_unindented_line {
1329 259     259 0 598 my ( $self, $line ) = @_;
1330 259         735 $self->flush();
1331 259         564 my $file_writer_object = $self->[_file_writer_object_];
1332 259         892 $file_writer_object->write_line($line);
1333 259         553 return;
1334             } ## end sub write_unindented_line
1335              
1336             sub consecutive_nonblank_lines {
1337 1     1 0 4 my ($self) = @_;
1338 1         3 my $file_writer_object = $self->[_file_writer_object_];
1339 1         3 my $vao = $self->[_vertical_aligner_object_];
1340 1         7 return $file_writer_object->get_consecutive_nonblank_lines() +
1341             $vao->get_cached_line_count();
1342             } ## end sub consecutive_nonblank_lines
1343              
1344             sub split_words {
1345              
1346             # given a string containing words separated by whitespace,
1347             # return the list of words
1348 7291     7291 0 14432 my ($str) = @_;
1349 7291 100       26550 return unless $str;
1350 2289         8141 $str =~ s/\s+$//;
1351 2289         4943 $str =~ s/^\s+//;
1352 2289         10778 return split( /\s+/, $str );
1353             } ## end sub split_words
1354              
1355             ###########################################
1356             # CODE SECTION 3: Check and process options
1357             ###########################################
1358              
1359             sub check_options {
1360              
1361             # This routine is called to check the user-supplied run parameters
1362             # and to configure the control hashes to them.
1363 560     560 0 1777 $rOpts = shift;
1364              
1365 560         1566 $controlled_comma_style = 0;
1366              
1367 560         3711 initialize_whitespace_hashes();
1368 560         3892 initialize_bond_strength_hashes();
1369              
1370             # This function must be called early to get hashes with grep initialized
1371 560         3243 initialize_grep_and_friends();
1372              
1373             # Make needed regex patterns for matching text.
1374             # NOTE: sub_matching_patterns must be made first because later patterns use
1375             # them; see RT #133130.
1376 560         4371 make_sub_matching_pattern(); # must be first pattern made
1377 560         3006 make_static_block_comment_pattern();
1378 560         2846 make_static_side_comment_pattern();
1379 560         2752 make_closing_side_comment_prefix();
1380 560         3733 make_closing_side_comment_list_pattern();
1381 560         2609 $format_skipping_pattern_begin =
1382             make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
1383 560         2104 $format_skipping_pattern_end =
1384             make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
1385 560         4236 make_non_indenting_brace_pattern();
1386              
1387             # If closing side comments ARE selected, then we can safely
1388             # delete old closing side comments unless closing side comment
1389             # warnings are requested. This is a good idea because it will
1390             # eliminate any old csc's which fall below the line count threshold.
1391             # We cannot do this if warnings are turned on, though, because we
1392             # might delete some text which has been added. So that must
1393             # be handled when comments are created. And we cannot do this
1394             # with -io because -csc will be skipped altogether.
1395 560 100       3415 if ( $rOpts->{'closing-side-comments'} ) {
    50          
1396 4 50 33     39 if ( !$rOpts->{'closing-side-comment-warnings'}
1397             && !$rOpts->{'indent-only'} )
1398             {
1399 4         14 $rOpts->{'delete-closing-side-comments'} = 1;
1400             }
1401             }
1402              
1403             # If closing side comments ARE NOT selected, but warnings ARE
1404             # selected and we ARE DELETING csc's, then we will pretend to be
1405             # adding with a huge interval. This will force the comments to be
1406             # generated for comparison with the old comments, but not added.
1407             elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
1408 0 0       0 if ( $rOpts->{'delete-closing-side-comments'} ) {
1409 0         0 $rOpts->{'delete-closing-side-comments'} = 0;
1410 0         0 $rOpts->{'closing-side-comments'} = 1;
1411 0         0 $rOpts->{'closing-side-comment-interval'} = 100_000_000;
1412             }
1413             }
1414             else {
1415             ## ok - no -csc issues
1416             }
1417              
1418 560         1600 my $comment = $rOpts->{'add-missing-else-comment'};
1419 560 100       2022 if ( !$comment ) {
1420 558         1453 $comment = "##FIXME - added with perltidy -ame";
1421             }
1422             else {
1423 2         8 $comment = substr( $comment, 0, 60 );
1424 2         8 $comment =~ s/^\s+//;
1425 2         9 $comment =~ s/\s+$//;
1426 2         7 $comment =~ s/\n/ /g;
1427 2 50       12 if ( substr( $comment, 0, 1 ) ne '#' ) {
1428 0         0 $comment = '#' . $comment;
1429             }
1430             }
1431 560         1712 $rOpts->{'add-missing-else-comment'} = $comment;
1432              
1433 560         3062 make_bli_pattern();
1434              
1435 560         3033 make_bl_pattern();
1436              
1437 560         3433 make_block_brace_vertical_tightness_pattern();
1438              
1439 560         2473 make_blank_line_pattern();
1440              
1441 560         2519 make_keyword_group_list_pattern();
1442              
1443 560         2683 prepare_cuddled_block_types();
1444              
1445 560 50       2387 if ( $rOpts->{'dump-cuddled-block-list'} ) {
1446 0         0 dump_cuddled_block_list(*STDOUT);
1447 0         0 Exit(0);
1448             }
1449              
1450             # -xlp implies -lp
1451 560 100       2568 if ( $rOpts->{'extended-line-up-parentheses'} ) {
1452 3   100     19 $rOpts->{'line-up-parentheses'} ||= 1;
1453             }
1454              
1455 560 100       2264 if ( $rOpts->{'line-up-parentheses'} ) {
1456              
1457 30 50 33     370 if ( $rOpts->{'indent-only'}
      33        
1458             || !$rOpts->{'add-newlines'}
1459             || !$rOpts->{'delete-old-newlines'} )
1460             {
1461 0         0 Warn(<<EOM);
1462             -----------------------------------------------------------------------
1463             Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
1464            
1465             The -lp indentation logic requires that perltidy be able to coordinate
1466             arbitrarily large numbers of line breakpoints. This isn't possible
1467             with these flags.
1468             -----------------------------------------------------------------------
1469             EOM
1470 0         0 $rOpts->{'line-up-parentheses'} = 0;
1471 0         0 $rOpts->{'extended-line-up-parentheses'} = 0;
1472             }
1473              
1474 30 50       182 if ( $rOpts->{'whitespace-cycle'} ) {
1475 0         0 Warn(<<EOM);
1476             Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
1477             EOM
1478 0         0 $rOpts->{'whitespace-cycle'} = 0;
1479             }
1480             }
1481              
1482             # At present, tabs are not compatible with the line-up-parentheses style
1483             # (it would be possible to entab the total leading whitespace
1484             # just prior to writing the line, if desired).
1485 560 50 66     2407 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
1486 0         0 Warn(<<EOM);
1487             Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
1488             EOM
1489 0         0 $rOpts->{'tabs'} = 0;
1490             }
1491              
1492             # Likewise, tabs are not compatible with outdenting..
1493 560 50 66     2380 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
1494 0         0 Warn(<<EOM);
1495             Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
1496             EOM
1497 0         0 $rOpts->{'tabs'} = 0;
1498             }
1499              
1500 560 50 66     4096 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
1501 0         0 Warn(<<EOM);
1502             Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
1503             EOM
1504 0         0 $rOpts->{'tabs'} = 0;
1505             }
1506              
1507 560 100       2216 if ( !$rOpts->{'space-for-semicolon'} ) {
1508 13         43 $want_left_space{'f'} = -1;
1509             }
1510              
1511 560 100       2478 if ( $rOpts->{'space-terminal-semicolon'} ) {
1512 2         7 $want_left_space{';'} = 1;
1513             }
1514              
1515             # We should put an upper bound on any -sil=n value. Otherwise enormous
1516             # files could be created by mistake.
1517 560         2151 for ( $rOpts->{'starting-indentation-level'} ) {
1518 560 50 33     2940 if ( $_ && $_ > 100 ) {
1519 0         0 Warn(<<EOM);
1520             The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
1521             EOM
1522 0         0 $_ = 0;
1523             }
1524             }
1525              
1526             # Require -msp > 0 to avoid future parsing problems (issue c147)
1527 560         2005 for ( $rOpts->{'minimum-space-to-comment'} ) {
1528 560 50 33     4093 if ( !$_ || $_ <= 0 ) { $_ = 1 }
  0         0  
1529             }
1530              
1531             # implement outdenting preferences for keywords
1532 560         2751 %outdent_keyword = ();
1533 560         2729 my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
1534 560 100       3120 if ( !@okw ) {
1535 559         2850 @okw = qw(next last redo goto return); # defaults
1536             }
1537              
1538             # FUTURE: if not a keyword, assume that it is an identifier
1539 560         2075 foreach (@okw) {
1540 2796 50       7862 if ( Perl::Tidy::Tokenizer::is_keyword($_) ) {
1541 2796         7091 $outdent_keyword{$_} = 1;
1542             }
1543             else {
1544 0         0 Warn("ignoring '$_' in -okwl list; not a perl keyword");
1545             }
1546             }
1547              
1548             # setup hash for -kpit option
1549 560         2019 %keyword_paren_inner_tightness = ();
1550 560         1935 my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
1551 560 100 66     4248 if ( defined($kpit_value) && $kpit_value != 1 ) {
1552             my @kpit =
1553 2         10 split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
1554 2 100       17 if ( !@kpit ) {
1555 1         5 @kpit = qw(if elsif unless while until for foreach); # defaults
1556             }
1557              
1558             # we will allow keywords and user-defined identifiers
1559 2         9 foreach (@kpit) {
1560 9         19 $keyword_paren_inner_tightness{$_} = $kpit_value;
1561             }
1562             }
1563              
1564             # implement user whitespace preferences
1565 560 100       2381 if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
1566 5         35 @want_left_space{@q} = (1) x scalar(@q);
1567             }
1568              
1569 560 100       2666 if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
1570 5         26 @want_right_space{@q} = (1) x scalar(@q);
1571             }
1572              
1573 560 100       2510 if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
1574 6         45 @want_left_space{@q} = (-1) x scalar(@q);
1575             }
1576              
1577 560 100       2461 if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
1578 7         38 @want_right_space{@q} = (-1) x scalar(@q);
1579             }
1580 560 50       2691 if ( $rOpts->{'dump-want-left-space'} ) {
1581 0         0 dump_want_left_space(*STDOUT);
1582 0         0 Exit(0);
1583             }
1584              
1585 560 50       2263 if ( $rOpts->{'dump-want-right-space'} ) {
1586 0         0 dump_want_right_space(*STDOUT);
1587 0         0 Exit(0);
1588             }
1589              
1590 560         3243 initialize_space_after_keyword();
1591              
1592 560         2915 initialize_extended_block_tightness_list();
1593              
1594 560         3148 initialize_token_break_preferences();
1595              
1596             #--------------------------------------------------------------
1597             # The combination -lp -iob -vmll -bbx=2 can be unstable (b1266)
1598             #--------------------------------------------------------------
1599             # The -vmll and -lp parameters do not really work well together.
1600             # To avoid instabilities, we will change any -bbx=2 to -bbx=1 (stable).
1601             # NOTE: we could make this more precise by looking at any exclusion
1602             # flags for -lp, and allowing -bbx=2 for excluded types.
1603 560 0 66     2371 if ( $rOpts->{'variable-maximum-line-length'}
      33        
1604             && $rOpts->{'ignore-old-breakpoints'}
1605             && $rOpts->{'line-up-parentheses'} )
1606             {
1607 0         0 my @changed;
1608 0         0 foreach my $key ( keys %break_before_container_types ) {
1609 0 0       0 if ( $break_before_container_types{$key} == 2 ) {
1610 0         0 $break_before_container_types{$key} = 1;
1611 0         0 push @changed, $key;
1612             }
1613             }
1614 0 0       0 if (@changed) {
1615              
1616             # we could write a warning here
1617             }
1618             }
1619              
1620             #-----------------------------------------------------------
1621             # The combination -lp -vmll can be unstable if -ci<2 (b1267)
1622             #-----------------------------------------------------------
1623             # The -vmll and -lp parameters do not really work well together.
1624             # This is a very crude fix for an unusual parameter combination.
1625 560 50 66     2267 if ( $rOpts->{'variable-maximum-line-length'}
      33        
1626             && $rOpts->{'line-up-parentheses'}
1627             && $rOpts->{'continuation-indentation'} < 2 )
1628             {
1629 0         0 $rOpts->{'continuation-indentation'} = 2;
1630             ##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n");
1631             }
1632              
1633             #-----------------------------------------------------------
1634             # The combination -lp -vmll -atc -dtc can be unstable
1635             #-----------------------------------------------------------
1636             # This fixes b1386 b1387 b1388 which had -wtc='b'
1637             # Updated to to include any -wtc to fix b1426
1638 560 0 66     2200 if ( $rOpts->{'variable-maximum-line-length'}
      33        
      0        
      0        
1639             && $rOpts->{'line-up-parentheses'}
1640             && $rOpts->{'add-trailing-commas'}
1641             && $rOpts->{'delete-trailing-commas'}
1642             && $rOpts->{'want-trailing-commas'} )
1643             {
1644 0         0 $rOpts->{'delete-trailing-commas'} = 0;
1645             ## Issuing a warning message causes trouble with test cases, and this combo is
1646             ## so rare that it is unlikely to not occur in practice. So skip warning.
1647             ## Warn(
1648             ##"The combination -vmll -lp -atc -dtc can be unstable; turning off -dtc\n"
1649             ## );
1650             }
1651              
1652 560         1598 %container_indentation_options = ();
1653 560         3509 foreach my $pair (
1654             [ 'break-before-hash-brace-and-indent', '{' ],
1655             [ 'break-before-square-bracket-and-indent', '[' ],
1656             [ 'break-before-paren-and-indent', '(' ],
1657             )
1658             {
1659 1680         2659 my ( $key, $tok ) = @{$pair};
  1680         3796  
1660 1680         3766 my $opt = $rOpts->{$key};
1661 1680 50 66     7448 if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
      66        
1662             {
1663              
1664             # (1) -lp is not compatible with opt=2, silently set to opt=0
1665             # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
1666             # (3) set opt=0 if -i < -ci (can be unstable, case b1355)
1667 5 100       18 if ( $opt == 2 ) {
1668 3 100 66     19 if (
1669             $rOpts->{'line-up-parentheses'}
1670             || ( $rOpts->{'indent-columns'} <=
1671             $rOpts->{'continuation-indentation'} )
1672             )
1673             {
1674 1         5 $opt = 0;
1675             }
1676             }
1677 5         13 $container_indentation_options{$tok} = $opt;
1678             }
1679             }
1680              
1681 560         3168 $right_bond_strength{'{'} = WEAK;
1682 560         2091 $left_bond_strength{'{'} = VERY_STRONG;
1683              
1684             # make -l=0 equal to -l=infinite
1685 560 100       2310 if ( !$rOpts->{'maximum-line-length'} ) {
1686 4         14 $rOpts->{'maximum-line-length'} = 1_000_000;
1687             }
1688              
1689             # make -lbl=0 equal to -lbl=infinite
1690 560 50       2634 if ( !$rOpts->{'long-block-line-count'} ) {
1691 0         0 $rOpts->{'long-block-line-count'} = 1_000_000;
1692             }
1693              
1694             # hashes used to simplify setting whitespace
1695             %tightness = (
1696             '{' => $rOpts->{'brace-tightness'},
1697             '}' => $rOpts->{'brace-tightness'},
1698             '(' => $rOpts->{'paren-tightness'},
1699             ')' => $rOpts->{'paren-tightness'},
1700             '[' => $rOpts->{'square-bracket-tightness'},
1701 560         5057 ']' => $rOpts->{'square-bracket-tightness'},
1702             );
1703              
1704 560 100       2368 if ( $rOpts->{'ignore-old-breakpoints'} ) {
1705              
1706 2         5 my @conflicts;
1707 2 50       16 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
1708 0         0 $rOpts->{'break-at-old-method-breakpoints'} = 0;
1709 0         0 push @conflicts, '--break-at-old-method-breakpoints (-bom)';
1710             }
1711 2 50       9 if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
1712 0         0 $rOpts->{'break-at-old-comma-breakpoints'} = 0;
1713 0         0 push @conflicts, '--break-at-old-comma-breakpoints (-boc)';
1714             }
1715 2 50       11 if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
1716 0         0 $rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
1717 0         0 push @conflicts, '--break-at-old-semicolon-breakpoints (-bos)';
1718             }
1719 2 50       9 if ( $rOpts->{'keep-old-breakpoints-before'} ) {
1720 0         0 $rOpts->{'keep-old-breakpoints-before'} = EMPTY_STRING;
1721 0         0 push @conflicts, '--keep-old-breakpoints-before (-kbb)';
1722             }
1723 2 50       9 if ( $rOpts->{'keep-old-breakpoints-after'} ) {
1724 0         0 $rOpts->{'keep-old-breakpoints-after'} = EMPTY_STRING;
1725 0         0 push @conflicts, '--keep-old-breakpoints-after (-kba)';
1726             }
1727              
1728 2 50       8 if (@conflicts) {
1729 0         0 my $msg = join( "\n ",
1730             " Conflict: These conflicts with --ignore-old-breakponts (-iob) will be turned off:",
1731             @conflicts )
1732             . "\n";
1733 0         0 Warn($msg);
1734             }
1735              
1736             # Note: These additional parameters are made inactive by -iob.
1737             # They are silently turned off here because they are on by default.
1738             # We would generate unexpected warnings if we issued a warning.
1739 2         6 $rOpts->{'break-at-old-keyword-breakpoints'} = 0;
1740 2         5 $rOpts->{'break-at-old-logical-breakpoints'} = 0;
1741 2         5 $rOpts->{'break-at-old-ternary-breakpoints'} = 0;
1742 2         4 $rOpts->{'break-at-old-attribute-breakpoints'} = 0;
1743             }
1744              
1745 560         1554 %keep_break_before_type = ();
1746 560         3758 initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-before'},
1747             'kbb', \%keep_break_before_type );
1748              
1749 560         1735 %keep_break_after_type = ();
1750 560         2673 initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'},
1751             'kba', \%keep_break_after_type );
1752              
1753             # Modify %keep_break_before and %keep_break_after to avoid conflicts
1754             # with %want_break_before; fixes b1436.
1755             # This became necessary after breaks for some tokens were converted
1756             # from hard to soft (see b1433).
1757             # We could do this for all tokens, but to minimize changes to existing
1758             # code we currently only do this for the soft break tokens.
1759 560         2795 foreach my $key ( keys %keep_break_before_type ) {
1760 2 50 66     17 if ( defined( $want_break_before{$key} )
      66        
1761             && !$want_break_before{$key}
1762             && $is_soft_keep_break_type{$key} )
1763             {
1764 0         0 $keep_break_after_type{$key} = $keep_break_before_type{$key};
1765 0         0 delete $keep_break_before_type{$key};
1766             }
1767             }
1768 560         2193 foreach my $key ( keys %keep_break_after_type ) {
1769 1 0 33     5 if ( defined( $want_break_before{$key} )
      0        
1770             && $want_break_before{$key}
1771             && $is_soft_keep_break_type{$key} )
1772             {
1773 0         0 $keep_break_before_type{$key} = $keep_break_after_type{$key};
1774 0         0 delete $keep_break_after_type{$key};
1775             }
1776             }
1777              
1778 560   66     3622 $controlled_comma_style ||= $keep_break_before_type{','};
1779 560   66     3415 $controlled_comma_style ||= $keep_break_after_type{','};
1780              
1781 560         2758 initialize_global_option_vars();
1782              
1783 560         2494 initialize_line_length_vars(); # after 'initialize_global_option_vars'
1784              
1785 560         3305 initialize_trailing_comma_rules(); # after 'initialize_line_length_vars'
1786              
1787 560         2892 initialize_weld_nested_exclusion_rules();
1788              
1789 560         2670 initialize_weld_fat_comma_rules();
1790              
1791 560         1390 %line_up_parentheses_control_hash = ();
1792 560         1300 $line_up_parentheses_control_is_lxpl = 1;
1793 560         1462 my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'};
1794 560         1344 my $lpil = $rOpts->{'line-up-parentheses-inclusion-list'};
1795 560 50 66     2286 if ( $lpxl && $lpil ) {
1796 0         0 Warn( <<EOM );
1797             You entered values for both -lpxl=s and -lpil=s; the -lpil list will be ignored
1798             EOM
1799             }
1800 560 100       2635 if ($lpxl) {
    100          
1801 3         6 $line_up_parentheses_control_is_lxpl = 1;
1802             initialize_line_up_parentheses_control_hash(
1803 3         14 $rOpts->{'line-up-parentheses-exclusion-list'}, 'lpxl' );
1804             }
1805             elsif ($lpil) {
1806 1         3 $line_up_parentheses_control_is_lxpl = 0;
1807             initialize_line_up_parentheses_control_hash(
1808 1         6 $rOpts->{'line-up-parentheses-inclusion-list'}, 'lpil' );
1809             }
1810             else {
1811             ## ok - neither -lpxl nor -lpil
1812             }
1813              
1814 560         2658 return;
1815             } ## end sub check_options
1816              
1817 39     39   375 use constant ALIGN_GREP_ALIASES => 0;
  39         95  
  39         105761  
1818              
1819             sub initialize_grep_and_friends {
1820              
1821             # Initialize or re-initialize hashes with 'grep' and grep aliases. This
1822             # must be done after each set of options because new grep aliases may be
1823             # used.
1824              
1825             # re-initialize the hashes ... this is critical!
1826 560     560 0 3113 %is_sort_map_grep = ();
1827              
1828 560         2374 my @q = qw(sort map grep);
1829 560         2752 @is_sort_map_grep{@q} = (1) x scalar(@q);
1830              
1831 560         2036 my $olbxl = $rOpts->{'one-line-block-exclusion-list'};
1832 560         1261 my %is_olb_exclusion_word;
1833 560 100       2566 if ( defined($olbxl) ) {
1834 2         11 my @list = split_words($olbxl);
1835 2 50       12 if (@list) {
1836 2         11 @is_olb_exclusion_word{@list} = (1) x scalar(@list);
1837             }
1838             }
1839              
1840             # Make the list of block types which may be re-formed into one line.
1841             # They will be modified with the grep-alias-list below and
1842             # by sub 'prepare_cuddled_block_types'.
1843             # Note that it is essential to always re-initialize the hash here:
1844 560         2838 %want_one_line_block = ();
1845 560 100       2245 if ( !$is_olb_exclusion_word{'*'} ) {
1846 559         2327 foreach (qw(sort map grep eval)) {
1847 2236 100       5120 if ( !$is_olb_exclusion_word{$_} ) { $want_one_line_block{$_} = 1 }
  2235         5760  
1848             }
1849             }
1850              
1851             # Note that any 'grep-alias-list' string has been preprocessed to be a
1852             # trimmed, space-separated list.
1853 560         1945 my $str = $rOpts->{'grep-alias-list'};
1854 560         5588 my @grep_aliases = split /\s+/, $str;
1855              
1856 560 50       2806 if (@grep_aliases) {
1857              
1858 560         3364 @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases);
1859              
1860 560 100       2776 if ( $want_one_line_block{'grep'} ) {
1861 559         2859 @{want_one_line_block}{@grep_aliases} = (1) x scalar(@grep_aliases);
1862             }
1863             }
1864              
1865             ##@q = qw(sort map grep eval);
1866 560         4658 %is_sort_map_grep_eval = %is_sort_map_grep;
1867 560         2085 $is_sort_map_grep_eval{'eval'} = 1;
1868              
1869             ##@q = qw(sort map grep eval do);
1870 560         4132 %is_sort_map_grep_eval_do = %is_sort_map_grep_eval;
1871 560         2082 $is_sort_map_grep_eval_do{'do'} = 1;
1872              
1873             # These block types can take ci. This is used by the -xci option.
1874             # Note that the 'sub' in this list is an anonymous sub. To be more correct
1875             # we could remove sub and use ASUB pattern to also handle a
1876             # prototype/signature. But that would slow things down and would probably
1877             # never be useful.
1878             ##@q = qw( do sub eval sort map grep );
1879 560         4311 %is_block_with_ci = %is_sort_map_grep_eval_do;
1880 560         2087 $is_block_with_ci{'sub'} = 1;
1881              
1882 560         3045 %is_keyword_returning_list = ();
1883 560         2829 @q = qw(
1884             grep
1885             keys
1886             map
1887             reverse
1888             sort
1889             split
1890             );
1891 560         2261 push @q, @grep_aliases;
1892 560         4106 @is_keyword_returning_list{@q} = (1) x scalar(@q);
1893              
1894             # This code enables vertical alignment of grep aliases for testing. It has
1895             # not been found to be beneficial, so it is off by default. But it is
1896             # useful for precise testing of the grep alias coding.
1897 560         1249 if (ALIGN_GREP_ALIASES) {
1898             %block_type_map = (
1899             'unless' => 'if',
1900             'else' => 'if',
1901             'elsif' => 'if',
1902             'when' => 'if',
1903             'default' => 'if',
1904             'case' => 'if',
1905             'sort' => 'map',
1906             'grep' => 'map',
1907             );
1908             foreach (@q) {
1909             $block_type_map{$_} = 'map' unless ( $_ eq 'map' );
1910             }
1911             }
1912 560         2317 return;
1913             } ## end sub initialize_grep_and_friends
1914              
1915             sub initialize_weld_nested_exclusion_rules {
1916 560     560 0 1473 %weld_nested_exclusion_rules = ();
1917              
1918 560         1449 my $opt_name = 'weld-nested-exclusion-list';
1919 560         1637 my $str = $rOpts->{$opt_name};
1920 560 100       1976 return unless ($str);
1921 4         22 $str =~ s/^\s+//;
1922 4         18 $str =~ s/\s+$//;
1923 4 50       16 return unless ($str);
1924              
1925             # There are four container tokens.
1926 4         28 my %token_keys = (
1927             '(' => '(',
1928             '[' => '[',
1929             '{' => '{',
1930             'q' => 'q',
1931             );
1932              
1933             # We are parsing an exclusion list for nested welds. The list is a string
1934             # with spaces separating any number of items. Each item consists of three
1935             # pieces of information:
1936             # <optional position> <optional type> <type of container>
1937             # < ^ or . > < k or K > < ( [ { >
1938              
1939             # The last character is the required container type and must be one of:
1940             # ( = paren
1941             # [ = square bracket
1942             # { = brace
1943              
1944             # An optional leading position indicator:
1945             # ^ means the leading token position in the weld
1946             # . means a secondary token position in the weld
1947             # no position indicator means all positions match
1948              
1949             # An optional alphanumeric character between the position and container
1950             # token selects to which the rule applies:
1951             # k = any keyword
1952             # K = any non-keyword
1953             # f = function call
1954             # F = not a function call
1955             # w = function or keyword
1956             # W = not a function or keyword
1957             # no letter means any preceding type matches
1958              
1959             # Examples:
1960             # ^( - the weld must not start with a paren
1961             # .( - the second and later tokens may not be parens
1962             # ( - no parens in weld
1963             # ^K( - exclude a leading paren not preceded by a keyword
1964             # .k( - exclude a secondary paren preceded by a keyword
1965             # [ { - exclude all brackets and braces
1966              
1967 4         25 my @items = split /\s+/, $str;
1968 4         13 my $msg1;
1969             my $msg2;
1970 4         14 foreach my $item (@items) {
1971 9         20 my $item_save = $item;
1972 9         20 my $tok = chop($item);
1973 9         17 my $key = $token_keys{$tok};
1974 9 50       31 if ( !defined($key) ) {
1975 0         0 $msg1 .= " '$item_save'";
1976 0         0 next;
1977             }
1978 9 100       28 if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
1979 8         19 $weld_nested_exclusion_rules{$key} = [];
1980             }
1981 9         18 my $rflags = $weld_nested_exclusion_rules{$key};
1982              
1983             # A 'q' means do not weld quotes
1984 9 100       27 if ( $tok eq 'q' ) {
1985 1         3 $rflags->[0] = '*';
1986 1         3 $rflags->[1] = '*';
1987 1         4 next;
1988             }
1989              
1990 8         13 my $pos = '*';
1991 8         18 my $select = '*';
1992 8 100       21 if ($item) {
1993 5 50       33 if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
1994 5 50       29 $pos = $1 if ($1);
1995 5 100       35 $select = $2 if ($2);
1996             }
1997             else {
1998 0         0 $msg1 .= " '$item_save'";
1999 0         0 next;
2000             }
2001             }
2002              
2003 8         13 my $err;
2004 8 100 100     41 if ( $pos eq '^' || $pos eq '*' ) {
2005 6 50 33     21 if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
2006 0         0 $err = 1;
2007             }
2008 6         14 $rflags->[0] = $select;
2009             }
2010 8 100 100     39 if ( $pos eq '.' || $pos eq '*' ) {
2011 5 50 33     19 if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
2012 0         0 $err = 1;
2013             }
2014 5         14 $rflags->[1] = $select;
2015             }
2016 8 50       27 if ($err) { $msg2 .= " '$item_save'"; }
  0         0  
2017             }
2018 4 50       15 if ($msg1) {
2019 0         0 Warn(<<EOM);
2020             Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2021             $msg1
2022             EOM
2023             }
2024 4 50       15 if ($msg2) {
2025 0         0 Warn(<<EOM);
2026             Multiple specifications were encountered in the --weld-nested-exclusion-list for:
2027             $msg2
2028             Only the last will be used.
2029             EOM
2030             }
2031 4         14 return;
2032             } ## end sub initialize_weld_nested_exclusion_rules
2033              
2034             sub initialize_weld_fat_comma_rules {
2035              
2036             # Initialize a hash controlling which opening token types can be
2037             # welded around a fat comma
2038 560     560 0 1486 %weld_fat_comma_rules = ();
2039              
2040             # The -wfc flag turns on welding of '=>' after an opening paren
2041 560 100       2133 if ( $rOpts->{'weld-fat-comma'} ) { $weld_fat_comma_rules{'('} = 1 }
  1         5  
2042              
2043             # This could be generalized in the future by introducing a parameter
2044             # -weld-fat-comma-after=str (-wfca=str), where str contains any of:
2045             # * { [ (
2046             # to indicate which opening parens may weld to a subsequent '=>'
2047              
2048             # The flag -wfc would then be equivalent to -wfca='('
2049              
2050             # This has not been done because it is not yet clear how useful
2051             # this generalization would be.
2052 560         1158 return;
2053             } ## end sub initialize_weld_fat_comma_rules
2054              
2055             sub initialize_line_up_parentheses_control_hash {
2056 4     4 0 18 my ( $str, $opt_name ) = @_;
2057 4 50       20 return unless ($str);
2058 4         33 $str =~ s/^\s+//;
2059 4         23 $str =~ s/\s+$//;
2060 4 50       17 return unless ($str);
2061              
2062             # The format is space separated items, where each item must consist of a
2063             # string with a token type preceded by an optional text token and followed
2064             # by an integer:
2065             # For example:
2066             # W(1
2067             # = (flag1)(key)(flag2), where
2068             # flag1 = 'W'
2069             # key = '('
2070             # flag2 = '1'
2071              
2072 4         24 my @items = split /\s+/, $str;
2073 4         15 my $msg1;
2074             my $msg2;
2075 4         19 foreach my $item (@items) {
2076 10         21 my $item_save = $item;
2077 10         20 my ( $flag1, $key, $flag2 );
2078 10 50       56 if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
2079 10 100       41 $flag1 = $1 if $1;
2080 10 50       35 $key = $2 if $2;
2081 10 100       40 $flag2 = $3 if $3;
2082             }
2083             else {
2084 0         0 $msg1 .= " '$item_save'";
2085 0         0 next;
2086             }
2087              
2088 10 50       25 if ( !defined($key) ) {
2089 0         0 $msg1 .= " '$item_save'";
2090 0         0 next;
2091             }
2092              
2093             # Check for valid flag1
2094 10 100       29 if ( !defined($flag1) ) { $flag1 = '*' }
  7         13  
2095              
2096 10 50       36 if ( $flag1 !~ /^[kKfFwW\*]$/ ) {
2097 0         0 $msg1 .= " '$item_save'";
2098 0         0 next;
2099             }
2100              
2101             # Check for valid flag2
2102             # 0 or blank: ignore container contents
2103             # 1 all containers with sublists match
2104             # 2 all containers with sublists, code blocks or ternary operators match
2105             # ... this could be extended in the future
2106 10 100       26 if ( !defined($flag2) ) { $flag2 = 0 }
  7         13  
2107              
2108 10 50       35 if ( $flag2 !~ /^[012]$/ ) {
2109 0         0 $msg1 .= " '$item_save'";
2110 0         0 next;
2111             }
2112              
2113 10 50       33 if ( !defined( $line_up_parentheses_control_hash{$key} ) ) {
2114 10         26 $line_up_parentheses_control_hash{$key} = [ $flag1, $flag2 ];
2115 10         31 next;
2116             }
2117              
2118             # check for multiple conflicting specifications
2119 0         0 my $rflags = $line_up_parentheses_control_hash{$key};
2120 0         0 my $err;
2121 0 0 0     0 if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
2122 0         0 $err = 1;
2123 0         0 $rflags->[0] = $flag1;
2124             }
2125 0 0 0     0 if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
2126 0         0 $err = 1;
2127 0         0 $rflags->[1] = $flag2;
2128             }
2129 0 0       0 $msg2 .= " '$item_save'" if ($err);
2130 0         0 next;
2131             }
2132 4 50       39 if ($msg1) {
2133 0         0 Warn(<<EOM);
2134             Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2135             $msg1
2136             EOM
2137             }
2138 4 50       20 if ($msg2) {
2139 0         0 Warn(<<EOM);
2140             Multiple specifications were encountered in the $opt_name at:
2141             $msg2
2142             Only the last will be used.
2143             EOM
2144             }
2145              
2146             # Speedup: we can turn off -lp if it is not actually used
2147 4 100       16 if ($line_up_parentheses_control_is_lxpl) {
2148 3         9 my $all_off = 1;
2149 3         10 foreach my $key (qw# ( { [ #) {
2150 5         14 my $rflags = $line_up_parentheses_control_hash{$key};
2151 5 50       17 if ( defined($rflags) ) {
2152 5         13 my ( $flag1, $flag2 ) = @{$rflags};
  5         12  
2153 5 100 66     32 if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
  2         7  
  2         5  
2154 3 50       14 if ($flag2) { $all_off = 0; last }
  0         0  
  0         0  
2155             }
2156             }
2157 3 100       14 if ($all_off) {
2158 1         4 $rOpts->{'line-up-parentheses'} = EMPTY_STRING;
2159             }
2160             }
2161              
2162 4         21 return;
2163             } ## end sub initialize_line_up_parentheses_control_hash
2164              
2165             sub initialize_space_after_keyword {
2166              
2167             # default keywords for which space is introduced before an opening paren
2168             # (at present, including them messes up vertical alignment)
2169 560     560 0 5252 my @sak = qw(my local our and or xor err eq ne if else elsif until
2170             unless while for foreach return switch case given when catch);
2171 560         1810 %space_after_keyword = map { $_ => 1 } @sak;
  12880         30303  
2172              
2173             # first remove any or all of these if desired
2174 560 100       3380 if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
2175              
2176             # -nsak='*' selects all the above keywords
2177 1 50 33     14 if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
  0         0  
2178 1         12 @space_after_keyword{@q} = (0) x scalar(@q);
2179             }
2180              
2181             # then allow user to add to these defaults
2182 560 100       2709 if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
2183 1         5 @space_after_keyword{@q} = (1) x scalar(@q);
2184             }
2185              
2186 560         2043 return;
2187             } ## end sub initialize_space_after_keyword
2188              
2189             sub initialize_extended_block_tightness_list {
2190              
2191             # Setup the control hash for --extended-block-tightness
2192              
2193             # keywords taking indirect objects:
2194 560     560 0 3529 my @k_list = keys %is_indirect_object_taker;
2195              
2196             # type symbols which may precede an opening block brace
2197 560         2816 my @t_list = qw($ @ % & *);
2198 560         1571 push @t_list, '$#';
2199              
2200 560         2137 my @all = ( @k_list, @t_list );
2201              
2202             # We will build the selection in %hash
2203             # By default the option is 'on' for keywords only (-xbtl='k')
2204 560         1355 my %hash;
2205 560         3671 @hash{@k_list} = (1) x scalar(@k_list);
2206 560         3597 @hash{@t_list} = (0) x scalar(@t_list);
2207              
2208             # This can be overridden with -xbtl="..."
2209 560         1746 my $long_name = 'extended-block-tightness-list';
2210 560 100       2668 if ( $rOpts->{$long_name} ) {
2211 2         12 my @words = split_words( $rOpts->{$long_name} );
2212 2         7 my @unknown;
2213              
2214             # Turn everything off
2215 2         12 @hash{@all} = (0) x scalar(@all);
2216              
2217             # Then turn on selections
2218 2         7 foreach my $word (@words) {
2219              
2220             # 'print' etc turns on a specific word or symbol
2221 4 100       26 if ( defined( $hash{$word} ) ) { $hash{$word} = 1; }
  2 50       5  
    100          
    50          
2222              
2223             # 'k' turns on all keywords
2224             elsif ( $word eq 'k' ) {
2225 0         0 @hash{@k_list} = (1) x scalar(@k_list);
2226             }
2227              
2228             # 't' turns on all symbols
2229             elsif ( $word eq 't' ) {
2230 1         6 @hash{@t_list} = (1) x scalar(@t_list);
2231             }
2232              
2233             # 'kt' same as 'k' and 't' for convenience
2234             elsif ( $word eq 'kt' ) {
2235 1         5 @hash{@all} = (1) x scalar(@all);
2236             }
2237              
2238             # Anything else is an error
2239 0         0 else { push @unknown, $word }
2240             }
2241 2 50       10 if (@unknown) {
2242 0         0 my $num = @unknown;
2243 0         0 local $LIST_SEPARATOR = SPACE;
2244 0         0 Warn(<<EOM);
2245             $num unrecognized keyword(s) were input with --$long_name :
2246             @unknown
2247             EOM
2248             }
2249             }
2250              
2251             # Transfer the result to the global hash
2252 560         4684 %extended_block_tightness_list = %hash;
2253              
2254 560         2875 return;
2255             } ## end sub initialize_extended_block_tightness_list
2256              
2257             sub initialize_token_break_preferences {
2258              
2259             # implement user break preferences
2260             my $break_after = sub {
2261 562     562   2294 my @toks = @_;
2262 562         2050 foreach my $tok (@toks) {
2263 124 100       219 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
  2         11  
2264 124 50       202 if ( $tok eq ',' ) { $controlled_comma_style = 1 }
  0         0  
2265 124         189 my $lbs = $left_bond_strength{$tok};
2266 124         169 my $rbs = $right_bond_strength{$tok};
2267 124 100 33     467 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
      66        
2268 22         53 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
2269             ( $lbs, $rbs );
2270             }
2271             }
2272 562         1222 return;
2273 560     560 0 4793 };
2274              
2275             my $break_before = sub {
2276 561     561   2104 my @toks = @_;
2277 561         1896 foreach my $tok (@toks) {
2278 370 50       653 if ( $tok eq ',' ) { $controlled_comma_style = 1 }
  0         0  
2279 370         587 my $lbs = $left_bond_strength{$tok};
2280 370         523 my $rbs = $right_bond_strength{$tok};
2281 370 100 33     1381 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
      66        
2282 361         770 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
2283             ( $lbs, $rbs );
2284             }
2285             }
2286 561         1305 return;
2287 560         3225 };
2288              
2289 560 100       2505 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
2290             $break_before->(@all_operators)
2291 560 100       2203 if ( $rOpts->{'break-before-all-operators'} );
2292              
2293 560         2503 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
2294 560         2404 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
2295              
2296             # make note if breaks are before certain key types
2297 560         7529 %want_break_before = ();
2298 560         2232 foreach my $tok ( @all_operators, ',' ) {
2299             $want_break_before{$tok} =
2300 24080         55505 $left_bond_strength{$tok} < $right_bond_strength{$tok};
2301             }
2302              
2303             # Coordinate ?/: breaks, which must be similar
2304             # The small strength 0.01 which is added is 1% of the strength of one
2305             # indentation level and seems to work okay.
2306 560 100       3748 if ( !$want_break_before{':'} ) {
2307 2         7 $want_break_before{'?'} = $want_break_before{':'};
2308 2         8 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
2309 2         6 $left_bond_strength{'?'} = NO_BREAK;
2310             }
2311              
2312             # Only make a hash entry for the next parameters if values are defined.
2313             # That allows a quick check to be made later.
2314 560         2008 %break_before_container_types = ();
2315 560         1822 for ( $rOpts->{'break-before-hash-brace'} ) {
2316 560 100 66     2825 $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
2317             }
2318 560         1951 for ( $rOpts->{'break-before-square-bracket'} ) {
2319 560 50 33     2557 $break_before_container_types{'['} = $_ if $_ && $_ > 0;
2320             }
2321 560         1718 for ( $rOpts->{'break-before-paren'} ) {
2322 560 100 66     2569 $break_before_container_types{'('} = $_ if $_ && $_ > 0;
2323             }
2324 560         7745 return;
2325             } ## end sub initialize_token_break_preferences
2326              
2327 39     39   360 use constant DEBUG_KB => 0;
  39         104  
  39         58137  
2328              
2329             sub initialize_keep_old_breakpoints {
2330 1120     1120 0 3575 my ( $str, $short_name, $rkeep_break_hash ) = @_;
2331 1120 100       3175 return unless $str;
2332              
2333 2         4 my %flags = ();
2334 2         7 my @list = split_words($str);
2335 2         6 if ( DEBUG_KB && @list ) {
2336             local $LIST_SEPARATOR = SPACE;
2337             print <<EOM;
2338             DEBUG_KB entering for '$short_name' with str=$str\n";
2339             list is: @list;
2340             EOM
2341             }
2342              
2343             # Ignore kbb='(' and '[' and '{': can cause unstable math formatting
2344             # (issues b1346, b1347, b1348) and likewise ignore kba=')' and ']' and '}'
2345             # Also always ignore ? and : (b1440 and b1433-b1439)
2346 2 100       9 if ( $short_name eq 'kbb' ) {
    50          
2347 1         3 @list = grep { !m/[\(\[\{\?\:]/ } @list;
  2         9  
2348             }
2349             elsif ( $short_name eq 'kba' ) {
2350 1         3 @list = grep { !m/[\)\]\}\?\:]/ } @list;
  1         6  
2351             }
2352             else {
2353 0         0 Fault(<<EOM);
2354             Bad call arg - received short name '$short_name' but expecting 'kbb' or 'kba'
2355             EOM
2356             }
2357              
2358             # pull out any any leading container code, like f( or *{
2359             # For example: 'f(' becomes flags hash entry '(' => 'f'
2360 2         6 foreach my $item (@list) {
2361 3 50       12 if ( $item =~ /^( [ \w\* ] )( [ \{\(\[\}\)\] ] )$/x ) {
2362 0         0 $item = $2;
2363 0         0 $flags{$2} = $1;
2364             }
2365             }
2366              
2367 2         4 my @unknown_types;
2368 2         6 foreach my $type (@list) {
2369 3 50       12 if ( !Perl::Tidy::Tokenizer::is_valid_token_type($type) ) {
2370 0         0 push @unknown_types, $type;
2371             }
2372             }
2373              
2374 2 50       6 if (@unknown_types) {
2375 0         0 my $num = @unknown_types;
2376 0         0 local $LIST_SEPARATOR = SPACE;
2377 0         0 Warn(<<EOM);
2378             $num unrecognized token types were input with --$short_name :
2379             @unknown_types
2380             EOM
2381             }
2382              
2383 2         6 @{$rkeep_break_hash}{@list} = (1) x scalar(@list);
  2         6  
2384              
2385 2         7 foreach my $key ( keys %flags ) {
2386 0         0 my $flag = $flags{$key};
2387              
2388 0 0 0     0 if ( length($flag) != 1 ) {
    0 0        
    0 0        
      0        
2389 0         0 Warn(<<EOM);
2390             Multiple entries given for '$key' in '$short_name'
2391             EOM
2392             }
2393             elsif ( ( $key eq '(' || $key eq ')' ) && $flag !~ /^[kKfFwW\*]$/ ) {
2394 0         0 Warn(<<EOM);
2395             Unknown flag '$flag' given for '$key' in '$short_name'
2396             EOM
2397             }
2398             elsif ( ( $key eq '}' || $key eq '}' ) && $flag !~ /^[bB\*]$/ ) {
2399 0         0 Warn(<<EOM);
2400             Unknown flag '$flag' given for '$key' in '$short_name'
2401             EOM
2402             }
2403             else {
2404             ## ok - no error seen
2405             }
2406              
2407 0         0 $rkeep_break_hash->{$key} = $flag;
2408             }
2409              
2410 2         5 if ( DEBUG_KB && @list ) {
2411             my @tmp = %flags;
2412             local $LIST_SEPARATOR = SPACE;
2413             print <<EOM;
2414              
2415             DEBUG_KB -$short_name flag: $str
2416             final keys: @list
2417             special flags: @tmp
2418             EOM
2419              
2420             }
2421              
2422 2         5 return;
2423              
2424             } ## end sub initialize_keep_old_breakpoints
2425              
2426             sub initialize_global_option_vars {
2427              
2428             #------------------------------------------------------------
2429             # Make global vars for frequently used options for efficiency
2430             #------------------------------------------------------------
2431              
2432 560     560 0 1750 $rOpts_add_newlines = $rOpts->{'add-newlines'};
2433 560         1456 $rOpts_add_trailing_commas = $rOpts->{'add-trailing-commas'};
2434 560         1406 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
2435             $rOpts_blank_lines_after_opening_block =
2436 560         1370 $rOpts->{'blank-lines-after-opening-block'};
2437 560         1558 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
2438             $rOpts_block_brace_vertical_tightness =
2439 560         1418 $rOpts->{'block-brace-vertical-tightness'};
2440             $rOpts_brace_follower_vertical_tightness =
2441 560         1558 $rOpts->{'brace-follower-vertical-tightness'};
2442 560         1547 $rOpts_break_after_labels = $rOpts->{'break-after-labels'};
2443             $rOpts_break_at_old_attribute_breakpoints =
2444 560         1452 $rOpts->{'break-at-old-attribute-breakpoints'};
2445             $rOpts_break_at_old_comma_breakpoints =
2446 560         1443 $rOpts->{'break-at-old-comma-breakpoints'};
2447             $rOpts_break_at_old_keyword_breakpoints =
2448 560         1564 $rOpts->{'break-at-old-keyword-breakpoints'};
2449             $rOpts_break_at_old_logical_breakpoints =
2450 560         1488 $rOpts->{'break-at-old-logical-breakpoints'};
2451             $rOpts_break_at_old_semicolon_breakpoints =
2452 560         1436 $rOpts->{'break-at-old-semicolon-breakpoints'};
2453             $rOpts_break_at_old_ternary_breakpoints =
2454 560         1573 $rOpts->{'break-at-old-ternary-breakpoints'};
2455 560         1568 $rOpts_break_open_compact_parens = $rOpts->{'break-open-compact-parens'};
2456 560         1388 $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'};
2457             $rOpts_closing_side_comment_else_flag =
2458 560         1553 $rOpts->{'closing-side-comment-else-flag'};
2459             $rOpts_closing_side_comment_maximum_text =
2460 560         1404 $rOpts->{'closing-side-comment-maximum-text'};
2461 560         1439 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
2462 560         1341 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
2463 560         1320 $rOpts_cuddled_paren_brace = $rOpts->{'cuddled-paren-brace'};
2464             $rOpts_delete_closing_side_comments =
2465 560         1386 $rOpts->{'delete-closing-side-comments'};
2466 560         1341 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
2467             $rOpts_extended_continuation_indentation =
2468 560         1283 $rOpts->{'extended-continuation-indentation'};
2469 560         1202 $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
2470 560         1317 $rOpts_delete_trailing_commas = $rOpts->{'delete-trailing-commas'};
2471             $rOpts_delete_weld_interfering_commas =
2472 560         1237 $rOpts->{'delete-weld-interfering-commas'};
2473 560         1397 $rOpts_format_skipping = $rOpts->{'format-skipping'};
2474 560         1438 $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
2475             $rOpts_function_paren_vertical_alignment =
2476 560         1233 $rOpts->{'function-paren-vertical-alignment'};
2477 560         1415 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
2478 560         1318 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
2479             $rOpts_ignore_side_comment_lengths =
2480 560         1402 $rOpts->{'ignore-side-comment-lengths'};
2481 560         1419 $rOpts_ignore_perlcritic_comments = $rOpts->{'ignore-perlcritic-comments'};
2482 560         1338 $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
2483 560         1238 $rOpts_indent_columns = $rOpts->{'indent-columns'};
2484 560         1271 $rOpts_indent_only = $rOpts->{'indent-only'};
2485 560         1196 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
2486 560         1350 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
2487 560         1316 $rOpts_extended_block_tightness = $rOpts->{'extended-block-tightness'};
2488             $rOpts_extended_line_up_parentheses =
2489 560         1454 $rOpts->{'extended-line-up-parentheses'};
2490 560         1327 $rOpts_logical_padding = $rOpts->{'logical-padding'};
2491             $rOpts_maximum_consecutive_blank_lines =
2492 560         1365 $rOpts->{'maximum-consecutive-blank-lines'};
2493 560         1364 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
2494 560         1268 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
2495 560         1200 $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
2496             $rOpts_opening_brace_always_on_right =
2497 560         1343 $rOpts->{'opening-brace-always-on-right'};
2498 560         1293 $rOpts_outdent_keywords = $rOpts->{'outdent-keywords'};
2499 560         1344 $rOpts_outdent_labels = $rOpts->{'outdent-labels'};
2500 560         1274 $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
2501 560         1342 $rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'};
2502             $rOpts_outdent_static_block_comments =
2503 560         1271 $rOpts->{'outdent-static-block-comments'};
2504 560         1401 $rOpts_recombine = $rOpts->{'recombine'};
2505             $rOpts_short_concatenation_item_length =
2506 560         1233 $rOpts->{'short-concatenation-item-length'};
2507 560         1313 $rOpts_space_prototype_paren = $rOpts->{'space-prototype-paren'};
2508 560         1228 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
2509 560         1358 $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
2510 560         1221 $rOpts_add_missing_else = $rOpts->{'add-missing-else'};
2511 560         1325 $rOpts_warn_missing_else = $rOpts->{'warn-missing-else'};
2512 560         1242 $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
2513 560         1219 $rOpts_tee_pod = $rOpts->{'tee-pod'};
2514 560         1185 $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
2515 560         1221 $rOpts_valign_code = $rOpts->{'valign-code'};
2516 560         1319 $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'};
2517 560         1303 $rOpts_valign_if_unless = $rOpts->{'valign-if-unless'};
2518             $rOpts_variable_maximum_line_length =
2519 560         1411 $rOpts->{'variable-maximum-line-length'};
2520              
2521             # Note that both opening and closing tokens can access the opening
2522             # and closing flags of their container types.
2523             %opening_vertical_tightness = (
2524             '(' => $rOpts->{'paren-vertical-tightness'},
2525             '{' => $rOpts->{'brace-vertical-tightness'},
2526             '[' => $rOpts->{'square-bracket-vertical-tightness'},
2527             ')' => $rOpts->{'paren-vertical-tightness'},
2528             '}' => $rOpts->{'brace-vertical-tightness'},
2529 560         4226 ']' => $rOpts->{'square-bracket-vertical-tightness'},
2530             );
2531              
2532             %closing_vertical_tightness = (
2533             '(' => $rOpts->{'paren-vertical-tightness-closing'},
2534             '{' => $rOpts->{'brace-vertical-tightness-closing'},
2535             '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
2536             ')' => $rOpts->{'paren-vertical-tightness-closing'},
2537             '}' => $rOpts->{'brace-vertical-tightness-closing'},
2538 560         3910 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
2539             );
2540              
2541             # assume flag for '>' same as ')' for closing qw quotes
2542             %closing_token_indentation = (
2543             ')' => $rOpts->{'closing-paren-indentation'},
2544             '}' => $rOpts->{'closing-brace-indentation'},
2545             ']' => $rOpts->{'closing-square-bracket-indentation'},
2546 560         3163 '>' => $rOpts->{'closing-paren-indentation'},
2547             );
2548              
2549             # flag indicating if any closing tokens are indented
2550             $some_closing_token_indentation =
2551             $rOpts->{'closing-paren-indentation'}
2552             || $rOpts->{'closing-brace-indentation'}
2553             || $rOpts->{'closing-square-bracket-indentation'}
2554 560   66     5979 || $rOpts->{'indent-closing-brace'};
2555              
2556             %opening_token_right = (
2557             '(' => $rOpts->{'opening-paren-right'},
2558             '{' => $rOpts->{'opening-hash-brace-right'},
2559 560         2890 '[' => $rOpts->{'opening-square-bracket-right'},
2560             );
2561              
2562             %stack_opening_token = (
2563             '(' => $rOpts->{'stack-opening-paren'},
2564             '{' => $rOpts->{'stack-opening-hash-brace'},
2565 560         2510 '[' => $rOpts->{'stack-opening-square-bracket'},
2566             );
2567              
2568             %stack_closing_token = (
2569             ')' => $rOpts->{'stack-closing-paren'},
2570             '}' => $rOpts->{'stack-closing-hash-brace'},
2571 560         2263 ']' => $rOpts->{'stack-closing-square-bracket'},
2572             );
2573 560         1192 return;
2574             } ## end sub initialize_global_option_vars
2575              
2576             sub initialize_line_length_vars {
2577              
2578             # Create a table of maximum line length vs level for later efficient use.
2579             # We will make the tables very long to be sure it will not be exceeded.
2580             # But we have to choose a fixed length. A check will be made at the start
2581             # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of
2582             # my standard test problems have indentation levels of about 150, so this
2583             # should be fairly large. If the choice of a maximum level ever becomes
2584             # an issue then these table values could be returned in a sub with a simple
2585             # memoization scheme.
2586              
2587             # Also create a table of the maximum spaces available for text due to the
2588             # level only. If a line has continuation indentation, then that space must
2589             # be subtracted from the table value. This table is used for preliminary
2590             # estimates in welding, extended_ci, BBX, and marking short blocks.
2591 39     39   357 use constant LEVEL_TABLE_MAX => 1000;
  39         89  
  39         68674  
2592              
2593             # The basic scheme:
2594 560     560 0 1989 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
2595 560560         697842 my $indent = $level * $rOpts_indent_columns;
2596 560560         768551 $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
2597 560560         816796 $maximum_text_length_at_level[$level] =
2598             $rOpts_maximum_line_length - $indent;
2599             }
2600              
2601             # Correct the maximum_text_length table if the -wc=n flag is used
2602 560         3365 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
2603 560 100       2482 if ($rOpts_whitespace_cycle) {
2604 2 50       11 if ( $rOpts_whitespace_cycle > 0 ) {
2605 2         10 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
2606 2002         2451 my $level_mod = $level % $rOpts_whitespace_cycle;
2607 2002         2448 my $indent = $level_mod * $rOpts_indent_columns;
2608 2002         2818 $maximum_text_length_at_level[$level] =
2609             $rOpts_maximum_line_length - $indent;
2610             }
2611             }
2612             else {
2613 0         0 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
2614             }
2615             }
2616              
2617             # Correct the tables if the -vmll flag is used. These values override the
2618             # previous values.
2619 560 100       2146 if ($rOpts_variable_maximum_line_length) {
2620 1         6 foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
2621 1001         1426 $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
2622 1001         1422 $maximum_line_length_at_level[$level] =
2623             $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
2624             }
2625             }
2626              
2627             # Define two measures of indentation level, alpha and beta, at which some
2628             # formatting features come under stress and need to start shutting down.
2629             # Some combination of the two will be used to shut down different
2630             # formatting features.
2631             # Put a reasonable upper limit on stress level (say 100) in case the
2632             # whitespace-cycle variable is used.
2633 560         4055 my $stress_level_limit = min( 100, LEVEL_TABLE_MAX );
2634              
2635             # Find stress_level_alpha, targeted at very short maximum line lengths.
2636 560         1521 $stress_level_alpha = $stress_level_limit + 1;
2637 560         2036 foreach my $level_test ( 0 .. $stress_level_limit ) {
2638 10608         14821 my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
2639 10608         14776 my $excess_inside_space =
2640             $max_len -
2641             $rOpts_continuation_indentation -
2642             $rOpts_indent_columns - 8;
2643 10608 100       19533 if ( $excess_inside_space <= 0 ) {
2644 547         2119 $stress_level_alpha = $level_test;
2645 547         1884 last;
2646             }
2647             }
2648              
2649             # Find stress level beta, a stress level targeted at formatting
2650             # at deep levels near the maximum line length. We start increasing
2651             # from zero and stop at the first level which shows no more space.
2652              
2653             # 'const' is a fixed number of spaces for a typical variable.
2654             # Cases b1197-b1204 work ok with const=12 but not with const=8
2655 560         1939 my $const = 16;
2656 560         2584 my $denom = max( 1, $rOpts_indent_columns );
2657 560         1448 $stress_level_beta = 0;
2658 560         1709 foreach my $level ( 0 .. $stress_level_limit ) {
2659 8979         16328 my $remaining_cycles = max(
2660             0,
2661             (
2662             $maximum_text_length_at_level[$level] -
2663             $rOpts_continuation_indentation - $const
2664             ) / $denom
2665             );
2666 8979 100       16839 last if ( $remaining_cycles <= 3 ); # 2 does not work
2667 8432         11897 $stress_level_beta = $level;
2668             }
2669              
2670             # This is a combined level which works well for turning off formatting
2671             # features in most cases:
2672 560         3343 $high_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
2673              
2674 560         1659 return;
2675             } ## end sub initialize_line_length_vars
2676              
2677             sub initialize_trailing_comma_rules {
2678              
2679             # Setup control hash for trailing commas
2680              
2681             # -wtc=s defines desired trailing comma policy:
2682             #
2683             # =" " stable
2684             # [ both -atc and -dtc ignored ]
2685             # =0 : none
2686             # [requires -dtc; -atc ignored]
2687             # =1 or * : all
2688             # [requires -atc; -dtc ignored]
2689             # =m : multiline lists require trailing comma
2690             # if -atc set => will add missing multiline trailing commas
2691             # if -dtc set => will delete trailing single line commas
2692             # =b or 'bare' (multiline) lists require trailing comma
2693             # if -atc set => will add missing bare trailing commas
2694             # if -dtc set => will delete non-bare trailing commas
2695             # =h or 'hash': single column stable bare lists require trailing comma
2696             # if -atc set will add these
2697             # if -dtc set will delete other trailing commas
2698              
2699             #-------------------------------------------------------------------
2700             # This routine must be called after the alpha and beta stress levels
2701             # have been defined in sub 'initialize_line_length_vars'.
2702             #-------------------------------------------------------------------
2703              
2704 560     560 0 1815 %trailing_comma_rules = ();
2705              
2706 560         3271 my $rvalid_flags = [qw(0 1 * m b h i)];
2707              
2708 560         1769 my $option = $rOpts->{'want-trailing-commas'};
2709              
2710 560 100       2055 if ($option) {
2711 6         25 $option =~ s/^\s+//;
2712 6         29 $option =~ s/\s+$//;
2713             }
2714              
2715             # We need to use length() here because '0' is a possible option
2716 560 100 66     2414 if ( defined($option) && length($option) ) {
2717 7         20 my $error_message;
2718             my %rule_hash;
2719 7         15 my @q = @{$rvalid_flags};
  7         31  
2720 7         15 my %is_valid_flag;
2721 7         54 @is_valid_flag{@q} = (1) x scalar(@q);
2722              
2723             # handle single character control, such as -wtc='b'
2724 7 50       39 if ( length($option) == 1 ) {
2725 7         28 foreach (qw< ) ] } >) {
2726 21         70 $rule_hash{$_} = [ $option, EMPTY_STRING ];
2727             }
2728             }
2729              
2730             # handle multi-character control(s), such as -wtc='[m' or -wtc='k(m'
2731             else {
2732 0         0 my @parts = split /\s+/, $option;
2733 0         0 foreach my $part (@parts) {
2734 0 0 0     0 if ( length($part) >= 2 && length($part) <= 3 ) {
2735 0         0 my $val = substr( $part, -1, 1 );
2736 0         0 my $key_o = substr( $part, -2, 1 );
2737 0 0       0 if ( $is_opening_token{$key_o} ) {
2738 0         0 my $paren_flag = EMPTY_STRING;
2739 0 0       0 if ( length($part) == 3 ) {
2740 0         0 $paren_flag = substr( $part, 0, 1 );
2741             }
2742 0         0 my $key = $matching_token{$key_o};
2743 0         0 $rule_hash{$key} = [ $val, $paren_flag ];
2744             }
2745             else {
2746 0         0 $error_message .= "Unrecognized term: '$part'\n";
2747             }
2748             }
2749             else {
2750 0         0 $error_message .= "Unrecognized term: '$part'\n";
2751             }
2752             }
2753             }
2754              
2755             # check for valid control characters
2756 7 50       40 if ( !$error_message ) {
2757 7         35 foreach my $key ( keys %rule_hash ) {
2758 21         39 my $item = $rule_hash{$key};
2759 21         36 my ( $val, $paren_flag ) = @{$item};
  21         49  
2760 21 50 66     89 if ( $val && !$is_valid_flag{$val} ) {
2761 0         0 my $valid_str = join( SPACE, @{$rvalid_flags} );
  0         0  
2762 0         0 $error_message .=
2763             "Unexpected value '$val'; must be one of: $valid_str\n";
2764 0         0 last;
2765             }
2766 21 50       64 if ($paren_flag) {
2767 0 0       0 if ( $paren_flag !~ /^[kKfFwW]$/ ) {
2768 0         0 $error_message .=
2769             "Unexpected paren flag '$paren_flag'; must be one of: k K f F w W\n";
2770 0         0 last;
2771             }
2772 0 0       0 if ( $key ne ')' ) {
2773 0         0 $error_message .=
2774             "paren flag '$paren_flag' is only allowed before a '('\n";
2775 0         0 last;
2776             }
2777             }
2778             }
2779             }
2780              
2781 7 50       96 if ($error_message) {
2782 0         0 Warn(<<EOM);
2783             Error parsing --want-trailing-commas='$option':
2784             $error_message
2785             EOM
2786             }
2787              
2788             # Set the control hash if no errors
2789             else {
2790 7         51 %trailing_comma_rules = %rule_hash;
2791             }
2792             }
2793              
2794             # Both adding and deleting commas can lead to instability in extreme cases
2795 560 100 100     2394 if ( $rOpts_add_trailing_commas && $rOpts_delete_trailing_commas ) {
2796              
2797             # If the possible instability is significant, then we can turn off
2798             # -dtc as a defensive measure to prevent it.
2799              
2800             # We must turn off -dtc for very small values of --whitespace-cycle
2801             # to avoid instability. A minimum value of -wc=3 fixes b1393, but a
2802             # value of 4 is used here for safety. This parameter is seldom used,
2803             # and much larger than this when used, so the cutoff value is not
2804             # critical.
2805 4 50 33     28 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle <= 4 ) {
2806 0         0 $rOpts_delete_trailing_commas = 0;
2807             }
2808             }
2809              
2810 560         1620 return;
2811             } ## end sub initialize_trailing_comma_rules
2812              
2813             sub initialize_whitespace_hashes {
2814              
2815             # This is called once before formatting begins to initialize these global
2816             # hashes, which control the use of whitespace around tokens:
2817             #
2818             # %binary_ws_rules
2819             # %want_left_space
2820             # %want_right_space
2821             # %space_after_keyword
2822             #
2823             # Many token types are identical to the tokens themselves.
2824             # See the tokenizer for a complete list. Here are some special types:
2825             # k = perl keyword
2826             # f = semicolon in for statement
2827             # m = unary minus
2828             # p = unary plus
2829             # Note that :: is excluded since it should be contained in an identifier
2830             # Note that '->' is excluded because it never gets space
2831             # parentheses and brackets are excluded since they are handled specially
2832             # curly braces are included but may be overridden by logic, such as
2833             # newline logic.
2834              
2835             # NEW_TOKENS: create a whitespace rule here. This can be as
2836             # simple as adding your new letter to @spaces_both_sides, for
2837             # example.
2838              
2839             # fix for c250: added space rules new package type 'P' and sub type 'S'
2840 560     560 0 9754 my @spaces_both_sides = qw#
2841             + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
2842             .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
2843             &&= ||= //= <=> A k f w F n C Y U G v P S
2844             #;
2845              
2846 560         2993 my @spaces_left_side = qw<
2847             t ! ~ m p { \ h pp mm Z j
2848             >;
2849 560         1584 push( @spaces_left_side, '#' ); # avoids warning message
2850              
2851 560         2611 my @spaces_right_side = qw<
2852             ; } ) ] R J ++ -- **=
2853             >;
2854 560         1399 push( @spaces_right_side, ',' ); # avoids warning message
2855              
2856 560         13802 %want_left_space = ();
2857 560         7845 %want_right_space = ();
2858 560         9472 %binary_ws_rules = ();
2859              
2860             # Note that we setting defaults here. Later in processing
2861             # the values of %want_left_space and %want_right_space
2862             # may be overridden by any user settings specified by the
2863             # -wls and -wrs parameters. However the binary_whitespace_rules
2864             # are hardwired and have priority.
2865 560         19175 @want_left_space{@spaces_both_sides} =
2866             (1) x scalar(@spaces_both_sides);
2867 560         7774 @want_right_space{@spaces_both_sides} =
2868             (1) x scalar(@spaces_both_sides);
2869 560         6463 @want_left_space{@spaces_left_side} =
2870             (1) x scalar(@spaces_left_side);
2871 560         3307 @want_right_space{@spaces_left_side} =
2872             (-1) x scalar(@spaces_left_side);
2873 560         4862 @want_left_space{@spaces_right_side} =
2874             (-1) x scalar(@spaces_right_side);
2875 560         2790 @want_right_space{@spaces_right_side} =
2876             (1) x scalar(@spaces_right_side);
2877 560         2242 $want_left_space{'->'} = WS_NO;
2878 560         1611 $want_right_space{'->'} = WS_NO;
2879 560         1510 $want_left_space{'**'} = WS_NO;
2880 560         1461 $want_right_space{'**'} = WS_NO;
2881 560         1644 $want_right_space{'CORE::'} = WS_NO;
2882              
2883             # These binary_ws_rules are hardwired and have priority over the above
2884             # settings. It would be nice to allow adjustment by the user,
2885             # but it would be complicated to specify.
2886             #
2887             # hash type information must stay tightly bound
2888             # as in : ${xxxx}
2889 560         2112 $binary_ws_rules{'i'}{'L'} = WS_NO;
2890 560         1749 $binary_ws_rules{'i'}{'{'} = WS_YES;
2891 560         1756 $binary_ws_rules{'k'}{'{'} = WS_YES;
2892 560         1641 $binary_ws_rules{'U'}{'{'} = WS_YES;
2893 560         1628 $binary_ws_rules{'i'}{'['} = WS_NO;
2894 560         1704 $binary_ws_rules{'R'}{'L'} = WS_NO;
2895 560         1503 $binary_ws_rules{'R'}{'{'} = WS_NO;
2896 560         1688 $binary_ws_rules{'t'}{'L'} = WS_NO;
2897 560         1485 $binary_ws_rules{'t'}{'{'} = WS_NO;
2898 560         1533 $binary_ws_rules{'t'}{'='} = WS_OPTIONAL; # for signatures; fixes b1123
2899 560         1633 $binary_ws_rules{'}'}{'L'} = WS_NO;
2900 560         1570 $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
2901 560         1874 $binary_ws_rules{'$'}{'L'} = WS_NO;
2902 560         1532 $binary_ws_rules{'$'}{'{'} = WS_NO;
2903 560         1781 $binary_ws_rules{'@'}{'L'} = WS_NO;
2904 560         1497 $binary_ws_rules{'@'}{'{'} = WS_NO;
2905 560         1619 $binary_ws_rules{'='}{'L'} = WS_YES;
2906 560         1684 $binary_ws_rules{'J'}{'J'} = WS_YES;
2907              
2908             # the following includes ') {'
2909             # as in : if ( xxx ) { yyy }
2910 560         1656 $binary_ws_rules{']'}{'L'} = WS_NO;
2911 560         1475 $binary_ws_rules{']'}{'{'} = WS_NO;
2912 560         1708 $binary_ws_rules{')'}{'{'} = WS_YES;
2913 560         1503 $binary_ws_rules{')'}{'['} = WS_NO;
2914 560         1499 $binary_ws_rules{']'}{'['} = WS_NO;
2915 560         1454 $binary_ws_rules{']'}{'{'} = WS_NO;
2916 560         1447 $binary_ws_rules{'}'}{'['} = WS_NO;
2917 560         1396 $binary_ws_rules{'R'}{'['} = WS_NO;
2918              
2919 560         1380 $binary_ws_rules{']'}{'++'} = WS_NO;
2920 560         1441 $binary_ws_rules{']'}{'--'} = WS_NO;
2921 560         1455 $binary_ws_rules{')'}{'++'} = WS_NO;
2922 560         1446 $binary_ws_rules{')'}{'--'} = WS_NO;
2923              
2924 560         1418 $binary_ws_rules{'R'}{'++'} = WS_NO;
2925 560         1438 $binary_ws_rules{'R'}{'--'} = WS_NO;
2926              
2927 560         1575 $binary_ws_rules{'i'}{'Q'} = WS_YES;
2928 560         1636 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
2929              
2930 560         1479 $binary_ws_rules{'i'}{'('} = WS_NO;
2931              
2932 560         1438 $binary_ws_rules{'w'}{'('} = WS_NO;
2933 560         1440 $binary_ws_rules{'w'}{'{'} = WS_YES;
2934 560         3930 return;
2935              
2936             } ## end sub initialize_whitespace_hashes
2937              
2938             { #<<< begin closure set_whitespace_flags
2939              
2940             my %is_special_ws_type;
2941             my %is_wCUG;
2942             my %is_wi;
2943              
2944             BEGIN {
2945              
2946             # The following hash is used to skip over needless if tests.
2947             # Be sure to update it when adding new checks in its block.
2948 39     39   248 my @q = qw(k w C m - Q);
2949 39         144 push @q, '#';
2950 39         312 @is_special_ws_type{@q} = (1) x scalar(@q);
2951              
2952             # These hashes replace slower regex tests
2953 39         125 @q = qw( w C U G );
2954 39         134 @is_wCUG{@q} = (1) x scalar(@q);
2955              
2956 39         91 @q = qw( w i );
2957 39         1114 @is_wi{@q} = (1) x scalar(@q);
2958             } ## end BEGIN
2959              
2960 39     39   318 use constant DEBUG_WHITE => 0;
  39         107  
  39         124172  
2961              
2962             # Hashes to set spaces around container tokens according to their
2963             # sequence numbers. These are set as keywords are examined.
2964             # They are controlled by the -kpit and -kpitl flags.
2965             my %opening_container_inside_ws;
2966             my %closing_container_inside_ws;
2967              
2968             sub set_whitespace_flags {
2969              
2970             # This routine is called once per file to set whitespace flags for that
2971             # file. This routine examines each pair of nonblank tokens and sets a flag
2972             # indicating if white space is needed.
2973             #
2974             # $rwhitespace_flags->[$j] is a flag indicating whether a white space
2975             # BEFORE token $j is needed, with the following values:
2976             #
2977             # WS_NO = -1 do not want a space BEFORE token $j
2978             # WS_OPTIONAL= 0 optional space or $j is a whitespace
2979             # WS_YES = 1 want a space BEFORE token $j
2980             #
2981              
2982 558     558 0 1336 my $self = shift;
2983              
2984 558         1212 my $j_tight_closing_paren = -1;
2985 558         1464 my $rLL = $self->[_rLL_];
2986 558         1144 my $jmax = @{$rLL} - 1;
  558         1655  
2987              
2988 558         1468 %opening_container_inside_ws = ();
2989 558         1260 %closing_container_inside_ws = ();
2990              
2991 558         1285 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
2992              
2993 558         1810 my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
2994 558         1505 my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
2995 558         1348 my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
2996              
2997 558         1347 my $rwhitespace_flags = [];
2998 558         1331 my $ris_function_call_paren = {};
2999              
3000 558 100       1877 return $rwhitespace_flags if ( $jmax < 0 );
3001              
3002 554         2411 my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
3003              
3004 554         1535 my $last_token = SPACE;
3005 554         1399 my $last_type = 'b';
3006              
3007 554         1241 my $last_token_dbg = SPACE;
3008 554         1309 my $last_type_dbg = 'b';
3009              
3010 554         1132 my $rtokh_last = [ @{ $rLL->[0] } ];
  554         2487  
3011 554         1723 $rtokh_last->[_TOKEN_] = $last_token;
3012 554         1447 $rtokh_last->[_TYPE_] = $last_type;
3013 554         1308 $rtokh_last->[_TYPE_SEQUENCE_] = EMPTY_STRING;
3014 554         1468 $rtokh_last->[_LINE_INDEX_] = 0;
3015              
3016 554         1132 my $rtokh_last_last = $rtokh_last;
3017              
3018             # This will identify braces to be treated as blocks for the -xbt flag
3019 554         1087 my %block_type_for_tightness;
3020              
3021 554         2324 my ( $ws_1, $ws_2, $ws_3, $ws_4 );
3022              
3023             # main loop over all tokens to define the whitespace flags
3024 554         0 my $last_type_is_opening;
3025 554         0 my ( $token, $type );
3026 554         1193 my $j = -1;
3027 554         1075 foreach my $rtokh ( @{$rLL} ) {
  554         1561  
3028              
3029 51322         63400 $j++;
3030              
3031 51322         84482 $type = $rtokh->[_TYPE_];
3032 51322 100       86820 if ( $type eq 'b' ) {
3033 15320         23483 $rwhitespace_flags->[$j] = WS_OPTIONAL;
3034 15320         22784 next;
3035             }
3036              
3037 36002         54686 $token = $rtokh->[_TOKEN_];
3038              
3039 36002         44665 my $ws;
3040              
3041             #---------------------------------------------------------------
3042             # Whitespace Rules Section 1:
3043             # Handle space on the inside of opening braces.
3044             #---------------------------------------------------------------
3045              
3046             # /^[L\{\(\[]$/
3047 36002 100       57843 if ($last_type_is_opening) {
3048              
3049 4385         7416 $last_type_is_opening = 0;
3050              
3051 4385         7649 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
3052 4385         7045 my $block_type = $rblock_type_of_seqno->{$seqno};
3053 4385         6905 my $last_seqno = $rtokh_last->[_TYPE_SEQUENCE_];
3054             my $last_block_type = $rblock_type_of_seqno->{$last_seqno}
3055 4385   100     12621 || $block_type_for_tightness{$last_seqno};
3056              
3057 4385         6200 $j_tight_closing_paren = -1;
3058              
3059             # let us keep empty matched braces together: () {} []
3060             # except for BLOCKS
3061 4385 100       10204 if ( $token eq $matching_token{$last_token} ) {
3062 223 100       741 if ($block_type) {
3063 49         111 $ws = WS_YES;
3064             }
3065             else {
3066 174         401 $ws = WS_NO;
3067             }
3068             }
3069             else {
3070              
3071             # we're considering the right of an opening brace
3072             # tightness = 0 means always pad inside with space
3073             # tightness = 1 means pad inside if "complex"
3074             # tightness = 2 means never pad inside with space
3075              
3076 4162         5703 my $tightness;
3077 4162 100 66     10600 if ( $last_block_type && $last_token eq '{' ) {
3078 955         1753 $tightness = $rOpts_block_brace_tightness;
3079             }
3080 3207         5452 else { $tightness = $tightness{$last_token} }
3081              
3082             #=============================================================
3083             # Patch for test problem <<snippets/fabrice_bug.in>>
3084             # We must always avoid spaces around a bare word beginning
3085             # with ^ as in:
3086             # my $before = ${^PREMATCH};
3087             # Because all of the following cause an error in perl:
3088             # my $before = ${ ^PREMATCH };
3089             # my $before = ${ ^PREMATCH};
3090             # my $before = ${^PREMATCH };
3091             # So if brace tightness flag is -bt=0 we must temporarily reset
3092             # to bt=1. Note that here we must set tightness=1 and not 2 so
3093             # that the closing space is also avoided
3094             # (via the $j_tight_closing_paren flag in coding)
3095 4162 100 100     10869 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
  5         15  
3096              
3097             #=============================================================
3098              
3099 4162 100       9271 if ( $tightness <= 0 ) {
    100          
3100 915         1619 $ws = WS_YES;
3101             }
3102             elsif ( $tightness > 1 ) {
3103 198         352 $ws = WS_NO;
3104             }
3105             else {
3106              
3107             # find the index of the closing token
3108             my $j_closing =
3109 3049         5902 $self->[_K_closing_container_]->{$last_seqno};
3110              
3111             # If the closing token is less than five characters ahead
3112             # we must take a closer look
3113 3049 100 66     13922 if ( defined($j_closing)
      66        
3114             && $j_closing - $j < 5
3115             && $rLL->[$j_closing]->[_TYPE_SEQUENCE_] eq
3116             $last_seqno )
3117             {
3118 1191         4987 $ws =
3119             ws_in_container( $j, $j_closing, $rLL, $type, $token,
3120             $last_token );
3121 1191 100       3038 if ( $ws == WS_NO ) {
3122 999         1794 $j_tight_closing_paren = $j_closing;
3123             }
3124             }
3125             else {
3126 1858         3226 $ws = WS_YES;
3127             }
3128             }
3129             }
3130              
3131             # check for special cases which override the above rules
3132 4385 100 66     9528 if ( %opening_container_inside_ws && $last_seqno ) {
3133 23         31 my $ws_override = $opening_container_inside_ws{$last_seqno};
3134 23 100       47 if ($ws_override) { $ws = $ws_override }
  6         14  
3135             }
3136              
3137 4385         6047 $ws_4 = $ws_3 = $ws_2 = $ws_1 = $ws
3138             if DEBUG_WHITE;
3139              
3140             } ## end setting space flag inside opening tokens
3141              
3142             #---------------------------------------------------------------
3143             # Whitespace Rules Section 2:
3144             # Special checks for certain types ...
3145             #---------------------------------------------------------------
3146             # The hash '%is_special_ws_type' significantly speeds up this routine,
3147             # but be sure to update it if a new check is added.
3148             # Currently has types: qw(k w C m - Q #)
3149 36002 100       84173 if ( $is_special_ws_type{$type} ) {
    100          
    100          
3150              
3151 8350 100 100     25066 if ( $type eq 'k' ) {
    100 66        
    100          
    100          
    50          
3152              
3153             # Keywords 'for', 'foreach' are special cases for -kpit since
3154             # the opening paren does not always immediately follow the
3155             # keyword. So we have to search forward for the paren in this
3156             # case. I have limited the search to 10 tokens ahead, just in
3157             # case somebody has a big file and no opening paren. This
3158             # should be enough for all normal code. Added the level check
3159             # to fix b1236.
3160 2806 50 100     8476 if ( $is_for_foreach{$token}
      66        
      66        
3161             && %keyword_paren_inner_tightness
3162             && defined( $keyword_paren_inner_tightness{$token} )
3163             && $j < $jmax )
3164             {
3165 1         2 my $level = $rLL->[$j]->[_LEVEL_];
3166 1         3 my $jp = $j;
3167             ## NOTE: we might use the KNEXT variable to avoid this loop
3168             ## but profiling shows that little would be saved
3169 1         4 foreach my $inc ( 1 .. 9 ) {
3170 3         4 $jp++;
3171 3 50       8 last if ( $jp > $jmax );
3172 3 50       9 last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236
3173 3 100       9 next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
3174 1         4 my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_];
3175 1         5 set_container_ws_by_keyword( $token, $seqno_p );
3176 1         3 last;
3177             }
3178             }
3179             }
3180              
3181             # handle a comment
3182             elsif ( $type eq '#' ) {
3183              
3184             # newline before block comment ($j==0), and
3185             # space before side comment ($j>0), so ..
3186 1091         1879 $ws = WS_YES;
3187              
3188             #---------------------------------
3189             # Nothing more to do for a comment
3190             #---------------------------------
3191 1091         2144 $rwhitespace_flags->[$j] = $ws;
3192 1091         2395 next;
3193             }
3194              
3195             # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
3196             # allow a space between a backslash and single or double quote
3197             # to avoid fooling html formatters
3198             elsif ( $type eq 'Q' ) {
3199 2489 100 66     6460 if ( $last_type eq '\\' && $token =~ /^[\"\']/ ) {
3200 11 50       43 $ws =
    100          
    100          
3201             !$rOpts_space_backslash_quote ? WS_NO
3202             : $rOpts_space_backslash_quote == 1 ? WS_OPTIONAL
3203             : $rOpts_space_backslash_quote == 2 ? WS_YES
3204             : WS_YES;
3205             }
3206             }
3207              
3208             # retain any space between '-' and bare word
3209             elsif ( $type eq 'w' || $type eq 'C' ) {
3210 1575 100       3598 $ws = WS_OPTIONAL if $last_type eq '-';
3211             }
3212              
3213             # retain any space between '-' and bare word; for example
3214             # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
3215             # $myhash{USER-NAME}='steve';
3216             elsif ( $type eq 'm' || $type eq '-' ) {
3217 389 100       920 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
3218             }
3219              
3220             else {
3221             # A type $type was entered in %is_special_ws_type but
3222             # there is no code block to handle it. Either remove it
3223             # from the hash or add a code block to handle it.
3224 0         0 DEVEL_MODE && Fault("no code to handle type $type\n");
3225             }
3226             } ## end elsif ( $is_special_ws_type{$type} ...
3227              
3228             #---------------------------------------------------------------
3229             # Whitespace Rules Section 3:
3230             # Handle space on inside of closing brace pairs.
3231             #---------------------------------------------------------------
3232              
3233             # /[\}\)\]R]/
3234             elsif ( $is_closing_type{$type} ) {
3235              
3236 4385         8960 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
3237 4385 100       9120 if ( $j == $j_tight_closing_paren ) {
3238              
3239 999         1769 $j_tight_closing_paren = -1;
3240 999         1650 $ws = WS_NO;
3241             }
3242             else {
3243              
3244 3386 100       7202 if ( !defined($ws) ) {
3245              
3246 3163         4282 my $tightness;
3247             my $block_type = $rblock_type_of_seqno->{$seqno}
3248 3163   100     9407 || $block_type_for_tightness{$seqno};
3249              
3250 3163 100 66     9015 if ( $block_type && $token eq '}' ) {
3251 953         1738 $tightness = $rOpts_block_brace_tightness;
3252             }
3253 2210         3931 else { $tightness = $tightness{$token} }
3254              
3255 3163 100       6085 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
3256             }
3257             }
3258              
3259             # check for special cases which override the above rules
3260 4385 100 66     9355 if ( %closing_container_inside_ws && $seqno ) {
3261 23         36 my $ws_override = $closing_container_inside_ws{$seqno};
3262 23 100       45 if ($ws_override) { $ws = $ws_override }
  6         10  
3263             }
3264              
3265 4385         5835 $ws_4 = $ws_3 = $ws_2 = $ws
3266             if DEBUG_WHITE;
3267             } ## end setting space flag inside closing tokens
3268              
3269             #---------------------------------------------------------------
3270             # Whitespace Rules Section 4:
3271             #---------------------------------------------------------------
3272             # /^[L\{\(\[]$/
3273             elsif ( $is_opening_type{$type} ) {
3274              
3275 4385         7361 $last_type_is_opening = 1;
3276              
3277 4385 100 100     17157 if ( $token eq '(' ) {
    100 100        
3278              
3279 2122         4342 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
3280              
3281             # This will have to be tweaked as tokenization changes.
3282             # We usually want a space at '} (', for example:
3283             # <<snippets/space1.in>>
3284             # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
3285             #
3286             # But not others:
3287             # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
3288             # At present, the above & block is marked as type L/R so this
3289             # case won't go through here.
3290 2122 100 100     17871 if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
  8 100 66     23  
    100 66        
    100 66        
    50 66        
      33        
3291              
3292             # NOTE: some older versions of Perl had occasional problems if
3293             # spaces are introduced between keywords or functions and
3294             # opening parens. So the default is not to do this except is
3295             # certain cases. The current Perl seems to tolerate spaces.
3296              
3297             # Space between keyword and '('
3298             elsif ( $last_type eq 'k' ) {
3299             $ws = WS_NO
3300             unless ( $rOpts_space_keyword_paren
3301 635 100 100     3180 || $space_after_keyword{$last_token} );
3302              
3303             # Set inside space flag if requested
3304 635         1830 set_container_ws_by_keyword( $last_token, $seqno );
3305             }
3306              
3307             # Space between function and '('
3308             # -----------------------------------------------------
3309             # 'w' and 'i' checks for something like:
3310             # myfun( &myfun( ->myfun(
3311             # -----------------------------------------------------
3312              
3313             # Note that at this point an identifier may still have a
3314             # leading arrow, but the arrow will be split off during token
3315             # respacing. After that, the token may become a bare word
3316             # without leading arrow. The point is, it is best to mark
3317             # function call parens right here before that happens.
3318             # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
3319             # NOTE: this would be the place to allow spaces between
3320             # repeated parens, like () () (), as in case c017, but I
3321             # decided that would not be a good idea.
3322              
3323             # Updated to allow detached '->' from tokenizer (issue c140)
3324             elsif (
3325              
3326             # /^[wCUG]$/
3327             $is_wCUG{$last_type}
3328              
3329             || (
3330              
3331             # /^[wi]$/
3332             $is_wi{$last_type}
3333              
3334             && (
3335              
3336             # with prefix '->' or '&'
3337             $last_token =~ /^([\&]|->)/
3338              
3339             # or preceding token '->' (see b1337; c140)
3340             || $rtokh_last_last->[_TYPE_] eq '->'
3341              
3342             # or preceding sub call operator token '&'
3343             || ( $rtokh_last_last->[_TYPE_] eq 't'
3344             && $rtokh_last_last->[_TOKEN_] =~ /^\&\s*$/ )
3345             )
3346             )
3347             )
3348             {
3349 848 100       2170 $ws =
3350             $rOpts_space_function_paren
3351             ? $self->ws_space_function_paren( $j, $rtokh_last_last )
3352             : WS_NO;
3353              
3354 848         2664 set_container_ws_by_keyword( $last_token, $seqno );
3355 848         2294 $ris_function_call_paren->{$seqno} = 1;
3356             }
3357              
3358             # space between something like $i and ( in 'snippets/space2.in'
3359             # for $i ( 0 .. 20 ) {
3360             elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
3361 37         108 $ws = WS_YES;
3362             }
3363              
3364             # allow constant function followed by '()' to retain no space
3365             elsif ($last_type eq 'C'
3366             && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
3367             {
3368 0         0 $ws = WS_NO;
3369             }
3370             else {
3371             # ok - opening paren not covered by a special rule
3372             }
3373             }
3374              
3375             # patch for SWITCH/CASE: make space at ']{' optional
3376             # since the '{' might begin a case or when block
3377             elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
3378 2         7 $ws = WS_OPTIONAL;
3379             }
3380             else {
3381             # ok - opening type not covered by a special rule
3382             }
3383              
3384             # keep space between 'sub' and '{' for anonymous sub definition,
3385             # be sure type = 'k' (added for c140)
3386 4385 100       9066 if ( $type eq '{' ) {
3387 3711 100 66     8826 if ( $last_token eq 'sub' && $last_type eq 'k' ) {
3388 161         310 $ws = WS_YES;
3389             }
3390              
3391             # this is needed to avoid no space in '){'
3392 3711 100 100     8484 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
  262         468  
3393              
3394             # avoid any space before the brace or bracket in something like
3395             # @opts{'a','b',...}
3396 3711 50 66     9241 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
3397 0         0 $ws = WS_NO;
3398             }
3399             }
3400              
3401             # The --extended-block-tightness option allows certain braces
3402             # to be treated as blocks just for setting inner whitespace
3403 4385 100 100     9021 if ( $rOpts_extended_block_tightness && $token eq '{' ) {
3404 60         100 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
3405 60 100 100     216 if ( !$rblock_type_of_seqno->{$seqno}
3406             && $extended_block_tightness_list{$last_token} )
3407             {
3408              
3409             # Ok - make this brace a block type for tightness only
3410 32         79 $block_type_for_tightness{$seqno} = $last_token;
3411             }
3412             }
3413             } ## end elsif ( $is_opening_type{$type} ) {
3414              
3415             else {
3416             # ok: $type not opening, closing, or covered by a special rule
3417             }
3418              
3419             # always preserve whatever space was used after a possible
3420             # filehandle (except _) or here doc operator
3421 34911 100 100     112974 if (
      66        
3422             (
3423             ( $last_type eq 'Z' && $last_token ne '_' )
3424             || $last_type eq 'h'
3425             )
3426             && $type ne '#' # no longer required due to early exit for '#' above
3427             )
3428             {
3429             # no space for '$ {' even if '$' is marked as type 'Z', issue c221
3430 48 50 66     479 if ( $last_type eq 'Z' && $last_token eq '$' && $token eq '{' ) {
      33        
3431 0         0 $ws = WS_NO;
3432             }
3433             else {
3434 48         120 $ws = WS_OPTIONAL;
3435             }
3436             }
3437              
3438 34911         42604 $ws_4 = $ws_3 = $ws
3439             if DEBUG_WHITE;
3440              
3441 34911 100       58019 if ( !defined($ws) ) {
3442              
3443             #---------------------------------------------------------------
3444             # Whitespace Rules Section 4:
3445             # Use the binary rule table.
3446             #---------------------------------------------------------------
3447 24773 100       48822 if ( defined( $binary_ws_rules{$last_type}{$type} ) ) {
3448 1103         2236 $ws = $binary_ws_rules{$last_type}{$type};
3449 1103         1793 $ws_4 = $ws if DEBUG_WHITE;
3450             }
3451              
3452             #---------------------------------------------------------------
3453             # Whitespace Rules Section 5:
3454             # Apply default rules not covered above.
3455             #---------------------------------------------------------------
3456              
3457             # If we fall through to here, look at the pre-defined hash tables
3458             # for the two tokens, and:
3459             # if (they are equal) use the common value
3460             # if (either is zero or undef) use the other
3461             # if (either is -1) use it
3462             # That is,
3463             # left vs right
3464             # 1 vs 1 --> 1
3465             # 0 vs 0 --> 0
3466             # -1 vs -1 --> -1
3467             #
3468             # 0 vs -1 --> -1
3469             # 0 vs 1 --> 1
3470             # 1 vs 0 --> 1
3471             # -1 vs 0 --> -1
3472             #
3473             # -1 vs 1 --> -1
3474             # 1 vs -1 --> -1
3475             else {
3476 23670         36800 my $wl = $want_left_space{$type};
3477 23670         34205 my $wr = $want_right_space{$last_type};
3478 23670 100       45339 if ( !defined($wl) ) {
    100          
3479 6269 100       11883 $ws = defined($wr) ? $wr : 0;
3480             }
3481             elsif ( !defined($wr) ) {
3482 5709         8758 $ws = $wl;
3483             }
3484             else {
3485 11692 100 66     34868 $ws =
3486             ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
3487             }
3488             }
3489             }
3490              
3491             # Treat newline as a whitespace. Otherwise, we might combine
3492             # 'Send' and '-recipients' here according to the above rules:
3493             # <<snippets/space3.in>>
3494             # my $msg = new Fax::Send
3495             # -recipients => $to,
3496             # -data => $data;
3497 34911 100 100     63150 if ( !$ws
3498             && $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] )
3499             {
3500 258         480 $ws = WS_YES;
3501             }
3502              
3503 34911         53064 $rwhitespace_flags->[$j] = $ws;
3504              
3505             # remember non-blank, non-comment tokens
3506 34911         46763 $last_token = $token;
3507 34911         43727 $last_type = $type;
3508 34911         43522 $rtokh_last_last = $rtokh_last;
3509 34911         42727 $rtokh_last = $rtokh;
3510              
3511             # Programming note: for some reason, it is very much faster to 'next'
3512             # out of this loop here than to put the DEBUG coding in a block.
3513             # But note that the debug code must then update its own copies
3514             # of $last_token and $last_type.
3515 34911         52560 next if ( !DEBUG_WHITE );
3516              
3517 0         0 my $str = substr( $last_token_dbg, 0, 15 );
3518 0         0 $str .= SPACE x ( 16 - length($str) );
3519 0 0       0 if ( !defined($ws_1) ) { $ws_1 = "*" }
  0         0  
3520 0 0       0 if ( !defined($ws_2) ) { $ws_2 = "*" }
  0         0  
3521 0 0       0 if ( !defined($ws_3) ) { $ws_3 = "*" }
  0         0  
3522 0 0       0 if ( !defined($ws_4) ) { $ws_4 = "*" }
  0         0  
3523 0         0 print {*STDOUT}
  0         0  
3524             "NEW WHITE: i=$j $str $last_type_dbg $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
3525              
3526             # reset for next pass
3527 0         0 $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
3528              
3529 0         0 $last_token_dbg = $token;
3530 0         0 $last_type_dbg = $type;
3531              
3532             } ## end main loop
3533              
3534 554 100       4643 if ( $rOpts->{'tight-secret-operators'} ) {
3535 1         6 new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
3536             }
3537 554         1956 $self->[_ris_function_call_paren_] = $ris_function_call_paren;
3538 554         5924 return $rwhitespace_flags;
3539              
3540             } ## end sub set_whitespace_flags
3541              
3542             sub set_container_ws_by_keyword {
3543              
3544 1484     1484 0 3692 my ( $word, $sequence_number ) = @_;
3545 1484 100       3722 return unless (%keyword_paren_inner_tightness);
3546              
3547             # We just saw a keyword (or other function name) followed by an opening
3548             # paren. Now check to see if the following paren should have special
3549             # treatment for its inside space. If so we set a hash value using the
3550             # sequence number as key.
3551 12 50 33     47 if ( $word && $sequence_number ) {
3552 12         27 my $tightness = $keyword_paren_inner_tightness{$word};
3553 12 100 66     43 if ( defined($tightness) && $tightness != 1 ) {
3554 6 50       18 my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
3555 6         12 $opening_container_inside_ws{$sequence_number} = $ws_flag;
3556 6         13 $closing_container_inside_ws{$sequence_number} = $ws_flag;
3557             }
3558             }
3559 12         18 return;
3560             } ## end sub set_container_ws_by_keyword
3561              
3562             sub ws_in_container {
3563              
3564 1191     1191 0 3277 my ( $j, $j_closing, $rLL, $type, $token, $last_token ) = @_;
3565              
3566             # Given:
3567             # $j = index of token following an opening container token
3568             # $type, $token = the type and token at index $j
3569             # $j_closing = closing token of the container
3570             # $last_token = the opening token of the container
3571             # Return:
3572             # WS_NO if there is just one token in the container (with exceptions)
3573             # WS_YES otherwise
3574              
3575             #------------------------------------
3576             # Look forward for the closing token;
3577             #------------------------------------
3578 1191 50       3013 if ( $j + 1 > $j_closing ) { return WS_NO }
  0         0  
3579              
3580             # Patch to count '-foo' as single token so that
3581             # each of $a{-foo} and $a{foo} and $a{'foo'} do
3582             # not get spaces with default formatting.
3583 1191         1865 my $j_here = $j;
3584 1191 50 66     3493 ++$j_here
      66        
3585             if ( $token eq '-'
3586             && $last_token eq '{'
3587             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
3588              
3589             # Patch to count a sign separated from a number as a single token, as
3590             # in the following line. Otherwise, it takes two steps to converge:
3591             # deg2rad(- 0.5)
3592 1191 0 66     5068 if ( ( $type eq 'm' || $type eq 'p' )
      66        
      66        
      33        
      33        
3593             && $j < $j_closing + 1
3594             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
3595             && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
3596             && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
3597             {
3598 0         0 $j_here = $j + 2;
3599             }
3600              
3601             # $j_next is where a closing token should be if the container has
3602             # just a "single" token
3603 1191 50       2887 if ( $j_here + 1 > $j_closing ) { return WS_NO }
  0         0  
3604 1191 100       3438 my $j_next =
3605             ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
3606             ? $j_here + 2
3607             : $j_here + 1;
3608              
3609             #-----------------------------------------------------------------
3610             # Now decide: if we get to the closing token we will keep it tight
3611             #-----------------------------------------------------------------
3612 1191 100 100     4512 if (
3613             $j_next == $j_closing
3614              
3615             # OLD PROBLEM: but watch out for this: [ [ ] (misc.t)
3616             # No longer necessary because of the previous check on sequence numbers
3617             ##&& $last_token ne $token
3618              
3619             # double diamond is usually spaced
3620             && $token ne '<<>>'
3621              
3622             )
3623             {
3624 999         2350 return WS_NO;
3625             }
3626              
3627 192         545 return WS_YES;
3628              
3629             } ## end sub ws_in_container
3630              
3631             sub ws_space_function_paren {
3632              
3633 32     32 0 73 my ( $self, $j, $rtokh_last_last ) = @_;
3634              
3635             # Called if --space-function-paren is set to see if it might cause
3636             # a problem. The manual warns the user about potential problems with
3637             # this flag. Here we just try to catch one common problem.
3638              
3639             # Given:
3640             # $j = index of '(' after function name
3641             # Return:
3642             # WS_NO if no space
3643             # WS_YES otherwise
3644              
3645             # This was added to fix for issue c166. Ignore -sfp at a possible indirect
3646             # object location. For example, do not convert this:
3647             # print header() ...
3648             # to this:
3649             # print header () ...
3650             # because in this latter form, header may be taken to be a file handle
3651             # instead of a function call.
3652              
3653             # Start with the normal value for -sfp:
3654 32         49 my $ws = WS_YES;
3655              
3656             # now check to be sure we don't cause a problem:
3657 32         59 my $type_ll = $rtokh_last_last->[_TYPE_];
3658 32         90 my $tok_ll = $rtokh_last_last->[_TOKEN_];
3659              
3660             # NOTE: this is just a minimal check. For example, we might also check
3661             # for something like this:
3662             # print ( header ( ..
3663 32 50 66     104 if ( $type_ll eq 'k' && $is_indirect_object_taker{$tok_ll} ) {
3664 0         0 $ws = WS_NO;
3665             }
3666              
3667 32         68 return $ws;
3668              
3669             } ## end sub ws_space_function_paren
3670              
3671             } ## end closure set_whitespace_flags
3672              
3673             sub dump_want_left_space {
3674 0     0 0 0 my $fh = shift;
3675 0         0 local $LIST_SEPARATOR = "\n";
3676 0         0 $fh->print(<<EOM);
3677             These values are the main control of whitespace to the left of a token type;
3678             They may be altered with the -wls parameter.
3679             For a list of token types, use perltidy --dump-token-types (-dtt)
3680             1 means the token wants a space to its left
3681             -1 means the token does not want a space to its left
3682             ------------------------------------------------------------------------
3683             EOM
3684 0         0 foreach my $key ( sort keys %want_left_space ) {
3685 0         0 $fh->print("$key\t$want_left_space{$key}\n");
3686             }
3687 0         0 return;
3688             } ## end sub dump_want_left_space
3689              
3690             sub dump_want_right_space {
3691 0     0 0 0 my $fh = shift;
3692 0         0 local $LIST_SEPARATOR = "\n";
3693 0         0 $fh->print(<<EOM);
3694             These values are the main control of whitespace to the right of a token type;
3695             They may be altered with the -wrs parameter.
3696             For a list of token types, use perltidy --dump-token-types (-dtt)
3697             1 means the token wants a space to its right
3698             -1 means the token does not want a space to its right
3699             ------------------------------------------------------------------------
3700             EOM
3701 0         0 foreach my $key ( sort keys %want_right_space ) {
3702 0         0 $fh->print("$key\t$want_right_space{$key}\n");
3703             }
3704 0         0 return;
3705             } ## end sub dump_want_right_space
3706              
3707             { ## begin closure is_essential_whitespace
3708              
3709             my %is_sort_grep_map;
3710             my %is_for_foreach;
3711             my %is_digraph;
3712             my %is_trigraph;
3713             my %essential_whitespace_filter_l1;
3714             my %essential_whitespace_filter_r1;
3715             my %essential_whitespace_filter_l2;
3716             my %essential_whitespace_filter_r2;
3717             my %is_type_with_space_before_bareword;
3718             my %is_special_variable_char;
3719              
3720             BEGIN {
3721              
3722 39     39   210 my @q;
3723              
3724             # NOTE: This hash is like the global %is_sort_map_grep, but it ignores
3725             # grep aliases on purpose, since here we are looking parens, not braces
3726 39         186 @q = qw(sort grep map);
3727 39         203 @is_sort_grep_map{@q} = (1) x scalar(@q);
3728              
3729 39         109 @q = qw(for foreach);
3730 39         128 @is_for_foreach{@q} = (1) x scalar(@q);
3731              
3732 39         358 @q = qw(
3733             .. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <>
3734             <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
3735             );
3736 39         1026 @is_digraph{@q} = (1) x scalar(@q);
3737              
3738 39         262 @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
3739 39         441 @is_trigraph{@q} = (1) x scalar(@q);
3740              
3741             # These are used as a speedup filters for sub is_essential_whitespace.
3742              
3743             # Filter 1:
3744             # These left side token types USUALLY do not require a space:
3745 39         183 @q = qw( ; { } [ ] L R );
3746 39         112 push @q, ',';
3747 39         70 push @q, ')';
3748 39         90 push @q, '(';
3749 39         193 @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
3750              
3751             # BUT some might if followed by these right token types
3752 39         104 @q = qw( pp mm << <<= h );
3753 39         169 @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
3754              
3755             # Filter 2:
3756             # These right side filters usually do not require a space
3757 39         106 @q = qw( ; ] R } );
3758 39         82 push @q, ',';
3759 39         88 push @q, ')';
3760 39         144 @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
3761              
3762             # BUT some might if followed by these left token types
3763 39         108 @q = qw( h Z );
3764 39         107 @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
3765              
3766             # Keep a space between certain types and any bareword:
3767             # Q: keep a space between a quote and a bareword to prevent the
3768             # bareword from becoming a quote modifier.
3769             # &: do not remove space between an '&' and a bare word because
3770             # it may turn into a function evaluation, like here
3771             # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
3772             # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
3773 39         85 @q = qw( Q & );
3774 39         96 @is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
3775              
3776             # These are the only characters which can (currently) form special
3777             # variables, like $^W: (issue c066, c068).
3778 39         240 @q =
3779             qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
3780 39         33703 @{is_special_variable_char}{@q} = (1) x scalar(@q);
3781              
3782             } ## end BEGIN
3783              
3784             sub is_essential_whitespace {
3785              
3786             # Essential whitespace means whitespace which cannot be safely deleted
3787             # without risking the introduction of a syntax error.
3788             # We are given three tokens and their types:
3789             # ($tokenl, $typel) is the token to the left of the space in question
3790             # ($tokenr, $typer) is the token to the right of the space in question
3791             # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
3792             #
3793             # Note1: This routine should almost never need to be changed. It is
3794             # for avoiding syntax problems rather than for formatting.
3795              
3796             # Note2: The -mangle option causes large numbers of calls to this
3797             # routine and therefore is a good test. So if a change is made, be sure
3798             # to use nytprof to profile with both old and revised coding using the
3799             # -mangle option and check differences.
3800              
3801 6262     6262 0 14697 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
3802              
3803             # This is potentially a very slow routine but the following quick
3804             # filters typically catch and handle over 90% of the calls.
3805              
3806             # Filter 1: usually no space required after common types ; , [ ] { } ( )
3807             return
3808             if ( $essential_whitespace_filter_l1{$typel}
3809 6262 100 100     27204 && !$essential_whitespace_filter_r1{$typer} );
3810              
3811             # Filter 2: usually no space before common types ; ,
3812             return
3813             if ( $essential_whitespace_filter_r2{$typer}
3814 1304 100 66     5712 && !$essential_whitespace_filter_l2{$typel} );
3815              
3816             # Filter 3: Handle side comments: a space is only essential if the left
3817             # token ends in '$' For example, we do not want to create $#foo below:
3818              
3819             # sub t086
3820             # ( #foo)))
3821             # $ #foo)))
3822             # a #foo)))
3823             # ) #foo)))
3824             # { ... }
3825              
3826             # Also, I prefer not to put a ? and # together because ? used to be
3827             # a pattern delimiter and spacing was used if guessing was needed.
3828              
3829 1009 100       2657 if ( $typer eq '#' ) {
3830              
3831 6 100 66     41 return 1
      66        
3832             if ( $tokenl
3833             && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
3834 4         9 return;
3835             }
3836              
3837 1003   100     5091 my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
3838 1003         1889 my $tokenr_is_open_paren = $tokenr eq '(';
3839 1003         2189 my $token_joined = $tokenl . $tokenr;
3840 1003         1776 my $tokenl_is_dash = $tokenl eq '-';
3841              
3842             my $result =
3843              
3844             # never combine two bare words or numbers
3845             # examples: and ::ok(1)
3846             # return ::spw(...)
3847             # for bla::bla:: abc
3848             # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3849             # $input eq"quit" to make $inputeq"quit"
3850             # my $size=-s::SINK if $file; <==OK but we won't do it
3851             # don't join something like: for bla::bla:: abc
3852             # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3853             ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
3854             && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
3855              
3856             # do not combine a number with a concatenation dot
3857             # example: pom.caputo:
3858             # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
3859             || $typel eq 'n' && $tokenr eq '.'
3860             || $typer eq 'n' && $tokenl eq '.'
3861              
3862             # cases of a space before a bareword...
3863             || (
3864             $tokenr_is_bareword && (
3865              
3866             # do not join a minus with a bare word, because you might form
3867             # a file test operator. Example from Complex.pm:
3868             # if (CORE::abs($z - i) < $eps);
3869             # "z-i" would be taken as a file test.
3870             $tokenl_is_dash && length($tokenr) == 1
3871              
3872             # and something like this could become ambiguous without space
3873             # after the '-':
3874             # use constant III=>1;
3875             # $a = $b - III;
3876             # and even this:
3877             # $a = - III;
3878             || $tokenl_is_dash && $typer =~ /^[wC]$/
3879              
3880             # keep space between types Q & and a bareword
3881             || $is_type_with_space_before_bareword{$typel}
3882              
3883             # +-: binary plus and minus before a bareword could get
3884             # converted into unary plus and minus on next pass through the
3885             # tokenizer. This can lead to blinkers: cases b660 b670 b780
3886             # b781 b787 b788 b790 So we keep a space unless the +/- clearly
3887             # follows an operator
3888             || ( ( $typel eq '+' || $typel eq '-' )
3889             && $typell !~ /^[niC\)\}\]R]$/ )
3890              
3891             # keep a space between a token ending in '$' and any word;
3892             # this caused trouble: "die @$ if $@"
3893             || $typel eq 'i' && substr( $tokenl, -1, 1 ) eq '$'
3894              
3895             # don't combine $$ or $# with any alphanumeric
3896             # (testfile mangle.t with --mangle)
3897             || $tokenl eq '$$'
3898             || $tokenl eq '$#'
3899              
3900             )
3901             ) ## end $tokenr_is_bareword
3902              
3903             # OLD, not used
3904             # '= -' should not become =- or you will get a warning
3905             # about reversed -=
3906             # || ($tokenr eq '-')
3907              
3908             # do not join a bare word with a minus, like between 'Send' and
3909             # '-recipients' here <<snippets/space3.in>>
3910             # my $msg = new Fax::Send
3911             # -recipients => $to,
3912             # -data => $data;
3913             # This is the safest thing to do. If we had the token to the right of
3914             # the minus we could do a better check.
3915             #
3916             # And do not combine a bareword and a quote, like this:
3917             # oops "Your login, $Bad_Login, is not valid";
3918             # It can cause a syntax error if oops is a sub
3919             || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
3920              
3921             # perl is very fussy about spaces before <<
3922             || substr( $tokenr, 0, 2 ) eq '<<'
3923              
3924             # avoid combining tokens to create new meanings. Example:
3925             # $a+ +$b must not become $a++$b
3926             || ( $is_digraph{$token_joined} )
3927             || $is_trigraph{$token_joined}
3928              
3929             # another example: do not combine these two &'s:
3930             # allow_options & &OPT_EXECCGI
3931             || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
3932              
3933             # retain any space after possible filehandle
3934             # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
3935             # but no space for '$ {' even if '$' is marked as type 'Z', issue c221
3936             || ( $typel eq 'Z' && !( $tokenl eq '$' && $tokenr eq '{' ) )
3937              
3938             # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
3939             # space after type Y. Otherwise, it will get parsed as type 'Z' later
3940             # and any space would have to be added back manually if desired.
3941             || $typel eq 'Y'
3942              
3943             # Perl is sensitive to whitespace after the + here:
3944             # $b = xvals $a + 0.1 * yvals $a;
3945             || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
3946              
3947             || (
3948             $tokenr_is_open_paren && (
3949              
3950             # keep paren separate in 'use Foo::Bar ()'
3951             ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
3952              
3953             # OLD: keep any space between filehandle and paren:
3954             # file mangle.t with --mangle:
3955             # NEW: this test is no longer necessary here (moved above)
3956             ## || $typel eq 'Y'
3957              
3958             # must have space between grep and left paren; "grep(" will fail
3959             || $is_sort_grep_map{$tokenl}
3960              
3961             # don't stick numbers next to left parens, as in:
3962             #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
3963             || $typel eq 'n'
3964             )
3965             ) ## end $tokenr_is_open_paren
3966              
3967             # retain any space after here doc operator ( hereerr.t)
3968             || $typel eq 'h'
3969              
3970             # be careful with a space around ++ and --, to avoid ambiguity as to
3971             # which token it applies
3972             || ( $typer eq 'pp' || $typer eq 'mm' ) && $tokenl !~ /^[\;\{\(\[]/
3973             || ( $typel eq '++' || $typel eq '--' )
3974             && $tokenr !~ /^[\;\}\)\]]/
3975              
3976             # need space after foreach my; for example, this will fail in
3977             # older versions of Perl:
3978             # foreach my$ft(@filetypes)...
3979             || (
3980             $tokenl eq 'my'
3981              
3982             && substr( $tokenr, 0, 1 ) eq '$'
3983              
3984             # /^(for|foreach)$/
3985             && $is_for_foreach{$tokenll}
3986             )
3987              
3988             # Keep space after like $^ if needed to avoid forming a different
3989             # special variable (issue c068). For example:
3990             # my $aa = $^ ? "none" : "ok";
3991             || ( $typel eq 'i'
3992             && length($tokenl) == 2
3993             && substr( $tokenl, 1, 1 ) eq '^'
3994 1003   33     44113 && $is_special_variable_char{ substr( $tokenr, 0, 1 ) } )
3995              
3996             # We must be sure that a space between a ? and a quoted string
3997             # remains if the space before the ? remains. [Loca.pm, lockarea]
3998             # ie,
3999             # $b=join $comma ? ',' : ':', @_; # ok
4000             # $b=join $comma?',' : ':', @_; # ok!
4001             # $b=join $comma ?',' : ':', @_; # error!
4002             # Not really required:
4003             ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
4004              
4005             # Space stacked labels...
4006             # Not really required: Perl seems to accept non-spaced labels.
4007             ## || $typel eq 'J' && $typer eq 'J'
4008              
4009             ; # the value of this long logic sequence is the result we want
4010 1003         3458 return $result;
4011             } ## end sub is_essential_whitespace
4012             } ## end closure is_essential_whitespace
4013              
4014             { ## begin closure new_secret_operator_whitespace
4015              
4016             my %secret_operators;
4017             my %is_leading_secret_token;
4018              
4019             BEGIN {
4020              
4021             # token lists for perl secret operators as compiled by Philippe Bruhat
4022             # at: https://metacpan.org/module/perlsecret
4023 39     39   647 %secret_operators = (
4024             'Goatse' => [qw#= ( ) =#], #=( )=
4025             'Venus1' => [qw#0 +#], # 0+
4026             'Venus2' => [qw#+ 0#], # +0
4027             'Enterprise' => [qw#) x ! !#], # ()x!!
4028             'Kite1' => [qw#~ ~ <>#], # ~~<>
4029             'Kite2' => [qw#~~ <>#], # ~~<>
4030             'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
4031             'Bang bang ' => [qw#! !#], # !!
4032             );
4033              
4034             # The following operators and constants are not included because they
4035             # are normally kept tight by perltidy:
4036             # ~~ <~>
4037             #
4038              
4039             # Make a lookup table indexed by the first token of each operator:
4040             # first token => [list, list, ...]
4041 39         213 foreach my $value ( values(%secret_operators) ) {
4042 312         519 my $tok = $value->[0];
4043 312         397 push @{ $is_leading_secret_token{$tok} }, $value;
  312         77378  
4044             }
4045             } ## end BEGIN
4046              
4047             sub new_secret_operator_whitespace {
4048              
4049 1     1 0 5 my ( $rlong_array, $rwhitespace_flags ) = @_;
4050              
4051             # Loop over all tokens in this line
4052 1         3 my ( $token, $type );
4053 1         3 my $jmax = @{$rlong_array} - 1;
  1         3  
4054 1         4 foreach my $j ( 0 .. $jmax ) {
4055              
4056 9         14 $token = $rlong_array->[$j]->[_TOKEN_];
4057 9         13 $type = $rlong_array->[$j]->[_TYPE_];
4058              
4059             # Skip unless this token might start a secret operator
4060 9 100       19 next if ( $type eq 'b' );
4061 6 100       20 next unless ( $is_leading_secret_token{$token} );
4062              
4063             # Loop over all secret operators with this leading token
4064 2         5 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
  2         5  
4065 2         6 my $jend = $j - 1;
4066 2         4 foreach my $tok ( @{$rpattern} ) {
  2         5  
4067 4         6 $jend++;
4068 4 100 66     17 $jend++
4069              
4070             if ( $jend <= $jmax
4071             && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
4072 4 100 66     20 if ( $jend > $jmax
4073             || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
4074             {
4075 1         3 $jend = undef;
4076 1         2 last;
4077             }
4078             }
4079              
4080 2 100       7 if ($jend) {
4081              
4082             # set flags to prevent spaces within this operator
4083 1         4 foreach my $jj ( $j + 1 .. $jend ) {
4084 1         5 $rwhitespace_flags->[$jj] = WS_NO;
4085             }
4086 1         3 $j = $jend;
4087 1         2 last;
4088             }
4089             } ## End Loop over all operators
4090             } ## End loop over all tokens
4091 1         2 return;
4092             } ## end sub new_secret_operator_whitespace
4093             } ## end closure new_secret_operator_whitespace
4094              
4095             { ## begin closure set_bond_strengths
4096              
4097             # These routines and variables are involved in deciding where to break very
4098             # long lines.
4099              
4100             # NEW_TOKENS must add bond strength rules
4101              
4102             my %is_good_keyword_breakpoint;
4103             my %is_container_token;
4104              
4105             my %binary_bond_strength_nospace;
4106             my %binary_bond_strength;
4107             my %nobreak_lhs;
4108             my %nobreak_rhs;
4109              
4110             my @bias_tokens;
4111             my %bias_hash;
4112             my %bias;
4113             my $delta_bias;
4114              
4115             sub initialize_bond_strength_hashes {
4116              
4117 560     560 0 1292 my @q;
4118 560         2730 @q = qw(if unless while until for foreach);
4119 560         3291 @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
4120              
4121 560         2558 @q = qw/ ( [ { } ] ) /;
4122 560         2731 @is_container_token{@q} = (1) x scalar(@q);
4123              
4124             # The decision about where to break a line depends upon a "bond
4125             # strength" between tokens. The LOWER the bond strength, the MORE
4126             # likely a break. A bond strength may be any value but to simplify
4127             # things there are several pre-defined strength levels:
4128              
4129             # NO_BREAK => 10000;
4130             # VERY_STRONG => 100;
4131             # STRONG => 2.1;
4132             # NOMINAL => 1.1;
4133             # WEAK => 0.8;
4134             # VERY_WEAK => 0.55;
4135              
4136             # The strength values are based on trial-and-error, and need to be
4137             # tweaked occasionally to get desired results. Some comments:
4138             #
4139             # 1. Only relative strengths are important. small differences
4140             # in strengths can make big formatting differences.
4141             # 2. Each indentation level adds one unit of bond strength.
4142             # 3. A value of NO_BREAK makes an unbreakable bond
4143             # 4. A value of VERY_WEAK is the strength of a ','
4144             # 5. Values below NOMINAL are considered ok break points.
4145             # 6. Values above NOMINAL are considered poor break points.
4146             #
4147             # The bond strengths should roughly follow precedence order where
4148             # possible. If you make changes, please check the results very
4149             # carefully on a variety of scripts. Testing with the -extrude
4150             # options is particularly helpful in exercising all of the rules.
4151              
4152             # Wherever possible, bond strengths are defined in the following
4153             # tables. There are two main stages to setting bond strengths and
4154             # two types of tables:
4155             #
4156             # The first stage involves looking at each token individually and
4157             # defining left and right bond strengths, according to if we want
4158             # to break to the left or right side, and how good a break point it
4159             # is. For example tokens like =, ||, && make good break points and
4160             # will have low strengths, but one might want to break on either
4161             # side to put them at the end of one line or beginning of the next.
4162             #
4163             # The second stage involves looking at certain pairs of tokens and
4164             # defining a bond strength for that particular pair. This second
4165             # stage has priority.
4166              
4167             #---------------------------------------------------------------
4168             # Bond Strength BEGIN Section 1.
4169             # Set left and right bond strengths of individual tokens.
4170             #---------------------------------------------------------------
4171              
4172             # NOTE: NO_BREAK's set in this section first are HINTS which will
4173             # probably not be honored. Essential NO_BREAKS's should be set in
4174             # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
4175             # of this subroutine.
4176              
4177             # Note that we are setting defaults in this section. The user
4178             # cannot change bond strengths but can cause the left and right
4179             # bond strengths of any token type to be swapped through the use of
4180             # the -wba and -wbb flags. In this way the user can determine if a
4181             # breakpoint token should appear at the end of one line or the
4182             # beginning of the next line.
4183              
4184 560         12997 %right_bond_strength = ();
4185 560         10876 %left_bond_strength = ();
4186 560         4308 %binary_bond_strength_nospace = ();
4187 560         10355 %binary_bond_strength = ();
4188 560         1755 %nobreak_lhs = ();
4189 560         1801 %nobreak_rhs = ();
4190              
4191             # The hash keys in this section are token types, plus the text of
4192             # certain keywords like 'or', 'and'.
4193              
4194             # no break around possible filehandle
4195 560         2168 $left_bond_strength{'Z'} = NO_BREAK;
4196 560         1625 $right_bond_strength{'Z'} = NO_BREAK;
4197              
4198             # never put a bare word on a new line:
4199             # example print (STDERR, "bla"); will fail with break after (
4200 560         1690 $left_bond_strength{'w'} = NO_BREAK;
4201              
4202             # blanks always have infinite strength to force breaks after
4203             # real tokens
4204 560         1718 $right_bond_strength{'b'} = NO_BREAK;
4205              
4206             # try not to break on exponentiation
4207 560         2160 @q = qw# ** .. ... <=> #;
4208 560         2458 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4209 560         2231 @right_bond_strength{@q} = (STRONG) x scalar(@q);
4210              
4211             # The comma-arrow has very low precedence but not a good break point
4212 560         1684 $left_bond_strength{'=>'} = NO_BREAK;
4213 560         1643 $right_bond_strength{'=>'} = NOMINAL;
4214              
4215             # ok to break after label
4216 560         1500 $left_bond_strength{'J'} = NO_BREAK;
4217 560         1530 $right_bond_strength{'J'} = NOMINAL;
4218 560         1504 $left_bond_strength{'j'} = STRONG;
4219 560         1443 $right_bond_strength{'j'} = STRONG;
4220 560         1526 $left_bond_strength{'A'} = STRONG;
4221 560         1525 $right_bond_strength{'A'} = STRONG;
4222              
4223 560         1563 $left_bond_strength{'->'} = STRONG;
4224 560         1484 $right_bond_strength{'->'} = VERY_STRONG;
4225              
4226 560         1444 $left_bond_strength{'CORE::'} = NOMINAL;
4227 560         1404 $right_bond_strength{'CORE::'} = NO_BREAK;
4228              
4229             # Fix for c250: added strengths for new type 'P'
4230             # Note: these are working okay, but may eventually need to be
4231             # adjusted or even removed.
4232 560         1471 $left_bond_strength{'P'} = NOMINAL;
4233 560         1426 $right_bond_strength{'P'} = NOMINAL;
4234              
4235             # breaking AFTER modulus operator is ok:
4236 560         1564 @q = qw< % >;
4237 560         1741 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4238 560         1671 @right_bond_strength{@q} =
4239             ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
4240              
4241             # Break AFTER math operators * and /
4242 560         1973 @q = qw< * / x >;
4243 560         2154 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4244 560         1947 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
4245              
4246             # Break AFTER weakest math operators + and -
4247             # Make them weaker than * but a bit stronger than '.'
4248 560         1866 @q = qw< + - >;
4249 560         1957 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4250 560         1791 @right_bond_strength{@q} =
4251             ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
4252              
4253             # Define left strength of unary plus and minus (fixes case b511)
4254 560         1957 $left_bond_strength{p} = $left_bond_strength{'+'};
4255 560         1748 $left_bond_strength{m} = $left_bond_strength{'-'};
4256              
4257             # And make right strength of unary plus and minus very high.
4258             # Fixes cases b670 b790
4259 560         1492 $right_bond_strength{p} = NO_BREAK;
4260 560         1615 $right_bond_strength{m} = NO_BREAK;
4261              
4262             # breaking BEFORE these is just ok:
4263 560         1780 @q = qw# >> << #;
4264 560         1855 @right_bond_strength{@q} = (STRONG) x scalar(@q);
4265 560         1739 @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
4266              
4267             # breaking before the string concatenation operator seems best
4268             # because it can be hard to see at the end of a line
4269 560         1673 $right_bond_strength{'.'} = STRONG;
4270 560         1618 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
4271              
4272 560         2152 @q = qw< } ] ) R >;
4273 560         2155 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4274 560         2161 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
4275              
4276             # make these a little weaker than nominal so that they get
4277             # favored for end-of-line characters
4278 560         2552 @q = qw< != == =~ !~ ~~ !~~ >;
4279 560         2492 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4280 560         2549 @right_bond_strength{@q} =
4281             ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
4282              
4283             # break AFTER these
4284 560         2540 @q = qw# < > | & >= <= #;
4285 560         2572 @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
4286 560         2298 @right_bond_strength{@q} =
4287             ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
4288              
4289             # breaking either before or after a quote is ok
4290             # but bias for breaking before a quote
4291 560         1591 $left_bond_strength{'Q'} = NOMINAL;
4292 560         1574 $right_bond_strength{'Q'} = NOMINAL + 0.02;
4293 560         1549 $left_bond_strength{'q'} = NOMINAL;
4294 560         1467 $right_bond_strength{'q'} = NOMINAL;
4295              
4296             # starting a line with a keyword is usually ok
4297 560         1636 $left_bond_strength{'k'} = NOMINAL;
4298              
4299             # we usually want to bond a keyword strongly to what immediately
4300             # follows, rather than leaving it stranded at the end of a line
4301 560         1397 $right_bond_strength{'k'} = STRONG;
4302              
4303 560         1412 $left_bond_strength{'G'} = NOMINAL;
4304 560         1366 $right_bond_strength{'G'} = STRONG;
4305              
4306             # assignment operators
4307 560         3701 @q = qw(
4308             = **= += *= &= <<= &&=
4309             -= /= |= >>= ||= //=
4310             .= %= ^=
4311             x=
4312             );
4313              
4314             # Default is to break AFTER various assignment operators
4315 560         4195 @left_bond_strength{@q} = (STRONG) x scalar(@q);
4316 560         3877 @right_bond_strength{@q} =
4317             ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
4318              
4319             # Default is to break BEFORE '&&' and '||' and '//'
4320             # set strength of '||' to same as '=' so that chains like
4321             # $a = $b || $c || $d will break before the first '||'
4322 560         1786 $right_bond_strength{'||'} = NOMINAL;
4323 560         1960 $left_bond_strength{'||'} = $right_bond_strength{'='};
4324              
4325             # same thing for '//'
4326 560         1648 $right_bond_strength{'//'} = NOMINAL;
4327 560         1552 $left_bond_strength{'//'} = $right_bond_strength{'='};
4328              
4329             # set strength of && a little higher than ||
4330 560         1495 $right_bond_strength{'&&'} = NOMINAL;
4331 560         1871 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
4332              
4333 560         1439 $left_bond_strength{';'} = VERY_STRONG;
4334 560         1582 $right_bond_strength{';'} = VERY_WEAK;
4335 560         1480 $left_bond_strength{'f'} = VERY_STRONG;
4336              
4337             # make right strength of for ';' a little less than '='
4338             # to make for contents break after the ';' to avoid this:
4339             # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
4340             # $number_of_fields )
4341             # and make it weaker than ',' and 'and' too
4342 560         1470 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
4343              
4344             # The strengths of ?/: should be somewhere between
4345             # an '=' and a quote (NOMINAL),
4346             # make strength of ':' slightly less than '?' to help
4347             # break long chains of ? : after the colons
4348 560         1440 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
4349 560         1480 $right_bond_strength{':'} = NO_BREAK;
4350 560         1887 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
4351 560         1424 $right_bond_strength{'?'} = NO_BREAK;
4352              
4353 560         1580 $left_bond_strength{','} = VERY_STRONG;
4354 560         1424 $right_bond_strength{','} = VERY_WEAK;
4355              
4356             # remaining digraphs and trigraphs not defined above
4357 560         2586 @q = qw( :: <> ++ --);
4358 560         2389 @left_bond_strength{@q} = (WEAK) x scalar(@q);
4359 560         2193 @right_bond_strength{@q} = (STRONG) x scalar(@q);
4360              
4361             # Set bond strengths of certain keywords
4362             # make 'or', 'err', 'and' slightly weaker than a ','
4363 560         1745 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
4364 560         1617 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
4365 560         1542 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
4366 560         1549 $left_bond_strength{'xor'} = VERY_WEAK - 0.01;
4367              
4368 560         1681 @q = qw(ne eq);
4369 560         2420 @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
4370              
4371 560         2011 @q = qw(lt gt le ge);
4372 560         3000 @left_bond_strength{@q} = ( 0.9 * NOMINAL + 0.1 * STRONG ) x scalar(@q);
4373              
4374 560         2416 @q = qw(and or err xor ne eq);
4375 560         2355 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
4376              
4377             #---------------------------------------------------------------
4378             # Bond Strength BEGIN Section 2.
4379             # Set binary rules for bond strengths between certain token types.
4380             #---------------------------------------------------------------
4381              
4382             # We have a little problem making tables which apply to the
4383             # container tokens. Here is a list of container tokens and
4384             # their types:
4385             #
4386             # type tokens // meaning
4387             # { {, [, ( // indent
4388             # } }, ], ) // outdent
4389             # [ [ // left non-structural [ (enclosing an array index)
4390             # ] ] // right non-structural square bracket
4391             # ( ( // left non-structural paren
4392             # ) ) // right non-structural paren
4393             # L { // left non-structural curly brace (enclosing a key)
4394             # R } // right non-structural curly brace
4395             #
4396             # Some rules apply to token types and some to just the token
4397             # itself. We solve the problem by combining type and token into a
4398             # new hash key for the container types.
4399             #
4400             # If a rule applies to a token 'type' then we need to make rules
4401             # for each of these 'type.token' combinations:
4402             # Type Type.Token
4403             # { {{, {[, {(
4404             # [ [[
4405             # ( ((
4406             # L L{
4407             # } }}, }], })
4408             # ] ]]
4409             # ) ))
4410             # R R}
4411             #
4412             # If a rule applies to a token then we need to make rules for
4413             # these 'type.token' combinations:
4414             # Token Type.Token
4415             # { {{, L{
4416             # [ {[, [[
4417             # ( {(, ((
4418             # } }}, R}
4419             # ] }], ]]
4420             # ) }), ))
4421              
4422             # allow long lines before final { in an if statement, as in:
4423             # if (..........
4424             # ..........)
4425             # {
4426             #
4427             # Otherwise, the line before the { tends to be too short.
4428              
4429 560         2023 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
4430 560         1834 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
4431              
4432             # break on something like '} (', but keep this stronger than a ','
4433             # example is in 'howe.pl'
4434 560         1658 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
4435 560         1661 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
4436              
4437             # keep matrix and hash indices together
4438             # but make them a little below STRONG to allow breaking open
4439             # something like {'some-word'}{'some-very-long-word'} at the }{
4440             # (bracebrk.t)
4441 560         1772 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
4442 560         1639 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
4443 560         1558 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
4444 560         1469 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
4445              
4446             # increase strength to the point where a break in the following
4447             # will be after the opening paren rather than at the arrow:
4448             # $a->$b($c);
4449 560         5859 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
4450              
4451             # Added for c140 to make 'w ->' and 'i ->' behave the same
4452 560         1615 $binary_bond_strength{'w'}{'->'} = 1.45 * STRONG;
4453              
4454             # Note that the following alternative strength would make the break at the
4455             # '->' rather than opening the '('. Both have advantages and disadvantages.
4456             # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
4457              
4458 560         1487 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4459 560         1586 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4460 560         1680 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4461 560         1683 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4462 560         1397 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4463 560         1475 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
4464              
4465 560         1405 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
4466 560         1433 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
4467 560         1516 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
4468 560         1462 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
4469              
4470             #---------------------------------------------------------------
4471             # Binary NO_BREAK rules
4472             #---------------------------------------------------------------
4473              
4474             # use strict requires that bare word and => not be separated
4475 560         1664 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
4476 560         1614 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
4477              
4478             # Never break between a bareword and a following paren because
4479             # perl may give an error. For example, if a break is placed
4480             # between 'to_filehandle' and its '(' the following line will
4481             # give a syntax error [Carp.pm]: my( $no) =fileno(
4482             # to_filehandle( $in)) ;
4483 560         1551 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
4484 560         1617 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
4485 560         1591 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
4486 560         1540 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
4487              
4488             # use strict requires that bare word within braces not start new
4489             # line
4490 560         1641 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
4491              
4492 560         1458 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
4493              
4494             # The following two rules prevent a syntax error caused by breaking up
4495             # a construction like '{-y}'. The '-' quotes the 'y' and prevents
4496             # it from being taken as a transliteration. We have to keep
4497             # token types 'L m w' together to prevent this error.
4498 560         1552 $binary_bond_strength{'L{'}{'m'} = NO_BREAK;
4499 560         1573 $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
4500              
4501             # keep 'bareword-' together, but only if there is no space between
4502             # the word and dash. Do not keep together if there is a space.
4503             # example 'use perl6-alpha'
4504 560         1528 $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
4505              
4506             # use strict requires that bare word and => not be separated
4507 560         1540 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
4508              
4509             # use strict does not allow separating type info from trailing { }
4510             # testfile is readmail.pl
4511 560         1674 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
4512 560         1438 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
4513              
4514             # Fix for c250: set strength for new 'S' to be same as 'i'
4515             # testfile is test11/Hub.pm
4516 560         1601 $binary_bond_strength{'S'}{'L{'} = NO_BREAK;
4517              
4518             # As a defensive measure, do not break between a '(' and a
4519             # filehandle. In some cases, this can cause an error. For
4520             # example, the following program works:
4521             # my $msg="hi!\n";
4522             # print
4523             # ( STDOUT
4524             # $msg
4525             # );
4526             #
4527             # But this program fails:
4528             # my $msg="hi!\n";
4529             # print
4530             # (
4531             # STDOUT
4532             # $msg
4533             # );
4534             #
4535             # This is normally only a problem with the 'extrude' option
4536 560         1418 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
4537 560         1621 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
4538              
4539             # never break between sub name and opening paren
4540 560         1608 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
4541 560         1481 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
4542              
4543             # keep '}' together with ';'
4544 560         1514 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
4545              
4546             # Breaking before a ++ can cause perl to guess wrong. For
4547             # example the following line will cause a syntax error
4548             # with -extrude if we break between '$i' and '++' [fixstyle2]
4549             # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
4550 560         1405 $nobreak_lhs{'++'} = NO_BREAK;
4551              
4552             # Do not break before a possible file handle
4553 560         1403 $nobreak_lhs{'Z'} = NO_BREAK;
4554              
4555             # use strict hates bare words on any new line. For
4556             # example, a break before the underscore here provokes the
4557             # wrath of use strict:
4558             # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
4559 560         1441 $nobreak_rhs{'F'} = NO_BREAK;
4560 560         1389 $nobreak_rhs{'CORE::'} = NO_BREAK;
4561              
4562             # To prevent the tokenizer from switching between types 'w' and 'G' we
4563             # need to avoid breaking between type 'G' and the following code block
4564             # brace. Fixes case b929.
4565 560         1583 $nobreak_rhs{G} = NO_BREAK;
4566              
4567             #---------------------------------------------------------------
4568             # Bond Strength BEGIN Section 3.
4569             # Define tables and values for applying a small bias to the above
4570             # values.
4571             #---------------------------------------------------------------
4572             # Adding a small 'bias' to strengths is a simple way to make a line
4573             # break at the first of a sequence of identical terms. For
4574             # example, to force long string of conditional operators to break
4575             # with each line ending in a ':', we can add a small number to the
4576             # bond strength of each ':' (colon.t)
4577 560         3327 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
4578 560         1745 %bias_hash = map { $_ => 0 } @bias_tokens;
  3920         10459  
4579 560         2125 $delta_bias = 0.0001; # a very small strength level
4580 560         1604 return;
4581              
4582             } ## end sub initialize_bond_strength_hashes
4583              
4584 39     39   357 use constant DEBUG_BOND => 0;
  39         82  
  39         72898  
4585              
4586             sub set_bond_strengths {
4587              
4588 1113     1113 0 2608 my ($self) = @_;
4589              
4590             #-----------------------------------------------------------------
4591             # Define a 'bond strength' for each token pair in an output batch.
4592             # See comments above for definition of bond strength.
4593             #-----------------------------------------------------------------
4594              
4595 1113         2303 my $rbond_strength_to_go = [];
4596              
4597 1113         2429 my $rLL = $self->[_rLL_];
4598 1113         2250 my $rK_weld_right = $self->[_rK_weld_right_];
4599 1113         2220 my $rK_weld_left = $self->[_rK_weld_left_];
4600 1113         2160 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
4601              
4602             # patch-its always ok to break at end of line
4603 1113         2203 $nobreak_to_go[$max_index_to_go] = 0;
4604              
4605             # we start a new set of bias values for each line
4606 1113         10107 %bias = %bias_hash;
4607              
4608 1113         2949 my $code_bias = -.01; # bias for closing block braces
4609              
4610 1113         2197 my $type = 'b';
4611 1113         2237 my $token = SPACE;
4612 1113         1871 my $token_length = 1;
4613 1113         1783 my $last_type;
4614 1113         2329 my $last_nonblank_type = $type;
4615 1113         1958 my $last_nonblank_token = $token;
4616 1113         2697 my $list_str = $left_bond_strength{'?'};
4617              
4618 1113         3809 my ( $bond_str_1, $bond_str_2, $bond_str_3, $bond_str_4 );
4619              
4620 1113         0 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
4621             $next_nonblank_type, $next_token, $next_type,
4622             $total_nesting_depth, );
4623              
4624             # main loop to compute bond strengths between each pair of tokens
4625 1113         3379 foreach my $i ( 0 .. $max_index_to_go ) {
4626 31257         42769 $last_type = $type;
4627 31257 100       54715 if ( $type ne 'b' ) {
4628 18551         24484 $last_nonblank_type = $type;
4629 18551         24497 $last_nonblank_token = $token;
4630             }
4631 31257         43843 $type = $types_to_go[$i];
4632              
4633             # strength on both sides of a blank is the same
4634 31257 100 66     70035 if ( $type eq 'b' && $last_type ne 'b' ) {
4635 11593         26389 $rbond_strength_to_go->[$i] = $rbond_strength_to_go->[ $i - 1 ];
4636 11593   100     39093 $nobreak_to_go[$i] ||= $nobreak_to_go[ $i - 1 ]; # fix for b1257
4637 11593         19061 next;
4638             }
4639              
4640 19664         27848 $token = $tokens_to_go[$i];
4641 19664         26865 $token_length = $token_lengths_to_go[$i];
4642 19664         26894 $block_type = $block_type_to_go[$i];
4643 19664         26031 $i_next = $i + 1;
4644 19664         28199 $next_type = $types_to_go[$i_next];
4645 19664         27109 $next_token = $tokens_to_go[$i_next];
4646 19664         27800 $total_nesting_depth = $nesting_depth_to_go[$i_next];
4647 19664 100       35120 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
4648 19664         32482 $next_nonblank_type = $types_to_go[$i_next_nonblank];
4649 19664         27188 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
4650              
4651 19664         27368 my $seqno = $type_sequence_to_go[$i];
4652 19664         26888 my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
4653              
4654             # We are computing the strength of the bond between the current
4655             # token and the NEXT token.
4656              
4657             #---------------------------------------------------------------
4658             # Bond Strength Section 1:
4659             # First Approximation.
4660             # Use minimum of individual left and right tabulated bond
4661             # strengths.
4662             #---------------------------------------------------------------
4663 19664         33537 my $bsr = $right_bond_strength{$type};
4664 19664         32575 my $bsl = $left_bond_strength{$next_nonblank_type};
4665              
4666             # define right bond strengths of certain keywords
4667 19664 100       35449 if ( $type eq 'k' ) {
4668 1229 100       3448 if ( defined( $right_bond_strength{$token} ) ) {
4669 157         369 $bsr = $right_bond_strength{$token};
4670             }
4671             }
4672              
4673             # set terminal bond strength to the nominal value
4674             # this will cause good preceding breaks to be retained
4675 19664 100       34181 if ( $i_next_nonblank > $max_index_to_go ) {
4676 1113         2436 $bsl = NOMINAL;
4677              
4678             # But weaken the bond at a 'missing terminal comma'. If an
4679             # optional comma is missing at the end of a broken list, use
4680             # the strength of a comma anyway to make formatting the same as
4681             # if it were there. Fixes issue c133.
4682 1113 100 100     5907 if ( !defined($bsr) || $bsr > VERY_WEAK ) {
4683 558         1696 my $seqno_px = $parent_seqno_to_go[$max_index_to_go];
4684 558 100       1712 if ( $ris_list_by_seqno->{$seqno_px} ) {
4685 72         184 my $KK = $K_to_go[$max_index_to_go];
4686 72         368 my $Kn = $self->K_next_nonblank($KK);
4687 72         223 my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
4688 72 100 100     402 if ( $seqno_n && $seqno_n eq $seqno_px ) {
4689 17         48 $bsl = VERY_WEAK;
4690             }
4691             }
4692             }
4693             }
4694              
4695             # define left bond strengths of certain keywords
4696 19664 100       34215 if ( $next_nonblank_type eq 'k' ) {
4697 731 100       2583 if ( defined( $left_bond_strength{$next_nonblank_token} ) ) {
4698 157         402 $bsl = $left_bond_strength{$next_nonblank_token};
4699             }
4700             }
4701              
4702             # Use the minimum of the left and right strengths. Note: it might
4703             # seem that we would want to keep a NO_BREAK if either token has
4704             # this value. This didn't work, for example because in an arrow
4705             # list, it prevents the comma from separating from the following
4706             # bare word (which is probably quoted by its arrow). So necessary
4707             # NO_BREAK's have to be handled as special cases in the final
4708             # section.
4709 19664 100       33773 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
  5685         8237  
4710 19664 100       33211 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
  4294         6250  
4711 19664 100       34831 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
4712 19664         24109 $bond_str_1 = $bond_str if (DEBUG_BOND);
4713              
4714             #---------------------------------------------------------------
4715             # Bond Strength Section 2:
4716             # Apply hardwired rules..
4717             #---------------------------------------------------------------
4718              
4719             # Patch to put terminal or clauses on a new line: Weaken the bond
4720             # at an || followed by die or similar keyword to make the terminal
4721             # or clause fall on a new line, like this:
4722             #
4723             # my $class = shift
4724             # || die "Cannot add broadcast: No class identifier found";
4725             #
4726             # Otherwise the break will be at the previous '=' since the || and
4727             # = have the same starting strength and the or is biased, like
4728             # this:
4729             #
4730             # my $class =
4731             # shift || die "Cannot add broadcast: No class identifier found";
4732             #
4733             # In any case if the user places a break at either the = or the ||
4734             # it should remain there.
4735 19664 100 100     57843 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
      100        
4736              
4737             # /^(die|confess|croak|warn)$/
4738 89 100       482 if ( $is_die_confess_croak_warn{$next_nonblank_token} ) {
4739 4 50 33     43 if ( $want_break_before{$token} && $i > 0 ) {
4740 4         15 $rbond_strength_to_go->[ $i - 1 ] -= $delta_bias;
4741              
4742             # keep bond strength of a token and its following blank
4743             # the same
4744 4 100 66     24 if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
4745 1         6 $rbond_strength_to_go->[ $i - 2 ] -= $delta_bias;
4746             }
4747             }
4748             else {
4749 0         0 $bond_str -= $delta_bias;
4750             }
4751             }
4752             }
4753              
4754             # good to break after end of code blocks
4755 19664 100 100     42008 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
      100        
4756              
4757 194         450 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
4758 194         397 $code_bias += $delta_bias;
4759             }
4760              
4761 19664 100       33720 if ( $type eq 'k' ) {
4762              
4763             # allow certain control keywords to stand out
4764 1229 100 100     3446 if ( $next_nonblank_type eq 'k'
4765             && $is_last_next_redo_return{$token} )
4766             {
4767 5         12 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
4768             }
4769              
4770             # Don't break after keyword my. This is a quick fix for a
4771             # rare problem with perl. An example is this line from file
4772             # Container.pm:
4773              
4774             # foreach my $question( Debian::DebConf::ConfigDb::gettree(
4775             # $this->{'question'} ) )
4776              
4777 1229 100       2776 if ( $token eq 'my' ) {
4778 234         555 $bond_str = NO_BREAK;
4779             }
4780              
4781             }
4782              
4783 19664 100 100     48233 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
    100          
4784              
4785 730 100       2299 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
4786 65 50       215 $bond_str = $list_str if ( $bond_str > $list_str );
4787             }
4788              
4789             # keywords like 'unless', 'if', etc, within statements
4790             # make good breaks
4791 730 100       2040 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
4792 20         71 $bond_str = VERY_WEAK / 1.05;
4793             }
4794             }
4795              
4796             # try not to break before a comma-arrow
4797             elsif ( $next_nonblank_type eq '=>' ) {
4798 890 100       2444 if ( $bond_str < STRONG ) { $bond_str = STRONG }
  185         336  
4799             }
4800             else {
4801             ## ok - not special
4802             }
4803              
4804             #---------------------------------------------------------------
4805             # Additional hardwired NOBREAK rules
4806             #---------------------------------------------------------------
4807              
4808             # map1.t -- correct for a quirk in perl
4809 19664 50 100     42754 if ( $token eq '('
      100        
      66        
4810             && $next_nonblank_type eq 'i'
4811             && $last_nonblank_type eq 'k'
4812             && $is_sort_map_grep{$last_nonblank_token} )
4813              
4814             # /^(sort|map|grep)$/ )
4815             {
4816 0         0 $bond_str = NO_BREAK;
4817             }
4818              
4819             # extrude.t: do not break before paren at:
4820             # -l pid_filename(
4821 19664 100 100     36655 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
4822 2         6 $bond_str = NO_BREAK;
4823             }
4824              
4825             # OLD COMMENT: In older version of perl, use strict can cause
4826             # problems with breaks before bare words following opening parens.
4827             # For example, this will fail under older versions if a break is
4828             # made between '(' and 'MAIL':
4829              
4830             # use strict; open( MAIL, "a long filename or command"); close MAIL;
4831              
4832             # NEW COMMENT: Third fix for b1213:
4833             # This option does not seem to be needed any longer, and it can
4834             # cause instabilities. It can be turned off, but to minimize
4835             # changes to existing formatting it is retained only in the case
4836             # where the previous token was 'open' and there was no line break.
4837             # Even this could eventually be removed if it causes instability.
4838 19664 100       38149 if ( $type eq '{' ) {
    100          
4839              
4840 2369 50 100     8895 if ( $token eq '('
      100        
      66        
      33        
4841             && $next_nonblank_type eq 'w'
4842             && $last_nonblank_type eq 'k'
4843             && $last_nonblank_token eq 'open'
4844             && !$old_breakpoint_to_go[$i] )
4845             {
4846 0         0 $bond_str = NO_BREAK;
4847             }
4848             }
4849              
4850             # Do not break between a possible filehandle and a ? or / and do
4851             # not introduce a break after it if there is no blank
4852             # (extrude.t)
4853             elsif ( $type eq 'Z' ) {
4854              
4855             # don't break..
4856 2 100 66     41 if (
      66        
      33        
      66        
4857              
4858             # if there is no blank and we do not want one. Examples:
4859             # print $x++ # do not break after $x
4860             # print HTML"HELLO" # break ok after HTML
4861             (
4862             $next_type ne 'b'
4863             && defined( $want_left_space{$next_type} )
4864             && $want_left_space{$next_type} == WS_NO
4865             )
4866              
4867             # or we might be followed by the start of a quote,
4868             # and this is not an existing breakpoint; fixes c039.
4869             || !$old_breakpoint_to_go[$i]
4870             && substr( $next_nonblank_token, 0, 1 ) eq '/'
4871              
4872             )
4873             {
4874 1         3 $bond_str = NO_BREAK;
4875             }
4876             }
4877             else {
4878             ## ok - not special
4879             }
4880              
4881             # Breaking before a ? before a quote can cause trouble if
4882             # they are not separated by a blank.
4883             # Example: a syntax error occurs if you break before the ? here
4884             # my$logic=join$all?' && ':' || ',@regexps;
4885             # From: Professional_Perl_Programming_Code/multifind.pl
4886 19664 100       44983 if ( $next_nonblank_type eq '?' ) {
    100          
    100          
4887 125 100       765 $bond_str = NO_BREAK
4888             if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
4889             }
4890              
4891             # Breaking before a . followed by a number
4892             # can cause trouble if there is no intervening space
4893             # Example: a syntax error occurs if you break before the .2 here
4894             # $str .= pack($endian.2, ensurrogate($ord));
4895             # From: perl58/Unicode.pm
4896             elsif ( $next_nonblank_type eq '.' ) {
4897 116 50       390 $bond_str = NO_BREAK
4898             if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
4899             }
4900              
4901             # Fix for c039
4902             elsif ( $type eq 'w' ) {
4903 996 50 66     5416 $bond_str = NO_BREAK
      33        
4904             if ( !$old_breakpoint_to_go[$i]
4905             && substr( $next_nonblank_token, 0, 1 ) eq '/'
4906             && $next_nonblank_type ne '//' );
4907             }
4908             else {
4909             ## ok - not special
4910             }
4911              
4912 19664         24013 $bond_str_2 = $bond_str if (DEBUG_BOND);
4913              
4914             #---------------------------------------------------------------
4915             # End of hardwired rules
4916             #---------------------------------------------------------------
4917              
4918             #---------------------------------------------------------------
4919             # Bond Strength Section 3:
4920             # Apply table rules. These have priority over the above
4921             # hardwired rules.
4922             #---------------------------------------------------------------
4923              
4924 19664         24646 my $tabulated_bond_str;
4925 19664         26742 my $ltype = $type;
4926 19664         25475 my $rtype = $next_nonblank_type;
4927 19664 100 100     40543 if ( $seqno && $is_container_token{$token} ) {
4928 4953         7888 $ltype = $type . $token;
4929             }
4930              
4931 19664 100 100     41926 if ( $next_nonblank_seqno
4932             && $is_container_token{$next_nonblank_token} )
4933             {
4934 4836         8225 $rtype = $next_nonblank_type . $next_nonblank_token;
4935              
4936             # Alternate Fix #1 for issue b1299. This version makes the
4937             # decision as soon as possible. See Alternate Fix #2 also.
4938             # Do not separate a bareword identifier from its paren: b1299
4939             # This is currently needed for stability because if the bareword
4940             # gets separated from a preceding '->' and following '(' then
4941             # the tokenizer may switch from type 'i' to type 'w'. This
4942             # patch will prevent this by keeping it adjacent to its '('.
4943             ## if ( $next_nonblank_token eq '('
4944             ## && $ltype eq 'i'
4945             ## && substr( $token, 0, 1 ) =~ /^\w$/ )
4946             ## {
4947             ## $ltype = 'w';
4948             ## }
4949             }
4950              
4951             # apply binary rules which apply regardless of space between tokens
4952 19664 100       47324 if ( $binary_bond_strength{$ltype}{$rtype} ) {
4953 1655         3256 $bond_str = $binary_bond_strength{$ltype}{$rtype};
4954 1655         2557 $tabulated_bond_str = $bond_str;
4955             }
4956              
4957             # apply binary rules which apply only if no space between tokens
4958 19664 100       39114 if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
4959 255         448 $bond_str = $binary_bond_strength{$ltype}{$next_type};
4960 255         419 $tabulated_bond_str = $bond_str;
4961             }
4962              
4963 19664 100 100     58088 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
4964 48         157 $bond_str = NO_BREAK;
4965 48         106 $tabulated_bond_str = $bond_str;
4966             }
4967              
4968 19664         24200 $bond_str_3 = $bond_str if (DEBUG_BOND);
4969              
4970             # If the hardwired rules conflict with the tabulated bond
4971             # strength then there is an inconsistency that should be fixed
4972             DEBUG_BOND
4973             && $tabulated_bond_str
4974             && $bond_str_1
4975             && $bond_str_1 != $bond_str_2
4976             && $bond_str_2 != $tabulated_bond_str
4977 19664         23515 && do {
4978             print {*STDOUT}
4979             "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
4980             };
4981              
4982             #-----------------------------------------------------------------
4983             # Bond Strength Section 4:
4984             # Modify strengths of certain tokens which often occur in sequence
4985             # by adding a small bias to each one in turn so that the breaks
4986             # occur from left to right.
4987             #
4988             # Note that we only changing strengths by small amounts here,
4989             # and usually increasing, so we should not be altering any NO_BREAKs.
4990             # Other routines which check for NO_BREAKs will use a tolerance
4991             # of one to avoid any problem.
4992             #-----------------------------------------------------------------
4993              
4994             # The bias tables use special keys:
4995             # $type - if not keyword
4996             # $token - if keyword, but map some keywords together
4997 19664 50       35116 my $left_key =
    100          
4998             $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
4999 19664 50       32778 my $right_key =
    100          
5000             $next_nonblank_type eq 'k'
5001             ? $next_nonblank_token eq 'err'
5002             ? 'or'
5003             : $next_nonblank_token
5004             : $next_nonblank_type;
5005              
5006             # bias left token
5007 19664 100       37962 if ( defined( $bias{$left_key} ) ) {
5008 452 100       1569 if ( !$want_break_before{$left_key} ) {
5009 30         68 $bias{$left_key} += $delta_bias;
5010 30         60 $bond_str += $bias{$left_key};
5011             }
5012             }
5013              
5014             # bias right token
5015 19664 100       35072 if ( defined( $bias{$right_key} ) ) {
5016 451 100       1737 if ( $want_break_before{$right_key} ) {
5017              
5018             # for leading '.' align all but 'short' quotes; the idea
5019             # is to not place something like "\n" on a single line.
5020 421 100       1119 if ( $right_key eq '.' ) {
5021              
5022             my $is_short_quote = $last_nonblank_type eq '.'
5023             && ( $token_length <=
5024             $rOpts_short_concatenation_item_length )
5025 115   66     638 && !$is_closing_token{$token};
5026              
5027 115 100       275 if ( !$is_short_quote ) {
5028 75         162 $bias{$right_key} += $delta_bias;
5029             }
5030             }
5031             else {
5032 306         716 $bias{$right_key} += $delta_bias;
5033             }
5034 421         795 $bond_str += $bias{$right_key};
5035             }
5036             }
5037              
5038 19664         23545 $bond_str_4 = $bond_str if (DEBUG_BOND);
5039              
5040             #---------------------------------------------------------------
5041             # Bond Strength Section 5:
5042             # Fifth Approximation.
5043             # Take nesting depth into account by adding the nesting depth
5044             # to the bond strength.
5045             #---------------------------------------------------------------
5046 19664         24994 my $strength;
5047              
5048 19664 100 100     54206 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
5049 18258 100       29282 if ( $total_nesting_depth > 0 ) {
5050 14655         22264 $strength = $bond_str + $total_nesting_depth;
5051             }
5052             else {
5053 3603         5454 $strength = $bond_str;
5054             }
5055             }
5056             else {
5057 1406         2252 $strength = NO_BREAK;
5058              
5059             # For critical code such as lines with here targets we must
5060             # be absolutely sure that we do not allow a break. So for
5061             # these the nobreak flag exceeds 1 as a signal. Otherwise we
5062             # can run into trouble when small tolerances are added.
5063 1406 100 100     4476 $strength += 1
5064             if ( $nobreak_to_go[$i] && $nobreak_to_go[$i] > 1 );
5065             }
5066              
5067             #---------------------------------------------------------------
5068             # Bond Strength Section 6:
5069             # Sixth Approximation. Welds.
5070             #---------------------------------------------------------------
5071              
5072             # Do not allow a break within welds
5073 19664 100 100     37663 if ( $total_weld_count && $seqno ) {
5074 383         671 my $KK = $K_to_go[$i];
5075 383 100 66     1425 if ( $rK_weld_right->{$KK} ) {
    100          
5076 68         180 $strength = NO_BREAK;
5077             }
5078              
5079             # But encourage breaking after opening welded tokens
5080             elsif ($rK_weld_left->{$KK}
5081             && $is_opening_token{$token} )
5082             {
5083 27         95 $strength -= 1;
5084             }
5085             else {
5086             ## ok - not welded left or right
5087             }
5088             }
5089              
5090             # always break after side comment
5091 19664 100       34296 if ( $type eq '#' ) { $strength = 0 }
  50         147  
5092              
5093 19664         38540 $rbond_strength_to_go->[$i] = $strength;
5094              
5095             # Fix for case c001: be sure NO_BREAK's are enforced by later
5096             # routines, except at a '?' because '?' as quote delimiter is
5097             # deprecated.
5098 19664 100 100     43678 if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
5099 2744   100     7042 $nobreak_to_go[$i] ||= 1;
5100             }
5101              
5102 19664         32031 DEBUG_BOND && do {
5103             my $str = substr( $token, 0, 15 );
5104             $str .= SPACE x ( 16 - length($str) );
5105             print {*STDOUT}
5106             "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
5107              
5108             # reset for next pass
5109             $bond_str_1 = $bond_str_2 = $bond_str_3 = $bond_str_4 = undef;
5110             };
5111              
5112             } ## end main loop
5113 1113         4545 return $rbond_strength_to_go;
5114             } ## end sub set_bond_strengths
5115             } ## end closure set_bond_strengths
5116              
5117             sub bad_pattern {
5118 2244     2244 0 4618 my ($pattern) = @_;
5119              
5120             # See if a pattern will compile.
5121             # Note: this sub is also called from Tokenizer
5122 2244         3874 my $regex = eval { qr/$pattern/ };
  2244         44872  
5123 2244         11779 return $EVAL_ERROR;
5124             }
5125              
5126             { ## begin closure prepare_cuddled_block_types
5127              
5128             my %no_cuddle;
5129              
5130             # Add keywords here which really should not be cuddled
5131             BEGIN {
5132 39     39   272 my @q = qw(if unless for foreach while);
5133 39         20062 @no_cuddle{@q} = (1) x scalar(@q);
5134             }
5135              
5136             sub prepare_cuddled_block_types {
5137              
5138             # the cuddled-else style, if used, is controlled by a hash that
5139             # we construct here
5140              
5141             # Include keywords here which should not be cuddled
5142              
5143 560     560 0 1398 my $cuddled_string = EMPTY_STRING;
5144 560 100       2316 if ( $rOpts->{'cuddled-else'} ) {
5145              
5146             # set the default
5147             $cuddled_string = 'elsif else continue catch finally'
5148 12 50       69 unless ( $rOpts->{'cuddled-block-list-exclusive'} );
5149              
5150             # This is the old equivalent but more complex version
5151             # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
5152              
5153             # Add users other blocks to be cuddled
5154 12         41 my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
5155 12 100       47 if ($cuddled_block_list) {
5156 2         8 $cuddled_string .= SPACE . $cuddled_block_list;
5157             }
5158              
5159             }
5160              
5161             # If we have a cuddled string of the form
5162             # 'try-catch-finally'
5163              
5164             # we want to prepare a hash of the form
5165              
5166             # $rcuddled_block_types = {
5167             # 'try' => {
5168             # 'catch' => 1,
5169             # 'finally' => 1
5170             # },
5171             # };
5172              
5173             # use -dcbl to dump this hash
5174              
5175             # Multiple such strings are input as a space or comma separated list
5176              
5177             # If we get two lists with the same leading type, such as
5178             # -cbl = "-try-catch-finally -try-catch-otherwise"
5179             # then they will get merged as follows:
5180             # $rcuddled_block_types = {
5181             # 'try' => {
5182             # 'catch' => 1,
5183             # 'finally' => 2,
5184             # 'otherwise' => 1,
5185             # },
5186             # };
5187             # This will allow either type of chain to be followed.
5188              
5189 560         1551 $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
5190 560         2034 my @cuddled_strings = split /\s+/, $cuddled_string;
5191              
5192 560         1821 $rcuddled_block_types = {};
5193              
5194             # process each dash-separated string...
5195 560         1605 my $string_count = 0;
5196 560         1961 foreach my $string (@cuddled_strings) {
5197 66 50       152 next unless $string;
5198 66         151 my @words = split /-+/, $string; # allow multiple dashes
5199              
5200             # we could look for and report possible errors here...
5201 66 50       152 next if ( @words <= 0 );
5202              
5203             # allow either '-continue' or *-continue' for arbitrary starting type
5204 66         118 my $start = '*';
5205              
5206             # a single word without dashes is a secondary block type
5207 66 50       168 if ( @words > 1 ) {
5208 0         0 $start = shift @words;
5209             }
5210              
5211             # always make an entry for the leading word. If none follow, this
5212             # will still prevent a wildcard from matching this word.
5213 66 100       161 if ( !defined( $rcuddled_block_types->{$start} ) ) {
5214 12         45 $rcuddled_block_types->{$start} = {};
5215             }
5216              
5217             # The count gives the original word order in case we ever want it.
5218 66         96 $string_count++;
5219 66         105 my $word_count = 0;
5220 66         122 foreach my $word (@words) {
5221 66 50       128 next unless $word;
5222 66 50       182 if ( $no_cuddle{$word} ) {
5223 0         0 Warn(
5224             "## Ignoring keyword '$word' in -cbl; does not seem right\n"
5225             );
5226 0         0 next;
5227             }
5228 66         103 $word_count++;
5229 66         189 $rcuddled_block_types->{$start}->{$word} =
5230             1; #"$string_count.$word_count";
5231              
5232             # git#9: Remove this word from the list of desired one-line
5233             # blocks
5234 66         211 $want_one_line_block{$word} = 0;
5235             }
5236             }
5237 560         1327 return;
5238             } ## end sub prepare_cuddled_block_types
5239             } ## end closure prepare_cuddled_block_types
5240              
5241             sub dump_cuddled_block_list {
5242 0     0 0 0 my ($fh) = @_;
5243              
5244             # ORIGINAL METHOD: Here is the format of the cuddled block type hash
5245             # which controls this routine
5246             # my $rcuddled_block_types = {
5247             # 'if' => {
5248             # 'else' => 1,
5249             # 'elsif' => 1
5250             # },
5251             # 'try' => {
5252             # 'catch' => 1,
5253             # 'finally' => 1
5254             # },
5255             # };
5256              
5257             # SIMPLIFIED METHOD: the simplified method uses a wildcard for
5258             # the starting block type and puts all cuddled blocks together:
5259             # my $rcuddled_block_types = {
5260             # '*' => {
5261             # 'else' => 1,
5262             # 'elsif' => 1
5263             # 'catch' => 1,
5264             # 'finally' => 1
5265             # },
5266             # };
5267              
5268             # Both methods work, but the simplified method has proven to be adequate and
5269             # easier to manage.
5270              
5271 0         0 my $cuddled_string = $rOpts->{'cuddled-block-list'};
5272 0 0       0 $cuddled_string = EMPTY_STRING unless $cuddled_string;
5273              
5274 0         0 my $flags = EMPTY_STRING;
5275 0 0       0 $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
5276 0         0 $flags .= " -cbl='$cuddled_string'";
5277              
5278 0 0       0 if ( !$rOpts->{'cuddled-else'} ) {
5279 0         0 $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
5280             }
5281              
5282 0         0 $fh->print(<<EOM);
5283             ------------------------------------------------------------------------
5284             Hash of cuddled block types prepared for a run with these parameters:
5285             $flags
5286             ------------------------------------------------------------------------
5287             EOM
5288              
5289 39     39   28906 use Data::Dumper;
  39         286759  
  39         121617  
5290 0         0 $fh->print( Dumper($rcuddled_block_types) );
5291              
5292 0         0 $fh->print(<<EOM);
5293             ------------------------------------------------------------------------
5294             EOM
5295 0         0 return;
5296             } ## end sub dump_cuddled_block_list
5297              
5298             sub make_static_block_comment_pattern {
5299              
5300             # create the pattern used to identify static block comments
5301 560     560 0 1833 $static_block_comment_pattern = '^\s*##';
5302              
5303             # allow the user to change it
5304 560 100       2733 if ( $rOpts->{'static-block-comment-prefix'} ) {
5305 1         3 my $prefix = $rOpts->{'static-block-comment-prefix'};
5306 1         5 $prefix =~ s/^\s*//;
5307 1         4 my $pattern = $prefix;
5308              
5309             # user may give leading caret to force matching left comments only
5310 1 50       6 if ( $prefix !~ /^\^#/ ) {
5311 1 50       5 if ( $prefix !~ /^#/ ) {
5312 0         0 Die(
5313             "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
5314             );
5315             }
5316 1         4 $pattern = '^\s*' . $prefix;
5317             }
5318 1 50       6 if ( bad_pattern($pattern) ) {
5319 0         0 Die(
5320             "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
5321             );
5322             }
5323 1         3 $static_block_comment_pattern = $pattern;
5324             }
5325 560         1063 return;
5326             } ## end sub make_static_block_comment_pattern
5327              
5328             sub make_format_skipping_pattern {
5329 1120     1120 0 3208 my ( $opt_name, $default ) = @_;
5330 1120         2728 my $param = $rOpts->{$opt_name};
5331 1120 100       3152 if ( !$param ) { $param = $default }
  1118         2219  
5332 1120         4520 $param =~ s/^\s*//;
5333 1120 50       5146 if ( $param !~ /^#/ ) {
5334 0         0 Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
5335             }
5336 1120         3345 my $pattern = '^' . $param . '\s';
5337 1120 50       3624 if ( bad_pattern($pattern) ) {
5338 0         0 Die(
5339             "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
5340             );
5341             }
5342 1120         3525 return $pattern;
5343             } ## end sub make_format_skipping_pattern
5344              
5345             sub make_non_indenting_brace_pattern {
5346              
5347             # Create the pattern used to identify static side comments.
5348             # Note that we are ending the pattern in a \s. This will allow
5349             # the pattern to be followed by a space and some text, or a newline.
5350             # The pattern is used in sub 'non_indenting_braces'
5351 560     560 0 1605 $non_indenting_brace_pattern = '^#<<<\s';
5352              
5353             # allow the user to change it
5354 560 100       2735 if ( $rOpts->{'non-indenting-brace-prefix'} ) {
5355 1         3 my $prefix = $rOpts->{'non-indenting-brace-prefix'};
5356 1         8 $prefix =~ s/^\s*//;
5357 1 50       8 if ( $prefix !~ /^#/ ) {
5358 0         0 Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
5359             }
5360 1         6 my $pattern = '^' . $prefix . '\s';
5361 1 50       5 if ( bad_pattern($pattern) ) {
5362 0         0 Die(
5363             "ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
5364             );
5365             }
5366 1         11 $non_indenting_brace_pattern = $pattern;
5367             }
5368 560         1213 return;
5369             } ## end sub make_non_indenting_brace_pattern
5370              
5371             sub make_closing_side_comment_list_pattern {
5372              
5373             # turn any input list into a regex for recognizing selected block types
5374 560     560 0 1588 $closing_side_comment_list_pattern = '^\w+';
5375 560 50 66     2403 if ( defined( $rOpts->{'closing-side-comment-list'} )
5376             && $rOpts->{'closing-side-comment-list'} )
5377             {
5378             $closing_side_comment_list_pattern =
5379 1         9 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
5380             }
5381 560         1099 return;
5382             } ## end sub make_closing_side_comment_list_pattern
5383              
5384             sub make_sub_matching_pattern {
5385              
5386             # Patterns for standardizing matches to block types for regular subs and
5387             # anonymous subs. Examples
5388             # 'sub process' is a named sub
5389             # 'sub ::m' is a named sub
5390             # 'sub' is an anonymous sub
5391             # 'sub:' is a label, not a sub
5392             # 'sub :' is a label, not a sub ( block type will be <sub:> )
5393             # sub'_ is a named sub ( block type will be <sub '_> )
5394             # 'substr' is a keyword
5395             # So note that named subs always have a space after 'sub'
5396 560     560 0 2312 $SUB_PATTERN = '^sub\s'; # match normal sub
5397 560         1608 $ASUB_PATTERN = '^sub$'; # match anonymous sub
5398 560         2386 %matches_ASUB = ( 'sub' => 1 );
5399              
5400             # Fix the patterns to include any sub aliases:
5401             # Note that any 'sub-alias-list' has been preprocessed to
5402             # be a trimmed, space-separated list which includes 'sub'
5403             # for example, it might be 'sub method fun'
5404 560         1380 my @words;
5405 560         1864 my $sub_alias_list = $rOpts->{'sub-alias-list'};
5406 560 100       1966 if ($sub_alias_list) {
5407 3         25 @words = split /\s+/, $sub_alias_list;
5408             }
5409             else {
5410 557         1962 push @words, 'sub';
5411             }
5412              
5413             # add 'method' unless use-feature='noclass' is set.
5414 560 50 33     3231 if ( !defined( $rOpts->{'use-feature'} )
5415             || $rOpts->{'use-feature'} !~ /\bnoclass\b/ )
5416             {
5417 560         1674 push @words, 'method';
5418             }
5419              
5420             # Note (see also RT #133130): These patterns are used by
5421             # sub make_block_pattern, which is used for making most patterns.
5422             # So this sub needs to be called before other pattern-making routines.
5423 560 50       2547 if ( @words > 1 ) {
5424              
5425             # Two ways are provided to match an anonymous sub:
5426             # $ASUB_PATTERN - with a regex (old method, slow)
5427             # %matches_ASUB - with a hash lookup (new method, faster)
5428              
5429 560         2292 @matches_ASUB{@words} = (1) x scalar(@words);
5430 560         2535 my $alias_list = join '|', keys %matches_ASUB;
5431 560         5114 $SUB_PATTERN =~ s/sub/\($alias_list\)/;
5432 560         3546 $ASUB_PATTERN =~ s/sub/\($alias_list\)/;
5433             }
5434 560         1634 return;
5435             } ## end sub make_sub_matching_pattern
5436              
5437             sub make_bl_pattern {
5438              
5439             # Set defaults lists to retain historical default behavior for -bl:
5440 560     560 0 1521 my $bl_list_string = '*';
5441 560         1444 my $bl_exclusion_list_string = 'sort map grep eval asub';
5442              
5443 560 50 66     2269 if ( defined( $rOpts->{'brace-left-list'} )
5444             && $rOpts->{'brace-left-list'} )
5445             {
5446 1         4 $bl_list_string = $rOpts->{'brace-left-list'};
5447             }
5448 560 100       2114 if ( $bl_list_string =~ /\bsub\b/ ) {
5449             $rOpts->{'opening-sub-brace-on-new-line'} ||=
5450 1   33     8 $rOpts->{'opening-brace-on-new-line'};
5451             }
5452 560 100       2180 if ( $bl_list_string =~ /\basub\b/ ) {
5453             $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
5454 1   33     7 $rOpts->{'opening-brace-on-new-line'};
5455             }
5456              
5457 560         1654 $bl_pattern = make_block_pattern( '-bll', $bl_list_string );
5458              
5459             # for -bl, a list with '*' turns on -sbl and -asbl
5460 560 100       3841 if ( $bl_pattern =~ /\.\*/ ) {
5461             $rOpts->{'opening-sub-brace-on-new-line'} ||=
5462 559   100     4506 $rOpts->{'opening-brace-on-new-line'};
5463             $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
5464 559   66     3695 $rOpts->{'opening-anonymous-brace-on-new-line'};
5465             }
5466              
5467 560 50 66     2362 if ( defined( $rOpts->{'brace-left-exclusion-list'} )
5468             && $rOpts->{'brace-left-exclusion-list'} )
5469             {
5470 1         6 $bl_exclusion_list_string = $rOpts->{'brace-left-exclusion-list'};
5471 1 50       6 if ( $bl_exclusion_list_string =~ /\bsub\b/ ) {
5472 0         0 $rOpts->{'opening-sub-brace-on-new-line'} = 0;
5473             }
5474 1 50       6 if ( $bl_exclusion_list_string =~ /\basub\b/ ) {
5475 0         0 $rOpts->{'opening-anonymous-sub-brace-on-new-line'} = 0;
5476             }
5477             }
5478              
5479             $bl_exclusion_pattern =
5480 560         1897 make_block_pattern( '-blxl', $bl_exclusion_list_string );
5481 560         1977 return;
5482             } ## end sub make_bl_pattern
5483              
5484             sub make_bli_pattern {
5485              
5486             # default list of block types for which -bli would apply
5487 560     560 0 1396 my $bli_list_string = 'if else elsif unless while for foreach do : sub';
5488 560         1641 my $bli_exclusion_list_string = SPACE;
5489              
5490 560 50 66     2392 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
5491             && $rOpts->{'brace-left-and-indent-list'} )
5492             {
5493 3         11 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
5494             }
5495              
5496 560         2615 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
5497              
5498 560 50 66     2916 if ( defined( $rOpts->{'brace-left-and-indent-exclusion-list'} )
5499             && $rOpts->{'brace-left-and-indent-exclusion-list'} )
5500             {
5501             $bli_exclusion_list_string =
5502 1         5 $rOpts->{'brace-left-and-indent-exclusion-list'};
5503             }
5504             $bli_exclusion_pattern =
5505 560         1850 make_block_pattern( '-blixl', $bli_exclusion_list_string );
5506 560         1896 return;
5507             } ## end sub make_bli_pattern
5508              
5509             sub make_keyword_group_list_pattern {
5510              
5511             # turn any input list into a regex for recognizing selected block types.
5512             # Here are the defaults:
5513 560     560 0 1480 $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
5514 560         1509 $keyword_group_list_comment_pattern = EMPTY_STRING;
5515 560 0 33     2307 if ( defined( $rOpts->{'keyword-group-blanks-list'} )
5516             && $rOpts->{'keyword-group-blanks-list'} )
5517             {
5518 0         0 my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
5519 0         0 my @keyword_list;
5520             my @comment_list;
5521 0         0 foreach my $word (@words) {
5522 0 0 0     0 if ( $word eq 'BC' || $word eq 'SBC' ) {
5523 0         0 push @comment_list, $word;
5524 0 0       0 if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
  0         0  
5525             }
5526             else {
5527 0         0 push @keyword_list, $word;
5528             }
5529             }
5530             $keyword_group_list_pattern =
5531 0         0 make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
5532 0         0 $keyword_group_list_comment_pattern =
5533             make_block_pattern( '-kgbl', join( SPACE, @comment_list ) );
5534             }
5535 560         1172 return;
5536             } ## end sub make_keyword_group_list_pattern
5537              
5538             sub make_block_brace_vertical_tightness_pattern {
5539              
5540             # turn any input list into a regex for recognizing selected block types
5541 560     560 0 1661 $block_brace_vertical_tightness_pattern =
5542             '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
5543 560 0 33     2430 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
5544             && $rOpts->{'block-brace-vertical-tightness-list'} )
5545             {
5546             $block_brace_vertical_tightness_pattern =
5547             make_block_pattern( '-bbvtl',
5548 0         0 $rOpts->{'block-brace-vertical-tightness-list'} );
5549             }
5550 560         1134 return;
5551             } ## end sub make_block_brace_vertical_tightness_pattern
5552              
5553             sub make_blank_line_pattern {
5554              
5555 560     560 0 1689 $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
5556 560         1411 my $key = 'blank-lines-before-closing-block-list';
5557 560 50 66     2242 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
5558             $blank_lines_before_closing_block_pattern =
5559 1         5 make_block_pattern( '-blbcl', $rOpts->{$key} );
5560             }
5561              
5562 560         1379 $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
5563 560         1306 $key = 'blank-lines-after-opening-block-list';
5564 560 50 66     2323 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
5565             $blank_lines_after_opening_block_pattern =
5566 1         5 make_block_pattern( '-blaol', $rOpts->{$key} );
5567             }
5568 560         1179 return;
5569             } ## end sub make_blank_line_pattern
5570              
5571             sub make_block_pattern {
5572              
5573             # given a string of block-type keywords, return a regex to match them
5574             # The only tricky part is that labels are indicated with a single ':'
5575             # and the 'sub' token text may have additional text after it (name of
5576             # sub).
5577             #
5578             # Example:
5579             #
5580             # input string: "if else elsif unless while for foreach do : sub";
5581             # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
5582              
5583             # Minor Update:
5584             #
5585             # To distinguish between anonymous subs and named subs, use 'sub' to
5586             # indicate a named sub, and 'asub' to indicate an anonymous sub
5587              
5588 2243     2243 0 5396 my ( $abbrev, $string ) = @_;
5589 2243         5052 my @list = split_words($string);
5590 2243         4619 my @words = ();
5591 2243         3595 my %seen;
5592 2243         5020 for my $i (@list) {
5593 8934 100       17162 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
  563         1868  
  563         2845  
5594 8371 50       16082 next if $seen{$i};
5595 8371         16036 $seen{$i} = 1;
5596 8371 100       34997 if ( $i eq 'sub' ) {
    100          
    50          
    50          
    100          
    50          
5597             }
5598             elsif ( $i eq 'asub' ) {
5599             }
5600             elsif ( $i eq ';' ) {
5601 0         0 push @words, ';';
5602             }
5603             elsif ( $i eq '{' ) {
5604 0         0 push @words, '\{';
5605             }
5606             elsif ( $i eq ':' ) {
5607 557         3054 push @words, '\w+:';
5608             }
5609             elsif ( $i =~ /^\w/ ) {
5610 6695         12162 push @words, $i;
5611             }
5612             else {
5613 0         0 Warn("unrecognized block type $i after $abbrev, ignoring\n");
5614             }
5615             }
5616              
5617             # Fix 2 for c091, prevent the pattern from matching an empty string
5618             # '1 ' is an impossible block name.
5619 1680 100       5510 if ( !@words ) { push @words, "1 " }
  561         2370  
5620              
5621 1680         6505 my $pattern = '(' . join( '|', @words ) . ')$';
5622 1680         3244 my $sub_patterns = EMPTY_STRING;
5623 1680 100       4407 if ( $seen{'sub'} ) {
5624 559         1742 $sub_patterns .= '|' . $SUB_PATTERN;
5625             }
5626 1680 100       4574 if ( $seen{'asub'} ) {
5627 560         2378 $sub_patterns .= '|' . $ASUB_PATTERN;
5628             }
5629 1680 100       3970 if ($sub_patterns) {
5630 1118         3171 $pattern = '(' . $pattern . $sub_patterns . ')';
5631             }
5632 1680         4159 $pattern = '^' . $pattern;
5633 1680         7054 return $pattern;
5634             } ## end sub make_block_pattern
5635              
5636             sub make_static_side_comment_pattern {
5637              
5638             # create the pattern used to identify static side comments
5639 560     560 0 1575 $static_side_comment_pattern = '^##';
5640              
5641             # allow the user to change it
5642 560 50       2520 if ( $rOpts->{'static-side-comment-prefix'} ) {
5643 0         0 my $prefix = $rOpts->{'static-side-comment-prefix'};
5644 0         0 $prefix =~ s/^\s*//;
5645 0         0 my $pattern = '^' . $prefix;
5646 0 0       0 if ( bad_pattern($pattern) ) {
5647 0         0 Die(
5648             "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
5649             );
5650             }
5651 0         0 $static_side_comment_pattern = $pattern;
5652             }
5653 560         1263 return;
5654             } ## end sub make_static_side_comment_pattern
5655              
5656             sub make_closing_side_comment_prefix {
5657              
5658             # Be sure we have a valid closing side comment prefix
5659 560     560 0 1701 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
5660 560         1277 my $csc_prefix_pattern;
5661 560 100       2266 if ( !defined($csc_prefix) ) {
5662 558         1416 $csc_prefix = '## end';
5663 558         1437 $csc_prefix_pattern = '^##\s+end';
5664             }
5665             else {
5666 2         6 my $test_csc_prefix = $csc_prefix;
5667 2 50       17 if ( $test_csc_prefix !~ /^#/ ) {
5668 0         0 $test_csc_prefix = '#' . $test_csc_prefix;
5669             }
5670              
5671             # make a regex to recognize the prefix
5672 2         6 my $test_csc_prefix_pattern = $test_csc_prefix;
5673              
5674             # escape any special characters
5675 2         8 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
5676              
5677 2         8 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
5678              
5679             # allow exact number of intermediate spaces to vary
5680 2         13 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
5681              
5682             # make sure we have a good pattern
5683             # if we fail this we probably have an error in escaping
5684             # characters.
5685              
5686 2 50       11 if ( bad_pattern($test_csc_prefix_pattern) ) {
5687              
5688             # shouldn't happen..must have screwed up escaping, above
5689 0         0 if (DEVEL_MODE) {
5690             Fault(<<EOM);
5691             Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'
5692             EOM
5693             }
5694              
5695             # just warn and keep going with defaults
5696             Warn(
5697 0         0 "Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
5698             );
5699 0         0 Warn("Please consider using a simpler -cscp prefix\n");
5700 0         0 Warn("Using default -cscp instead; please check output\n");
5701             }
5702             else {
5703 2         9 $csc_prefix = $test_csc_prefix;
5704 2         6 $csc_prefix_pattern = $test_csc_prefix_pattern;
5705             }
5706             }
5707 560         1698 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
5708 560         1567 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
5709 560         1554 return;
5710             } ## end sub make_closing_side_comment_prefix
5711              
5712             ##################################################
5713             # CODE SECTION 4: receive lines from the tokenizer
5714             ##################################################
5715              
5716             { ## begin closure write_line
5717              
5718             my $nesting_depth;
5719              
5720             # Variables used by sub check_sequence_numbers:
5721             my $last_seqno;
5722             my %saw_opening_seqno;
5723             my %saw_closing_seqno;
5724             my $initial_seqno;
5725              
5726             sub initialize_write_line {
5727              
5728 561     561 0 1446 $nesting_depth = undef;
5729              
5730 561         1378 $last_seqno = SEQ_ROOT;
5731 561         1305 %saw_opening_seqno = ();
5732 561         1366 %saw_closing_seqno = ();
5733              
5734 561         1111 return;
5735             } ## end sub initialize_write_line
5736              
5737             sub check_sequence_numbers {
5738              
5739             # Routine for checking sequence numbers. This only needs to be
5740             # done occasionally in DEVEL_MODE to be sure everything is working
5741             # correctly.
5742 0     0 0 0 my ( $rtokens, $rtoken_type, $rtype_sequence, $input_line_no ) = @_;
5743 0         0 my $jmax = @{$rtokens} - 1;
  0         0  
5744 0 0       0 return if ( $jmax < 0 );
5745 0         0 foreach my $j ( 0 .. $jmax ) {
5746 0         0 my $seqno = $rtype_sequence->[$j];
5747 0         0 my $token = $rtokens->[$j];
5748 0         0 my $type = $rtoken_type->[$j];
5749 0 0       0 $seqno = EMPTY_STRING unless ( defined($seqno) );
5750 0         0 my $err_msg =
5751             "Error at j=$j, line number $input_line_no, seqno='$seqno', type='$type', tok='$token':\n";
5752              
5753 0 0       0 if ( !$seqno ) {
5754              
5755             # Sequence numbers are generated for opening tokens, so every opening
5756             # token should be sequenced. Closing tokens will be unsequenced
5757             # if they do not have a matching opening token.
5758 0 0 0     0 if ( $is_opening_sequence_token{$token}
      0        
5759             && $type ne 'q'
5760             && $type ne 'Q' )
5761             {
5762 0         0 Fault(
5763             <<EOM
5764             $err_msg Unexpected opening token without sequence number
5765             EOM
5766             );
5767             }
5768             }
5769             else {
5770              
5771             # Save starting seqno to identify sequence method:
5772             # New method starts with 2 and has continuous numbering
5773             # Old method starts with >2 and may have gaps
5774 0 0       0 if ( !defined($initial_seqno) ) { $initial_seqno = $seqno }
  0         0  
5775              
5776 0 0       0 if ( $is_opening_sequence_token{$token} ) {
    0          
5777              
5778             # New method should have continuous numbering
5779 0 0 0     0 if ( $initial_seqno == 2 && $seqno != $last_seqno + 1 ) {
5780 0         0 Fault(
5781             <<EOM
5782             $err_msg Unexpected opening sequence number: previous seqno=$last_seqno, but seqno= $seqno
5783             EOM
5784             );
5785             }
5786 0         0 $last_seqno = $seqno;
5787              
5788             # Numbers must be unique
5789 0 0       0 if ( $saw_opening_seqno{$seqno} ) {
5790 0         0 my $lno = $saw_opening_seqno{$seqno};
5791 0         0 Fault(
5792             <<EOM
5793             $err_msg Already saw an opening tokens at line $lno with this sequence number
5794             EOM
5795             );
5796             }
5797 0         0 $saw_opening_seqno{$seqno} = $input_line_no;
5798             }
5799              
5800             # only one closing item per seqno
5801             elsif ( $is_closing_sequence_token{$token} ) {
5802 0 0       0 if ( $saw_closing_seqno{$seqno} ) {
5803 0         0 my $lno = $saw_closing_seqno{$seqno};
5804 0         0 Fault(
5805             <<EOM
5806             $err_msg Already saw a closing token with this seqno at line $lno
5807             EOM
5808             );
5809             }
5810 0         0 $saw_closing_seqno{$seqno} = $input_line_no;
5811              
5812             # Every closing seqno must have an opening seqno
5813 0 0       0 if ( !$saw_opening_seqno{$seqno} ) {
5814 0         0 Fault(
5815             <<EOM
5816             $err_msg Saw a closing token but no opening token with this seqno
5817             EOM
5818             );
5819             }
5820             }
5821              
5822             # Sequenced items must be opening or closing
5823             else {
5824 0         0 Fault(
5825             <<EOM
5826             $err_msg Unexpected token type with a sequence number
5827             EOM
5828             );
5829             }
5830             }
5831             }
5832 0         0 return;
5833             } ## end sub check_sequence_numbers
5834              
5835             sub store_block_type {
5836 972     972 0 2610 my ( $self, $block_type, $seqno ) = @_;
5837              
5838 972 50       2549 return if ( !$block_type );
5839              
5840             # Save the type of a block in a hash using sequence number as key
5841 972         2550 $self->[_rblock_type_of_seqno_]->{$seqno} = $block_type;
5842              
5843             # and save named subs and anynymous subs in separate hashes so that
5844             # we only have to do the pattern tests once.
5845 972 100       9792 if ( $matches_ASUB{$block_type} ) {
    100          
5846 173         499 $self->[_ris_asub_block_]->{$seqno} = 1;
5847             }
5848             elsif ( $block_type =~ /$SUB_PATTERN/ ) {
5849 118         813 $self->[_ris_sub_block_]->{$seqno} = 1;
5850             }
5851             else {
5852             ## ok - not a sub
5853             }
5854 972         2075 return;
5855             } ## end sub store_block_type
5856              
5857             # hash keys which are common to old and new line_of_tokens
5858             my @common_keys;
5859              
5860             BEGIN {
5861 39     39   67415 @common_keys = qw(
5862             _curly_brace_depth
5863             _ending_in_quote
5864             _guessed_indentation_level
5865             _line_number
5866             _line_text
5867             _line_type
5868             _paren_depth
5869             _quote_character
5870             _square_bracket_depth
5871             _starting_in_quote
5872             );
5873             }
5874              
5875             sub write_line {
5876              
5877             # This routine receives lines one-by-one from the tokenizer and stores
5878             # them in a format suitable for further processing. After the last
5879             # line has been sent, the tokenizer will call sub 'finish_formatting'
5880             # to do the actual formatting.
5881              
5882 7666     7666 0 15082 my ( $self, $line_of_tokens_old ) = @_;
5883              
5884 7666         13167 my $rLL = $self->[_rLL_];
5885 7666         14518 my $line_of_tokens = {};
5886              
5887             # copy common hash key values
5888 7666         13169 @{$line_of_tokens}{@common_keys} = @{$line_of_tokens_old}{@common_keys};
  7666         50204  
  7666         26260  
5889              
5890 7666         15960 my $line_type = $line_of_tokens_old->{_line_type};
5891 7666         10997 my $tee_output;
5892              
5893 7666         12506 my $Klimit = $self->[_Klimit_];
5894 7666         11128 my $Kfirst;
5895              
5896             # Handle line of non-code
5897 7666 100       16595 if ( $line_type ne 'CODE' ) {
5898 173   66     1045 $tee_output ||= $rOpts_tee_pod
      66        
5899             && substr( $line_type, 0, 3 ) eq 'POD';
5900              
5901 173         371 $line_of_tokens->{_level_0} = 0;
5902 173         336 $line_of_tokens->{_ci_level_0} = 0;
5903 173         331 $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
5904 173         351 $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
5905 173         360 $line_of_tokens->{_ended_in_blank_token} = undef;
5906              
5907             }
5908              
5909             # Handle line of code
5910             else {
5911              
5912 7493         11923 my $rtokens = $line_of_tokens_old->{_rtokens};
5913 7493         10587 my $jmax = @{$rtokens} - 1;
  7493         13019  
5914              
5915 7493 100       15411 if ( $jmax >= 0 ) {
5916              
5917 6687 100       14279 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
5918              
5919             #----------------------------
5920             # get the tokens on this line
5921             #----------------------------
5922 6687         19950 $self->write_line_inner_loop( $line_of_tokens_old,
5923             $line_of_tokens );
5924              
5925             # update Klimit for added tokens
5926 6687         9062 $Klimit = @{$rLL} - 1;
  6687         11750  
5927              
5928             } ## end if ( $jmax >= 0 )
5929             else {
5930              
5931             # blank line
5932 806         2212 $line_of_tokens->{_level_0} = 0;
5933 806         1922 $line_of_tokens->{_ci_level_0} = 0;
5934 806         1651 $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
5935 806         1770 $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
5936 806         1539 $line_of_tokens->{_ended_in_blank_token} = undef;
5937              
5938             }
5939              
5940 7493   66     39013 $tee_output ||=
      66        
5941             $rOpts_tee_block_comments
5942             && $jmax == 0
5943             && $rLL->[$Kfirst]->[_TYPE_] eq '#';
5944              
5945 7493   100     28180 $tee_output ||=
      100        
5946             $rOpts_tee_side_comments
5947             && defined($Kfirst)
5948             && $Klimit > $Kfirst
5949             && $rLL->[$Klimit]->[_TYPE_] eq '#';
5950              
5951             } ## end if ( $line_type eq 'CODE')
5952              
5953             # Finish storing line variables
5954 7666         29286 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
5955 7666         13578 $self->[_Klimit_] = $Klimit;
5956 7666         12560 my $rlines = $self->[_rlines_];
5957 7666         11085 push @{$rlines}, $line_of_tokens;
  7666         14283  
5958              
5959 7666 100       16038 if ($tee_output) {
5960 5         7 my $fh_tee = $self->[_fh_tee_];
5961 5         10 my $line_text = $line_of_tokens_old->{_line_text};
5962 5 50       22 $fh_tee->print($line_text) if ($fh_tee);
5963             }
5964              
5965 7666         67677 return;
5966             } ## end sub write_line
5967              
5968             sub write_line_inner_loop {
5969 6687     6687 0 12550 my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_;
5970              
5971             #---------------------------------------------------------------------
5972             # Copy the tokens on one line received from the tokenizer to their new
5973             # storage locations.
5974             #---------------------------------------------------------------------
5975              
5976             # Input parameters:
5977             # $line_of_tokens_old = line received from tokenizer
5978             # $line_of_tokens = line of tokens being formed for formatter
5979              
5980 6687         10574 my $rtokens = $line_of_tokens_old->{_rtokens};
5981 6687         9203 my $jmax = @{$rtokens} - 1;
  6687         10591  
5982 6687 50       15396 if ( $jmax < 0 ) {
5983              
5984             # safety check; shouldn't happen
5985 0         0 DEVEL_MODE && Fault("unexpected jmax=$jmax\n");
5986 0         0 return;
5987             }
5988              
5989 6687         11083 my $line_index = $line_of_tokens_old->{_line_number} - 1;
5990 6687         10271 my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
5991 6687         10105 my $rblock_type = $line_of_tokens_old->{_rblock_type};
5992 6687         10740 my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
5993 6687         9995 my $rlevels = $line_of_tokens_old->{_rlevels};
5994              
5995 6687         9910 my $rLL = $self->[_rLL_];
5996 6687         10453 my $rSS = $self->[_rSS_];
5997 6687         10306 my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
5998              
5999 6687         9214 DEVEL_MODE
6000             && check_sequence_numbers( $rtokens, $rtoken_type,
6001             $rtype_sequence, $line_index + 1 );
6002              
6003             # Find the starting nesting depth ...
6004             # It must be the value of variable 'level' of the first token
6005             # because the nesting depth is used as a token tag in the
6006             # vertical aligner and is compared to actual levels.
6007             # So vertical alignment problems will occur with any other
6008             # starting value.
6009 6687 100       14078 if ( !defined($nesting_depth) ) {
6010 557         1476 $nesting_depth = $rlevels->[0];
6011 557 50       1915 $nesting_depth = 0 if ( $nesting_depth < 0 );
6012 557         1691 $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
6013             }
6014              
6015 6687         10194 my $j = -1;
6016              
6017             # NOTE: coding efficiency is critical in this loop over all tokens
6018 6687         9724 foreach my $token ( @{$rtokens} ) {
  6687         12671  
6019              
6020             # NOTE: Do not clip the 'level' variable yet if it is negative. We
6021             # will do that later, in sub 'store_token_to_go'. The reason is
6022             # that in files with level errors, the logic in 'weld_cuddled_else'
6023             # uses a stack logic that will give bad welds if we clip levels
6024             # here. (A recent update will probably not even allow negative
6025             # levels to arrive here any longer).
6026              
6027 51444         72415 my $seqno = EMPTY_STRING;
6028              
6029             # Handle tokens with sequence numbers ...
6030             # note the ++ increment hidden here for efficiency
6031 51444 100       94179 if ( $rtype_sequence->[ ++$j ] ) {
6032 9150         15015 $seqno = $rtype_sequence->[$j];
6033 9150         13031 my $sign = 1;
6034 9150 100       26561 if ( $is_opening_token{$token} ) {
    100          
    100          
    50          
6035 4388         6585 $self->[_K_opening_container_]->{$seqno} = @{$rLL};
  4388         15742  
6036 4388         9408 $rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
6037 4388         6734 $nesting_depth++;
6038              
6039             # Save a sequenced block type at its opening token.
6040             # Note that unsequenced block types can occur in
6041             # unbalanced code with errors but are ignored here.
6042 4388 100       12369 $self->store_block_type( $rblock_type->[$j], $seqno )
6043             if ( $rblock_type->[$j] );
6044             }
6045             elsif ( $is_closing_token{$token} ) {
6046              
6047             # The opening depth should always be defined, and
6048             # it should equal $nesting_depth-1. To protect
6049             # against unforseen error conditions, however, we
6050             # will check this and fix things if necessary. For
6051             # a test case see issue c055.
6052 4388         8307 my $opening_depth = $rdepth_of_opening_seqno->[$seqno];
6053 4388 50       9507 if ( !defined($opening_depth) ) {
6054 0         0 $opening_depth = $nesting_depth - 1;
6055 0 0       0 $opening_depth = 0 if ( $opening_depth < 0 );
6056 0         0 $rdepth_of_opening_seqno->[$seqno] = $opening_depth;
6057              
6058             # This is not fatal but should not happen. The
6059             # tokenizer generates sequence numbers
6060             # incrementally upon encountering each new
6061             # opening token, so every positive sequence
6062             # number should correspond to an opening token.
6063 0         0 DEVEL_MODE && Fault(<<EOM);
6064             No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
6065             EOM
6066             }
6067 4388         6065 $self->[_K_closing_container_]->{$seqno} = @{$rLL};
  4388         10297  
6068 4388         7702 $nesting_depth = $opening_depth;
6069 4388         6931 $sign = -1;
6070             }
6071             elsif ( $token eq '?' ) {
6072 187         412 $self->[_K_opening_ternary_]->{$seqno} = @{$rLL};
  187         725  
6073             }
6074             elsif ( $token eq ':' ) {
6075 187         488 $sign = -1;
6076 187         364 $self->[_K_closing_ternary_]->{$seqno} = @{$rLL};
  187         551  
6077             }
6078              
6079             # The only sequenced types output by the tokenizer are
6080             # the opening & closing containers and the ternary
6081             # types. So we would only get here if the tokenizer has
6082             # been changed to mark some other tokens with sequence
6083             # numbers, or if an error has been introduced in a
6084             # hash such as %is_opening_container
6085             else {
6086 0         0 DEVEL_MODE && Fault(<<EOM);
6087             Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
6088             Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
6089             EOM
6090             }
6091              
6092 9150 100       16521 if ( $sign > 0 ) {
6093 4575         6246 $self->[_Iss_opening_]->[$seqno] = @{$rSS};
  4575         9182  
6094              
6095             # For efficiency, we find the maximum level of
6096             # opening tokens of any type. The actual maximum
6097             # level will be that of their contents which is 1
6098             # greater. That will be fixed in sub
6099             # 'finish_formatting'.
6100 4575         8074 my $level = $rlevels->[$j];
6101 4575 100       10907 if ( $level > $self->[_maximum_level_] ) {
6102 842         2037 $self->[_maximum_level_] = $level;
6103 842         2011 $self->[_maximum_level_at_line_] = $line_index + 1;
6104             }
6105             }
6106 4575         6066 else { $self->[_Iss_closing_]->[$seqno] = @{$rSS} }
  4575         8856  
6107 9150         12573 push @{$rSS}, $sign * $seqno;
  9150         16881  
6108              
6109             }
6110              
6111             # Here we are storing the first five variables per token. The
6112             # remaining token variables will be added later as follows:
6113             # _TOKEN_LENGTH_ is added by sub store_token
6114             # _CUMULATIVE_LENGTH_ is added by sub store_token
6115             # _KNEXT_SEQ_ITEM_ is added by sub respace_post_loop_ops
6116             # _CI_LEVEL_ is added by sub set_ci
6117             # So all token variables are available for use after sub set_ci.
6118              
6119 51444         64939 my @tokary;
6120              
6121 51444         85858 $tokary[_TOKEN_] = $token;
6122 51444         101583 $tokary[_TYPE_] = $rtoken_type->[$j];
6123 51444         82324 $tokary[_TYPE_SEQUENCE_] = $seqno;
6124 51444         78078 $tokary[_LEVEL_] = $rlevels->[$j];
6125 51444         71119 $tokary[_LINE_INDEX_] = $line_index;
6126              
6127 51444         64005 push @{$rLL}, \@tokary;
  51444         116073  
6128              
6129             } ## end token loop
6130              
6131             # Need to remember if we can trim the input line
6132 6687         19530 $line_of_tokens->{_ended_in_blank_token} = $rtoken_type->[$jmax] eq 'b';
6133              
6134             # Values needed by Logger
6135 6687         14592 $line_of_tokens->{_level_0} = $rlevels->[0];
6136 6687         12627 $line_of_tokens->{_ci_level_0} = 0; # sub set_ci will fix this
6137             $line_of_tokens->{_nesting_blocks_0} =
6138 6687         13864 $line_of_tokens_old->{_nesting_blocks_0};
6139             $line_of_tokens->{_nesting_tokens_0} =
6140 6687         13916 $line_of_tokens_old->{_nesting_tokens_0};
6141              
6142 6687         14114 return;
6143              
6144             } ## end sub write_line_inner_loop
6145              
6146             } ## end closure write_line
6147              
6148             #############################################
6149             # CODE SECTION 5: Pre-process the entire file
6150             #############################################
6151              
6152             sub finish_formatting {
6153              
6154 561     561 0 2001 my ( $self, $severe_error ) = @_;
6155              
6156             # The file has been tokenized and is ready to be formatted.
6157             # All of the relevant data is stored in $self, ready to go.
6158              
6159             # Returns:
6160             # true if input file was copied verbatim due to errors
6161             # false otherwise
6162              
6163             # Some of the code in sub break_lists is not robust enough to process code
6164             # with arbitrary brace errors. The simplest fix is to just return the file
6165             # verbatim if there are brace errors. This fixes issue c160.
6166 561   33     5254 $severe_error ||= get_saw_brace_error();
6167              
6168             # Check the maximum level. If it is extremely large we will give up and
6169             # output the file verbatim. Note that the actual maximum level is 1
6170             # greater than the saved value, so we fix that here.
6171 561         1780 $self->[_maximum_level_] += 1;
6172 561         1515 my $maximum_level = $self->[_maximum_level_];
6173 561         1989 my $maximum_table_index = $#maximum_line_length_at_level;
6174 561 50 33     3590 if ( !$severe_error && $maximum_level >= $maximum_table_index ) {
6175 0   0     0 $severe_error ||= 1;
6176 0         0 Warn(<<EOM);
6177             The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
6178             Something may be wrong; formatting will be skipped.
6179             EOM
6180             }
6181              
6182             # Dump any requested block summary data
6183 561 50       2323 if ( $rOpts->{'dump-block-summary'} ) {
6184 0 0       0 if ($severe_error) { Exit(1) }
  0         0  
6185 0         0 $self->dump_block_summary();
6186 0         0 Exit(0);
6187             }
6188              
6189             # output file verbatim if severe error or no formatting requested
6190 561 50 33     5157 if ( $severe_error || $rOpts->{notidy} ) {
6191 0         0 $self->dump_verbatim();
6192 0         0 $self->wrapup($severe_error);
6193 0         0 return 1;
6194             }
6195              
6196             # Update the 'save_logfile' flag based to include any tokenization errors.
6197             # We can save time by skipping logfile calls if it is not going to be saved.
6198 561         2953 my $logger_object = $self->[_logger_object_];
6199 561 100       2267 if ($logger_object) {
6200 559         2682 my $save_logfile = $logger_object->get_save_logfile();
6201 559         1609 $self->[_save_logfile_] = $save_logfile;
6202 559         1761 my $file_writer_object = $self->[_file_writer_object_];
6203 559         3368 $file_writer_object->set_save_logfile($save_logfile);
6204             }
6205              
6206             {
6207 561         1153 my $rix_side_comments = $self->set_CODE_type();
  561         3843  
6208              
6209 561         4189 $self->find_non_indenting_braces($rix_side_comments);
6210              
6211             # Handle any requested side comment deletions. It is easier to get
6212             # this done here rather than farther down the pipeline because IO
6213             # lines take a different route, and because lines with deleted HSC
6214             # become BL lines. We have already handled any tee requests in sub
6215             # getline, so it is safe to delete side comments now.
6216 561 100 100     3474 $self->delete_side_comments($rix_side_comments)
6217             if ( $rOpts_delete_side_comments
6218             || $rOpts_delete_closing_side_comments );
6219             }
6220              
6221             # Verify that the line hash does not have any unknown keys.
6222 561         1214 $self->check_line_hashes() if (DEVEL_MODE);
6223              
6224             {
6225             # Make a pass through all tokens, adding or deleting any whitespace as
6226             # required. Also make any other changes, such as adding semicolons.
6227             # All token changes must be made here so that the token data structure
6228             # remains fixed for the rest of this iteration.
6229 561         1098 my ( $error, $rqw_lines ) = $self->respace_tokens();
  561         3486  
6230 561 50       3778 if ($error) {
6231 0         0 $self->dump_verbatim();
6232 0         0 $self->wrapup();
6233 0         0 return 1;
6234             }
6235              
6236             # sub 'set_ci' is called after sub respace to allow use of type counts
6237             # Token variable _CI_LEVEL_ is only defined after this call
6238 561         3493 $self->set_ci();
6239              
6240 561         4684 $self->find_multiline_qw($rqw_lines);
6241             }
6242              
6243 561         3688 $self->examine_vertical_tightness_flags();
6244              
6245 561         3588 $self->set_excluded_lp_containers();
6246              
6247 561         3074 $self->keep_old_line_breaks();
6248              
6249             # Implement any welding needed for the -wn or -cb options
6250 561         2776 $self->weld_containers();
6251              
6252             # Collect info needed to implement the -xlp style
6253 561 100 100     2501 $self->xlp_collapsed_lengths()
6254             if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses );
6255              
6256             # Locate small nested blocks which should not be broken
6257 561         3548 $self->mark_short_nested_blocks();
6258              
6259 561         2838 $self->special_indentation_adjustments();
6260              
6261             # Verify that the main token array looks OK. If this ever causes a fault
6262             # then place similar checks before the sub calls above to localize the
6263             # problem.
6264 561         945 $self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE);
6265              
6266             # Finishes formatting and write the result to the line sink.
6267             # Eventually this call should just change the 'rlines' data according to the
6268             # new line breaks and then return so that we can do an internal iteration
6269             # before continuing with the next stages of formatting.
6270 561         3244 $self->process_all_lines();
6271              
6272             # A final routine to tie up any loose ends
6273 561         4774 $self->wrapup();
6274 561         2527 return;
6275             } ## end sub finish_formatting
6276              
6277             my %is_loop_type;
6278              
6279             BEGIN {
6280 39     39   260 my @q = qw( for foreach while do until );
6281 39         27075 @{is_loop_type}{@q} = (1) x scalar(@q);
6282             }
6283              
6284             sub find_level_info {
6285              
6286             # Find level ranges and total variations of all code blocks in this file.
6287              
6288             # Returns:
6289             # ref to hash with block info, with seqno as key (see below)
6290              
6291 0     0 0 0 my ($self) = @_;
6292              
6293             # The array _rSS_ has the complete container tree for this file.
6294 0         0 my $rSS = $self->[_rSS_];
6295              
6296             # We will be ignoring everything except code block containers
6297 0         0 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6298              
6299 0         0 my @stack;
6300             my %level_info;
6301              
6302             # TREE_LOOP:
6303 0         0 foreach my $sseq ( @{$rSS} ) {
  0         0  
6304 0         0 my $stack_depth = @stack;
6305 0 0       0 my $seq_next = $sseq > 0 ? $sseq : -$sseq;
6306              
6307 0 0       0 next if ( !$rblock_type_of_seqno->{$seq_next} );
6308 0 0       0 if ( $sseq > 0 ) {
6309              
6310             # STACK_LOOP:
6311 0         0 my $item;
6312 0         0 foreach my $seq (@stack) {
6313 0         0 $item = $level_info{$seq};
6314 0 0       0 if ( $item->{maximum_depth} < $stack_depth ) {
6315 0         0 $item->{maximum_depth} = $stack_depth;
6316             }
6317 0         0 $item->{block_count}++;
6318             } ## end STACK LOOP
6319              
6320 0         0 push @stack, $seq_next;
6321 0         0 my $block_type = $rblock_type_of_seqno->{$seq_next};
6322              
6323             # If this block is a loop nested within a loop, then we
6324             # will mark it as an 'inner_loop'. This is a useful
6325             # complexity measure.
6326 0         0 my $is_inner_loop = 0;
6327 0 0 0     0 if ( $is_loop_type{$block_type} && defined($item) ) {
6328 0         0 $is_inner_loop = $is_loop_type{ $item->{block_type} };
6329             }
6330              
6331 0         0 $level_info{$seq_next} = {
6332             starting_depth => $stack_depth,
6333             maximum_depth => $stack_depth,
6334             block_count => 1,
6335             block_type => $block_type,
6336             is_inner_loop => $is_inner_loop,
6337             };
6338             }
6339             else {
6340 0         0 my $seq_test = pop @stack;
6341              
6342             # error check
6343 0 0       0 if ( $seq_test != $seq_next ) {
6344              
6345             # Shouldn't happen - the $rSS array must have an error
6346 0         0 DEVEL_MODE && Fault("stack error finding total depths\n");
6347              
6348 0         0 %level_info = ();
6349 0         0 last;
6350             }
6351             }
6352             } ## end TREE_LOOP
6353 0         0 return \%level_info;
6354             } ## end sub find_level_info
6355              
6356             sub find_loop_label {
6357              
6358 0     0 0 0 my ( $self, $seqno ) = @_;
6359              
6360             # Given:
6361             # $seqno = sequence number of a block of code for a loop
6362             # Return:
6363             # $label = the loop label text, if any, or an empty string
6364              
6365 0         0 my $rLL = $self->[_rLL_];
6366 0         0 my $rlines = $self->[_rlines_];
6367 0         0 my $K_opening_container = $self->[_K_opening_container_];
6368              
6369 0         0 my $label = EMPTY_STRING;
6370 0         0 my $K_opening = $K_opening_container->{$seqno};
6371              
6372             # backup to the line with the opening paren, if any, in case the
6373             # keyword is on a different line
6374 0         0 my $Kp = $self->K_previous_code($K_opening);
6375 0 0       0 return $label unless ( defined($Kp) );
6376 0 0       0 if ( $rLL->[$Kp]->[_TOKEN_] eq ')' ) {
6377 0         0 $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
6378 0         0 $K_opening = $K_opening_container->{$seqno};
6379             }
6380              
6381 0 0       0 return $label unless ( defined($K_opening) );
6382 0         0 my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
6383              
6384             # look for a label within a few lines; allow a couple of blank lines
6385 0         0 foreach my $lx ( reverse( $lx_open - 3 .. $lx_open ) ) {
6386 0 0       0 last if ( $lx < 0 );
6387 0         0 my $line_of_tokens = $rlines->[$lx];
6388 0         0 my $line_type = $line_of_tokens->{_line_type};
6389              
6390             # stop search on a non-code line
6391 0 0       0 last if ( $line_type ne 'CODE' );
6392              
6393 0         0 my $rK_range = $line_of_tokens->{_rK_range};
6394 0         0 my ( $Kfirst, $Klast ) = @{$rK_range};
  0         0  
6395              
6396             # skip a blank line
6397 0 0       0 next if ( !defined($Kfirst) );
6398              
6399             # check for a lable
6400 0 0       0 if ( $rLL->[$Kfirst]->[_TYPE_] eq 'J' ) {
6401 0         0 $label = $rLL->[$Kfirst]->[_TOKEN_];
6402 0         0 last;
6403             }
6404              
6405             # quit the search if we are above the starting line
6406 0 0       0 last if ( $lx < $lx_open );
6407             }
6408              
6409 0         0 return $label;
6410             } ## end sub find_loop_label
6411              
6412             { ## closure find_mccabe_count
6413             my %is_mccabe_logic_keyword;
6414             my %is_mccabe_logic_operator;
6415              
6416             BEGIN {
6417 39     39   302 my @q = (qw( && || ||= &&= ? <<= >>= ));
6418 39         273 @is_mccabe_logic_operator{@q} = (1) x scalar(@q);
6419              
6420 39         222 @q = (qw( and or xor if else elsif unless until while for foreach ));
6421 39         101715 @is_mccabe_logic_keyword{@q} = (1) x scalar(@q);
6422             } ## end BEGIN
6423              
6424             sub find_mccabe_count {
6425 0     0 0 0 my ($self) = @_;
6426              
6427             # Find the cumulative mccabe count to each token
6428             # Return '$rmccabe_count_sum' = ref to array with cumulative
6429             # mccabe count to each token $K
6430              
6431             # NOTE: This sub currently follows the definitions in Perl::Critic
6432              
6433 0         0 my $rmccabe_count_sum;
6434 0         0 my $rLL = $self->[_rLL_];
6435 0         0 my $count = 0;
6436 0         0 my $Klimit = $self->[_Klimit_];
6437 0         0 foreach my $KK ( 0 .. $Klimit ) {
6438 0         0 $rmccabe_count_sum->{$KK} = $count;
6439 0         0 my $type = $rLL->[$KK]->[_TYPE_];
6440 0 0       0 if ( $type eq 'k' ) {
6441 0         0 my $token = $rLL->[$KK]->[_TOKEN_];
6442 0 0       0 if ( $is_mccabe_logic_keyword{$token} ) { $count++ }
  0         0  
6443             }
6444             else {
6445 0 0       0 if ( $is_mccabe_logic_operator{$type} ) {
6446 0         0 $count++;
6447             }
6448             }
6449             }
6450 0         0 $rmccabe_count_sum->{ $Klimit + 1 } = $count;
6451 0         0 return $rmccabe_count_sum;
6452             } ## end sub find_mccabe_count
6453             } ## end closure find_mccabe_count
6454              
6455             sub find_code_line_count {
6456 0     0 0 0 my ($self) = @_;
6457              
6458             # Find the cumulative number of lines of code, excluding blanks,
6459             # comments and pod.
6460             # Return '$rcode_line_count' = ref to array with cumulative
6461             # code line count for each input line number.
6462              
6463 0         0 my $rcode_line_count;
6464 0         0 my $rLL = $self->[_rLL_];
6465 0         0 my $rlines = $self->[_rlines_];
6466 0         0 my $ix_line = -1;
6467 0         0 my $code_line_count = 0;
6468              
6469             # loop over all lines
6470 0         0 foreach my $line_of_tokens ( @{$rlines} ) {
  0         0  
6471 0         0 $ix_line++;
6472              
6473             # what type of line?
6474 0         0 my $line_type = $line_of_tokens->{_line_type};
6475              
6476             # if 'CODE' it must be non-blank and non-comment
6477 0 0       0 if ( $line_type eq 'CODE' ) {
6478 0         0 my $rK_range = $line_of_tokens->{_rK_range};
6479 0         0 my ( $Kfirst, $Klast ) = @{$rK_range};
  0         0  
6480              
6481 0 0       0 if ( defined($Kfirst) ) {
6482              
6483             # it is non-blank
6484 0 0       0 my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
6485 0 0 0     0 if ( $jmax > 0 || $rLL->[$Klast]->[_TYPE_] ne '#' ) {
6486              
6487             # ok, it is a non-comment
6488 0         0 $code_line_count++;
6489             }
6490             }
6491             }
6492              
6493             # Count all other special line types except pod;
6494             # For a list of line types see sub 'process_all_lines'
6495             else {
6496 0 0       0 if ( $line_type !~ /^POD/ ) { $code_line_count++ }
  0         0  
6497             }
6498              
6499             # Store the cumulative count using the input line index
6500 0         0 $rcode_line_count->[$ix_line] = $code_line_count;
6501             }
6502 0         0 return $rcode_line_count;
6503             } ## end sub find_code_line_count
6504              
6505             sub find_selected_packages {
6506              
6507 0     0 0 0 my ( $self, $rdump_block_types ) = @_;
6508              
6509             # returns a list of all selected package statements in a file
6510 0         0 my @package_list;
6511              
6512 0 0 0     0 if ( !$rdump_block_types->{'*'}
      0        
6513             && !$rdump_block_types->{'package'}
6514             && !$rdump_block_types->{'class'} )
6515             {
6516 0         0 return \@package_list;
6517             }
6518              
6519 0         0 my $rLL = $self->[_rLL_];
6520 0         0 my $Klimit = $self->[_Klimit_];
6521 0         0 my $rlines = $self->[_rlines_];
6522              
6523 0         0 my $K_closing_container = $self->[_K_closing_container_];
6524 0         0 my @package_sweep;
6525 0         0 foreach my $KK ( 0 .. $Klimit ) {
6526 0         0 my $item = $rLL->[$KK];
6527 0         0 my $type = $item->[_TYPE_];
6528              
6529             # fix for c250: package type has changed from 'i' to 'P'
6530 0 0       0 next if ( $type ne 'P' );
6531              
6532 0         0 my $token = $item->[_TOKEN_];
6533 0 0 0     0 if ( substr( $token, 0, 7 ) eq 'package' && $token =~ /^package\s/
      0        
      0        
6534             || substr( $token, 0, 5 ) eq 'class' && $token =~ /^class\s/ )
6535             {
6536              
6537 0         0 $token =~ s/\s+/ /g;
6538 0         0 my ( $keyword, $name ) = split /\s+/, $token, 2;
6539              
6540 0         0 my $lx_start = $item->[_LINE_INDEX_];
6541 0         0 my $level = $item->[_LEVEL_];
6542 0         0 my $parent_seqno = $self->parent_seqno_by_K($KK);
6543              
6544             # Skip a class BLOCK because it will be handled as a block
6545 0 0       0 if ( $keyword eq 'class' ) {
6546 0         0 my $line_of_tokens = $rlines->[$lx_start];
6547 0         0 my $rK_range = $line_of_tokens->{_rK_range};
6548 0         0 my ( $K_first, $K_last ) = @{$rK_range};
  0         0  
6549 0 0       0 if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
6550 0         0 $K_last = $self->K_previous_code($K_last);
6551             }
6552 0 0       0 if ( defined($K_last) ) {
6553 0         0 my $seqno_class = $rLL->[$K_last]->[_TYPE_SEQUENCE_];
6554             my $block_type_next =
6555 0         0 $self->[_rblock_type_of_seqno_]->{$seqno_class};
6556              
6557             # these block types are currently marked 'package'
6558             # but may be 'class' in the future, so allow both.
6559 0 0 0     0 if ( defined($block_type_next)
6560             && $block_type_next =~ /^(class|package)\b/ )
6561             {
6562 0         0 next;
6563             }
6564             }
6565             }
6566              
6567 0         0 my $K_closing = $Klimit;
6568 0 0       0 if ( $parent_seqno != SEQ_ROOT ) {
6569 0         0 my $Kc = $K_closing_container->{$parent_seqno};
6570 0 0       0 if ( defined($Kc) ) {
6571 0         0 $K_closing = $Kc;
6572             }
6573             }
6574              
6575             # This package ends any previous package at this level
6576 0 0       0 if ( defined( my $ix = $package_sweep[$level] ) ) {
6577 0         0 my $rpk = $package_list[$ix];
6578 0         0 my $Kc = $rpk->{K_closing};
6579 0 0       0 if ( $Kc > $KK ) {
6580 0         0 $rpk->{K_closing} = $KK - 1;
6581             }
6582             }
6583 0         0 $package_sweep[$level] = @package_list;
6584              
6585             # max_change and block_count are not currently reported 'package'
6586 0         0 push @package_list,
6587             {
6588             line_start => $lx_start + 1,
6589             K_opening => $KK,
6590             K_closing => $Klimit,
6591             name => $name,
6592             type => $keyword,
6593             level => $level,
6594             max_change => 0,
6595             block_count => 0,
6596             };
6597             }
6598             }
6599              
6600 0         0 return \@package_list;
6601             } ## end sub find_selected_packages
6602              
6603             sub find_selected_blocks {
6604              
6605 0     0 0 0 my ( $self, $rdump_block_types ) = @_;
6606              
6607             # Find blocks needed for --dump-block-summary
6608             # Returns:
6609             # $rslected_blocks = ref to a list of information on the selected blocks
6610              
6611 0         0 my $rLL = $self->[_rLL_];
6612 0         0 my $rlines = $self->[_rlines_];
6613 0         0 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6614 0         0 my $K_opening_container = $self->[_K_opening_container_];
6615 0         0 my $K_closing_container = $self->[_K_closing_container_];
6616 0         0 my $ris_asub_block = $self->[_ris_asub_block_];
6617 0         0 my $ris_sub_block = $self->[_ris_sub_block_];
6618              
6619 0         0 my $dump_all_types = $rdump_block_types->{'*'};
6620              
6621             # Get level variation info for code blocks
6622 0         0 my $rlevel_info = $self->find_level_info();
6623              
6624 0         0 my @selected_blocks;
6625              
6626             #---------------------------------------------------
6627             # BEGIN loop over all blocks to find selected blocks
6628             #---------------------------------------------------
6629 0         0 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
  0         0  
6630              
6631 0         0 my $type;
6632 0         0 my $name = EMPTY_STRING;
6633 0         0 my $block_type = $rblock_type_of_seqno->{$seqno};
6634 0         0 my $K_opening = $K_opening_container->{$seqno};
6635 0         0 my $K_closing = $K_closing_container->{$seqno};
6636 0         0 my $level = $rLL->[$K_opening]->[_LEVEL_];
6637              
6638 0         0 my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
6639 0         0 my $line_of_tokens = $rlines->[$lx_open];
6640 0         0 my $rK_range = $line_of_tokens->{_rK_range};
6641 0         0 my ( $Kfirst, $Klast ) = @{$rK_range};
  0         0  
6642 0 0 0     0 if ( !defined($Kfirst) || !defined($Klast) || $Kfirst > $K_opening ) {
      0        
6643 0         0 my $line_type = $line_of_tokens->{_line_type};
6644              
6645             # shouldn't happen
6646 0         0 my $CODE_type = $line_of_tokens->{_code_type};
6647 0         0 DEVEL_MODE && Fault(<<EOM);
6648             unexpected line_type=$line_type at line $lx_open, code type=$CODE_type
6649             EOM
6650 0         0 next;
6651             }
6652              
6653 0         0 my ( $max_change, $block_count, $inner_loop_plus ) =
6654             ( 0, 0, EMPTY_STRING );
6655 0         0 my $item = $rlevel_info->{$seqno};
6656 0 0       0 if ( defined($item) ) {
6657 0         0 my $starting_depth = $item->{starting_depth};
6658 0         0 my $maximum_depth = $item->{maximum_depth};
6659 0         0 $block_count = $item->{block_count};
6660 0         0 $max_change = $maximum_depth - $starting_depth + 1;
6661              
6662             # this is a '+' character if this block is an inner loops
6663 0 0       0 $inner_loop_plus = $item->{is_inner_loop} ? '+' : EMPTY_STRING;
6664             }
6665              
6666             # Skip closures unless type 'closure' is explicitly requested
6667 0 0 0     0 if ( ( $block_type eq '}' || $block_type eq ';' )
    0 0        
    0 0        
    0 0        
    0 0        
    0 0        
      0        
      0        
      0        
      0        
      0        
6668             && $rdump_block_types->{'closure'} )
6669             {
6670 0         0 $type = 'closure';
6671             }
6672              
6673             # Both 'sub' and 'asub' select an anonymous sub.
6674             # This allows anonymous subs to be explicitely selected
6675             elsif (
6676             $ris_asub_block->{$seqno}
6677             && ( $dump_all_types
6678             || $rdump_block_types->{'sub'}
6679             || $rdump_block_types->{'asub'} )
6680             )
6681             {
6682 0         0 $type = 'asub';
6683              
6684             # Look back to try to find some kind of name, such as
6685             # my $var = sub { - var is type 'i'
6686             # var => sub { - var is type 'w'
6687             # -var => sub { - var is type 'w'
6688             # 'var' => sub { - var is type 'Q'
6689 0         0 my ( $saw_equals, $saw_fat_comma, $blank_count );
6690 0         0 foreach my $KK ( reverse( $Kfirst .. $K_opening - 1 ) ) {
6691 0         0 my $token_type = $rLL->[$KK]->[_TYPE_];
6692 0 0       0 if ( $token_type eq 'b' ) { $blank_count++; next }
  0         0  
  0         0  
6693 0 0       0 if ( $token_type eq '=>' ) { $saw_fat_comma++; next }
  0         0  
  0         0  
6694 0 0       0 if ( $token_type eq '=' ) { $saw_equals++; next }
  0         0  
  0         0  
6695 0 0 0     0 if ( $token_type eq 'i' && $saw_equals
      0        
      0        
      0        
6696             || ( $token_type eq 'w' || $token_type eq 'Q' )
6697             && $saw_fat_comma )
6698             {
6699 0         0 $name = $rLL->[$KK]->[_TOKEN_];
6700 0         0 last;
6701             }
6702             }
6703             }
6704             elsif ( $ris_sub_block->{$seqno}
6705             && ( $dump_all_types || $rdump_block_types->{'sub'} ) )
6706             {
6707 0         0 $type = 'sub';
6708              
6709             # what we want:
6710             # $block_type $name
6711             # 'sub setidentifier($)' => 'setidentifier'
6712             # 'method setidentifier($)' => 'setidentifier'
6713 0         0 my @parts = split /\s+/, $block_type;
6714 0         0 $name = $parts[1];
6715 0         0 $name =~ s/\(.*$//;
6716             }
6717             elsif (
6718             $block_type =~ /^(package|class)\b/
6719             && ( $dump_all_types
6720             || $rdump_block_types->{'package'}
6721             || $rdump_block_types->{'class'} )
6722             )
6723             {
6724 0         0 $type = 'class';
6725 0         0 my @parts = split /\s+/, $block_type;
6726 0         0 $name = $parts[1];
6727 0         0 $name =~ s/\(.*$//;
6728             }
6729             elsif (
6730             $is_loop_type{$block_type}
6731             && ( $dump_all_types
6732             || $rdump_block_types->{$block_type}
6733             || $rdump_block_types->{ $block_type . $inner_loop_plus }
6734             || $rdump_block_types->{$inner_loop_plus} )
6735             )
6736             {
6737 0         0 $type = $block_type . $inner_loop_plus;
6738             }
6739             elsif ( $dump_all_types || $rdump_block_types->{$block_type} ) {
6740 0 0       0 if ( $is_loop_type{$block_type} ) {
6741 0         0 $name = $self->find_loop_label($seqno);
6742             }
6743 0         0 $type = $block_type;
6744             }
6745             else {
6746 0         0 next;
6747             }
6748              
6749 0         0 push @selected_blocks,
6750             {
6751             K_opening => $K_opening,
6752             K_closing => $K_closing,
6753             line_start => $lx_open + 1,
6754             name => $name,
6755             type => $type,
6756             level => $level,
6757             max_change => $max_change,
6758             block_count => $block_count,
6759             };
6760             } ## END loop to get info for selected blocks
6761 0         0 return \@selected_blocks;
6762             } ## end sub find_selected_blocks
6763              
6764             sub dump_block_summary {
6765 0     0 0 0 my ($self) = @_;
6766              
6767             # Dump information about selected code blocks to STDOUT
6768             # This sub is called when
6769             # --dump-block-summary (-dbs) is set.
6770              
6771             # The following controls are available:
6772             # --dump-block-types=s (-dbt=s), where s is a list of block types
6773             # (if else elsif for foreach while do ... sub) ; default is 'sub'
6774             # --dump-block-minimum-lines=n (-dbml=n), where n is the minimum
6775             # number of lines for a block to be included; default is 20.
6776              
6777 0         0 my $rOpts_dump_block_types = $rOpts->{'dump-block-types'};
6778 0 0       0 if ( !defined($rOpts_dump_block_types) ) { $rOpts_dump_block_types = 'sub' }
  0         0  
6779 0         0 $rOpts_dump_block_types =~ s/^\s+//;
6780 0         0 $rOpts_dump_block_types =~ s/\s+$//;
6781 0         0 my @list = split /\s+/, $rOpts_dump_block_types;
6782 0         0 my %dump_block_types;
6783 0         0 @{dump_block_types}{@list} = (1) x scalar(@list);
6784              
6785             # Get block info
6786 0         0 my $rselected_blocks = $self->find_selected_blocks( \%dump_block_types );
6787              
6788             # Get package info
6789 0         0 my $rpackage_list = $self->find_selected_packages( \%dump_block_types );
6790              
6791 0 0 0     0 return if ( !@{$rselected_blocks} && !@{$rpackage_list} );
  0         0  
  0         0  
6792              
6793 0         0 my $input_stream_name = get_input_stream_name();
6794              
6795             # Get code line count
6796 0         0 my $rcode_line_count = $self->find_code_line_count();
6797              
6798             # Get mccabe count
6799 0         0 my $rmccabe_count_sum = $self->find_mccabe_count();
6800              
6801 0         0 my $rOpts_dump_block_minimum_lines = $rOpts->{'dump-block-minimum-lines'};
6802 0 0       0 if ( !defined($rOpts_dump_block_minimum_lines) ) {
6803 0         0 $rOpts_dump_block_minimum_lines = 20;
6804             }
6805              
6806 0         0 my $rLL = $self->[_rLL_];
6807              
6808             # merge blocks and packages, add various counts, filter and print to STDOUT
6809 0         0 my $routput_lines = [];
6810 0         0 foreach my $item ( @{$rselected_blocks}, @{$rpackage_list} ) {
  0         0  
  0         0  
6811              
6812 0         0 my $K_opening = $item->{K_opening};
6813 0         0 my $K_closing = $item->{K_closing};
6814              
6815             # define total number of lines
6816 0         0 my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
6817 0         0 my $lx_close = $rLL->[$K_closing]->[_LINE_INDEX_];
6818 0         0 my $line_count = $lx_close - $lx_open + 1;
6819              
6820             # define total number of lines of code excluding blanks, comments, pod
6821 0         0 my $code_lines_open = $rcode_line_count->[$lx_open];
6822 0         0 my $code_lines_close = $rcode_line_count->[$lx_close];
6823 0         0 my $code_lines = 0;
6824 0 0 0     0 if ( defined($code_lines_open) && defined($code_lines_close) ) {
6825 0         0 $code_lines = $code_lines_close - $code_lines_open + 1;
6826             }
6827              
6828             # filter out blocks below the selected code line limit
6829 0 0       0 if ( $code_lines < $rOpts_dump_block_minimum_lines ) {
6830 0         0 next;
6831             }
6832              
6833             # add mccabe_count for this block
6834 0         0 my $mccabe_closing = $rmccabe_count_sum->{ $K_closing + 1 };
6835 0         0 my $mccabe_opening = $rmccabe_count_sum->{$K_opening};
6836 0         0 my $mccabe_count = 1; # add 1 to match Perl::Critic
6837 0 0 0     0 if ( defined($mccabe_opening) && defined($mccabe_closing) ) {
6838 0         0 $mccabe_count += $mccabe_closing - $mccabe_opening;
6839             }
6840              
6841             # Store the final set of print variables
6842 0         0 push @{$routput_lines}, [
6843              
6844             $input_stream_name,
6845             $item->{line_start},
6846             $line_count,
6847             $code_lines,
6848             $item->{type},
6849             $item->{name},
6850             $item->{level},
6851             $item->{max_change},
6852             $item->{block_count},
6853 0         0 $mccabe_count,
6854              
6855             ];
6856             }
6857              
6858 0 0       0 return unless @{$routput_lines};
  0         0  
6859              
6860             # Sort blocks and packages on starting line number
6861 0         0 my @sorted_lines = sort { $a->[1] <=> $b->[1] } @{$routput_lines};
  0         0  
  0         0  
6862              
6863 0         0 print {*STDOUT}
  0         0  
6864             "file,line,line_count,code_lines,type,name,level,max_change,block_count,mccabe_count\n";
6865              
6866 0         0 foreach my $rline_vars (@sorted_lines) {
6867 0         0 my $line = join( ",", @{$rline_vars} ) . "\n";
  0         0  
6868 0         0 print {*STDOUT} $line;
  0         0  
6869             }
6870 0         0 return;
6871             } ## end sub dump_block_summary
6872              
6873             sub set_ci {
6874              
6875 561     561 0 1657 my ($self) = @_;
6876              
6877             # Set the basic continuation indentation (ci) for all tokens.
6878             # This is a replacement for the values previously computed in
6879             # sub Perl::Tidy::Tokenizer::tokenizer_wrapup. In most cases it
6880             # produces identical results, but in a few cases it is an improvement.
6881              
6882 39     39   420 use constant DEBUG_SET_CI => 0;
  39         97  
  39         3359  
6883              
6884             # This turns on an optional piece of logic which makes the new and
6885             # old computations of ci agree. It has almost no effect on actual
6886             # programs but is useful for testing.
6887 39     39   297 use constant SET_CI_OPTION_0 => 1;
  39         101  
  39         238358  
6888              
6889             # This is slightly different from the hash in in break_lists
6890             # with a similar name (removed '?' and ':' to fix t007 and others)
6891 561         1255 my %is_logical_container_for_ci;
6892 561         3647 my @q = qw# if elsif unless while and or err not && | || ! #;
6893 561         5632 @is_logical_container_for_ci{@q} = (1) x scalar(@q);
6894              
6895             # This is slightly different from a tokenizer hash with a similar name:
6896 561         1351 my %is_container_label_type_for_ci;
6897 561         2942 @q = qw# k && | || ? : ! #;
6898 561         2943 @is_container_label_type_for_ci{@q} = (1) x scalar(@q);
6899              
6900             # Undo ci of closing list paren followed by these binary operators:
6901             # - initially defined for issue t027, then
6902             # - added '=' for t015
6903             # - added '=~' for 'locale.in'
6904             # - added '<=>' for 'corelist.in'
6905             # Note:
6906             # See @value_requestor_type for more that might be included
6907             # See also @is_binary_type
6908 561         1171 my %bin_op_type;
6909 561         3954 @q = qw# . ** -> + - / * = != ^ < > % >= <= =~ !~ <=> x #;
6910 561         6061 @bin_op_type{@q} = (1) x scalar(@q);
6911              
6912 561         1470 my %is_list_end_type;
6913 561         2778 @q = qw( ; { } );
6914 561         1367 push @q, ',';
6915 561         2787 @is_list_end_type{@q} = (1) x scalar(@q);
6916              
6917 561         1716 my $rLL = $self->[_rLL_];
6918 561         1318 my $Klimit = $self->[_Klimit_];
6919 561 100       1965 return unless defined($Klimit);
6920              
6921 557         1289 my $token = ';';
6922 557         1125 my $type = ';';
6923 557         1235 my $last_token = $token;
6924 557         1188 my $last_type = $type;
6925 557         1085 my $ci_last = 0;
6926 557         1108 my $ci_next = 0;
6927 557         2804 my $ci_next_next = 1;
6928 557         1205 my $rstack = [];
6929              
6930 557         1478 my $seq_root = SEQ_ROOT;
6931 557         7123 my $rparent = {
6932             _seqno => $seq_root,
6933             _ci_open => 0,
6934             _ci_open_next => 0,
6935             _ci_close => 0,
6936             _ci_close_next => 0,
6937             _container_type => 'Block',
6938             _ci_next_next => $ci_next_next,
6939             _comma_count => 0,
6940             _semicolon_count => 0,
6941             _Kc => undef,
6942             };
6943              
6944             # Debug stuff
6945 557         1586 my @debug_lines;
6946             my %saw_ci_diff;
6947              
6948 557         1390 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
6949 557         1336 my $ris_sub_block = $self->[_ris_sub_block_];
6950 557         1280 my $ris_asub_block = $self->[_ris_asub_block_];
6951 557         1267 my $K_opening_container = $self->[_K_opening_container_];
6952 557         1194 my $K_closing_container = $self->[_K_closing_container_];
6953 557         1199 my $K_opening_ternary = $self->[_K_opening_ternary_];
6954 557         1297 my $K_closing_ternary = $self->[_K_closing_ternary_];
6955 557         1328 my $rlines = $self->[_rlines_];
6956 557         1243 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
6957              
6958 557         1674 my $want_break_before_comma = $want_break_before{','};
6959              
6960             my $map_block_follows = sub {
6961              
6962             # return true if a sort/map/etc block follows the closing brace
6963             # of container $seqno
6964 104     104   307 my ($seqno) = @_;
6965 104         215 my $Kc = $K_closing_container->{$seqno};
6966 104 50       328 return unless defined($Kc);
6967 104         436 my $Kcn = $self->K_next_code($Kc);
6968 104 50       384 return unless defined($Kcn);
6969 104         274 my $seqno_n = $rLL->[$Kcn]->[_TYPE_SEQUENCE_];
6970              
6971             #return if ( defined($seqno_n) );
6972 104 100       447 return if ($seqno_n);
6973 34         104 my $Knn = $self->K_next_code($Kcn);
6974 34 50       158 return unless defined($Knn);
6975 34         108 my $seqno_nn = $rLL->[$Knn]->[_TYPE_SEQUENCE_];
6976 34 100       196 return unless ($seqno_nn);
6977 25         68 my $K_nno = $K_opening_container->{$seqno_nn};
6978 25 100 66     164 return unless $K_nno && $K_nno == $Knn;
6979 13         28 my $block_type = $rblock_type_of_seqno->{$seqno_nn};
6980              
6981 13 100       55 if ($block_type) {
6982 6         25 return $is_block_with_ci{$block_type};
6983             }
6984 7         27 return;
6985 557         5152 };
6986              
6987             my $redo_preceding_comment_ci = sub {
6988              
6989             # We need to reset the ci of the previous comment(s)
6990 187     187   514 my ( $K, $ci ) = @_;
6991 187         689 my $Km = $self->K_previous_code($K);
6992 187 50       743 return if ( !defined($Km) );
6993 187         807 foreach my $Kt ( $Km + 1 .. $K - 1 ) {
6994 180 50       689 if ( $rLL->[$Kt]->[_TYPE_] eq '#' ) {
6995 0         0 $rLL->[$Kt]->[_CI_LEVEL_] = $ci;
6996             }
6997             }
6998 187         396 return;
6999 557         4973 };
7000              
7001             # Definitions of the sequence of ci_values being maintained:
7002             # $ci_last = the ci value of the previous non-blank, non-comment token
7003             # $ci_this = the ci value to be stored for this token at index $KK
7004             # $ci_next = the normal ci for the next token, set by the previous tok
7005             # $ci_next_next = the normal next value of $ci_next in this container
7006              
7007             #--------------------------
7008             # Main loop over all tokens
7009             #--------------------------
7010 557         3067 my $KK = -1;
7011 557         3001 foreach my $rtoken_K ( @{$rLL} ) {
  557         2052  
7012              
7013 58535         70998 $KK++;
7014 58535         83852 $type = $rtoken_K->[_TYPE_];
7015              
7016             #------------------
7017             # Section 1. Blanks
7018             #------------------
7019 58535 100       97385 if ( $type eq 'b' ) {
7020              
7021 22290         31550 $rtoken_K->[_CI_LEVEL_] = $ci_next;
7022              
7023             # 'next' to avoid saving last_ values for blanks and commas
7024 22290         33053 next;
7025             }
7026              
7027             #--------------------
7028             # Section 2. Comments
7029             #--------------------
7030 36245 100       60577 if ( $type eq '#' ) {
7031              
7032 1092         1887 my $ci_this = $ci_next;
7033              
7034             # If at '#' in ternary before a ? or :, use that level to make
7035             # the comment line up with the next ? or : line. (see c202/t052)
7036             # i.e. if a nested ? follows, we increase the '#' level by 1, and
7037             # if a nested : follows, we decrease the '#' level by 1.
7038             # This is the only place where this sub changes a _LEVEL_ value.
7039 1092         1627 my $Kn;
7040 1092         3708 my $parent_container_type = $rparent->{_container_type};
7041 1092 100       2559 if ( $parent_container_type eq 'Ternary' ) {
7042 4         15 $Kn = $self->K_next_code($KK);
7043 4 50       11 if ($Kn) {
7044 4         9 my $type_kn = $rLL->[$Kn]->[_TYPE_];
7045 4 50       12 if ( $is_ternary{$type_kn} ) {
7046 4         6 my $level_KK = $rLL->[$KK]->[_LEVEL_];
7047 4         9 my $level_Kn = $rLL->[$Kn]->[_LEVEL_];
7048 4         6 $rLL->[$KK]->[_LEVEL_] = $rLL->[$Kn]->[_LEVEL_];
7049              
7050             # and use the ci of a terminating ':'
7051 4 50       15 if ( $Kn == $rparent->{_Kc} ) {
7052 4         6 $ci_this = $rparent->{_ci_close};
7053             }
7054             }
7055             }
7056             }
7057              
7058             # Undo ci for a block comment followed by a closing token or , or ;
7059             # provided that the parent container:
7060             # - ends without ci, or
7061             # - starts ci=0 and is a comma list or this follows a closing type
7062             # - has a level jump
7063 1092 50 66     2769 if (
      66        
7064             $ci_this
7065             && (
7066             !$rparent->{_ci_close}
7067             || (
7068             !$rparent->{_ci_open_next}
7069             && ( ( $rparent->{_comma_count} || $last_type eq ',' )
7070             || $is_closing_type{$last_type} )
7071             )
7072             )
7073             )
7074             {
7075             # Be sure this is a block comment
7076 37         77 my $lx = $rtoken_K->[_LINE_INDEX_];
7077 37         102 my $rK_range = $rlines->[$lx]->{_rK_range};
7078 37         73 my $Kfirst;
7079 37 50       115 if ($rK_range) { $Kfirst = $rK_range->[0] }
  37         86  
7080 37 100 66     224 if ( defined($Kfirst) && $Kfirst == $KK ) {
7081              
7082             # Look for trailing closing token
7083             # [ and possibly ',' or ';' ]
7084 11 50       67 $Kn = $self->K_next_code($KK) if ( !$Kn );
7085 11         38 my $Kc = $rparent->{_Kc};
7086 11 0 66     119 if (
      33        
      66        
7087             $Kn
7088             && $Kc
7089             && (
7090             $Kn == $Kc
7091              
7092             # only look for comma if -wbb=',' is set
7093             # to minimize changes to existing formatting
7094             || ( $rLL->[$Kn]->[_TYPE_] eq ','
7095             && $want_break_before_comma
7096             && $parent_container_type eq 'List' )
7097              
7098             # do not look ahead for a bare ';' because
7099             # it changes old formatting with little benefit.
7100             ## || ( $rLL->[$Kn]->[_TYPE_] eq ';'
7101             ## && $parent_container_type eq 'Block' )
7102             )
7103             )
7104             {
7105              
7106             # Be sure container has a level jump
7107 0         0 my $level_KK = $rLL->[$KK]->[_LEVEL_];
7108 0         0 my $level_Kc = $rLL->[$Kc]->[_LEVEL_];
7109 0 0       0 if ( $level_Kc < $level_KK ) {
7110 0         0 $ci_this = 0;
7111             }
7112             }
7113             }
7114             }
7115              
7116 1092         1706 $ci_next = $ci_this;
7117 1092         1887 $rtoken_K->[_CI_LEVEL_] = $ci_this;
7118              
7119             # 'next' to avoid saving last_ values for blanks and commas
7120 1092         2179 next;
7121             }
7122              
7123             #------------------------------------------------------------
7124             # Section 3. Continuing with non-blank and non-comment tokens
7125             #------------------------------------------------------------
7126              
7127 35153         50574 $token = $rtoken_K->[_TOKEN_];
7128              
7129             # Set ci values appropriate for most tokens:
7130 35153         44567 my $ci_this = $ci_next;
7131 35153         44008 $ci_next = $ci_next_next;
7132              
7133             # Now change these ci values as necessary for special cases...
7134              
7135             #----------------------------
7136             # Section 4. Container tokens
7137             #----------------------------
7138 35153 100 100     143509 if ( $rtoken_K->[_TYPE_SEQUENCE_] ) {
    100 100        
    100 100        
    100          
    100          
7139              
7140 9150         14903 my $seqno = $rtoken_K->[_TYPE_SEQUENCE_];
7141              
7142             #-------------------------------------
7143             # Section 4.1 Opening container tokens
7144             #-------------------------------------
7145 9150 100       18561 if ( $is_opening_sequence_token{$token} ) {
7146              
7147 4575         7560 my $level = $rtoken_K->[_LEVEL_];
7148              
7149             # Default ci values for the closing token, to be modified
7150             # as necessary:
7151 4575         6400 my $ci_close = $ci_next;
7152 4575         6512 my $ci_close_next = $ci_next_next;
7153              
7154             my $Kc =
7155             $type eq '?'
7156             ? $K_closing_ternary->{$seqno}
7157 4575 100       10093 : $K_closing_container->{$seqno};
7158              
7159             # $Kn = $self->K_next_nonblank($KK);
7160 4575         6232 my $Kn;
7161 4575 50       8967 if ( $KK < $Klimit ) {
7162 4575         6443 $Kn = $KK + 1;
7163 4575 100 66     15541 if ( $rLL->[$Kn]->[_TYPE_] eq 'b' && $Kn < $Klimit ) {
7164 3132         4926 $Kn += 1;
7165             }
7166             }
7167              
7168             # $Kcn = $self->K_next_code($Kc);
7169 4575         6310 my $Kcn;
7170 4575 100 66     13962 if ( $Kc && $Kc < $Klimit ) {
7171 4452         6273 $Kcn = $Kc + 1;
7172 4452 100 100     15242 if ( $rLL->[$Kcn]->[_TYPE_] eq 'b' && $Kcn < $Klimit ) {
7173 2335         3647 $Kcn += 1;
7174             }
7175 4452 100       9505 if ( $rLL->[$Kcn]->[_TYPE_] eq '#' ) {
7176 90         544 $Kcn = $self->K_next_code($Kcn);
7177             }
7178             }
7179              
7180 4575 50       9950 my $opening_level_jump =
7181             $Kn ? $rLL->[$Kn]->[_LEVEL_] - $level : 0;
7182              
7183             # initialize ci_next_next to its standard value
7184 4575         6594 $ci_next_next = 1;
7185              
7186             # Default: ci of first item of list with level jump is same as
7187             # ci of first item of container
7188 4575 100       8890 if ( $opening_level_jump > 0 ) {
7189 3858         6409 $ci_next = $rparent->{_ci_open_next};
7190             }
7191              
7192 4575         6821 my ( $comma_count, $semicolon_count );
7193 4575         7648 my $rtype_count = $rtype_count_by_seqno->{$seqno};
7194 4575 100       8815 if ($rtype_count) {
7195 1892         3490 $comma_count = $rtype_count->{','};
7196 1892         3275 $semicolon_count = $rtype_count->{';'};
7197              
7198             # Do not include a terminal semicolon in the count (the
7199             # comma_count has already been corrected by respace_tokens)
7200             # We only need to know if there are semicolons or not, so
7201             # for speed we can just do this test if the count is 1.
7202 1892 100 100     7584 if ( $semicolon_count && $semicolon_count == 1 ) {
7203 400         1643 my $Kcm = $self->K_previous_code($Kc);
7204 400 100       1498 if ( $rLL->[$Kcm]->[_TYPE_] eq ';' ) {
7205 380         853 $semicolon_count--;
7206             }
7207             }
7208             }
7209              
7210 4575         6459 my $container_type;
7211              
7212             #-------------------------
7213             # Section 4.1.1 Code Block
7214             #-------------------------
7215 4575         7884 my $block_type = $rblock_type_of_seqno->{$seqno};
7216 4575 100       11064 if ($block_type) {
    100          
7217 971         1913 $container_type = 'Block';
7218              
7219             # set default depending on block type
7220 971         1570 $ci_close = 0;
7221              
7222             my $no_semicolon =
7223             $is_block_without_semicolon{$block_type}
7224 971   100     5280 || $ris_sub_block->{$seqno}
7225             || $last_type eq 'J';
7226              
7227 971 100       2451 if ( !$no_semicolon ) {
7228              
7229             # Optional fix for block types sort/map/etc which use
7230             # zero ci at terminal brace if previous keyword had
7231             # zero ci. This will cause sort/map/grep filter blocks
7232             # to line up. Note that sub 'undo_ci' will also try to
7233             # do this, so this is not a critical operation.
7234 538 100       1705 if ( $is_block_with_ci{$block_type} ) {
7235 347         782 my $parent_seqno = $rparent->{_seqno};
7236             my $rtype_count_p =
7237 347         734 $rtype_count_by_seqno->{$parent_seqno};
7238 347 100 100     2202 if (
      100        
      100        
7239              
7240             # only do this within containers
7241             $parent_seqno != SEQ_ROOT
7242              
7243             # only in containers without ',' and ';'
7244             && !$rparent->{_comma_count}
7245             && !$rparent->{_semicolon_count}
7246              
7247             && $map_block_follows->($seqno)
7248             )
7249             {
7250 6 50       18 if ($ci_last) {
7251 0         0 $ci_close = $ci_this;
7252             }
7253             }
7254             else {
7255 341         715 $ci_close = $ci_this;
7256             }
7257             }
7258              
7259             # keep ci if certain operators follow (fix c202/t024)
7260 538 100 100     1990 if ( !$ci_close && $Kcn ) {
7261 174         418 my $type_kcn = $rLL->[$Kcn]->[_TYPE_];
7262 174         340 my $token_kcn = $rLL->[$Kcn]->[_TOKEN_];
7263 174 100 100     1439 if ( $type_kcn =~ /^(\.|\&\&|\|\|)$/
      66        
7264             || $type_kcn eq 'k' && $is_and_or{$token_kcn} )
7265             {
7266 1         4 $ci_close = $ci_this;
7267             }
7268             }
7269             }
7270              
7271 971 100       2650 if ( $rparent->{_container_type} ne 'Ternary' ) {
7272 965         1668 $ci_this = 0;
7273             }
7274 971         1484 $ci_next = 0;
7275 971         1635 $ci_close_next = $ci_close;
7276             }
7277              
7278             #----------------------
7279             # Section 4.1.2 Ternary
7280             #----------------------
7281             elsif ( $type eq '?' ) {
7282 187         535 $container_type = 'Ternary';
7283 187 100 66     1077 if ( $rparent->{_container_type} eq 'List'
7284             && !$rparent->{_ci_open_next} )
7285             {
7286 52         115 $ci_this = 0;
7287 52         114 $ci_close = 0;
7288             }
7289              
7290             # redo ci of any preceding comments if necessary
7291             # at an outermost ? (which has no level jump)
7292 187 50       589 if ( !$opening_level_jump ) {
7293 187         534 $redo_preceding_comment_ci->( $KK, $ci_this );
7294             }
7295             }
7296              
7297             #-------------------------------
7298             # Section 4.1.3 Logical or List?
7299             #-------------------------------
7300             else {
7301             my $is_logical = $is_container_label_type_for_ci{$last_type}
7302 3417   100     16525 && $is_logical_container_for_ci{$last_token}
7303              
7304             # Part 1 of optional patch to get agreement with previous
7305             # ci This makes almost no difference in a typical program
7306             # because we will seldom break within an array index.
7307             || $type eq '[' && SET_CI_OPTION_0;
7308              
7309 3417 100 100     11152 if ( !$is_logical && $token eq '(' ) {
7310              
7311             # 'foreach' and 'for' paren contents are treated as
7312             # logical except for C-style 'for'
7313 1894 100 66     8113 if ( $last_type eq 'k' ) {
    100          
    100          
7314 433   66     2152 $is_logical ||= $last_token eq 'foreach';
7315              
7316             # C-style 'for' container will be type 'List'
7317 433 100       1386 if ( $last_token eq 'for' ) {
7318             $is_logical =
7319 28   100     273 !( $rtype_count && $rtype_count->{'f'} );
7320             }
7321             }
7322              
7323             # Check for 'for' and 'foreach' loops with iterators
7324             elsif ( $last_type eq 'i' && defined($Kcn) ) {
7325 576         1348 my $seqno_kcn = $rLL->[$Kcn]->[_TYPE_SEQUENCE_];
7326 576         1163 my $type_kcn = $rLL->[$Kcn]->[_TOKEN_];
7327 576 100 100     3088 if ( $seqno_kcn && $type_kcn eq '{' ) {
7328             my $block_type_kcn =
7329 34         87 $rblock_type_of_seqno->{$seqno_kcn};
7330 34   33     278 $is_logical ||= $block_type_kcn
      33        
7331             && ( $block_type_kcn eq 'for'
7332             || $block_type_kcn eq 'foreach' );
7333             }
7334              
7335             # Search backwards for 'for'/'foreach' with
7336             # iterator in case user is running from an editor
7337             # and did not include the block (fixes case
7338             # 'xci.in').
7339 576         1893 my $Km = $self->K_previous_code($KK);
7340 576         2098 foreach ( 0 .. 2 ) {
7341 588         1475 $Km = $self->K_previous_code($Km);
7342 588 50       2187 last unless defined($Km);
7343 588 100       2336 last unless $rLL->[$Km]->[_TYPE_] eq 'k';
7344 49         115 my $tok = $rLL->[$Km]->[_TOKEN_];
7345 49 100       169 next if $tok eq 'my';
7346 37   33     206 $is_logical ||=
      66        
7347             ( $tok eq 'for' || $tok eq 'foreach' );
7348 37         105 last;
7349             }
7350             }
7351             elsif ( $last_token eq '(' ) {
7352             $is_logical ||=
7353 61   66     385 $rparent->{_container_type} eq 'Logical';
7354             }
7355             else {
7356             ## ok - none of the above
7357             }
7358             }
7359              
7360             #------------------------
7361             # Section 4.1.3.1 Logical
7362             #------------------------
7363 3417 100       6470 if ($is_logical) {
7364 603         1176 $container_type = 'Logical';
7365              
7366             # Pass ci though an '!'
7367 603 100       1531 if ( $last_type eq '!' ) { $ci_this = $ci_last }
  6         14  
7368              
7369 603         1393 $ci_next_next = 0;
7370 603         992 $ci_close_next = $ci_this;
7371              
7372             # Part 2 of optional patch to get agreement with
7373             # previous ci
7374 603 100 100     2201 if ( $type eq '[' && SET_CI_OPTION_0 ) {
7375              
7376 307         569 $ci_next_next = $ci_this;
7377              
7378             # Undo ci at a chain of indexes or hash keys
7379 307 100       802 if ( $last_type eq '}' ) {
7380 7         15 $ci_this = $ci_last;
7381             }
7382             }
7383              
7384 603 100       1398 if ($opening_level_jump) {
7385 296         553 $ci_next = 0;
7386             }
7387             }
7388              
7389             #---------------------
7390             # Section 4.1.3.2 List
7391             #---------------------
7392             else {
7393              
7394             # Here 'List' is a catchall for none of the above types
7395 2814         4531 $container_type = 'List';
7396              
7397             # lists in blocks ...
7398 2814 100       6192 if ( $rparent->{_container_type} eq 'Block' ) {
7399              
7400             # undo ci if another closing token follows
7401 1657 100       3753 if ( defined($Kcn) ) {
7402 1656         3259 my $closing_level_jump =
7403             $rLL->[$Kcn]->[_LEVEL_] - $level;
7404 1656 100       3818 if ( $closing_level_jump < 0 ) {
7405 58         162 $ci_close = $ci_this;
7406             }
7407             }
7408             }
7409              
7410             # lists not in blocks ...
7411             else {
7412              
7413 1157 100       3238 if ( !$rparent->{_comma_count} ) {
7414              
7415 566         1060 $ci_close = $ci_this;
7416              
7417             # undo ci at binary op after right paren if no
7418             # commas in container; fixes t027, t028
7419 566 100 66     2341 if ( $ci_close_next != $ci_close
      100        
7420             && defined($Kcn)
7421             && $bin_op_type{ $rLL->[$Kcn]->[_TYPE_] } )
7422             {
7423 20         44 $ci_close_next = $ci_close;
7424             }
7425             }
7426              
7427 1157 100       2774 if ( $rparent->{_container_type} eq 'Ternary' ) {
7428 55         131 $ci_next = 0;
7429             }
7430             }
7431              
7432             # Undo ci at a chain of indexes or hash keys
7433 2814 50 66     8530 if ( $token ne '(' && $last_type eq '}' ) {
7434 0         0 $ci_this = $ci_close = $ci_last;
7435             }
7436             }
7437             }
7438              
7439             #---------------------------------------
7440             # Section 4.1.4 Store opening token info
7441             #---------------------------------------
7442              
7443             # Most closing tokens should align with their opening tokens.
7444 4575 100 100     19009 if (
      100        
      100        
7445             $type eq '{'
7446             && $token ne '('
7447             && $is_list_end_type{$last_type}
7448              
7449             # avoid asub blocks, which may have prototypes ending in '}'
7450             && !$ris_asub_block->{$seqno}
7451             )
7452             {
7453 729         1243 $ci_close = $ci_this;
7454             }
7455              
7456             # Closing ci must never be less than opening
7457 4575 50       8918 if ( $ci_close < $ci_this ) { $ci_close = $ci_this }
  0         0  
7458              
7459 4575         6614 push @{$rstack}, $rparent;
  4575         8521  
7460 4575         30840 $rparent = {
7461             _seqno => $seqno,
7462             _container_type => $container_type,
7463             _ci_next_next => $ci_next_next,
7464             _ci_open => $ci_this,
7465             _ci_open_next => $ci_next,
7466             _ci_close => $ci_close,
7467             _ci_close_next => $ci_close_next,
7468             _comma_count => $comma_count,
7469             _semicolon_count => $semicolon_count,
7470             _Kc => $Kc,
7471             };
7472             }
7473              
7474             #-------------------------------------
7475             # Section 4.2 Closing container tokens
7476             #-------------------------------------
7477             else {
7478              
7479 4575         8894 my $seqno_test = $rparent->{_seqno};
7480 4575 50       9449 if ( $seqno_test ne $seqno ) {
7481              
7482             # Shouldn't happen if we are processing balanced text.
7483             # (Unbalanced text should go out verbatim)
7484 0         0 DEVEL_MODE
7485             && Fault("stack error: $seqno_test != $seqno\n");
7486             }
7487              
7488             # Use ci_this, ci_next values set by the matching opening token:
7489 4575         6683 $ci_this = $rparent->{_ci_close};
7490 4575         6628 $ci_next = $rparent->{_ci_close_next};
7491 4575         6589 my $ci_open_old = $rparent->{_ci_open};
7492              
7493             # Then pop the stack and use the parent ci_next_next value:
7494 4575 50       5940 if ( @{$rstack} ) {
  4575         8642  
7495 4575         5876 $rparent = pop @{$rstack};
  4575         11963  
7496 4575         7652 $ci_next_next = $rparent->{_ci_next_next};
7497             }
7498             else {
7499              
7500             # Shouldn't happen if we are processing balanced text.
7501 0         0 DEVEL_MODE && Fault("empty stack - shouldn't happen\n");
7502             }
7503              
7504             # Fix: undo ci at a closing token followed by a closing token.
7505             # Goal is to keep formatting independent of the existence of a
7506             # trailing comma or semicolon.
7507 4575 100 100     16728 if ( $ci_this > 0 && !$ci_open_old && !$rparent->{_ci_close} ) {
      100        
7508 205         505 my $Kc = $rparent->{_Kc};
7509 205         974 my $Kn = $self->K_next_code($KK);
7510 205 100 66     991 if ( $Kc && $Kn && $Kc == $Kn ) {
      100        
7511 5         14 $ci_this = $ci_next = 0;
7512             }
7513             }
7514             }
7515             }
7516              
7517             #---------------------------------
7518             # Section 5. Semicolons and Labels
7519             #---------------------------------
7520             # The next token after a ';' and label (type 'J') starts a new stmt
7521             # The ci after a C-style for ';' (type 'f') is handled similarly.
7522             elsif ( $type eq ';' || $type eq 'J' || $type eq 'f' ) {
7523 2681         4826 $ci_next = 0;
7524 2681 100       6950 if ( $is_closing_type{$last_type} ) { $ci_this = $ci_last }
  1253         2217  
7525             }
7526              
7527             #--------------------
7528             # Section 6. Keywords
7529             #--------------------
7530             # Undo ci after a format statement
7531             elsif ( $type eq 'k' ) {
7532 2812 100       7574 if ( substr( $token, 0, 6 ) eq 'format' ) {
7533 1         2 $ci_next = 0;
7534             }
7535             }
7536              
7537             #------------------
7538             # Section 7. Commas
7539             #------------------
7540             # A comma and the subsequent item normally have ci undone
7541             # unless ci has been set at a lower level
7542             elsif ( $type eq ',' ) {
7543              
7544 3034 100       7785 if ( $rparent->{_container_type} eq 'List' ) {
7545 2815         4580 $ci_this = $ci_next = $rparent->{_ci_open_next};
7546             }
7547             }
7548              
7549             #---------------------------------
7550             # Section 8. Hanging side comments
7551             #---------------------------------
7552             # Treat hanging side comments like blanks
7553             elsif ( $type eq 'q' && $token eq EMPTY_STRING ) {
7554 54         121 $ci_next = $ci_this;
7555              
7556 54         109 $rtoken_K->[_CI_LEVEL_] = $ci_this;
7557              
7558             # 'next' to avoid saving last_ values for blanks and commas
7559 54         111 next;
7560             }
7561             else {
7562             ## ok - not a special type for ci
7563             }
7564              
7565             # Save debug info if requested
7566 35099         44329 DEBUG_SET_CI && do {
7567              
7568             my $seqno = $rtoken_K->[_TYPE_SEQUENCE_];
7569             my $level = $rtoken_K->[_LEVEL_];
7570             my $ci = $rtoken_K->[_CI_LEVEL_];
7571             if ( $ci > 1 ) { $ci = 1 }
7572              
7573             my $tok = $token;
7574             my $last_tok = $last_token;
7575             $tok =~ s/\t//g;
7576             $last_tok =~ s/\t//g;
7577             $tok = length($tok) > 3 ? substr( $tok, 0, 8 ) : $tok;
7578             $last_tok =
7579             length($last_tok) > 3 ? substr( $last_tok, 0, 8 ) : $last_tok;
7580             $tok =~ s/["']//g;
7581             $last_tok =~ s/["']//g;
7582             my $block_type;
7583             $block_type = $rblock_type_of_seqno->{$seqno} if ($seqno);
7584             $block_type = EMPTY_STRING unless ($block_type);
7585             my $ptype = $rparent->{_container_type};
7586             my $pname = $ptype;
7587              
7588             my $error =
7589             $ci_this == $ci ? EMPTY_STRING : $type eq 'b' ? "error" : "ERROR";
7590             if ($error) { $saw_ci_diff{$KK} = 1 }
7591              
7592             my $lno = $rtoken_K->[_LINE_INDEX_] + 1;
7593             $debug_lines[$KK] = <<EOM;
7594             $lno\t$ci\t$ci_this\t$ci_next\t$last_type\t$last_tok\t$type\t$tok\t$seqno\t$level\t$pname\t$block_type\t$error
7595             EOM
7596             };
7597              
7598             #----------------------------------
7599             # Store the ci value for this token
7600             #----------------------------------
7601 35099         48935 $rtoken_K->[_CI_LEVEL_] = $ci_this;
7602              
7603             # Remember last nonblank, non-comment token info for the next pass
7604 35099         43883 $ci_last = $ci_this;
7605 35099         45292 $last_token = $token;
7606 35099         52641 $last_type = $type;
7607              
7608             } ## End main loop over tokens
7609              
7610             #----------------------
7611             # Post-loop operations:
7612             #----------------------
7613              
7614             # if the logfile is saved, we need to save the leading ci of
7615             # each old line of code.
7616 557 100       3635 if ( $self->[_save_logfile_] ) {
7617 2         4 foreach my $line_of_tokens ( @{$rlines} ) {
  2         7  
7618 10         15 my $line_type = $line_of_tokens->{_line_type};
7619 10 100       40 next if ( $line_type ne 'CODE' );
7620 7         10 my ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
  7         25  
7621 7 100       19 next if ( !defined($Kfirst) );
7622 6         16 $line_of_tokens->{_ci_level_0} = $rLL->[$Kfirst]->[_CI_LEVEL_];
7623             }
7624             }
7625              
7626 557         1343 if (DEBUG_SET_CI) {
7627             my @output_lines;
7628             foreach my $KK ( 0 .. $Klimit ) {
7629             my $line = $debug_lines[$KK];
7630             if ($line) {
7631             my $Kp = $self->K_previous_code($KK);
7632             my $Kn = $self->K_next_code($KK);
7633             if ( DEBUG_SET_CI > 1
7634             || $Kp && $saw_ci_diff{$Kp}
7635             || $saw_ci_diff{$KK}
7636             || $Kn && $saw_ci_diff{$Kn} )
7637             {
7638             push @output_lines, $line;
7639             }
7640             }
7641             }
7642             if (@output_lines) {
7643             unshift @output_lines, <<EOM;
7644             lno\tci\tci_this\tci_next\tlast_type\tlast_tok\ttype\ttok\tseqno\tlevel\tpname\tblock_type\terror?
7645             EOM
7646             foreach my $line (@output_lines) {
7647             chomp $line;
7648             print {*STDOUT} $line, "\n";
7649             }
7650             }
7651             }
7652              
7653 557         14681 return;
7654             } ## end sub set_ci
7655              
7656             sub set_CODE_type {
7657 561     561 0 1736 my ($self) = @_;
7658              
7659             # Examine each line of code and set a flag '$CODE_type' to describe it.
7660             # Also return a list of lines with side comments.
7661              
7662 561         1650 my $rLL = $self->[_rLL_];
7663 561         1403 my $rlines = $self->[_rlines_];
7664              
7665 561         1458 my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
7666 561         1419 my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
7667             my $rOpts_static_block_comment_prefix =
7668 561         1661 $rOpts->{'static-block-comment-prefix'};
7669              
7670             # Remember indexes of lines with side comments
7671 561         1165 my @ix_side_comments;
7672              
7673 561         1359 my $In_format_skipping_section = 0;
7674 561         1507 my $Saw_VERSION_in_this_file = 0;
7675 561         1106 my $has_side_comment = 0;
7676 561         1105 my $last_line_had_side_comment = 0;
7677 561         1621 my ( $Kfirst, $Klast );
7678 561         0 my $CODE_type;
7679              
7680             # Loop to set CODE_type
7681              
7682             # Possible CODE_types
7683             # 'VB' = Verbatim - line goes out verbatim (a quote)
7684             # 'FS' = Format Skipping - line goes out verbatim
7685             # 'BL' = Blank Line
7686             # 'HSC' = Hanging Side Comment - fix this hanging side comment
7687             # 'SBCX'= Static Block Comment Without Leading Space
7688             # 'SBC' = Static Block Comment
7689             # 'BC' = Block Comment - an ordinary full line comment
7690             # 'IO' = Indent Only - line goes out unchanged except for indentation
7691             # 'NIN' = No Internal Newlines - line does not get broken
7692             # 'VER' = VERSION statement
7693             # '' = ordinary line of code with no restrictions
7694              
7695 561         1169 my $ix_line = -1;
7696 561         1170 foreach my $line_of_tokens ( @{$rlines} ) {
  561         1693  
7697 7666         9812 $ix_line++;
7698 7666         13753 my $line_type = $line_of_tokens->{_line_type};
7699              
7700 7666         10384 my $last_CODE_type = $CODE_type;
7701 7666         10521 $CODE_type = EMPTY_STRING;
7702              
7703 7666 100       14471 if ( $line_type ne 'CODE' ) {
7704 173         296 next;
7705             }
7706              
7707 7493         12663 my $input_line = $line_of_tokens->{_line_text};
7708              
7709 7493         9922 my $Klast_prev = $Klast;
7710 7493         9613 ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
  7493         15339  
7711 7493 100       13993 my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
7712              
7713 7493         10253 my $is_block_comment;
7714 7493 100 100     25572 if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
7715 1114 100       2856 if ( $jmax == 0 ) { $is_block_comment = 1; }
  786         1491  
7716 328         806 else { $has_side_comment = 1 }
7717             }
7718              
7719             # Write line verbatim if we are in a formatting skip section
7720 7493 100       13241 if ($In_format_skipping_section) {
7721              
7722             # Note: extra space appended to comment simplifies pattern matching
7723 57 100 66     430 if (
    50 66        
      33        
      0        
      33        
      0        
      0        
7724             $is_block_comment
7725              
7726             # optional fast pre-check
7727             && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
7728             || $rOpts_format_skipping_end )
7729              
7730             && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
7731             /$format_skipping_pattern_end/
7732             )
7733             {
7734 13         58 $In_format_skipping_section = 0;
7735 13         37 my $input_line_no = $line_of_tokens->{_line_number};
7736 13         56 write_logfile_entry(
7737             "Line $input_line_no: Exiting format-skipping section\n");
7738             }
7739             elsif (
7740             $is_block_comment
7741              
7742             # optional fast pre-check
7743             && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
7744             || $rOpts_format_skipping_begin )
7745              
7746             && $rOpts_format_skipping
7747             && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
7748             /$format_skipping_pattern_begin/
7749             )
7750             {
7751             # warn of duplicate starting comment lines, git #118
7752 0         0 my $input_line_no = $line_of_tokens->{_line_number};
7753 0         0 warning(
7754             "Already in format-skipping section which started at line $In_format_skipping_section\n",
7755             $input_line_no
7756             );
7757             }
7758             else {
7759             ## ok - not at a format skipping control line
7760             }
7761 57         88 $CODE_type = 'FS';
7762 57         91 next;
7763             }
7764              
7765             # Check for a continued quote..
7766 7436 100       13689 if ( $line_of_tokens->{_starting_in_quote} ) {
7767              
7768             # A line which is entirely a quote or pattern must go out
7769             # verbatim. Note: the \n is contained in $input_line.
7770 47 100       172 if ( $jmax <= 0 ) {
7771 28 50 33     83 if ( $self->[_save_logfile_] && $input_line =~ /\t/ ) {
7772 0         0 my $input_line_number = $line_of_tokens->{_line_number};
7773 0         0 $self->note_embedded_tab($input_line_number);
7774             }
7775 28         49 $CODE_type = 'VB';
7776 28         49 next;
7777             }
7778             }
7779              
7780             # See if we are entering a formatting skip section
7781 7408 100 100     16466 if (
      100        
      100        
      100        
7782             $is_block_comment
7783              
7784             # optional fast pre-check
7785             && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
7786             || $rOpts_format_skipping_begin )
7787              
7788             && $rOpts_format_skipping
7789             && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
7790             /$format_skipping_pattern_begin/
7791             )
7792             {
7793 13         56 my $input_line_no = $line_of_tokens->{_line_number};
7794 13         30 $In_format_skipping_section = $input_line_no;
7795 13         84 write_logfile_entry(
7796             "Line $input_line_no: Entering format-skipping section\n");
7797 13         27 $CODE_type = 'FS';
7798 13         42 next;
7799             }
7800              
7801             # ignore trailing blank tokens (they will get deleted later)
7802 7395 100 100     21266 if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
7803 145         349 $jmax--;
7804             }
7805              
7806             # blank line..
7807 7395 100       13170 if ( $jmax < 0 ) {
7808 801         1732 $CODE_type = 'BL';
7809 801         1430 next;
7810             }
7811              
7812             # Handle comments
7813 6594 100       11352 if ($is_block_comment) {
7814              
7815             # see if this is a static block comment (starts with ## by default)
7816 760         1287 my $is_static_block_comment = 0;
7817 760         1685 my $no_leading_space = substr( $input_line, 0, 1 ) eq '#';
7818 760 100 100     3949 if (
      100        
      100        
7819              
7820             # optional fast pre-check
7821             (
7822             substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
7823             || $rOpts_static_block_comment_prefix
7824             )
7825              
7826             && $rOpts_static_block_comments
7827             && $input_line =~ /$static_block_comment_pattern/
7828             )
7829             {
7830 21         45 $is_static_block_comment = 1;
7831             }
7832              
7833             # Check for comments which are line directives
7834             # Treat exactly as static block comments without leading space
7835             # reference: perlsyn, near end, section Plain Old Comments (Not!)
7836             # example: '# line 42 "new_filename.plx"'
7837 760 100 100     3728 if (
7838             $no_leading_space
7839             && $input_line =~ m{^\# \s*
7840             line \s+ (\d+) \s*
7841             (?:\s("?)([^"]+)\2)? \s*
7842             $}x
7843             )
7844             {
7845 2         6 $is_static_block_comment = 1;
7846             }
7847              
7848             # look for hanging side comment ...
7849 760 100 100     2520 if (
      66        
7850             $last_line_had_side_comment # this follows as side comment
7851             && !$no_leading_space # with some leading space, and
7852             && !$is_static_block_comment # this is not a static comment
7853             )
7854             {
7855              
7856             # continuing an existing HSC chain?
7857 61 100       204 if ( $last_CODE_type eq 'HSC' ) {
7858 26         57 $has_side_comment = 1;
7859 26         52 $CODE_type = 'HSC';
7860 26         47 next;
7861             }
7862              
7863             # starting a new HSC chain?
7864 35 50 33     406 if (
      66        
      33        
      33        
7865              
7866             $rOpts->{'hanging-side-comments'} # user is allowing
7867             # hanging side comments
7868             # like this
7869              
7870             && ( defined($Klast_prev) && $Klast_prev > 1 )
7871              
7872             # and the previous side comment was not static (issue c070)
7873             && !(
7874             $rOpts->{'static-side-comments'}
7875             && $rLL->[$Klast_prev]->[_TOKEN_] =~
7876             /$static_side_comment_pattern/
7877             )
7878              
7879             )
7880             {
7881              
7882             # and it is not a closing side comment (issue c070).
7883 33         81 my $K_penult = $Klast_prev - 1;
7884 33 100       135 $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
7885 33   66     251 my $follows_csc =
7886             ( $rLL->[$K_penult]->[_TOKEN_] eq '}'
7887             && $rLL->[$K_penult]->[_TYPE_] eq '}'
7888             && $rLL->[$Klast_prev]->[_TOKEN_] =~
7889             /$closing_side_comment_prefix_pattern/ );
7890              
7891 33 50       128 if ( !$follows_csc ) {
7892 33         66 $has_side_comment = 1;
7893 33         76 $CODE_type = 'HSC';
7894 33         83 next;
7895             }
7896             }
7897             }
7898              
7899 701 100 66     2780 if ($is_static_block_comment) {
    50 33        
7900 23 100       91 $CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
7901 23         57 next;
7902             }
7903             elsif ($last_line_had_side_comment
7904             && !$rOpts_maximum_consecutive_blank_lines
7905             && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
7906             {
7907             # Emergency fix to keep a block comment from becoming a hanging
7908             # side comment. This fix is for the case that blank lines
7909             # cannot be inserted. There is related code in sub
7910             # 'process_line_of_CODE'
7911 0         0 $CODE_type = 'SBCX';
7912 0         0 next;
7913             }
7914             else {
7915 678         1220 $CODE_type = 'BC';
7916 678         1342 next;
7917             }
7918             }
7919              
7920             # End of comments. Handle a line of normal code:
7921              
7922 5834 100       10550 if ($rOpts_indent_only) {
7923 12         25 $CODE_type = 'IO';
7924 12         20 next;
7925             }
7926              
7927 5822 100       10275 if ( !$rOpts_add_newlines ) {
7928 64         101 $CODE_type = 'NIN';
7929 64         105 next;
7930             }
7931              
7932             # Patch needed for MakeMaker. Do not break a statement
7933             # in which $VERSION may be calculated. See MakeMaker.pm;
7934             # this is based on the coding in it.
7935             # The first line of a file that matches this will be eval'd:
7936             # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
7937             # Examples:
7938             # *VERSION = \'1.01';
7939             # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
7940             # We will pass such a line straight through without breaking
7941             # it unless -npvl is used.
7942              
7943             # Patch for problem reported in RT #81866, where files
7944             # had been flattened into a single line and couldn't be
7945             # tidied without -npvl. There are two parts to this patch:
7946             # First, it is not done for a really long line (80 tokens for now).
7947             # Second, we will only allow up to one semicolon
7948             # before the VERSION. We need to allow at least one semicolon
7949             # for statements like this:
7950             # require Exporter; our $VERSION = $Exporter::VERSION;
7951             # where both statements must be on a single line for MakeMaker
7952              
7953 5758 100 66     27000 if ( !$Saw_VERSION_in_this_file
      100        
7954             && $jmax < 80
7955             && $input_line =~
7956             /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
7957             {
7958 4         28 $Saw_VERSION_in_this_file = 1;
7959 4         28 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
7960              
7961             # This code type has lower priority than others
7962 4         26 $CODE_type = 'VER';
7963 4         14 next;
7964             }
7965             }
7966             continue {
7967 7666         14342 $line_of_tokens->{_code_type} = $CODE_type;
7968              
7969 7666         10047 $last_line_had_side_comment = $has_side_comment;
7970 7666 100       16173 if ($has_side_comment) {
7971 387         1025 push @ix_side_comments, $ix_line;
7972 387         853 $has_side_comment = 0;
7973             }
7974             }
7975              
7976 561         3733 return \@ix_side_comments;
7977             } ## end sub set_CODE_type
7978              
7979             sub find_non_indenting_braces {
7980              
7981 561     561 0 1944 my ( $self, $rix_side_comments ) = @_;
7982              
7983             # Find and mark all non-indenting braces in this file.
7984              
7985             # Given:
7986             # $rix_side_comments = index of lines which have side comments
7987             # Find and save the line indexes of these special side comments in:
7988             # $self->[_rseqno_non_indenting_brace_by_ix_];
7989              
7990             # Non-indenting braces are opening braces of the form
7991             # { #<<< ...
7992             # which do not cause an increase in indentation level.
7993             # They are enabled with the --non-indenting-braces, or -nib, flag.
7994              
7995 561 100       2250 return unless ( $rOpts->{'non-indenting-braces'} );
7996 560         1445 my $rLL = $self->[_rLL_];
7997 560 100 66     2204 return unless ( defined($rLL) && @{$rLL} );
  560         2068  
7998 556         1740 my $rlines = $self->[_rlines_];
7999 556         1684 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
8000 556         1403 my $rseqno_non_indenting_brace_by_ix =
8001             $self->[_rseqno_non_indenting_brace_by_ix_];
8002              
8003 556         1180 foreach my $ix ( @{$rix_side_comments} ) {
  556         1764  
8004 381         706 my $line_of_tokens = $rlines->[$ix];
8005 381         748 my $line_type = $line_of_tokens->{_line_type};
8006 381 50       900 if ( $line_type ne 'CODE' ) {
8007              
8008             # shouldn't happen
8009 0         0 DEVEL_MODE && Fault("unexpected line_type=$line_type\n");
8010 0         0 next;
8011             }
8012 381         662 my $rK_range = $line_of_tokens->{_rK_range};
8013 381         617 my ( $Kfirst, $Klast ) = @{$rK_range};
  381         784  
8014 381 50 33     1771 if ( !defined($Kfirst) || $rLL->[$Klast]->[_TYPE_] ne '#' ) {
8015              
8016             # shouldn't happen
8017 0         0 DEVEL_MODE && Fault("did not get a comment\n");
8018 0         0 next;
8019             }
8020 381 100       993 next if ( $Klast <= $Kfirst ); # maybe HSC
8021 322         637 my $token_sc = $rLL->[$Klast]->[_TOKEN_];
8022 322         732 my $K_m = $Klast - 1;
8023 322         650 my $type_m = $rLL->[$K_m]->[_TYPE_];
8024 322 100 66     1330 if ( $type_m eq 'b' && $K_m > $Kfirst ) {
8025 316         590 $K_m--;
8026 316         704 $type_m = $rLL->[$K_m]->[_TYPE_];
8027             }
8028 322         631 my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
8029 322 100       903 if ($seqno_m) {
8030 111         277 my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
8031              
8032             # The pattern ends in \s but we have removed the newline, so
8033             # we added it back for the match. That way we require an exact
8034             # match to the special string and also allow additional text.
8035 111         245 $token_sc .= "\n";
8036 111 100 100     912 if ( $block_type_m
      100        
8037             && $is_opening_type{$type_m}
8038             && $token_sc =~ /$non_indenting_brace_pattern/ )
8039             {
8040 6         30 $rseqno_non_indenting_brace_by_ix->{$ix} = $seqno_m;
8041             }
8042             }
8043             }
8044 556         1493 return;
8045             } ## end sub find_non_indenting_braces
8046              
8047             sub delete_side_comments {
8048 10     10 0 53 my ( $self, $rix_side_comments ) = @_;
8049              
8050             # Given a list of indexes of lines with side comments, handle any
8051             # requested side comment deletions.
8052              
8053 10         32 my $rLL = $self->[_rLL_];
8054 10         27 my $rlines = $self->[_rlines_];
8055 10         28 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
8056 10         27 my $rseqno_non_indenting_brace_by_ix =
8057             $self->[_rseqno_non_indenting_brace_by_ix_];
8058              
8059 10         33 foreach my $ix ( @{$rix_side_comments} ) {
  10         38  
8060 23         40 my $line_of_tokens = $rlines->[$ix];
8061 23         45 my $line_type = $line_of_tokens->{_line_type};
8062              
8063             # This fault shouldn't happen because we only saved CODE lines with
8064             # side comments in the TASK 1 loop above.
8065 23 50       58 if ( $line_type ne 'CODE' ) {
8066 0         0 if (DEVEL_MODE) {
8067             my $lno = $ix + 1;
8068             Fault(<<EOM);
8069             Hit unexpected line_type = '$line_type' near line $lno while deleting side comments, should be 'CODE'
8070             EOM
8071             }
8072 0         0 next;
8073             }
8074              
8075 23         44 my $CODE_type = $line_of_tokens->{_code_type};
8076 23         46 my $rK_range = $line_of_tokens->{_rK_range};
8077 23         40 my ( $Kfirst, $Klast ) = @{$rK_range};
  23         46  
8078              
8079 23 50 33     115 if ( !defined($Kfirst) || $rLL->[$Klast]->[_TYPE_] ne '#' ) {
8080 0         0 if (DEVEL_MODE) {
8081             my $lno = $ix + 1;
8082             Fault(<<EOM);
8083             Did not find side comment near line $lno while deleting side comments
8084             EOM
8085             }
8086 0         0 next;
8087             }
8088              
8089 23   33     179 my $delete_side_comment =
8090             $rOpts_delete_side_comments
8091             && ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
8092             && (!$CODE_type
8093             || $CODE_type eq 'HSC'
8094             || $CODE_type eq 'IO'
8095             || $CODE_type eq 'NIN' );
8096              
8097             # Do not delete special control side comments
8098 23 50       67 if ( $rseqno_non_indenting_brace_by_ix->{$ix} ) {
8099 0         0 $delete_side_comment = 0;
8100             }
8101              
8102 23 0 66     109 if (
      66        
      0        
      33        
8103             $rOpts_delete_closing_side_comments
8104             && !$delete_side_comment
8105             && $Klast > $Kfirst
8106             && ( !$CODE_type
8107             || $CODE_type eq 'HSC'
8108             || $CODE_type eq 'IO'
8109             || $CODE_type eq 'NIN' )
8110             )
8111             {
8112 3         13 my $token = $rLL->[$Klast]->[_TOKEN_];
8113 3         10 my $K_m = $Klast - 1;
8114 3         9 my $type_m = $rLL->[$K_m]->[_TYPE_];
8115 3 50 33     27 if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
  3         6  
8116 3         13 my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
8117 3 100       11 if ($seqno_m) {
8118 2         4 my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
8119 2 50 33     126 if ( $block_type_m
      33        
8120             && $token =~ /$closing_side_comment_prefix_pattern/
8121             && $block_type_m =~ /$closing_side_comment_list_pattern/ )
8122             {
8123 2         10 $delete_side_comment = 1;
8124             }
8125             }
8126             } ## end if ( $rOpts_delete_closing_side_comments...)
8127              
8128 23 100       56 if ($delete_side_comment) {
8129              
8130             # We are actually just changing the side comment to a blank.
8131             # This may produce multiple blanks in a row, but sub respace_tokens
8132             # will check for this and fix it.
8133 22         44 $rLL->[$Klast]->[_TYPE_] = 'b';
8134 22         40 $rLL->[$Klast]->[_TOKEN_] = SPACE;
8135              
8136             # The -io option outputs the line text, so we have to update
8137             # the line text so that the comment does not reappear.
8138 22 100       66 if ( $CODE_type eq 'IO' ) {
8139 2         4 my $line = EMPTY_STRING;
8140 2         6 foreach my $KK ( $Kfirst .. $Klast - 1 ) {
8141 18         29 $line .= $rLL->[$KK]->[_TOKEN_];
8142             }
8143 2         12 $line =~ s/\s+$//;
8144 2         7 $line_of_tokens->{_line_text} = $line . "\n";
8145             }
8146              
8147             # If we delete a hanging side comment the line becomes blank.
8148 22 100       84 if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
  5         23  
8149             }
8150             }
8151 10         48 return;
8152             } ## end sub delete_side_comments
8153              
8154             sub dump_verbatim {
8155 0     0 0 0 my $self = shift;
8156              
8157             # Dump the input file to the output verbatim. This is called when
8158             # there is a severe error and formatted output cannot be made.
8159 0         0 my $rlines = $self->[_rlines_];
8160 0         0 foreach my $line ( @{$rlines} ) {
  0         0  
8161 0         0 my $input_line = $line->{_line_text};
8162 0         0 $self->write_unindented_line($input_line);
8163             }
8164 0         0 return;
8165             } ## end sub dump_verbatim
8166              
8167             my %wU;
8168             my %wiq;
8169             my %is_witPS;
8170             my %is_sigil;
8171             my %is_nonlist_keyword;
8172             my %is_nonlist_type;
8173             my %is_s_y_m_slash;
8174             my %is_unexpected_equals;
8175             my %is_ascii_type;
8176              
8177             BEGIN {
8178              
8179             # added 'U' to fix cases b1125 b1126 b1127
8180 39     39   274 my @q = qw(w U);
8181 39         183 @{wU}{@q} = (1) x scalar(@q);
8182              
8183 39         154 @q = qw(w i q Q G C Z);
8184 39         288 @{wiq}{@q} = (1) x scalar(@q);
8185              
8186 39         205 @q = qw(w i t P S); # Fix for c250: added new types 'P', 'S', formerly 'i'
8187 39         324 @{is_witPS}{@q} = (1) x scalar(@q);
8188              
8189 39         159 @q = qw($ & % * @);
8190 39         267 @{is_sigil}{@q} = (1) x scalar(@q);
8191              
8192             # Parens following these keywords will not be marked as lists. Note that
8193             # 'for' is not included and is handled separately, by including 'f' in the
8194             # hash %is_counted_type, since it may or may not be a c-style for loop.
8195 39         173 @q = qw( if elsif unless and or );
8196 39         149 @is_nonlist_keyword{@q} = (1) x scalar(@q);
8197              
8198             # Parens following these types will not be marked as lists
8199 39         91 @q = qw( && || );
8200 39         110 @is_nonlist_type{@q} = (1) x scalar(@q);
8201              
8202 39         95 @q = qw( s y m / );
8203 39         175 @is_s_y_m_slash{@q} = (1) x scalar(@q);
8204              
8205 39         123 @q = qw( = == != );
8206 39         149 @is_unexpected_equals{@q} = (1) x scalar(@q);
8207              
8208             # We can always skip expensive length_function->() calls for these
8209             # ascii token types
8210 39         615 @q = qw#
8211             b k L R ; ( { [ ? : ] } ) f t n v F p m pp mm
8212             .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
8213             ( ) <= >= == =~ !~ != ++ -- /= x=
8214             ... **= <<= >>= &&= ||= //= <=>
8215             + - / * | % ! x ~ = \ ? : . < > ^ &
8216             #;
8217 39         124 push @q, ',';
8218 39         554764 @is_ascii_type{@q} = (1) x scalar(@q);
8219              
8220             } ## end BEGIN
8221              
8222             { #<<< begin closure respace_tokens
8223              
8224             my $rLL_new; # This will be the new array of tokens
8225              
8226             # These are variables in $self
8227             my $rLL;
8228             my $length_function;
8229              
8230             my $K_closing_ternary;
8231             my $K_opening_ternary;
8232             my $rchildren_of_seqno;
8233             my $rhas_broken_code_block;
8234             my $rhas_broken_list;
8235             my $rhas_broken_list_with_lec;
8236             my $rhas_code_block;
8237             my $rhas_list;
8238             my $rhas_ternary;
8239             my $ris_assigned_structure;
8240             my $ris_broken_container;
8241             my $ris_excluded_lp_container;
8242             my $ris_list_by_seqno;
8243             my $ris_permanently_broken;
8244             my $rlec_count_by_seqno;
8245             my $roverride_cab3;
8246             my $rparent_of_seqno;
8247             my $rtype_count_by_seqno;
8248             my $rblock_type_of_seqno;
8249              
8250             my $K_opening_container;
8251             my $K_closing_container;
8252              
8253             my %K_first_here_doc_by_seqno;
8254              
8255             my $last_nonblank_code_type;
8256             my $last_nonblank_code_token;
8257             my $last_nonblank_block_type;
8258             my $last_last_nonblank_code_type;
8259             my $last_last_nonblank_code_token;
8260              
8261             my %seqno_stack;
8262             my %K_old_opening_by_seqno;
8263             my $depth_next;
8264             my $depth_next_max;
8265              
8266             my $cumulative_length;
8267              
8268             # Variables holding the current line info
8269             my $Ktoken_vars;
8270             my $Kfirst_old;
8271             my $Klast_old;
8272             my $Klast_old_code;
8273             my $CODE_type;
8274              
8275             my $rwhitespace_flags;
8276              
8277             sub initialize_respace_tokens_closure {
8278              
8279 558     558 0 1602 my ($self) = @_;
8280              
8281 558         1657 $rLL_new = []; # This is the new array
8282              
8283 558         7563 $rLL = $self->[_rLL_];
8284              
8285 558         1593 $length_function = $self->[_length_function_];
8286 558         1618 $K_closing_ternary = $self->[_K_closing_ternary_];
8287 558         1717 $K_opening_ternary = $self->[_K_opening_ternary_];
8288 558         4004 $rchildren_of_seqno = $self->[_rchildren_of_seqno_];
8289 558         1402 $rhas_broken_code_block = $self->[_rhas_broken_code_block_];
8290 558         1487 $rhas_broken_list = $self->[_rhas_broken_list_];
8291 558         1429 $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
8292 558         1321 $rhas_code_block = $self->[_rhas_code_block_];
8293 558         1424 $rhas_list = $self->[_rhas_list_];
8294 558         1614 $rhas_ternary = $self->[_rhas_ternary_];
8295 558         1658 $ris_assigned_structure = $self->[_ris_assigned_structure_];
8296 558         2180 $ris_broken_container = $self->[_ris_broken_container_];
8297 558         1390 $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
8298 558         1680 $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
8299 558         1779 $ris_permanently_broken = $self->[_ris_permanently_broken_];
8300 558         1528 $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
8301 558         1341 $roverride_cab3 = $self->[_roverride_cab3_];
8302 558         1344 $rparent_of_seqno = $self->[_rparent_of_seqno_];
8303 558         2646 $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
8304 558         1276 $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
8305              
8306 558         1452 %K_first_here_doc_by_seqno = ();
8307              
8308 558         1508 $last_nonblank_code_type = ';';
8309 558         1218 $last_nonblank_code_token = ';';
8310 558         1275 $last_nonblank_block_type = EMPTY_STRING;
8311 558         1328 $last_last_nonblank_code_type = ';';
8312 558         1185 $last_last_nonblank_code_token = ';';
8313              
8314 558         1819 %seqno_stack = ();
8315 558         2069 %K_old_opening_by_seqno = (); # Note: old K index
8316 558         1174 $depth_next = 0;
8317 558         1165 $depth_next_max = 0;
8318              
8319             # we will be setting token lengths as we go
8320 558         1172 $cumulative_length = 0;
8321              
8322 558         1273 $Ktoken_vars = undef; # the old K value of $rtoken_vars
8323 558         1114 $Kfirst_old = undef; # min K of old line
8324 558         1178 $Klast_old = undef; # max K of old line
8325 558         1068 $Klast_old_code = undef; # K of last token if side comment
8326 558         1183 $CODE_type = EMPTY_STRING;
8327              
8328             # Set the whitespace flags, which indicate the token spacing preference.
8329 558         3139 $rwhitespace_flags = $self->set_whitespace_flags();
8330              
8331             # Note that $K_opening_container and $K_closing_container have values
8332             # defined in sub get_line() for the previous K indexes. They were needed
8333             # in case option 'indent-only' was set, and we didn't get here. We no
8334             # longer need those and will eliminate them now to avoid any possible
8335             # mixing of old and new values. This must be done AFTER the call to
8336             # set_whitespace_flags, which needs these.
8337 558         3596 $K_opening_container = $self->[_K_opening_container_] = {};
8338 558         3103 $K_closing_container = $self->[_K_closing_container_] = {};
8339              
8340 558         1475 return;
8341              
8342             } ## end sub initialize_respace_tokens_closure
8343              
8344             sub respace_tokens {
8345              
8346 561     561 0 1593 my $self = shift;
8347              
8348             #--------------------------------------------------------------------------
8349             # This routine is called once per file to do as much formatting as possible
8350             # before new line breaks are set.
8351             #--------------------------------------------------------------------------
8352              
8353             # Return parameters:
8354             # Set $severe_error=true if processing must terminate immediately
8355 561         1427 my ( $severe_error, $rqw_lines );
8356              
8357             # We change any spaces in --indent-only mode
8358 561 100       2315 if ( $rOpts->{'indent-only'} ) {
8359              
8360             # We need to define lengths for -indent-only to avoid undefs, even
8361             # though these values are not actually needed for option --indent-only.
8362              
8363 3         21 $rLL = $self->[_rLL_];
8364 3         9 $cumulative_length = 0;
8365              
8366 3         8 foreach my $item ( @{$rLL} ) {
  3         12  
8367 122         177 my $token = $item->[_TOKEN_];
8368 122 50       192 my $token_length =
8369             $length_function ? $length_function->($token) : length($token);
8370 122         145 $cumulative_length += $token_length;
8371 122         151 $item->[_TOKEN_LENGTH_] = $token_length;
8372 122         177 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
8373             }
8374              
8375 3         21 return ( $severe_error, $rqw_lines );
8376             }
8377              
8378             # This routine makes all necessary and possible changes to the tokenization
8379             # after the initial tokenization of the file. This is a tedious routine,
8380             # but basically it consists of inserting and deleting whitespace between
8381             # nonblank tokens according to the selected parameters. In a few cases
8382             # non-space characters are added, deleted or modified.
8383              
8384             # The goal of this routine is to create a new token array which only needs
8385             # the definition of new line breaks and padding to complete formatting. In
8386             # a few cases we have to cheat a little to achieve this goal. In
8387             # particular, we may not know if a semicolon will be needed, because it
8388             # depends on how the line breaks go. To handle this, we include the
8389             # semicolon as a 'phantom' which can be displayed as normal or as an empty
8390             # string.
8391              
8392             # Method: The old tokens are copied one-by-one, with changes, from the old
8393             # linear storage array $rLL to a new array $rLL_new.
8394              
8395             # (re-)initialize closure variables for this problem
8396 558         2918 $self->initialize_respace_tokens_closure();
8397              
8398             #--------------------------------
8399             # Main over all lines of the file
8400             #--------------------------------
8401 558         1787 my $rlines = $self->[_rlines_];
8402 558         1629 my $line_type = EMPTY_STRING;
8403 558         1246 my $last_K_out;
8404              
8405 558         1356 foreach my $line_of_tokens ( @{$rlines} ) {
  558         1968  
8406              
8407 7647         14568 my $input_line_number = $line_of_tokens->{_line_number};
8408 7647         11583 my $last_line_type = $line_type;
8409 7647         12866 $line_type = $line_of_tokens->{_line_type};
8410 7647 100       16183 next unless ( $line_type eq 'CODE' );
8411 7474         13028 $CODE_type = $line_of_tokens->{_code_type};
8412              
8413 7474 100       14612 if ( $CODE_type eq 'BL' ) {
8414 803         2050 my $seqno = $seqno_stack{ $depth_next - 1 };
8415 803 100       2506 if ( defined($seqno) ) {
8416 79         223 $self->[_rblank_and_comment_count_]->{$seqno} += 1;
8417             $self->set_permanently_broken($seqno)
8418 79 100 66     532 if (!$ris_permanently_broken->{$seqno}
8419             && $rOpts_maximum_consecutive_blank_lines );
8420             }
8421             }
8422              
8423 7474         12322 my $rK_range = $line_of_tokens->{_rK_range};
8424 7474         10315 my ( $Kfirst, $Klast ) = @{$rK_range};
  7474         14786  
8425 7474 100       15658 next unless defined($Kfirst);
8426 6671         12595 ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
8427 6671         9605 $Klast_old_code = $Klast_old;
8428              
8429             # Be sure an old K value is defined for sub store_token
8430 6671         9194 $Ktoken_vars = $Kfirst;
8431              
8432             # Check for correct sequence of token indexes...
8433             # An error here means that sub write_line() did not correctly
8434             # package the tokenized lines as it received them. If we
8435             # get a fault here it has not output a continuous sequence
8436             # of K values. Or a line of CODE may have been mis-marked as
8437             # something else. There is no good way to continue after such an
8438             # error.
8439 6671 100       12166 if ( defined($last_K_out) ) {
8440 6117 50       13815 if ( $Kfirst != $last_K_out + 1 ) {
8441 0         0 Fault_Warn(
8442             "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
8443             );
8444 0         0 $severe_error = 1;
8445 0         0 return ( $severe_error, $rqw_lines );
8446             }
8447             }
8448             else {
8449              
8450             # The first token should always have been given index 0 by sub
8451             # write_line()
8452 554 50       1903 if ( $Kfirst != 0 ) {
8453 0         0 Fault("Program Bug: first K is $Kfirst but should be 0");
8454             }
8455             }
8456 6671         11221 $last_K_out = $Klast;
8457              
8458             # Handle special lines of code
8459 6671 100 100     17787 if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
      100        
8460              
8461             # CODE_types are as follows.
8462             # 'BL' = Blank Line
8463             # 'VB' = Verbatim - line goes out verbatim
8464             # 'FS' = Format Skipping - line goes out verbatim, no blanks
8465             # 'IO' = Indent Only - only indentation may be changed
8466             # 'NIN' = No Internal Newlines - line does not get broken
8467             # 'HSC'=Hanging Side Comment - fix this hanging side comment
8468             # 'BC'=Block Comment - an ordinary full line comment
8469             # 'SBC'=Static Block Comment - a block comment which does not get
8470             # indented
8471             # 'SBCX'=Static Block Comment Without Leading Space
8472             # 'VER'=VERSION statement
8473             # '' or (undefined) - no restrictions
8474              
8475             # For a hanging side comment we insert an empty quote before
8476             # the comment so that it becomes a normal side comment and
8477             # will be aligned by the vertical aligner
8478 849 100       2471 if ( $CODE_type eq 'HSC' ) {
8479              
8480             # Safety Check: This must be a line with one token (a comment)
8481 54         181 my $rvars_Kfirst = $rLL->[$Kfirst];
8482 54 50 33     319 if ( $Kfirst == $Klast && $rvars_Kfirst->[_TYPE_] eq '#' ) {
8483              
8484             # Note that even if the flag 'noadd-whitespace' is set, we
8485             # will make an exception here and allow a blank to be
8486             # inserted to push the comment to the right. We can think
8487             # of this as an adjustment of indentation rather than
8488             # whitespace between tokens. This will also prevent the
8489             # hanging side comment from getting converted to a block
8490             # comment if whitespace gets deleted, as for example with
8491             # the -extrude and -mangle options.
8492 54         201 my $rcopy =
8493             copy_token_as_type( $rvars_Kfirst, 'q', EMPTY_STRING );
8494 54         223 $self->store_token($rcopy);
8495 54         235 $rcopy = copy_token_as_type( $rvars_Kfirst, 'b', SPACE );
8496 54         192 $self->store_token($rcopy);
8497 54         230 $self->store_token($rvars_Kfirst);
8498 54         166 next;
8499             }
8500             else {
8501              
8502             # This line was mis-marked by sub scan_comment. Catch in
8503             # DEVEL_MODE, otherwise try to repair and keep going.
8504 0         0 Fault(
8505             "Program bug. A hanging side comment has been mismarked"
8506             ) if (DEVEL_MODE);
8507              
8508 0         0 $CODE_type = EMPTY_STRING;
8509 0         0 $line_of_tokens->{_code_type} = $CODE_type;
8510             }
8511             }
8512              
8513             # Copy tokens unchanged
8514 795         2192 foreach my $KK ( $Kfirst .. $Klast ) {
8515 1249         1893 $Ktoken_vars = $KK;
8516 1249         3630 $self->store_token( $rLL->[$KK] );
8517             }
8518 795         1746 next;
8519             }
8520              
8521             # Handle normal line..
8522              
8523             # Define index of last token before any side comment for comma counts
8524 5822         11548 my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
8525 5822 100 100     22631 if ( ( $type_end eq '#' || $type_end eq 'b' )
      66        
8526             && $Klast_old_code > $Kfirst_old )
8527             {
8528 470         873 $Klast_old_code--;
8529 470 100 66     2146 if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
8530             && $Klast_old_code > $Kfirst_old )
8531             {
8532 319         640 $Klast_old_code--;
8533             }
8534             }
8535              
8536             # Insert any essential whitespace between lines
8537             # if last line was normal CODE.
8538             # Patch for rt #125012: use K_previous_code rather than '_nonblank'
8539             # because comments may disappear.
8540             # Note that we must do this even if --noadd-whitespace is set
8541 5822 100       13917 if ( $last_line_type eq 'CODE' ) {
8542 5510         9977 my $type_next = $rLL->[$Kfirst]->[_TYPE_];
8543 5510         9065 my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
8544 5510 100       15266 if (
8545             is_essential_whitespace(
8546             $last_last_nonblank_code_token,
8547             $last_last_nonblank_code_type,
8548             $last_nonblank_code_token,
8549             $last_nonblank_code_type,
8550             $token_next,
8551             $type_next,
8552             )
8553             )
8554             {
8555 127         436 $self->store_token();
8556             }
8557             }
8558              
8559             #-----------------------------------------------
8560             # Inner loop to respace tokens on a line of code
8561             #-----------------------------------------------
8562              
8563             # The inner loop is in a separate sub for clarity
8564 5822         14691 $self->respace_tokens_inner_loop( $Kfirst, $Klast, $input_line_number );
8565              
8566             } # End line loop
8567              
8568             # finalize data structures
8569 558         4970 $self->respace_post_loop_ops();
8570              
8571             # Reset memory to be the new array
8572 558         1355 $self->[_rLL_] = $rLL_new;
8573 558         1069 my $Klimit;
8574 558 100       1101 if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
  558         1941  
  554         1095  
  554         1274  
8575 558         1273 $self->[_Klimit_] = $Klimit;
8576              
8577             # During development, verify that the new array still looks okay.
8578 558         951 DEVEL_MODE && $self->check_token_array();
8579              
8580             # update the token limits of each line
8581 558         3472 ( $severe_error, $rqw_lines ) = $self->resync_lines_and_tokens();
8582              
8583 558         2276 return ( $severe_error, $rqw_lines );
8584             } ## end sub respace_tokens
8585              
8586             sub respace_tokens_inner_loop {
8587              
8588 5822     5822 0 11804 my ( $self, $Kfirst, $Klast, $input_line_number ) = @_;
8589              
8590             #-----------------------------------------------------------------
8591             # Loop to copy all tokens on one line, making any spacing changes,
8592             # while also collecting information needed by later subs.
8593             #-----------------------------------------------------------------
8594 5822         12877 foreach my $KK ( $Kfirst .. $Klast ) {
8595              
8596             # TODO: consider eliminating this closure var by passing directly to
8597             # store_token following pattern of store_token_to_go.
8598 50019         65998 $Ktoken_vars = $KK;
8599              
8600 50019         70164 my $rtoken_vars = $rLL->[$KK];
8601 50019         73744 my $type = $rtoken_vars->[_TYPE_];
8602              
8603             # Handle a blank space ...
8604 50019 100       87983 if ( $type eq 'b' ) {
8605              
8606             # Delete it if not wanted by whitespace rules
8607             # or we are deleting all whitespace
8608             # Note that whitespace flag is a flag indicating whether a
8609             # white space BEFORE the token is needed
8610 15160 100       28700 next if ( $KK >= $Klast ); # skip terminal blank
8611 15000         21800 my $Knext = $KK + 1;
8612              
8613 15000 50       25201 if ($rOpts_freeze_whitespace) {
8614 0         0 $self->store_token($rtoken_vars);
8615 0         0 next;
8616             }
8617              
8618 15000         23173 my $ws = $rwhitespace_flags->[$Knext];
8619 15000 100 100     46295 if ( $ws == -1
8620             || $rOpts_delete_old_whitespace )
8621             {
8622              
8623 752         1728 my $token_next = $rLL->[$Knext]->[_TOKEN_];
8624 752         1334 my $type_next = $rLL->[$Knext]->[_TYPE_];
8625              
8626 752         1719 my $do_not_delete = is_essential_whitespace(
8627             $last_last_nonblank_code_token,
8628             $last_last_nonblank_code_type,
8629             $last_nonblank_code_token,
8630             $last_nonblank_code_type,
8631             $token_next,
8632             $type_next,
8633             );
8634              
8635             # Note that repeated blanks will get filtered out here
8636 752 100       2091 next unless ($do_not_delete);
8637             }
8638              
8639             # make it just one character
8640 14361         26195 $rtoken_vars->[_TOKEN_] = SPACE;
8641 14361         31318 $self->store_token($rtoken_vars);
8642 14361         25335 next;
8643             }
8644              
8645 34859         53139 my $token = $rtoken_vars->[_TOKEN_];
8646              
8647             # Handle a sequenced token ... i.e. one of ( ) { } [ ] ? :
8648 34859 100       114111 if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
    100          
    100          
    100          
    100          
    100          
    100          
    100          
8649              
8650             # One of ) ] } ...
8651 9096 100       21004 if ( $is_closing_token{$token} ) {
8652              
8653 4362         8120 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8654 4362         7519 my $block_type = $rblock_type_of_seqno->{$type_sequence};
8655              
8656             #---------------------------------------------
8657             # check for semicolon addition in a code block
8658             #---------------------------------------------
8659 4362 100       8331 if ($block_type) {
8660              
8661             # if not preceded by a ';' ..
8662 972 100       3347 if ( $last_nonblank_code_type ne ';' ) {
8663              
8664             # tentatively insert a semicolon if appropriate
8665             $self->add_phantom_semicolon($KK)
8666 542 100       3037 if $rOpts->{'add-semicolons'};
8667             }
8668             }
8669              
8670             #----------------------------------------------------------
8671             # check for addition/deletion of a trailing comma in a list
8672             #----------------------------------------------------------
8673             else {
8674              
8675             # if this is a list ..
8676 3390         5595 my $rtype_count = $rtype_count_by_seqno->{$type_sequence};
8677 3390 100 100     13875 if ( $rtype_count
      100        
      100        
8678             && $rtype_count->{','}
8679             && !$rtype_count->{';'}
8680             && !$rtype_count->{'f'} )
8681             {
8682              
8683             # if NOT preceded by a comma..
8684 1020 100       2740 if ( $last_nonblank_code_type ne ',' ) {
8685              
8686             # insert a comma if requested
8687 735 100 66     2385 if ( $rOpts_add_trailing_commas
8688             && %trailing_comma_rules )
8689             {
8690             $self->add_trailing_comma( $KK, $Kfirst,
8691 24         88 $trailing_comma_rules{$token} );
8692             }
8693             }
8694              
8695             # if preceded by a comma ..
8696             else {
8697              
8698             # delete a trailing comma if requested
8699 285         553 my $deleted;
8700 285 100 66     1172 if ( $rOpts_delete_trailing_commas
8701             && %trailing_comma_rules )
8702             {
8703             $deleted =
8704             $self->delete_trailing_comma( $KK, $Kfirst,
8705 60         186 $trailing_comma_rules{$token} );
8706             }
8707              
8708             # delete a weld-interfering comma if requested
8709 285 50 100     3209 if ( !$deleted
      66        
8710             && $rOpts_delete_weld_interfering_commas
8711             && $is_closing_type{
8712             $last_last_nonblank_code_type} )
8713             {
8714 1         20 $self->delete_weld_interfering_comma($KK);
8715             }
8716             }
8717             }
8718             }
8719             }
8720             }
8721              
8722             # Modify certain tokens here for whitespace
8723             # The following is not yet done, but could be:
8724             # sub (x x x)
8725             # ( $type =~ /^[witPS]$/ )
8726             elsif ( $is_witPS{$type} ) {
8727              
8728             # index() is several times faster than a regex test with \s here
8729             ## $token =~ /\s/
8730 7115 100 66     30870 if ( index( $token, SPACE ) > 0 || index( $token, "\t" ) > 0 ) {
8731              
8732             # change '$ var' to '$var' etc
8733             # change '@ ' to '@'
8734             # Examples: <<snippets/space1.in>>
8735 161         487 my $ord = ord( substr( $token, 1, 1 ) );
8736 161 100 66     1180 if (
      33        
8737              
8738             # quick test for possible blank at second char
8739             $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
8740             || $ord > ORD_PRINTABLE_MAX )
8741             )
8742             {
8743 6         63 my ( $sigil, $word ) = split /\s+/, $token, 2;
8744              
8745             # $sigil =~ /^[\$\&\%\*\@]$/ )
8746 6 100       25 if ( $is_sigil{$sigil} ) {
8747 5         11 $token = $sigil;
8748 5 50       18 $token .= $word if ( defined($word) ); # fix c104
8749 5         16 $rtoken_vars->[_TOKEN_] = $token;
8750             }
8751             }
8752              
8753             # trim identifiers of trailing blanks which can occur
8754             # under some unusual circumstances, such as if the
8755             # identifier 'witch' has trailing blanks on input here:
8756             #
8757             # sub
8758             # witch
8759             # () # prototype may be on new line ...
8760             # ...
8761 161         451 my $ord_ch = ord( substr( $token, -1, 1 ) );
8762 161 50 33     2612 if (
      33        
8763              
8764             # quick check for possible ending space
8765             $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
8766             || $ord_ch > ORD_PRINTABLE_MAX )
8767             )
8768             {
8769 0         0 $token =~ s/\s+$//g;
8770 0         0 $rtoken_vars->[_TOKEN_] = $token;
8771             }
8772              
8773             # Fixed for c250 to use 'S' for sub definitions
8774 161 100       565 if ( $type eq 'S' ) {
    100          
8775              
8776             # -spp = 0 : no space before opening prototype paren
8777             # -spp = 1 : stable (follow input spacing)
8778             # -spp = 2 : always space before opening prototype paren
8779 131 100 66     777 if ( !defined($rOpts_space_prototype_paren)
    100          
    50          
8780             || $rOpts_space_prototype_paren == 1 )
8781             {
8782             ## default: stable
8783             }
8784             elsif ( $rOpts_space_prototype_paren == 0 ) {
8785 5         30 $token =~ s/\s+\(/\(/;
8786             }
8787             elsif ( $rOpts_space_prototype_paren == 2 ) {
8788 5         23 $token =~ s/\(/ (/;
8789             }
8790             else {
8791             # bad n value for -spp=n
8792             # just use the default
8793             }
8794              
8795             # one space max, and no tabs
8796 131         993 $token =~ s/\s+/ /g;
8797 131         450 $rtoken_vars->[_TOKEN_] = $token;
8798              
8799 131         561 $self->[_ris_special_identifier_token_]->{$token} = 'sub';
8800             }
8801              
8802             # and trim spaces in package statements (added for c250)
8803             elsif ( $type eq 'P' ) {
8804              
8805             # clean up spaces in package identifiers, like
8806             # "package Bob::Dog;"
8807 25 50       196 if ( $token =~ s/\s+/ /g ) {
8808 25         64 $rtoken_vars->[_TOKEN_] = $token;
8809 25         99 $self->[_ris_special_identifier_token_]->{$token} =
8810             'package';
8811             }
8812             }
8813             else {
8814             # it is rare to arrive here (identifier with spaces)
8815             }
8816             }
8817             }
8818              
8819             # handle semicolons
8820             elsif ( $type eq ';' ) {
8821              
8822             # Remove unnecessary semicolons, but not after bare
8823             # blocks, where it could be unsafe if the brace is
8824             # mis-tokenized.
8825 2390 100 100     20230 if (
      100        
8826             $rOpts->{'delete-semicolons'}
8827             && (
8828             (
8829             $last_nonblank_block_type
8830             && $last_nonblank_code_type eq '}'
8831             && (
8832             $is_block_without_semicolon{
8833             $last_nonblank_block_type}
8834             || $last_nonblank_block_type =~ /$SUB_PATTERN/
8835             || $last_nonblank_block_type =~ /^\w+:$/
8836             )
8837             )
8838             || $last_nonblank_code_type eq ';'
8839             )
8840             )
8841             {
8842              
8843             # This looks like a deletable semicolon, but even if a
8844             # semicolon can be deleted it is not necessarily best to do
8845             # so. We apply these additional rules for deletion:
8846             # - Always ok to delete a ';' at the end of a line
8847             # - Never delete a ';' before a '#' because it would
8848             # promote it to a block comment.
8849             # - If a semicolon is not at the end of line, then only
8850             # delete if it is followed by another semicolon or closing
8851             # token. This includes the comment rule. It may take
8852             # two passes to get to a final state, but it is a little
8853             # safer. For example, keep the first semicolon here:
8854             # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
8855             # It is not required but adds some clarity.
8856 16         28 my $ok_to_delete = 1;
8857 16 100       36 if ( $KK < $Klast ) {
8858 15         41 my $Kn = $self->K_next_nonblank($KK);
8859 15 100 66     68 if ( defined($Kn) && $Kn <= $Klast ) {
8860 14         28 my $next_nonblank_token_type = $rLL->[$Kn]->[_TYPE_];
8861 14   66     63 $ok_to_delete = $next_nonblank_token_type eq ';'
8862             || $next_nonblank_token_type eq '}';
8863             }
8864             }
8865              
8866             # do not delete only nonblank token in a file
8867             else {
8868 1         17 my $Kp = $self->K_previous_code( undef, $rLL_new );
8869 1         4 my $Kn = $self->K_next_nonblank($KK);
8870 1   33     15 $ok_to_delete = defined($Kn) || defined($Kp);
8871             }
8872              
8873 16 100       32 if ($ok_to_delete) {
8874 13         41 $self->note_deleted_semicolon($input_line_number);
8875 13         28 next;
8876             }
8877             else {
8878 3         11 write_logfile_entry("Extra ';'\n");
8879             }
8880             }
8881             }
8882              
8883             # Old patch to add space to something like "x10".
8884             # Note: This is now done in the Tokenizer, but this code remains
8885             # for reference.
8886             elsif ( $type eq 'n' ) {
8887 1861 50 33     7215 if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
8888 0         0 $token =~ s/x/x /;
8889 0         0 $rtoken_vars->[_TOKEN_] = $token;
8890 0         0 if (DEVEL_MODE) {
8891             Fault(<<EOM);
8892             Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
8893             EOM
8894             }
8895             }
8896             }
8897              
8898             # check for a qw quote
8899             elsif ( $type eq 'q' ) {
8900              
8901             # trim blanks from right of qw quotes
8902             # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
8903             # this)
8904 274         2065 $token =~ s/\s*$//;
8905 274         694 $rtoken_vars->[_TOKEN_] = $token;
8906 274 50 66     881 if ( $self->[_save_logfile_] && $token =~ /\t/ ) {
8907 0         0 $self->note_embedded_tab($input_line_number);
8908             }
8909 274 100 66     854 if ( $rwhitespace_flags->[$KK] == WS_YES
      100        
      100        
8910 257         1582 && @{$rLL_new}
8911             && $rLL_new->[-1]->[_TYPE_] ne 'b'
8912             && $rOpts_add_whitespace )
8913             {
8914 66         206 $self->store_token();
8915             }
8916 274         1018 $self->store_token($rtoken_vars);
8917 274         753 next;
8918             } ## end if ( $type eq 'q' )
8919              
8920             # delete repeated commas if requested
8921             elsif ( $type eq ',' ) {
8922 2957 100 100     7021 if ( $last_nonblank_code_type eq ','
8923             && $rOpts->{'delete-repeated-commas'} )
8924             {
8925             # Could note this deletion as a possible future update:
8926             ## $self->note_deleted_comma($input_line_number);
8927 3         6 next;
8928             }
8929              
8930             # remember input line index of first comma if -wtc is used
8931 2954 100       6093 if (%trailing_comma_rules) {
8932 259         491 my $seqno = $seqno_stack{ $depth_next - 1 };
8933 259 100 66     1060 if ( defined($seqno)
8934             && !defined( $self->[_rfirst_comma_line_index_]->{$seqno} )
8935             )
8936             {
8937 112         271 $self->[_rfirst_comma_line_index_]->{$seqno} =
8938             $rtoken_vars->[_LINE_INDEX_];
8939             }
8940             }
8941             }
8942              
8943             # change 'LABEL :' to 'LABEL:'
8944             elsif ( $type eq 'J' ) {
8945 79         264 $token =~ s/\s+//g;
8946 79         149 $rtoken_vars->[_TOKEN_] = $token;
8947             }
8948              
8949             # check a quote for problems
8950             elsif ( $type eq 'Q' ) {
8951 2463 100       6777 $self->check_Q( $KK, $Kfirst, $input_line_number )
8952             if ( $self->[_save_logfile_] );
8953             }
8954             else {
8955             ## ok - no special processing for this token type
8956             }
8957              
8958             # Store this token with possible previous blank
8959 34569 100 100     69524 if ( $rwhitespace_flags->[$KK] == WS_YES
      100        
      100        
8960 22297         92064 && @{$rLL_new}
8961             && $rLL_new->[-1]->[_TYPE_] ne 'b'
8962             && $rOpts_add_whitespace )
8963             {
8964 7481         14822 $self->store_token();
8965             }
8966 34569         63238 $self->store_token($rtoken_vars);
8967              
8968             } # End token loop
8969              
8970 5822         13865 return;
8971             } ## end sub respace_tokens_inner_loop
8972              
8973             sub respace_post_loop_ops {
8974              
8975 558     558 0 1874 my ($self) = @_;
8976              
8977             # Walk backwards through the tokens, making forward links to sequence items.
8978 558 100       1085 if ( @{$rLL_new} ) {
  558         2160  
8979 554         1119 my $KNEXT;
8980 554         1665 foreach my $KK ( reverse( 0 .. @{$rLL_new} - 1 ) ) {
  554         5743  
8981 58413         82883 $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
8982 58413 100       100379 if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
  9144         13102  
8983             }
8984 554         4121 $self->[_K_first_seq_item_] = $KNEXT;
8985             }
8986              
8987             # Find and remember lists by sequence number
8988 558         1605 foreach my $seqno ( keys %{$K_opening_container} ) {
  558         4051  
8989 4385         7165 my $K_opening = $K_opening_container->{$seqno};
8990 4385 50       8162 next unless defined($K_opening);
8991              
8992             # code errors may leave undefined closing tokens
8993 4385         6646 my $K_closing = $K_closing_container->{$seqno};
8994 4385 50       7706 next unless defined($K_closing);
8995              
8996 4385         7204 my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_];
8997 4385         6662 my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_];
8998 4385         7954 my $line_diff = $lx_close - $lx_open;
8999 4385         7581 $ris_broken_container->{$seqno} = $line_diff;
9000              
9001             # See if this is a list
9002 4385         5697 my $is_list;
9003 4385         6483 my $rtype_count = $rtype_count_by_seqno->{$seqno};
9004 4385 100       8302 if ($rtype_count) {
9005 1892         3474 my $comma_count = $rtype_count->{','};
9006 1892         3174 my $fat_comma_count = $rtype_count->{'=>'};
9007 1892         3089 my $semicolon_count = $rtype_count->{';'};
9008 1892 100       4024 if ( $rtype_count->{'f'} ) {
9009 17         69 $semicolon_count += $rtype_count->{'f'};
9010             }
9011              
9012             # We will define a list to be a container with one or more commas
9013             # and no semicolons. Note that we have included the semicolons
9014             # in a 'for' container in the semicolon count to keep c-style for
9015             # statements from being formatted as lists.
9016 1892 100 100     7942 if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) {
      100        
9017 1218         2012 $is_list = 1;
9018              
9019             # We need to do one more check for a parenthesized list:
9020             # At an opening paren following certain tokens, such as 'if',
9021             # we do not want to format the contents as a list.
9022 1218 100       3280 if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
9023 731         2555 my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
9024 731 100       3371 if ( defined($Kp) ) {
9025 730         1393 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
9026 730         1426 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
9027             $is_list =
9028             $type_p eq 'k'
9029             ? !$is_nonlist_keyword{$token_p}
9030 730 100       2547 : !$is_nonlist_type{$type_p};
9031             }
9032             }
9033             }
9034             }
9035              
9036             # Look for a block brace marked as uncertain. If the tokenizer thinks
9037             # its guess is uncertain for the type of a brace following an unknown
9038             # bareword then it adds a trailing space as a signal. We can fix the
9039             # type here now that we have had a better look at the contents of the
9040             # container. This fixes case b1085. To find the corresponding code in
9041             # Tokenizer.pm search for 'b1085' with an editor.
9042 4385         6953 my $block_type = $rblock_type_of_seqno->{$seqno};
9043 4385 100 100     11015 if ( $block_type && substr( $block_type, -1, 1 ) eq SPACE ) {
9044              
9045             # Always remove the trailing space
9046 18         162 $block_type =~ s/\s+$//;
9047              
9048             # Try to filter out parenless sub calls
9049 18         115 my $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
9050 18         41 my $Knn2;
9051 18 50       62 if ( defined($Knn1) ) {
9052 18         51 $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new );
9053             }
9054 18 50       112 my $type_nn1 = defined($Knn1) ? $rLL_new->[$Knn1]->[_TYPE_] : 'b';
9055 18 50       79 my $type_nn2 = defined($Knn2) ? $rLL_new->[$Knn2]->[_TYPE_] : 'b';
9056              
9057             # if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
9058 18 100 100     134 if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
9059 6         15 $is_list = 0;
9060             }
9061              
9062             # Convert to a hash brace if it looks like it holds a list
9063 18 100       51 if ($is_list) {
9064 1         13 $block_type = EMPTY_STRING;
9065             }
9066              
9067 18         55 $rblock_type_of_seqno->{$seqno} = $block_type;
9068             }
9069              
9070             # Handle a list container
9071 4385 100 100     15751 if ( $is_list && !$block_type ) {
    100 100        
9072 1202         2668 $ris_list_by_seqno->{$seqno} = $seqno;
9073 1202         2168 my $seqno_parent = $rparent_of_seqno->{$seqno};
9074 1202         1791 my $depth = 0;
9075 1202   66     4987 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
9076 1209         1830 $depth++;
9077              
9078             # for $rhas_list we need to save the minimum depth
9079 1209 100 100     3899 if ( !$rhas_list->{$seqno_parent}
9080             || $rhas_list->{$seqno_parent} > $depth )
9081             {
9082 640         1308 $rhas_list->{$seqno_parent} = $depth;
9083             }
9084              
9085 1209 100       2429 if ($line_diff) {
9086 391         712 $rhas_broken_list->{$seqno_parent} = 1;
9087              
9088             # Patch1: We need to mark broken lists with non-terminal
9089             # line-ending commas for the -bbx=2 parameter. This insures
9090             # that the list will stay broken. Otherwise the flag
9091             # -bbx=2 can be unstable. This fixes case b789 and b938.
9092              
9093             # Patch2: Updated to also require either one fat comma or
9094             # one more line-ending comma. Fixes cases b1069 b1070
9095             # b1072 b1076.
9096 391 100 100     1669 if (
      100        
9097             $rlec_count_by_seqno->{$seqno}
9098             && ( $rlec_count_by_seqno->{$seqno} > 1
9099             || $rtype_count_by_seqno->{$seqno}->{'=>'} )
9100             )
9101             {
9102 177         381 $rhas_broken_list_with_lec->{$seqno_parent} = 1;
9103             }
9104             }
9105 1209         4011 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
9106             }
9107             }
9108              
9109             # Handle code blocks ...
9110             # The -lp option needs to know if a container holds a code block
9111             elsif ( $block_type && $rOpts_line_up_parentheses ) {
9112 43         99 my $seqno_parent = $rparent_of_seqno->{$seqno};
9113 43   66     186 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
9114 71         121 $rhas_code_block->{$seqno_parent} = 1;
9115 71         106 $rhas_broken_code_block->{$seqno_parent} = $line_diff;
9116 71         234 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
9117             }
9118             }
9119             else {
9120             ## ok - none of the above
9121             }
9122             }
9123              
9124             # Find containers with ternaries, needed for -lp formatting.
9125 558         2417 foreach my $seqno ( keys %{$K_opening_ternary} ) {
  558         2700  
9126 187         495 my $seqno_parent = $rparent_of_seqno->{$seqno};
9127 187   66     999 while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
9128 153         297 $rhas_ternary->{$seqno_parent} = 1;
9129 153         582 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
9130             }
9131             }
9132              
9133             # Turn off -lp for containers with here-docs with text within a container,
9134             # since they have their own fixed indentation. Fixes case b1081.
9135 558 100       3916 if ($rOpts_line_up_parentheses) {
9136 31         126 foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
9137 1         4 my $Kh = $K_first_here_doc_by_seqno{$seqno};
9138 1         4 my $Kc = $K_closing_container->{$seqno};
9139 1         3 my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
9140 1         4 my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
9141 1 50       6 next if ( $line_Kh == $line_Kc );
9142 0         0 $ris_excluded_lp_container->{$seqno} = 1;
9143             }
9144             }
9145              
9146             # Set a flag to turn off -cab=3 in complex structures. Otherwise,
9147             # instability can occur. When it is overridden the behavior of the closest
9148             # match, -cab=2, will be used instead. This fixes cases b1096 b1113.
9149 558 50       2301 if ( $rOpts_comma_arrow_breakpoints == 3 ) {
9150 0         0 foreach my $seqno ( keys %{$K_opening_container} ) {
  0         0  
9151              
9152 0         0 my $rtype_count = $rtype_count_by_seqno->{$seqno};
9153 0 0 0     0 next unless ( $rtype_count && $rtype_count->{'=>'} );
9154              
9155             # override -cab=3 if this contains a sub-list
9156 0 0       0 if ( !defined( $roverride_cab3->{$seqno} ) ) {
9157 0 0       0 if ( $rhas_list->{$seqno} ) {
9158 0         0 $roverride_cab3->{$seqno} = 2;
9159             }
9160              
9161             # or if this is a sub-list of its parent container
9162             else {
9163 0         0 my $seqno_parent = $rparent_of_seqno->{$seqno};
9164 0 0 0     0 if ( defined($seqno_parent)
9165             && $ris_list_by_seqno->{$seqno_parent} )
9166             {
9167 0         0 $roverride_cab3->{$seqno} = 2;
9168             }
9169             }
9170             }
9171             }
9172             }
9173              
9174 558         1262 return;
9175             } ## end sub respace_post_loop_ops
9176              
9177             sub set_permanently_broken {
9178 164     164 0 501 my ( $self, $seqno ) = @_;
9179              
9180             # Mark this container, and all of its parent containers, as being
9181             # permanently broken (for example, by containing a blank line). This
9182             # is needed for certain list formatting operations.
9183 164         535 while ( defined($seqno) ) {
9184 407         779 $ris_permanently_broken->{$seqno} = 1;
9185 407         1051 $seqno = $rparent_of_seqno->{$seqno};
9186             }
9187 164         421 return;
9188             } ## end sub set_permanently_broken
9189              
9190             sub store_token {
9191              
9192 58467     58467 0 92346 my ( $self, $item ) = @_;
9193              
9194             #------------------------------------------
9195             # Store one token during respace operations
9196             #------------------------------------------
9197              
9198             # Input parameter:
9199             # if defined => reference to a token
9200             # if undef => make and store a blank space
9201              
9202             # NOTE: called once per token so coding efficiency is critical.
9203              
9204             # If no arg, then make and store a blank space
9205 58467 100       99802 if ( !$item ) {
9206              
9207             # - Never start the array with a space, and
9208             # - Never store two consecutive spaces
9209 7674 50 33     10379 if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] ne 'b' ) {
  7674         25422  
9210              
9211             # Note that the level and ci_level of newly created spaces should
9212             # be the same as the previous token. Otherwise the coding for the
9213             # -lp option can create a blinking state in some rare cases.
9214             # (see b1109, b1110).
9215 7674         15211 $item = [];
9216 7674         16085 $item->[_TYPE_] = 'b';
9217 7674         14016 $item->[_TOKEN_] = SPACE;
9218 7674         16537 $item->[_TYPE_SEQUENCE_] = EMPTY_STRING;
9219 7674         12509 $item->[_LINE_INDEX_] = $rLL_new->[-1]->[_LINE_INDEX_];
9220 7674         14220 $item->[_LEVEL_] = $rLL_new->[-1]->[_LEVEL_];
9221             }
9222 0         0 else { return }
9223             }
9224              
9225             # The next multiple assignment statements are significantly faster than
9226             # doing them one-by-one.
9227             my (
9228              
9229             $type,
9230             $token,
9231             $type_sequence,
9232              
9233 58467         77952 ) = @{$item}[
  58467         114602  
9234              
9235             _TYPE_,
9236             _TOKEN_,
9237             _TYPE_SEQUENCE_,
9238              
9239             ];
9240              
9241             # Set the token length. Later it may be adjusted again if phantom or
9242             # ignoring side comment lengths. It is always okay to calculate the length
9243             # with $length_function->() if it is defined, but it is extremely slow so
9244             # we avoid it and use the builtin length() for printable ascii tokens.
9245             # Note: non-printable ascii characters (like tab) may get different lengths
9246             # by the two methods, so we have to use $length_function for them.
9247             my $token_length =
9248             ( $length_function
9249 58467 50 33     132034 && !$is_ascii_type{$type}
9250             && $token =~ /[[:^ascii:][:^print:]]/ )
9251             ? $length_function->($token)
9252             : length($token);
9253              
9254             # handle blanks
9255 58467 100       104884 if ( $type eq 'b' ) {
    100          
9256              
9257             # Do not output consecutive blanks. This situation should have been
9258             # prevented earlier, but it is worth checking because later routines
9259             # make this assumption.
9260 22378 100 66     28438 if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
  22378         70075  
9261 5         12 return;
9262             }
9263             }
9264              
9265             # handle comments
9266             elsif ( $type eq '#' ) {
9267              
9268             # trim comments if necessary
9269 1091         2841 my $ord = ord( substr( $token, -1, 1 ) );
9270 1091 100 66     8392 if (
      66        
      66        
9271             $ord > 0
9272             && ( $ord < ORD_PRINTABLE_MIN
9273             || $ord > ORD_PRINTABLE_MAX )
9274             && $token =~ s/\s+$//
9275             )
9276             {
9277 20 50       97 $token_length =
9278             $length_function ? $length_function->($token) : length($token);
9279 20         84 $item->[_TOKEN_] = $token;
9280             }
9281              
9282 1091         1978 my $ignore_sc_length = $rOpts_ignore_side_comment_lengths;
9283              
9284             # Ignore length of '## no critic' comments even if -iscl is not set
9285 1091 100 100     8382 if ( !$ignore_sc_length
      100        
      100        
      100        
9286             && !$rOpts_ignore_perlcritic_comments
9287             && $token_length > 10
9288             && substr( $token, 1, 1 ) eq '#'
9289             && $token =~ /^##\s*no\s+critic\b/ )
9290             {
9291              
9292             # Is it a side comment or a block comment?
9293 7 100       35 if ( $Ktoken_vars > $Kfirst_old ) {
9294              
9295             # This is a side comment. If we do not ignore its length, and
9296             # -iscl has not been set, then the line could be broken and
9297             # perlcritic will complain. So this is essential:
9298 3   50     35 $ignore_sc_length ||= 1;
9299              
9300             # It would be a good idea to also make this behave like a
9301             # static side comment, but this is not essential and would
9302             # change existing formatting. So we will leave it to the user
9303             # to set -ssc if desired.
9304             }
9305             else {
9306              
9307             # This is a full-line (block) comment.
9308             # It would be a good idea to make this behave like a static
9309             # block comment, but this is not essential and would change
9310             # existing formatting. So we will leave it to the user to
9311             # set -sbc if desired
9312             }
9313             }
9314              
9315             # Set length of ignored side comments as just 1
9316 1091 100 100     2831 if ( $ignore_sc_length && ( !$CODE_type || $CODE_type eq 'HSC' ) ) {
      100        
9317 17         27 $token_length = 1;
9318             }
9319              
9320 1091         2907 my $seqno = $seqno_stack{ $depth_next - 1 };
9321 1091 100       2755 if ( defined($seqno) ) {
9322 296 100       859 $self->[_rblank_and_comment_count_]->{$seqno} += 1
9323             if ( $CODE_type eq 'BC' );
9324             $self->set_permanently_broken($seqno)
9325 296 100       1108 if !$ris_permanently_broken->{$seqno};
9326             }
9327             }
9328              
9329             # handle non-blanks and non-comments
9330             else {
9331              
9332 34998         44954 my $block_type;
9333              
9334             # check for a sequenced item (i.e., container or ?/:)
9335 34998 100       59020 if ($type_sequence) {
9336              
9337             # This will be the index of this item in the new array
9338 9144         12545 my $KK_new = @{$rLL_new};
  9144         13969  
9339              
9340 9144 100       22612 if ( $is_opening_token{$token} ) {
    100          
9341              
9342 4385         10259 $K_opening_container->{$type_sequence} = $KK_new;
9343 4385         7355 $block_type = $rblock_type_of_seqno->{$type_sequence};
9344              
9345             # Fix for case b1100: Count a line ending in ', [' as having
9346             # a line-ending comma. Otherwise, these commas can be hidden
9347             # with something like --opening-square-bracket-right
9348 4385 100 100     11025 if ( $last_nonblank_code_type eq ','
      100        
9349             && $Ktoken_vars == $Klast_old_code
9350             && $Ktoken_vars > $Kfirst_old )
9351             {
9352 5         17 $rlec_count_by_seqno->{$type_sequence}++;
9353             }
9354              
9355 4385 100 100     14705 if ( $last_nonblank_code_type eq '='
9356             || $last_nonblank_code_type eq '=>' )
9357             {
9358 394         1126 $ris_assigned_structure->{$type_sequence} =
9359             $last_nonblank_code_type;
9360             }
9361              
9362 4385         8986 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
9363 4385 100       9334 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
9364 4385         6205 push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
  4385         12599  
9365 4385         9923 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
9366 4385         8184 $seqno_stack{$depth_next} = $type_sequence;
9367 4385         8038 $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
9368 4385         6630 $depth_next++;
9369              
9370 4385 100       9774 if ( $depth_next > $depth_next_max ) {
9371 1239         2365 $depth_next_max = $depth_next;
9372             }
9373             }
9374             elsif ( $is_closing_token{$token} ) {
9375              
9376 4385         9904 $K_closing_container->{$type_sequence} = $KK_new;
9377 4385         7467 $block_type = $rblock_type_of_seqno->{$type_sequence};
9378              
9379             # Do not include terminal commas in counts
9380 4385 100 66     15526 if ( $last_nonblank_code_type eq ','
9381             || $last_nonblank_code_type eq '=>' )
9382             {
9383             $rtype_count_by_seqno->{$type_sequence}
9384 300         760 ->{$last_nonblank_code_type}--;
9385              
9386 300 50 66     2071 if ( $Ktoken_vars == $Kfirst_old
      66        
9387             && $last_nonblank_code_type eq ','
9388             && $rlec_count_by_seqno->{$type_sequence} )
9389             {
9390 165         417 $rlec_count_by_seqno->{$type_sequence}--;
9391             }
9392             }
9393              
9394             # Update the stack...
9395 4385         6823 $depth_next--;
9396             }
9397             else {
9398              
9399             # For ternary, note parent but do not include as child
9400 374         1097 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
9401 374 100       1187 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
9402 374         1944 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
9403              
9404             # These are not yet used but could be useful
9405 374 100       1495 if ( $token eq '?' ) {
    50          
9406 187         476 $K_opening_ternary->{$type_sequence} = $KK_new;
9407             }
9408             elsif ( $token eq ':' ) {
9409 187         544 $K_closing_ternary->{$type_sequence} = $KK_new;
9410             }
9411             else {
9412              
9413             # We really shouldn't arrive here, just being cautious:
9414             # The only sequenced types output by the tokenizer are the
9415             # opening & closing containers and the ternary types. Each
9416             # of those was checked above. So we would only get here
9417             # if the tokenizer has been changed to mark some other
9418             # tokens with sequence numbers.
9419 0         0 if (DEVEL_MODE) {
9420             Fault(
9421             "Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
9422             );
9423             }
9424             }
9425             }
9426             }
9427              
9428             # Remember the most recent two non-blank, non-comment tokens.
9429             # NOTE: the phantom semicolon code may change the output stack
9430             # without updating these values. Phantom semicolons are considered
9431             # the same as blanks for now, but future needs might change that.
9432             # See the related note in sub 'add_phantom_semicolon'.
9433 34998         47855 $last_last_nonblank_code_type = $last_nonblank_code_type;
9434 34998         46471 $last_last_nonblank_code_token = $last_nonblank_code_token;
9435              
9436 34998         45041 $last_nonblank_code_type = $type;
9437 34998         45547 $last_nonblank_code_token = $token;
9438 34998         44560 $last_nonblank_block_type = $block_type;
9439              
9440             # count selected types
9441 34998 100       67947 if ( $is_counted_type{$type} ) {
9442 6567         14410 my $seqno = $seqno_stack{ $depth_next - 1 };
9443 6567 100       13791 if ( defined($seqno) ) {
9444 4871         10996 $rtype_count_by_seqno->{$seqno}->{$type}++;
9445              
9446             # Count line-ending commas for -bbx
9447 4871 100 100     15621 if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
9448 978         2159 $rlec_count_by_seqno->{$seqno}++;
9449             }
9450              
9451             # Remember index of first here doc target
9452 4871 100 66     11418 if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
9453 6         20 my $KK_new = @{$rLL_new};
  6         18  
9454 6         26 $K_first_here_doc_by_seqno{$seqno} = $KK_new;
9455             }
9456             }
9457             }
9458             }
9459              
9460             # cumulative length is the length sum including this token
9461 58462         79465 $cumulative_length += $token_length;
9462              
9463 58462         80723 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
9464 58462         78483 $item->[_TOKEN_LENGTH_] = $token_length;
9465              
9466             # For reference, here is how to get the parent sequence number.
9467             # This is not used because it is slower than finding it on the fly
9468             # in sub parent_seqno_by_K:
9469              
9470             # my $seqno_parent =
9471             # $type_sequence && $is_opening_token{$token}
9472             # ? $seqno_stack{ $depth_next - 2 }
9473             # : $seqno_stack{ $depth_next - 1 };
9474             # my $KK = @{$rLL_new};
9475             # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
9476              
9477             # and finally, add this item to the new array
9478 58462         72904 push @{$rLL_new}, $item;
  58462         98381  
9479 58462         103447 return;
9480             } ## end sub store_token
9481              
9482             sub add_phantom_semicolon {
9483              
9484 535     535 0 1299 my ( $self, $KK ) = @_;
9485              
9486             # The token at old index $KK is a closing block brace, and not preceded
9487             # by a semicolon. Before we push it onto the new token list, we may
9488             # want to add a phantom semicolon which can be activated if the the
9489             # block is broken on output.
9490              
9491             # We are only adding semicolons for certain block types
9492 535         1189 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9493 535 50       1430 return unless ($type_sequence);
9494 535         1145 my $block_type = $rblock_type_of_seqno->{$type_sequence};
9495 535 50       1346 return unless ($block_type);
9496             return
9497 535 100 100     4557 unless ( $ok_to_add_semicolon_for_block_type{$block_type}
      100        
9498             || $block_type =~ /^(sub|package)/
9499             || $block_type =~ /^\w+\:$/ );
9500              
9501             # Find the most recent token in the new token list
9502 309         1408 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
9503 309 50       927 return unless ( defined($Kp) ); # shouldn't happen except for bad input
9504              
9505 309         688 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
9506 309         704 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
9507 309         661 my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
9508              
9509             # Do not add a semicolon if...
9510             return
9511             if (
9512              
9513             # it would follow a comment (and be isolated)
9514             $type_p eq '#'
9515              
9516             # it follows a code block ( because they are not always wanted
9517             # there and may add clutter)
9518 309 50 100     3096 || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
      100        
      66        
      66        
      66        
9519              
9520             # it would follow a label
9521             || $type_p eq 'J'
9522              
9523             # it would be inside a 'format' statement (and cause syntax error)
9524             || ( $type_p eq 'k'
9525             && $token_p =~ /format/ )
9526              
9527             );
9528              
9529             # Do not add a semicolon if it would impede a weld with an immediately
9530             # following closing token...like this
9531             # { ( some code ) }
9532             # ^--No semicolon can go here
9533              
9534             # look at the previous token... note use of the _NEW rLL array here,
9535             # but sequence numbers are invariant.
9536 175         423 my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
9537              
9538             # If it is also a CLOSING token we have to look closer...
9539 175 100 66     804 if (
      33        
      66        
9540             $seqno_inner
9541             && $is_closing_token{$token_p}
9542              
9543             # we only need to look if there is just one inner container..
9544             && defined( $rchildren_of_seqno->{$type_sequence} )
9545 43         185 && @{ $rchildren_of_seqno->{$type_sequence} } == 1
9546             )
9547             {
9548              
9549             # Go back and see if the corresponding two OPENING tokens are also
9550             # together. Note that we are using the OLD K indexing here:
9551 38         121 my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
9552 38 50       134 if ( defined($K_outer_opening) ) {
9553 38         181 my $K_nxt = $self->K_next_nonblank($K_outer_opening);
9554 38 50       151 if ( defined($K_nxt) ) {
9555 38         101 my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
9556              
9557             # Is the next token after the outer opening the same as
9558             # our inner closing (i.e. same sequence number)?
9559             # If so, do not insert a semicolon here.
9560 38 100 66     200 return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
9561             }
9562             }
9563             }
9564              
9565             # We will insert an empty semicolon here as a placeholder. Later, if
9566             # it becomes the last token on a line, we will bring it to life. The
9567             # advantage of doing this is that (1) we just have to check line
9568             # endings, and (2) the phantom semicolon has zero width and therefore
9569             # won't cause needless breaks of one-line blocks.
9570 167         363 my $Ktop = -1;
9571 167 100 100     898 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
9572             && $want_left_space{';'} == WS_NO )
9573             {
9574              
9575             # convert the blank into a semicolon..
9576             # be careful: we are working on the new stack top
9577             # on a token which has been stored.
9578 127         598 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
9579              
9580             # Convert the existing blank to:
9581             # a phantom semicolon for one_line_block option = 0 or 1
9582             # a real semicolon for one_line_block option = 2
9583 127         286 my $tok = EMPTY_STRING;
9584 127         264 my $len_tok = 0;
9585 127 100       441 if ( $rOpts_one_line_block_semicolons == 2 ) {
9586 3         6 $tok = ';';
9587 3         5 $len_tok = 1;
9588             }
9589              
9590 127         316 $rLL_new->[$Ktop]->[_TOKEN_] = $tok;
9591 127         250 $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
9592 127         328 $rLL_new->[$Ktop]->[_TYPE_] = ';';
9593              
9594 127         431 $self->[_rtype_count_by_seqno_]->{$type_sequence}->{';'}++;
9595              
9596             # NOTE: we are changing the output stack without updating variables
9597             # $last_nonblank_code_type, etc. Future needs might require that
9598             # those variables be updated here. For now, it seems ok to skip
9599             # this.
9600              
9601             # Then store a new blank
9602 127         408 $self->store_token($rcopy);
9603             }
9604             else {
9605              
9606             # Patch for issue c078: keep line indexes in order. If the top
9607             # token is a space that we are keeping (due to '-wls=';') then
9608             # we have to check that old line indexes stay in order.
9609             # In very rare
9610             # instances in which side comments have been deleted and converted
9611             # into blanks, we may have filtered down multiple blanks into just
9612             # one. In that case the top blank may have a higher line number
9613             # than the previous nonblank token. Although the line indexes of
9614             # blanks are not really significant, we need to keep them in order
9615             # in order to pass error checks.
9616 40 100       148 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
9617 1         6 my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
9618 1         3 my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
9619 1 50       6 if ( $new_top_ix < $old_top_ix ) {
9620 0         0 $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
9621             }
9622             }
9623              
9624 40         185 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING );
9625 40         122 $self->store_token($rcopy);
9626             }
9627 167         684 return;
9628             } ## end sub add_phantom_semicolon
9629              
9630             sub add_trailing_comma {
9631              
9632             # Implement the --add-trailing-commas flag to the line end before index $KK:
9633              
9634 24     24 0 59 my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_;
9635              
9636             # Input parameter:
9637             # $KK = index of closing token in old ($rLL) token list
9638             # which starts a new line and is not preceded by a comma
9639             # $Kfirst = index of first token on the current line of input tokens
9640             # $add_flags = user control flags
9641              
9642             # For example, we might want to add a comma here:
9643              
9644             # bless {
9645             # _name => $name,
9646             # _price => $price,
9647             # _rebate => $rebate <------ location of possible bare comma
9648             # }, $pkg;
9649             # ^-------------------closing token at index $KK on new line
9650              
9651             # Do not add a comma if it would follow a comment
9652 24         77 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
9653 24 50       78 return unless ( defined($Kp) );
9654 24         49 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
9655 24 50       65 return if ( $type_p eq '#' );
9656              
9657             # see if the user wants a trailing comma here
9658 24         74 my $match =
9659             $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
9660             $trailing_comma_rule, 1 );
9661              
9662             # b1458 fix method 1: do not add if this would excess line length.
9663             # This is more general than fix method 2, below, but the logic is not
9664             # as clean. So this fix is currently deactivated.
9665 24         39 if ( 0 && $match && $rOpts_delete_trailing_commas && $KK > 0 ) {
9666             my $line_index = $rLL->[ $KK - 1 ]->[_LINE_INDEX_];
9667             my $rlines = $self->[_rlines_];
9668             my $line_of_tokens = $rlines->[$line_index];
9669             my $input_line = $line_of_tokens->{_line_text};
9670             my $len =
9671             $length_function
9672             ? $length_function->($input_line) - 1
9673             : length($input_line) - 1;
9674             my $level = $rLL->[$Kfirst]->[_LEVEL_];
9675             my $max_len = $maximum_line_length_at_level[$level];
9676              
9677             if ( $len >= $max_len ) {
9678             $match = 0;
9679             }
9680             }
9681              
9682             # if so, add a comma
9683 24 100       68 if ($match) {
9684 11         75 my $Knew = $self->store_new_token( ',', ',', $Kp );
9685             }
9686              
9687 24         59 return;
9688              
9689             } ## end sub add_trailing_comma
9690              
9691             sub delete_trailing_comma {
9692              
9693 60     60 0 150 my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_;
9694              
9695             # Apply the --delete-trailing-commas flag to the comma before index $KK
9696              
9697             # Input parameter:
9698             # $KK = index of a closing token in OLD ($rLL) token list
9699             # which is preceded by a comma on the same line.
9700             # $Kfirst = index of first token on the current line of input tokens
9701             # $delete_option = user control flag
9702              
9703             # Returns true if the comma was deleted
9704              
9705             # For example, we might want to delete this comma:
9706             # my @asset = ("FASMX", "FASGX", "FASIX",);
9707             # | |^--------token at index $KK
9708             # | ^------comma of interest
9709             # ^-------------token at $Kfirst
9710              
9711             # Verify that the previous token is a comma. Note that we are working in
9712             # the new token list $rLL_new.
9713 60         167 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
9714 60 50       206 return unless ( defined($Kp) );
9715 60 50       147 if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
9716              
9717             # there must be a '#' between the ',' and closing token; give up.
9718 0         0 return;
9719             }
9720              
9721             # Do not delete commas when formatting under stress to avoid instability.
9722             # This fixes b1389, b1390, b1391, b1392. The $high_stress_level has
9723             # been found to work well for trailing commas.
9724 60 50       156 if ( $rLL_new->[$Kp]->[_LEVEL_] >= $high_stress_level ) {
9725 0         0 return;
9726             }
9727              
9728             # See if the user wants this trailing comma
9729 60         168 my $match =
9730             $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
9731             $trailing_comma_rule, 0 );
9732              
9733             # Patch: the --noadd-whitespace flag can cause instability in complex
9734             # structures. In this case do not delete the comma. Fixes b1409.
9735 60 50 66     197 if ( !$match && !$rOpts_add_whitespace ) {
9736 0         0 my $Kn = $self->K_next_nonblank($KK);
9737 0 0       0 if ( defined($Kn) ) {
9738 0         0 my $type_n = $rLL->[$Kn]->[_TYPE_];
9739 0 0 0     0 if ( $type_n ne ';' && $type_n ne '#' ) { return }
  0         0  
9740             }
9741             }
9742              
9743             # b1458 fix method 2: do not remove a comma after a leading brace type 'R'
9744             # since it is under stress and could become unstable. This is a more
9745             # specific fix but the logic is cleaner than method 1.
9746 60 50 100     279 if ( !$match
      66        
9747             && $rOpts_add_trailing_commas
9748             && $rLL->[$Kfirst]->[_TYPE_] eq 'R' )
9749             {
9750              
9751             # previous old token should be the comma..
9752 0         0 my $Kp_old = $self->K_previous_nonblank( $KK, $rLL );
9753 0 0 0     0 if ( defined($Kp_old)
      0        
9754             && $Kp_old > $Kfirst
9755             && $rLL->[$Kp_old]->[_TYPE_] eq ',' )
9756             {
9757              
9758             # if the comma follows the first token of the line ..
9759 0         0 my $Kpp_old = $self->K_previous_nonblank( $Kp_old, $rLL );
9760 0 0 0     0 if ( defined($Kpp_old) && $Kpp_old eq $Kfirst ) {
9761              
9762             # do not delete it
9763 0         0 $match = 1;
9764             }
9765             }
9766             }
9767              
9768             # If no match, delete it
9769 60 100       110 if ( !$match ) {
9770              
9771 48         111 return $self->unstore_last_nonblank_token(',');
9772             }
9773 12         30 return;
9774              
9775             } ## end sub delete_trailing_comma
9776              
9777             sub delete_weld_interfering_comma {
9778              
9779 1     1 0 5 my ( $self, $KK ) = @_;
9780              
9781             # Apply the flag '--delete-weld-interfering-commas' to the comma
9782             # before index $KK
9783              
9784             # Input parameter:
9785             # $KK = index of a closing token in OLD ($rLL) token list
9786             # which is preceded by a comma on the same line.
9787              
9788             # Returns true if the comma was deleted
9789              
9790             # For example, we might want to delete this comma:
9791              
9792             # my $tmpl = { foo => {no_override => 1, default => 42}, };
9793             # || ^------$KK
9794             # |^---$Kp
9795             # $Kpp---^
9796             #
9797             # Note that:
9798             # index $KK is in the old $rLL array, but
9799             # indexes $Kp and $Kpp are in the new $rLL_new array.
9800              
9801 1         3 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9802 1 50       4 return unless ($type_sequence);
9803              
9804             # Find the previous token and verify that it is a comma.
9805 1         7 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
9806 1 50       4 return unless ( defined($Kp) );
9807 1 50       4 if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
9808              
9809             # it is not a comma, so give up ( it is probably a '#' )
9810 0         0 return;
9811             }
9812              
9813             # This must be the only comma in this list
9814 1         3 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
9815             return
9816             unless ( defined($rtype_count)
9817             && $rtype_count->{','}
9818 1 50 33     10 && $rtype_count->{','} == 1 );
      33        
9819              
9820             # Back up to the previous closing token
9821 1         5 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
9822 1 50       4 return unless ( defined($Kpp) );
9823 1         2 my $seqno_pp = $rLL_new->[$Kpp]->[_TYPE_SEQUENCE_];
9824 1         4 my $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
9825              
9826             # The containers must be nesting (i.e., sequence numbers must differ by 1 )
9827 1 50 33     6 if ( $seqno_pp && $is_closing_type{$type_pp} ) {
9828 1 50       5 if ( $seqno_pp == $type_sequence + 1 ) {
9829              
9830             # remove the ',' from the top of the new token list
9831 1         4 return $self->unstore_last_nonblank_token(',');
9832             }
9833             }
9834 0         0 return;
9835              
9836             } ## end sub delete_weld_interfering_comma
9837              
9838             sub unstore_last_nonblank_token {
9839              
9840 49     49 0 106 my ( $self, $type ) = @_;
9841              
9842             # remove the most recent nonblank token from the new token list
9843             # Input parameter:
9844             # $type = type to be removed (for safety check)
9845              
9846             # Returns true if success
9847             # false if error
9848              
9849             # This was written and is used for removing commas, but might
9850             # be useful for other tokens. If it is ever used for other tokens
9851             # then the issue of what to do about the other variables, such
9852             # as token counts and the '$last...' vars needs to be considered.
9853              
9854             # Safety check, shouldn't happen
9855 49 50       74 if ( @{$rLL_new} < 3 ) {
  49         140  
9856 0         0 DEVEL_MODE && Fault("not enough tokens on stack to remove '$type'\n");
9857 0         0 return;
9858             }
9859              
9860 49         86 my ( $rcomma, $rblank );
9861              
9862             # case 1: pop comma from top of stack
9863 49 100 33     228 if ( $rLL_new->[-1]->[_TYPE_] eq $type ) {
    50          
9864 6         26 $rcomma = pop @{$rLL_new};
  6         22  
9865             }
9866              
9867             # case 2: pop blank and then comma from top of stack
9868             elsif ($rLL_new->[-1]->[_TYPE_] eq 'b'
9869             && $rLL_new->[-2]->[_TYPE_] eq $type )
9870             {
9871 43         59 $rblank = pop @{$rLL_new};
  43         73  
9872 43         60 $rcomma = pop @{$rLL_new};
  43         61  
9873             }
9874              
9875             # case 3: error, shouldn't happen unless bad call
9876             else {
9877 0         0 DEVEL_MODE && Fault("Could not find token type '$type' to remove\n");
9878 0         0 return;
9879             }
9880              
9881             # A note on updating vars set by sub store_token for this comma: If we
9882             # reduce the comma count by 1 then we also have to change the variable
9883             # $last_nonblank_code_type to be $last_last_nonblank_code_type because
9884             # otherwise sub store_token is going to ALSO reduce the comma count.
9885             # Alternatively, we can leave the count alone and the
9886             # $last_nonblank_code_type alone. Then sub store_token will produce
9887             # the correct result. This is simpler and is done here.
9888              
9889             # Now add a blank space after the comma if appropriate.
9890             # Some unusual spacing controls might need another iteration to
9891             # reach a final state.
9892 49 50       170 if ( $rLL_new->[-1]->[_TYPE_] ne 'b' ) {
9893 49 100       112 if ( defined($rblank) ) {
9894 43         56 $rblank->[_CUMULATIVE_LENGTH_] -= 1; # fix for deleted comma
9895 43         75 push @{$rLL_new}, $rblank;
  43         70  
9896             }
9897             }
9898 49         126 return 1;
9899             } ## end sub unstore_last_nonblank_token
9900              
9901             sub match_trailing_comma_rule {
9902              
9903 84     84 0 193 my ( $self, $KK, $Kfirst, $Kp, $trailing_comma_rule, $if_add ) = @_;
9904              
9905             # Decide if a trailing comma rule is matched.
9906              
9907             # Input parameter:
9908             # $KK = index of closing token in old ($rLL) token list which follows
9909             # the location of a possible trailing comma. See diagram below.
9910             # $Kfirst = (old) index of first token on the current line of input tokens
9911             # $Kp = index of previous nonblank token in new ($rLL_new) array
9912             # $trailing_comma_rule = packed user control flags
9913             # $if_add = true if adding comma, false if deleting comma
9914              
9915             # Returns:
9916             # false if no match
9917             # true if match
9918              
9919             # For example, we might be checking for addition of a comma here:
9920              
9921             # bless {
9922             # _name => $name,
9923             # _price => $price,
9924             # _rebate => $rebate <------ location of possible trailing comma
9925             # }, $pkg;
9926             # ^-------------------closing token at index $KK
9927              
9928 84 50       198 return unless ($trailing_comma_rule);
9929 84         128 my ( $trailing_comma_style, $paren_flag ) = @{$trailing_comma_rule};
  84         202  
9930              
9931             # List of $trailing_comma_style values:
9932             # undef stable: do not change
9933             # '0' : no list should have a trailing comma
9934             # '1' or '*' : every list should have a trailing comma
9935             # 'm' a multi-line list should have a trailing commas
9936             # 'b' trailing commas should be 'bare' (comma followed by newline)
9937             # 'h' lists of key=>value pairs with a bare trailing comma
9938             # 'i' same as s=h but also include any list with no more than about one
9939             # comma per line
9940             # ' ' or -wtc not defined : leave trailing commas unchanged [DEFAULT].
9941              
9942             # Note: an interesting generalization would be to let an upper case
9943             # letter denote the negation of styles 'm', 'b', 'h', 'i'. This might
9944             # be useful for undoing operations. It would be implemented as a wrapper
9945             # around this routine.
9946              
9947             #-----------------------------------------
9948             # No style defined : do not add or delete
9949             #-----------------------------------------
9950 84 50       221 if ( !defined($trailing_comma_style) ) { return !$if_add }
  0         0  
9951              
9952             #----------------------------------------
9953             # Set some flags describing this location
9954             #----------------------------------------
9955 84         153 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9956 84 50       176 return unless ($type_sequence);
9957 84         146 my $closing_token = $rLL->[$KK]->[_TOKEN_];
9958 84         152 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
9959 84 50 33     340 return unless ( defined($rtype_count) && $rtype_count->{','} );
9960             my $is_permanently_broken =
9961 84         166 $self->[_ris_permanently_broken_]->{$type_sequence};
9962              
9963             # Note that _ris_broken_container_ also stores the line diff
9964             # but it is not available at this early stage.
9965 84         158 my $K_opening = $self->[_K_opening_container_]->{$type_sequence};
9966 84 50       193 return if ( !defined($K_opening) );
9967              
9968             # multiline definition 1: opening and closing tokens on different lines
9969 84         141 my $iline_o = $rLL_new->[$K_opening]->[_LINE_INDEX_];
9970 84         193 my $iline_c = $rLL->[$KK]->[_LINE_INDEX_];
9971 84         140 my $line_diff_containers = $iline_c - $iline_o;
9972 84         137 my $has_multiline_containers = $line_diff_containers > 0;
9973              
9974             # multiline definition 2: first and last commas on different lines
9975 84         149 my $iline_first = $self->[_rfirst_comma_line_index_]->{$type_sequence};
9976 84         126 my $iline_last = $rLL_new->[$Kp]->[_LINE_INDEX_];
9977 84         124 my $has_multiline_commas;
9978 84         130 my $line_diff_commas = 0;
9979 84 50       159 if ( !defined($iline_first) ) {
9980              
9981             # shouldn't happen if caller checked comma count
9982 0         0 my $type_kp = $rLL_new->[$Kp]->[_TYPE_];
9983 0         0 Fault(
9984             "at line $iline_last but line of first comma not defined, at Kp=$Kp, type=$type_kp\n"
9985             ) if (DEVEL_MODE);
9986             }
9987             else {
9988 84         113 $line_diff_commas = $iline_last - $iline_first;
9989 84         129 $has_multiline_commas = $line_diff_commas > 0;
9990             }
9991              
9992             # To avoid instability in edge cases, when adding commas we uses the
9993             # multiline_commas definition, but when deleting we use multiline
9994             # containers. This fixes b1384, b1396, b1397, b1398, b1400.
9995 84 100       164 my $is_multiline =
9996             $if_add ? $has_multiline_commas : $has_multiline_containers;
9997              
9998 84   100     237 my $is_bare_multiline_comma = $is_multiline && $KK == $Kfirst;
9999              
10000 84         122 my $match;
10001              
10002             #----------------------------
10003             # 0 : does not match any list
10004             #----------------------------
10005 84 100 66     462 if ( $trailing_comma_style eq '0' ) {
    100 66        
    100          
    100          
    50          
10006 12         21 $match = 0;
10007             }
10008              
10009             #------------------------------
10010             # '*' or '1' : matches any list
10011             #------------------------------
10012             elsif ( $trailing_comma_style eq '*' || $trailing_comma_style eq '1' ) {
10013 4         6 $match = 1;
10014             }
10015              
10016             #-----------------------------
10017             # 'm' matches a Multiline list
10018             #-----------------------------
10019             elsif ( $trailing_comma_style eq 'm' ) {
10020 20         39 $match = $is_multiline;
10021             }
10022              
10023             #----------------------------------
10024             # 'b' matches a Bare trailing comma
10025             #----------------------------------
10026             elsif ( $trailing_comma_style eq 'b' ) {
10027 16         31 $match = $is_bare_multiline_comma;
10028             }
10029              
10030             #--------------------------------------------------------------------------
10031             # 'h' matches a bare hash list with about 1 comma and 1 fat comma per line.
10032             # 'i' matches a bare stable list with about 1 comma per line.
10033             #--------------------------------------------------------------------------
10034             elsif ( $trailing_comma_style eq 'h' || $trailing_comma_style eq 'i' ) {
10035              
10036             # We can treat these together because they are similar.
10037             # The set of 'i' matches includes the set of 'h' matches.
10038              
10039             # the trailing comma must be bare for both 'h' and 'i'
10040 32 100       115 return if ( !$is_bare_multiline_comma );
10041              
10042             # There must be no more than one comma per line for both 'h' and 'i'
10043             # The new_comma_count here will include the trailing comma.
10044 10         23 my $new_comma_count = $rtype_count->{','};
10045 10 100       29 $new_comma_count += 1 if ($if_add);
10046 10         19 my $excess_commas = $new_comma_count - $line_diff_commas - 1;
10047 10 100       26 if ( $excess_commas > 0 ) {
10048              
10049             # Exception for a special edge case for option 'i': if the trailing
10050             # comma is followed by a blank line or comment, then it cannot be
10051             # covered. Then we can safely accept a small list to avoid
10052             # instability (issue b1443).
10053 2 50 66     42 if ( $trailing_comma_style eq 'i'
    50 33        
      66        
      33        
      33        
      0        
10054             && $iline_c - $rLL_new->[$Kp]->[_LINE_INDEX_] > 1
10055             && $new_comma_count <= 2 )
10056             {
10057 0         0 $match = 1;
10058             }
10059              
10060             # Patch for instability issue b1456: -boc can trick this test; so
10061             # skip it when deleting commas to avoid possible instability
10062             # with option 'h' in combination with -atc -dtc -boc;
10063             elsif (
10064             $trailing_comma_style eq 'h'
10065              
10066             # this is a deletion (due to -dtc)
10067             && !$if_add
10068              
10069             # -atc is also set
10070             && $rOpts_add_trailing_commas
10071              
10072             # -boc is set and active
10073             && $rOpts_break_at_old_comma_breakpoints
10074             && !$rOpts_ignore_old_breakpoints
10075             )
10076             {
10077             # ignore this test
10078             }
10079              
10080             else {
10081 2         6 return;
10082             }
10083             }
10084              
10085             # a list of key=>value pairs with at least 2 fat commas is a match
10086             # for both 'h' and 'i'
10087 8         14 my $fat_comma_count = $rtype_count->{'=>'};
10088 8 100 66     69 if ( !$match && $fat_comma_count && $fat_comma_count >= 2 ) {
      66        
10089              
10090             # comma count (including trailer) and fat comma count must differ by
10091             # by no more than 1. This allows for some small variations.
10092 4         11 my $comma_diff = $new_comma_count - $fat_comma_count;
10093 4   33     19 $match = ( $comma_diff >= -1 && $comma_diff <= 1 );
10094             }
10095              
10096             # For 'i' only, a list that can be shown to be stable is a match
10097 8 100 100     35 if ( !$match && $trailing_comma_style eq 'i' ) {
10098 2   66     10 $match = (
10099             $is_permanently_broken
10100             || ( $rOpts_break_at_old_comma_breakpoints
10101             && !$rOpts_ignore_old_breakpoints )
10102             );
10103             }
10104             }
10105              
10106             #-------------------------------------------------------------------------
10107             # Unrecognized parameter. This should have been caught in the input check.
10108             #-------------------------------------------------------------------------
10109             else {
10110              
10111 0         0 DEVEL_MODE && Fault("Unrecognized parameter '$trailing_comma_style'\n");
10112              
10113             # do not add or delete
10114 0         0 return !$if_add;
10115             }
10116              
10117             # Now do any special paren check
10118 60 0 66     229 if ( $match
      33        
      33        
      0        
10119             && $paren_flag
10120             && $paren_flag ne '1'
10121             && $paren_flag ne '*'
10122             && $closing_token eq ')' )
10123             {
10124 0   0     0 $match &&=
10125             $self->match_paren_control_flag( $type_sequence, $paren_flag,
10126             $rLL_new );
10127             }
10128              
10129             # Fix for b1379, b1380, b1381, b1382, b1384 part 1. Mark trailing commas
10130             # for use by -vtc logic to avoid instability when -dtc and -atc are both
10131             # active.
10132 60 100       127 if ($match) {
10133 23 100 100     180 if ( $if_add && $rOpts_delete_trailing_commas
      66        
      66        
10134             || !$if_add && $rOpts_add_trailing_commas )
10135             {
10136 17         38 $self->[_ris_bare_trailing_comma_by_seqno_]->{$type_sequence} = 1;
10137              
10138             # The combination of -atc and -dtc and -cab=3 can be unstable
10139             # (b1394). So we deactivate -cab=3 in this case.
10140             # A value of '0' or '4' is required for stability of case b1451.
10141 17 50       45 if ( $rOpts_comma_arrow_breakpoints == 3 ) {
10142 0         0 $self->[_roverride_cab3_]->{$type_sequence} = 0;
10143             }
10144             }
10145             }
10146 60         178 return $match;
10147             } ## end sub match_trailing_comma_rule
10148              
10149             sub store_new_token {
10150              
10151 11     11 0 64 my ( $self, $type, $token, $Kp ) = @_;
10152              
10153             # Create and insert a completely new token into the output stream
10154              
10155             # Input parameters:
10156             # $type = the token type
10157             # $token = the token text
10158             # $Kp = index of the previous token in the new list, $rLL_new
10159              
10160             # Returns:
10161             # $Knew = index in $rLL_new of the new token
10162              
10163             # This operation is a little tricky because we are creating a new token and
10164             # we have to take care to follow the requested whitespace rules.
10165              
10166 11         17 my $Ktop = @{$rLL_new} - 1;
  11         27  
10167 11   66     51 my $top_is_space = $Ktop >= 0 && $rLL_new->[$Ktop]->[_TYPE_] eq 'b';
10168 11         21 my $Knew;
10169 11 100 66     49 if ( $top_is_space && $want_left_space{$type} == WS_NO ) {
10170              
10171             #----------------------------------------------------
10172             # Method 1: Convert the top blank into the new token.
10173             #----------------------------------------------------
10174              
10175             # Be Careful: we are working on the top of the new stack, on a token
10176             # which has been stored.
10177              
10178 2         10 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
10179              
10180 2         3 $Knew = $Ktop;
10181 2         6 $rLL_new->[$Knew]->[_TOKEN_] = $token;
10182 2         6 $rLL_new->[$Knew]->[_TOKEN_LENGTH_] = length($token);
10183 2         5 $rLL_new->[$Knew]->[_TYPE_] = $type;
10184              
10185             # NOTE: we are changing the output stack without updating variables
10186             # $last_nonblank_code_type, etc. Future needs might require that
10187             # those variables be updated here. For now, we just update the
10188             # type counts as necessary.
10189              
10190 2 50       6 if ( $is_counted_type{$type} ) {
10191 2         8 my $seqno = $seqno_stack{ $depth_next - 1 };
10192 2 50       10 if ($seqno) {
10193 2         4 $self->[_rtype_count_by_seqno_]->{$seqno}->{$type}++;
10194             }
10195             }
10196              
10197             # Then store a new blank
10198 2         6 $self->store_token($rcopy);
10199             }
10200             else {
10201              
10202             #----------------------------------------
10203             # Method 2: Use the normal storage method
10204             #----------------------------------------
10205              
10206             # Patch for issue c078: keep line indexes in order. If the top
10207             # token is a space that we are keeping (due to '-wls=...) then
10208             # we have to check that old line indexes stay in order.
10209             # In very rare
10210             # instances in which side comments have been deleted and converted
10211             # into blanks, we may have filtered down multiple blanks into just
10212             # one. In that case the top blank may have a higher line number
10213             # than the previous nonblank token. Although the line indexes of
10214             # blanks are not really significant, we need to keep them in order
10215             # in order to pass error checks.
10216 9 50       27 if ($top_is_space) {
10217 0         0 my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
10218 0         0 my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
10219 0 0       0 if ( $new_top_ix < $old_top_ix ) {
10220 0         0 $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
10221             }
10222             }
10223              
10224 9         42 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], $type, $token );
10225 9         33 $self->store_token($rcopy);
10226 9         31 $Knew = @{$rLL_new} - 1;
  9         26  
10227             }
10228 11         27 return $Knew;
10229             } ## end sub store_new_token
10230              
10231             sub check_Q {
10232              
10233             # Check that a quote looks okay, and report possible problems
10234             # to the logfile.
10235              
10236 1     1 0 4 my ( $self, $KK, $Kfirst, $line_number ) = @_;
10237 1         3 my $token = $rLL->[$KK]->[_TOKEN_];
10238 1 50       5 if ( $token =~ /\t/ ) {
10239 0         0 $self->note_embedded_tab($line_number);
10240             }
10241              
10242             # The remainder of this routine looks for something like
10243             # '$var = s/xxx/yyy/;'
10244             # in case it should have been '$var =~ s/xxx/yyy/;'
10245              
10246             # Start by looking for a token beginning with one of: s y m / tr
10247             return
10248 1 50 33     17 unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
10249             || substr( $token, 0, 2 ) eq 'tr' );
10250              
10251             # ... and preceded by one of: = == !=
10252 0         0 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
10253 0 0       0 return unless ( defined($Kp) );
10254 0         0 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
10255 0 0       0 return unless ( $is_unexpected_equals{$previous_nonblank_type} );
10256 0         0 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
10257              
10258 0         0 my $previous_nonblank_type_2 = 'b';
10259 0         0 my $previous_nonblank_token_2 = EMPTY_STRING;
10260 0         0 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
10261 0 0       0 if ( defined($Kpp) ) {
10262 0         0 $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
10263 0         0 $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
10264             }
10265              
10266 0         0 my $next_nonblank_token = EMPTY_STRING;
10267 0         0 my $Kn = $KK + 1;
10268 0         0 my $Kmax = @{$rLL} - 1;
  0         0  
10269 0 0 0     0 if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
  0         0  
10270 0 0       0 if ( $Kn <= $Kmax ) {
10271 0         0 $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
10272             }
10273              
10274 0         0 my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
10275 0         0 my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
10276              
10277 0 0 0     0 if (
      0        
      0        
      0        
10278              
10279             # preceded by simple scalar
10280             $previous_nonblank_type_2 eq 'i'
10281             && $previous_nonblank_token_2 =~ /^\$/
10282              
10283             # followed by some kind of termination
10284             # (but give complaint if we can not see far enough ahead)
10285             && $next_nonblank_token =~ /^[; \)\}]$/
10286              
10287             # scalar is not declared
10288             ## =~ /^(my|our|local)$/
10289             && !( $type_0 eq 'k' && $is_my_our_local{$token_0} )
10290             )
10291             {
10292 0         0 my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
10293 0         0 my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
10294 0         0 complain(
10295             "Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
10296             );
10297             }
10298 0         0 return;
10299             } ## end sub check_Q
10300              
10301             } ## end closure respace_tokens
10302              
10303             sub copy_token_as_type {
10304              
10305             # This provides a quick way to create a new token by
10306             # slightly modifying an existing token.
10307 298     298 0 807 my ( $rold_token, $type, $token ) = @_;
10308 298 50       797 if ( !defined($token) ) {
10309 0 0       0 if ( $type eq 'b' ) {
    0          
    0          
    0          
    0          
10310 0         0 $token = SPACE;
10311             }
10312             elsif ( $type eq 'q' ) {
10313 0         0 $token = EMPTY_STRING;
10314             }
10315             elsif ( $type eq '->' ) {
10316 0         0 $token = '->';
10317             }
10318             elsif ( $type eq ';' ) {
10319 0         0 $token = ';';
10320             }
10321             elsif ( $type eq ',' ) {
10322 0         0 $token = ',';
10323             }
10324             else {
10325              
10326             # Unexpected type ... this sub will work as long as both $token and
10327             # $type are defined, but we should catch any unexpected types during
10328             # development.
10329 0         0 if (DEVEL_MODE) {
10330             Fault(<<EOM);
10331             sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
10332             EOM
10333             }
10334              
10335             # Shouldn't get here
10336 0         0 $token = $type;
10337             }
10338             }
10339              
10340 298         573 my @rnew_token = @{$rold_token};
  298         1230  
10341 298         650 $rnew_token[_TYPE_] = $type;
10342 298         572 $rnew_token[_TOKEN_] = $token;
10343 298         554 $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING;
10344 298         809 return \@rnew_token;
10345             } ## end sub copy_token_as_type
10346              
10347             sub K_next_code {
10348 532     532 0 1419 my ( $self, $KK, $rLL ) = @_;
10349              
10350             # return the index K of the next nonblank, non-comment token
10351 532 50       1426 return if ( !defined($KK) );
10352 532 50       1334 return if ( $KK < 0 );
10353              
10354             # use the standard array unless given otherwise
10355 532 50       1558 $rLL = $self->[_rLL_] if ( !defined($rLL) );
10356 532         794 my $Num = @{$rLL};
  532         1020  
10357 532         1098 my $Knnb = $KK + 1;
10358 532         1453 while ( $Knnb < $Num ) {
10359 904 50       2078 if ( !defined( $rLL->[$Knnb] ) ) {
10360              
10361             # We seem to have encountered a gap in our array.
10362             # This shouldn't happen because sub write_line() pushed
10363             # items into the $rLL array.
10364 0         0 Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
10365 0         0 return;
10366             }
10367 904 100 100     3374 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
10368             && $rLL->[$Knnb]->[_TYPE_] ne '#' )
10369             {
10370 515         1242 return $Knnb;
10371             }
10372 389         1697 $Knnb++;
10373             }
10374 17         69 return;
10375             } ## end sub K_next_code
10376              
10377             sub K_next_nonblank {
10378 545     545 0 1254 my ( $self, $KK, $rLL ) = @_;
10379              
10380             # return the index K of the next nonblank token, or
10381             # return undef if none
10382 545 50       1289 return if ( !defined($KK) );
10383 545 50       1328 return if ( $KK < 0 );
10384              
10385             # The third arg allows this routine to be used on any array. This is
10386             # useful in sub respace_tokens when we are copying tokens from an old $rLL
10387             # to a new $rLL array. But usually the third arg will not be given and we
10388             # will just use the $rLL array in $self.
10389 545 100       1380 $rLL = $self->[_rLL_] if ( !defined($rLL) );
10390 545         797 my $Num = @{$rLL};
  545         955  
10391 545         1070 my $Knnb = $KK + 1;
10392 545 100       1286 return if ( $Knnb >= $Num );
10393 544 100       1584 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
10394 458 50       1433 return if ( ++$Knnb >= $Num );
10395 458 50       3184 return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
10396              
10397             # Backup loop. Very unlikely to get here; it means we have neighboring
10398             # blanks in the token stream.
10399 0         0 $Knnb++;
10400 0         0 while ( $Knnb < $Num ) {
10401              
10402             # Safety check, this fault shouldn't happen: The $rLL array is the
10403             # main array of tokens, so all entries should be used. It is
10404             # initialized in sub write_line, and then re-initialized by sub
10405             # store_token() within sub respace_tokens. Tokens are pushed on
10406             # so there shouldn't be any gaps.
10407 0 0       0 if ( !defined( $rLL->[$Knnb] ) ) {
10408 0         0 Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
10409 0         0 return;
10410             }
10411 0 0       0 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
  0         0  
10412 0         0 $Knnb++;
10413             }
10414 0         0 return;
10415             } ## end sub K_next_nonblank
10416              
10417             sub K_previous_code {
10418              
10419             # return the index K of the previous nonblank, non-comment token
10420             # Call with $KK=undef to start search at the top of the array
10421 2483     2483 0 5261 my ( $self, $KK, $rLL ) = @_;
10422              
10423             # use the standard array unless given otherwise
10424 2483 100       5840 $rLL = $self->[_rLL_] unless ( defined($rLL) );
10425 2483         3490 my $Num = @{$rLL};
  2483         4241  
10426 2483 100       5277 if ( !defined($KK) ) { $KK = $Num }
  1         3  
10427              
10428 2483 50       5206 if ( $KK > $Num ) {
10429              
10430             # This fault can be caused by a programming error in which a bad $KK is
10431             # given. The caller should make the first call with KK_new=undef to
10432             # avoid this error.
10433 0         0 Fault(
10434             "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
10435             ) if (DEVEL_MODE);
10436 0         0 return;
10437             }
10438 2483         4049 my $Kpnb = $KK - 1;
10439 2483         5265 while ( $Kpnb >= 0 ) {
10440 3520 100 100     11646 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
10441             && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
10442             {
10443 2482         5367 return $Kpnb;
10444             }
10445 1038         2099 $Kpnb--;
10446             }
10447 1         3 return;
10448             } ## end sub K_previous_code
10449              
10450             sub K_previous_nonblank {
10451              
10452             # return index of previous nonblank token before item K;
10453             # Call with $KK=undef to start search at the top of the array
10454 780     780 0 1851 my ( $self, $KK, $rLL ) = @_;
10455              
10456             # use the standard array unless given otherwise
10457 780 100       2301 $rLL = $self->[_rLL_] unless ( defined($rLL) );
10458 780         1265 my $Num = @{$rLL};
  780         1448  
10459 780 100       2143 if ( !defined($KK) ) { $KK = $Num }
  394         712  
10460 780 50       1886 if ( $KK > $Num ) {
10461              
10462             # This fault can be caused by a programming error in which a bad $KK is
10463             # given. The caller should make the first call with KK_new=undef to
10464             # avoid this error.
10465 0         0 Fault(
10466             "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
10467             ) if (DEVEL_MODE);
10468 0         0 return;
10469             }
10470 780         1482 my $Kpnb = $KK - 1;
10471 780 100       1759 return if ( $Kpnb < 0 );
10472 771 100       2336 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
10473 538 50       1796 return if ( --$Kpnb < 0 );
10474 538 50       1847 return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
10475              
10476             # Backup loop. We should not get here unless some routine
10477             # slipped repeated blanks into the token stream.
10478 0 0       0 return if ( --$Kpnb < 0 );
10479 0         0 while ( $Kpnb >= 0 ) {
10480 0 0       0 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
  0         0  
10481 0         0 $Kpnb--;
10482             }
10483 0         0 return;
10484             } ## end sub K_previous_nonblank
10485              
10486             sub parent_seqno_by_K {
10487              
10488             # Return the sequence number of the parent container of token K, if any.
10489              
10490 208     208 0 357 my ( $self, $KK ) = @_;
10491 208         315 my $rLL = $self->[_rLL_];
10492              
10493             # The task is to jump forward to the next container token
10494             # and use the sequence number of either it or its parent.
10495              
10496             # For example, consider the following with seqno=5 of the '[' and ']'
10497             # being called with index K of the first token of each line:
10498              
10499             # # result
10500             # push @tests, # -
10501             # [ # -
10502             # sub { 99 }, 'do {&{%s} for 1,2}', # 5
10503             # '(&{})(&{})', undef, # 5
10504             # [ 2, 2, 0 ], 0 # 5
10505             # ]; # -
10506              
10507             # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
10508             # unbalanced files, last sequence number will either be undefined or it may
10509             # be at a deeper level. In either case we will just return SEQ_ROOT to
10510             # have a defined value and allow formatting to proceed.
10511 208         302 my $parent_seqno = SEQ_ROOT;
10512 208         336 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
10513 208 100       390 if ($type_sequence) {
10514 63         144 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
10515             }
10516             else {
10517 145         248 my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
10518 145 100       259 if ( defined($Kt) ) {
10519 122         190 $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
10520 122         175 my $type = $rLL->[$Kt]->[_TYPE_];
10521              
10522             # if next container token is closing, it is the parent seqno
10523 122 100       230 if ( $is_closing_type{$type} ) {
10524 19         29 $parent_seqno = $type_sequence;
10525             }
10526              
10527             # otherwise we want its parent container
10528             else {
10529 103         199 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
10530             }
10531             }
10532             }
10533 208 50       418 $parent_seqno = SEQ_ROOT if ( !defined($parent_seqno) );
10534 208         464 return $parent_seqno;
10535             } ## end sub parent_seqno_by_K
10536              
10537             sub is_in_block_by_i {
10538 316     316 0 910 my ( $self, $i ) = @_;
10539              
10540             # returns true if
10541             # token at i is contained in a BLOCK
10542             # or is at root level
10543             # or there is some kind of error (i.e. unbalanced file)
10544             # returns false otherwise
10545              
10546 316 50       1020 if ( $i < 0 ) {
10547 0         0 DEVEL_MODE && Fault("Bad call, i='$i'\n");
10548 0         0 return 1;
10549             }
10550              
10551 316         766 my $seqno = $parent_seqno_to_go[$i];
10552 316 100 66     2442 return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
10553 141 100       713 return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
10554 107         436 return;
10555             } ## end sub is_in_block_by_i
10556              
10557             sub is_in_list_by_i {
10558 1769     1769 0 4008 my ( $self, $i ) = @_;
10559              
10560             # returns true if token at i is contained in a LIST
10561             # returns false otherwise
10562 1769         3443 my $seqno = $parent_seqno_to_go[$i];
10563 1769 50       3886 return if ( !$seqno );
10564 1769 100       6972 return if ( $seqno eq SEQ_ROOT );
10565 591 100       1853 if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
10566 157         637 return 1;
10567             }
10568 434         1639 return;
10569             } ## end sub is_in_list_by_i
10570              
10571             sub is_list_by_K {
10572              
10573             # Return true if token K is in a list
10574 165     165 0 262 my ( $self, $KK ) = @_;
10575              
10576 165         290 my $parent_seqno = $self->parent_seqno_by_K($KK);
10577 165 50       296 return unless defined($parent_seqno);
10578 165         351 return $self->[_ris_list_by_seqno_]->{$parent_seqno};
10579             } ## end sub is_list_by_K
10580              
10581             sub is_list_by_seqno {
10582              
10583             # Return true if the immediate contents of a container appears to be a
10584             # list.
10585 46     46 0 99 my ( $self, $seqno ) = @_;
10586 46 50       96 return unless defined($seqno);
10587 46         96 return $self->[_ris_list_by_seqno_]->{$seqno};
10588             } ## end sub is_list_by_seqno
10589              
10590             sub resync_lines_and_tokens {
10591              
10592 558     558 0 1479 my $self = shift;
10593              
10594             # Re-construct the arrays of tokens associated with the original input
10595             # lines since they have probably changed due to inserting and deleting
10596             # blanks and a few other tokens.
10597              
10598             # Return parameters:
10599             # set severe_error = true if processing needs to terminate
10600 558         1123 my $severe_error;
10601 558         1443 my $rqw_lines = [];
10602              
10603 558         1589 my $rLL = $self->[_rLL_];
10604 558         1350 my $Klimit = $self->[_Klimit_];
10605 558         1284 my $rlines = $self->[_rlines_];
10606 558         1251 my @Krange_code_without_comments;
10607             my @Klast_valign_code;
10608              
10609             # This is the next token and its line index:
10610 558         1283 my $Knext = 0;
10611 558 100       1902 my $Kmax = defined($Klimit) ? $Klimit : -1;
10612              
10613             # Verify that old line indexes are in still order. If this error occurs,
10614             # check locations where sub 'respace_tokens' creates new tokens (like
10615             # blank spaces). It must have set a bad old line index.
10616 558         1039 if ( DEVEL_MODE && defined($Klimit) ) {
10617             my $iline = $rLL->[0]->[_LINE_INDEX_];
10618             foreach my $KK ( 1 .. $Klimit ) {
10619             my $iline_last = $iline;
10620             $iline = $rLL->[$KK]->[_LINE_INDEX_];
10621             if ( $iline < $iline_last ) {
10622             my $KK_m = $KK - 1;
10623             my $token_m = $rLL->[$KK_m]->[_TOKEN_];
10624             my $token = $rLL->[$KK]->[_TOKEN_];
10625             my $type_m = $rLL->[$KK_m]->[_TYPE_];
10626             my $type = $rLL->[$KK]->[_TYPE_];
10627             Fault(<<EOM);
10628             Line indexes out of order at index K=$KK:
10629             at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
10630             at KK =$KK: old line=$iline, type='$type', token='$token',
10631             EOM
10632             }
10633             }
10634             }
10635              
10636 558         1456 my $iline = -1;
10637 558         1245 foreach my $line_of_tokens ( @{$rlines} ) {
  558         1808  
10638 7647         10619 $iline++;
10639 7647         12585 my $line_type = $line_of_tokens->{_line_type};
10640 7647 100       14486 if ( $line_type eq 'CODE' ) {
10641              
10642             # Get the old number of tokens on this line
10643 7474         11336 my $rK_range_old = $line_of_tokens->{_rK_range};
10644 7474         9495 my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
  7474         13556  
10645 7474         10416 my $Kdiff_old = 0;
10646 7474 100       13200 if ( defined($Kfirst_old) ) {
10647 6671         9181 $Kdiff_old = $Klast_old - $Kfirst_old;
10648             }
10649              
10650             # Find the range of NEW K indexes for the line:
10651             # $Kfirst = index of first token on line
10652             # $Klast = index of last token on line
10653 7474         10345 my ( $Kfirst, $Klast );
10654              
10655 7474         9989 my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens
10656              
10657             # Optimization: Although the actual K indexes may be completely
10658             # changed after respacing, the number of tokens on any given line
10659             # will often be nearly unchanged. So we will see if we can start
10660             # our search by guessing that the new line has the same number
10661             # of tokens as the old line.
10662 7474         10421 my $Knext_guess = $Knext + $Kdiff_old;
10663 7474 100 100     26847 if ( $Knext_guess > $Knext
      100        
10664             && $Knext_guess < $Kmax
10665             && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
10666             {
10667              
10668             # the guess is good, so we can start our search here
10669 4550         6553 $Knext = $Knext_guess + 1;
10670             }
10671              
10672 7474   100     22204 while ($Knext <= $Kmax
10673             && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
10674             {
10675 16007         43203 $Knext++;
10676             }
10677              
10678 7474 100       14139 if ( $Knext > $Knext_beg ) {
10679              
10680 6665         9011 $Klast = $Knext - 1;
10681              
10682             # Delete any terminal blank token
10683 6665 100       13336 if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
  5226         7354  
10684              
10685 6665 50       11253 if ( $Klast < $Knext_beg ) {
10686 0         0 $Klast = undef;
10687             }
10688             else {
10689              
10690 6665         8903 $Kfirst = $Knext_beg;
10691              
10692             # Save ranges of non-comment code. This will be used by
10693             # sub keep_old_line_breaks.
10694 6665 100       13194 if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
10695 5939         14322 push @Krange_code_without_comments, [ $Kfirst, $Klast ];
10696             }
10697              
10698             # Only save ending K indexes of code types which are blank
10699             # or 'VER'. These will be used for a convergence check.
10700             # See related code in sub 'convey_batch_to_vertical_aligner'
10701 6665         11799 my $CODE_type = $line_of_tokens->{_code_type};
10702 6665 100 100     17398 if ( !$CODE_type
10703             || $CODE_type eq 'VER' )
10704             {
10705 5757         9513 push @Klast_valign_code, $Klast;
10706             }
10707             }
10708             }
10709              
10710             # It is only safe to trim the actual line text if the input
10711             # line had a terminal blank token. Otherwise, we may be
10712             # in a quote.
10713 7474 100       15226 if ( $line_of_tokens->{_ended_in_blank_token} ) {
10714 145         991 $line_of_tokens->{_line_text} =~ s/\s+$//;
10715             }
10716 7474         15906 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
10717              
10718             # Deleting semicolons can create new empty code lines
10719             # which should be marked as blank
10720 7474 100       15189 if ( !defined($Kfirst) ) {
10721 809         1834 my $CODE_type = $line_of_tokens->{_code_type};
10722 809 100       2730 if ( !$CODE_type ) {
10723 1         4 $line_of_tokens->{_code_type} = 'BL';
10724             }
10725             }
10726             else {
10727              
10728             #---------------------------------------------------
10729             # save indexes of all lines with a 'q' at either end
10730             # for later use by sub find_multiline_qw
10731             #---------------------------------------------------
10732 6665 100 100     26636 if ( $rLL->[$Kfirst]->[_TYPE_] eq 'q'
10733             || $rLL->[$Klast]->[_TYPE_] eq 'q' )
10734             {
10735 227         438 push @{$rqw_lines}, $iline;
  227         633  
10736             }
10737             }
10738             }
10739             }
10740              
10741             # There shouldn't be any nodes beyond the last one. This routine is
10742             # relinking lines and tokens after the tokens have been respaced. A fault
10743             # here indicates some kind of bug has been introduced into the above loops.
10744             # There is not good way to keep going; we better stop here.
10745 558 50       3578 if ( $Knext <= $Kmax ) {
10746 0         0 Fault_Warn(
10747             "unexpected tokens at end of file when reconstructing lines");
10748 0         0 $severe_error = 1;
10749 0         0 return ( $severe_error, $rqw_lines );
10750             }
10751 558         1902 $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
10752              
10753             # Setup the convergence test in the FileWriter based on line-ending indexes
10754 558         1437 my $file_writer_object = $self->[_file_writer_object_];
10755 558         4199 $file_writer_object->setup_convergence_test( \@Klast_valign_code );
10756              
10757 558         2310 return ( $severe_error, $rqw_lines );
10758              
10759             } ## end sub resync_lines_and_tokens
10760              
10761             sub check_for_old_break {
10762 32     32 0 52 my ( $self, $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
10763              
10764             # This sub is called to help implement flags:
10765             # --keep-old-breakpoints-before and --keep-old-breakpoints-after
10766             # Given:
10767             # $KK = index of a token,
10768             # $rkeep_break_hash = user control for --keep-old-...
10769             # $rbreak_hash = hash of tokens where breaks are requested
10770             # Set $rbreak_hash as follows if a user break is requested:
10771             # = 1 make a hard break (flush the current batch)
10772             # best for something like leading commas (-kbb=',')
10773             # = 2 make a soft break (keep building current batch)
10774             # best for something like leading ->
10775              
10776 32         45 my $rLL = $self->[_rLL_];
10777              
10778 32         50 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
10779              
10780             # non-container tokens use the type as the key
10781 32 100       55 if ( !$seqno ) {
10782 25         38 my $type = $rLL->[$KK]->[_TYPE_];
10783 25 100       47 if ( $rkeep_break_hash->{$type} ) {
10784 7 50       52 $rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1;
10785             }
10786             }
10787              
10788             # container tokens use the token as the key
10789             else {
10790 7         11 my $token = $rLL->[$KK]->[_TOKEN_];
10791 7         11 my $flag = $rkeep_break_hash->{$token};
10792 7 50       16 if ($flag) {
10793              
10794 0   0     0 my $match = $flag eq '1' || $flag eq '*';
10795              
10796             # check for special matching codes
10797 0 0       0 if ( !$match ) {
10798 0 0 0     0 if ( $token eq '(' || $token eq ')' ) {
    0 0        
10799 0         0 $match = $self->match_paren_control_flag( $seqno, $flag );
10800             }
10801             elsif ( $token eq '{' || $token eq '}' ) {
10802              
10803             # These tentative codes 'b' and 'B' for brace types are
10804             # placeholders for possible future brace types. They
10805             # are not documented and may be changed.
10806 0         0 my $block_type = $self->[_rblock_type_of_seqno_]->{$seqno};
10807 0 0       0 if ( $flag eq 'b' ) { $match = $block_type }
  0 0       0  
10808 0         0 elsif ( $flag eq 'B' ) { $match = !$block_type }
10809             else {
10810             # unknown code - no match
10811             }
10812             }
10813             else {
10814             ## ok: none of the above
10815             }
10816             }
10817 0 0       0 if ($match) {
10818 0         0 my $type = $rLL->[$KK]->[_TYPE_];
10819 0 0       0 $rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1;
10820             }
10821             }
10822             }
10823 32         58 return;
10824             } ## end sub check_for_old_break
10825              
10826             sub keep_old_line_breaks {
10827              
10828             # Called once per file to find and mark any old line breaks which
10829             # should be kept. We will be translating the input hashes into
10830             # token indexes.
10831              
10832             # A flag is set as follows:
10833             # = 1 make a hard break (flush the current batch)
10834             # best for something like leading commas (-kbb=',')
10835             # = 2 make a soft break (keep building current batch)
10836             # best for something like leading ->
10837              
10838 561     561 0 1523 my ($self) = @_;
10839              
10840 561         1471 my $rLL = $self->[_rLL_];
10841 561         1279 my $rKrange_code_without_comments =
10842             $self->[_rKrange_code_without_comments_];
10843 561         1369 my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
10844 561         1320 my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
10845 561         1399 my $rbreak_container = $self->[_rbreak_container_];
10846              
10847             #----------------------------------------
10848             # Apply --break-at-old-method-breakpoints
10849             #----------------------------------------
10850              
10851             # This code moved here from sub break_lists to fix b1120
10852 561 100       2696 if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
10853 2         5 foreach my $item ( @{$rKrange_code_without_comments} ) {
  2         7  
10854 16         26 my ( $Kfirst, $Klast ) = @{$item};
  16         27  
10855 16         33 my $type = $rLL->[$Kfirst]->[_TYPE_];
10856 16         31 my $token = $rLL->[$Kfirst]->[_TOKEN_];
10857              
10858             # leading '->' use a value of 2 which causes a soft
10859             # break rather than a hard break
10860 16 100       40 if ( $type eq '->' ) {
    100          
10861 4         11 $rbreak_before_Kfirst->{$Kfirst} = 2;
10862             }
10863              
10864             # leading ')->' use a special flag to insure that both
10865             # opening and closing parens get opened
10866             # Fix for b1120: only for parens, not braces
10867             elsif ( $token eq ')' ) {
10868 2         13 my $Kn = $self->K_next_nonblank($Kfirst);
10869 2 50       6 next if ( !defined($Kn) );
10870 2 50       6 next if ( $Kn > $Klast );
10871 2 50       6 next if ( $rLL->[$Kn]->[_TYPE_] ne '->' );
10872 2         4 my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
10873 2 50       6 next if ( !$seqno );
10874              
10875             # Note: in previous versions there was a fix here to avoid
10876             # instability between conflicting -bom and -pvt or -pvtc flags.
10877             # The fix skipped -bom for a small line difference. But this
10878             # was troublesome, and instead the fix has been moved to
10879             # sub set_vertical_tightness_flags where priority is given to
10880             # the -bom flag over -pvt and -pvtc flags. Both opening and
10881             # closing paren flags are involved because even though -bom only
10882             # requests breaking before the closing paren, automated logic
10883             # opens the opening paren when the closing paren opens.
10884             # Relevant cases are b977, b1215, b1270, b1303
10885              
10886 2         6 $rbreak_container->{$seqno} = 1;
10887             }
10888             else {
10889             ## ok: not a special case
10890             }
10891             }
10892             }
10893              
10894             #---------------------------------------------------------------------
10895             # Apply --keep-old-breakpoints-before and --keep-old-breakpoints-after
10896             #---------------------------------------------------------------------
10897              
10898 561 100 66     3491 return unless ( %keep_break_before_type || %keep_break_after_type );
10899              
10900 1         3 foreach my $item ( @{$rKrange_code_without_comments} ) {
  1         5  
10901 16         21 my ( $Kfirst, $Klast ) = @{$item};
  16         30  
10902 16         39 $self->check_for_old_break( $Kfirst, \%keep_break_before_type,
10903             $rbreak_before_Kfirst );
10904 16         29 $self->check_for_old_break( $Klast, \%keep_break_after_type,
10905             $rbreak_after_Klast );
10906             }
10907 1         4 return;
10908             } ## end sub keep_old_line_breaks
10909              
10910             sub weld_containers {
10911              
10912             # Called once per file to do any welding operations requested by --weld*
10913             # flags.
10914 561     561 0 1580 my ($self) = @_;
10915              
10916             # This count is used to eliminate needless calls for weld checks elsewhere
10917 561         1284 $total_weld_count = 0;
10918              
10919 561 100       1919 return if ( $rOpts->{'indent-only'} );
10920 558 100       1954 return unless ($rOpts_add_newlines);
10921              
10922             # Important: sub 'weld_cuddled_blocks' must be called before
10923             # sub 'weld_nested_containers'. This is because the cuddled option needs to
10924             # use the original _LEVEL_ values of containers, but the weld nested
10925             # containers changes _LEVEL_ of welded containers.
10926              
10927             # Here is a good test case to be sure that both cuddling and welding
10928             # are working and not interfering with each other: <<snippets/ce_wn1.in>>
10929              
10930             # perltidy -wn -ce
10931              
10932             # if ($BOLD_MATH) { (
10933             # $labels, $comment,
10934             # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
10935             # ) } else { (
10936             # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
10937             # $after
10938             # ) }
10939              
10940 552 100       1071 $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
  552         2038  
10941              
10942 552 100       3942 if ( $rOpts->{'weld-nested-containers'} ) {
10943              
10944 23         160 $self->weld_nested_containers();
10945              
10946 23         164 $self->weld_nested_quotes();
10947             }
10948              
10949             #-------------------------------------------------------------
10950             # All welding is done. Finish setting up weld data structures.
10951             #-------------------------------------------------------------
10952              
10953 552         1406 my $rLL = $self->[_rLL_];
10954 552         1388 my $rK_weld_left = $self->[_rK_weld_left_];
10955 552         1260 my $rK_weld_right = $self->[_rK_weld_right_];
10956 552         1263 my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
10957              
10958 552         1040 my @K_multi_weld;
10959 552         1060 my @keys = keys %{$rK_weld_right};
  552         1832  
10960 552         1348 $total_weld_count = @keys;
10961              
10962             # First pass to process binary welds.
10963             # This loop is processed in unsorted order for efficiency.
10964 552         1965 foreach my $Kstart (@keys) {
10965 110         217 my $Kend = $rK_weld_right->{$Kstart};
10966              
10967             # An error here would be due to an incorrect initialization introduced
10968             # in one of the above weld routines, like sub weld_nested.
10969 110 50       324 if ( $Kend <= $Kstart ) {
10970 0         0 Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
10971             if (DEVEL_MODE);
10972 0         0 next;
10973             }
10974              
10975             # Set weld values for all tokens this welded pair
10976 110         285 foreach ( $Kstart + 1 .. $Kend ) {
10977 265         795 $rK_weld_left->{$_} = $Kstart;
10978             }
10979 110         313 foreach my $Kx ( $Kstart .. $Kend - 1 ) {
10980 265         2020 $rK_weld_right->{$Kx} = $Kend;
10981 265         676 $rweld_len_right_at_K->{$Kx} =
10982             $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
10983             $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
10984             }
10985              
10986             # Remember the leftmost index of welds which continue to the right
10987 110 100 100     500 if ( defined( $rK_weld_right->{$Kend} )
10988             && !defined( $rK_weld_left->{$Kstart} ) )
10989             {
10990 17         49 push @K_multi_weld, $Kstart;
10991             }
10992             }
10993              
10994             # Second pass to process chains of welds (these are rare).
10995             # This has to be processed in sorted order.
10996 552 100       2238 if (@K_multi_weld) {
10997 9         26 my $Kend = -1;
10998 9         67 foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
  8         41  
10999              
11000             # Skip any interior K which was originally missing a left link
11001 17 50       57 next if ( $Kstart <= $Kend );
11002              
11003             # Find the end of this chain
11004 17         41 $Kend = $rK_weld_right->{$Kstart};
11005 17         39 my $Knext = $rK_weld_right->{$Kend};
11006 17         58 while ( defined($Knext) ) {
11007 19         37 $Kend = $Knext;
11008 19         51 $Knext = $rK_weld_right->{$Kend};
11009             }
11010              
11011             # Set weld values this chain
11012 17         51 foreach ( $Kstart + 1 .. $Kend ) {
11013 79         160 $rK_weld_left->{$_} = $Kstart;
11014             }
11015 17         58 foreach my $Kx ( $Kstart .. $Kend - 1 ) {
11016 79         123 $rK_weld_right->{$Kx} = $Kend;
11017 79         215 $rweld_len_right_at_K->{$Kx} =
11018             $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
11019             $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
11020             }
11021             }
11022             }
11023              
11024 552         1434 return;
11025             } ## end sub weld_containers
11026              
11027             sub cumulative_length_before_K {
11028 59     59 0 150 my ( $self, $KK ) = @_;
11029              
11030             # Returns the cumulative character length from the first token to
11031             # token before the token at index $KK.
11032 59         107 my $rLL = $self->[_rLL_];
11033 59 50       249 return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
11034             }
11035              
11036             sub weld_cuddled_blocks {
11037 12     12 0 42 my ($self) = @_;
11038              
11039             # Called once per file to handle cuddled formatting
11040              
11041 12         43 my $rK_weld_left = $self->[_rK_weld_left_];
11042 12         32 my $rK_weld_right = $self->[_rK_weld_right_];
11043 12         42 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11044              
11045             # This routine implements the -cb flag by finding the appropriate
11046             # closing and opening block braces and welding them together.
11047 12 50       26 return unless ( %{$rcuddled_block_types} );
  12         71  
11048              
11049 12         36 my $rLL = $self->[_rLL_];
11050 12 50 33     67 return unless ( defined($rLL) && @{$rLL} );
  12         62  
11051              
11052 12         37 my $rbreak_container = $self->[_rbreak_container_];
11053 12         48 my $ris_broken_container = $self->[_ris_broken_container_];
11054 12         36 my $ris_cuddled_closing_brace = $self->[_ris_cuddled_closing_brace_];
11055 12         29 my $K_closing_container = $self->[_K_closing_container_];
11056              
11057             # A stack to remember open chains at all levels: This is a hash rather than
11058             # an array for safety because negative levels can occur in files with
11059             # errors. This allows us to keep processing with negative levels.
11060             # $in_chain{$level} = [$chain_type, $type_sequence];
11061 12         29 my %in_chain;
11062 12         55 my $CBO = $rOpts->{'cuddled-break-option'};
11063              
11064             # loop over structure items to find cuddled pairs
11065 12         29 my $level = 0;
11066 12         35 my $KNEXT = $self->[_K_first_seq_item_];
11067 12         54 while ( defined($KNEXT) ) {
11068 394         516 my $KK = $KNEXT;
11069 394         564 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
11070 394         507 my $rtoken_vars = $rLL->[$KK];
11071 394         588 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
11072 394 50       666 if ( !$type_sequence ) {
11073 0 0       0 next if ( $KK == 0 ); # first token in file may not be container
11074              
11075             # A fault here implies that an error was made in the little loop at
11076             # the bottom of sub 'respace_tokens' which set the values of
11077             # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
11078             # loop control lines above.
11079 0         0 Fault("sequence = $type_sequence not defined at K=$KK")
11080             if (DEVEL_MODE);
11081 0         0 next;
11082             }
11083              
11084             # NOTE: we must use the original levels here. They can get changed
11085             # by sub 'weld_nested_containers', so this routine must be called
11086             # before sub 'weld_nested_containers'.
11087 394         527 my $last_level = $level;
11088 394         542 $level = $rtoken_vars->[_LEVEL_];
11089              
11090 394 100       785 if ( $level < $last_level ) { $in_chain{$last_level} = undef }
  72 100       173  
11091 72         187 elsif ( $level > $last_level ) { $in_chain{$level} = undef }
11092             else {
11093             ## ok - ($level == $last_level)
11094             }
11095              
11096             # We are only looking at code blocks
11097 394         562 my $token = $rtoken_vars->[_TOKEN_];
11098 394         547 my $type = $rtoken_vars->[_TYPE_];
11099 394 100       803 next unless ( $type eq $token );
11100              
11101 218 100       554 if ( $token eq '{' ) {
    100          
11102              
11103 65         154 my $block_type = $rblock_type_of_seqno->{$type_sequence};
11104 65 50       157 if ( !$block_type ) {
11105              
11106             # patch for unrecognized block types which may not be labeled
11107 0         0 my $Kp = $self->K_previous_nonblank($KK);
11108 0   0     0 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
11109 0         0 $Kp = $self->K_previous_nonblank($Kp);
11110             }
11111 0 0       0 next unless $Kp;
11112 0         0 $block_type = $rLL->[$Kp]->[_TOKEN_];
11113             }
11114 65 100       150 if ( $in_chain{$level} ) {
11115              
11116             # we are in a chain and are at an opening block brace.
11117             # See if we are welding this opening brace with the previous
11118             # block brace. Get their identification numbers:
11119 18         102 my $closing_seqno = $in_chain{$level}->[1];
11120 18         56 my $opening_seqno = $type_sequence;
11121              
11122             # The preceding block must be on multiple lines so that its
11123             # closing brace will start a new line.
11124 18 0 33     62 if ( !$ris_broken_container->{$closing_seqno}
11125             && !$rbreak_container->{$closing_seqno} )
11126             {
11127 0 0       0 next unless ( $CBO == 2 );
11128 0         0 $rbreak_container->{$closing_seqno} = 1;
11129             }
11130              
11131             # We can weld the closing brace to its following word ..
11132 18         49 my $Ko = $K_closing_container->{$closing_seqno};
11133 18         33 my $Kon;
11134 18 50       60 if ( defined($Ko) ) {
11135 18         68 $Kon = $self->K_next_nonblank($Ko);
11136             }
11137              
11138             # ..unless it is a comment
11139 18 50 33     145 if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
11140              
11141             # OK to weld these two tokens...
11142 18         86 $rK_weld_right->{$Ko} = $Kon;
11143 18         66 $rK_weld_left->{$Kon} = $Ko;
11144              
11145             # Set flag that we want to break the next container
11146             # so that the cuddled line is balanced.
11147 18 50       83 $rbreak_container->{$opening_seqno} = 1
11148             if ($CBO);
11149              
11150             # Remember which braces are cuddled.
11151             # The closing brace is used to set adjusted indentations.
11152             # The opening brace is not yet used but might eventually
11153             # be needed in setting adjusted indentation.
11154 18         64 $ris_cuddled_closing_brace->{$closing_seqno} = 1;
11155              
11156             }
11157              
11158             }
11159             else {
11160              
11161             # We are not in a chain. Start a new chain if we see the
11162             # starting block type.
11163 47 50       108 if ( $rcuddled_block_types->{$block_type} ) {
11164 0         0 $in_chain{$level} = [ $block_type, $type_sequence ];
11165             }
11166             else {
11167 47         79 $block_type = '*';
11168 47         197 $in_chain{$level} = [ $block_type, $type_sequence ];
11169             }
11170             }
11171             }
11172             elsif ( $token eq '}' ) {
11173 65 50       205 if ( $in_chain{$level} ) {
11174              
11175             # We are in a chain at a closing brace. See if this chain
11176             # continues..
11177 65         198 my $Knn = $self->K_next_code($KK);
11178 65 100       191 next unless $Knn;
11179              
11180 57         122 my $chain_type = $in_chain{$level}->[0];
11181 57         114 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
11182 57 100       169 if (
11183             $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
11184             )
11185             {
11186              
11187             # Note that we do not weld yet because we must wait until
11188             # we we are sure that an opening brace for this follows.
11189 18         56 $in_chain{$level}->[1] = $type_sequence;
11190             }
11191 39         130 else { $in_chain{$level} = undef }
11192             }
11193             }
11194             else {
11195             ## ok - not a curly brace
11196             }
11197             }
11198 12         52 return;
11199             } ## end sub weld_cuddled_blocks
11200              
11201             sub find_nested_pairs {
11202 23     23 0 67 my $self = shift;
11203              
11204             # This routine is called once per file to do preliminary work needed for
11205             # the --weld-nested option. This information is also needed for adding
11206             # semicolons.
11207              
11208 23         1026 my $rLL = $self->[_rLL_];
11209 23 50 33     141 return unless ( defined($rLL) && @{$rLL} );
  23         105  
11210 23         73 my $Num = @{$rLL};
  23         79  
11211              
11212 23         75 my $K_opening_container = $self->[_K_opening_container_];
11213 23         78 my $K_closing_container = $self->[_K_closing_container_];
11214 23         62 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11215              
11216             # We define an array of pairs of nested containers
11217 23         49 my @nested_pairs;
11218              
11219             # Names of calling routines can either be marked as 'i' or 'w',
11220             # and they may invoke a sub call with an '->'. We will consider
11221             # any consecutive string of such types as a single unit when making
11222             # weld decisions. We also allow a leading !
11223 23         150 my $is_name_type = {
11224             'i' => 1,
11225             'w' => 1,
11226             'U' => 1,
11227             '->' => 1,
11228             '!' => 1,
11229             };
11230              
11231             # Loop over all closing container tokens
11232 23         63 foreach my $inner_seqno ( keys %{$K_closing_container} ) {
  23         163  
11233 248         433 my $K_inner_closing = $K_closing_container->{$inner_seqno};
11234              
11235             # See if it is immediately followed by another, outer closing token
11236 248         379 my $K_outer_closing = $K_inner_closing + 1;
11237 248 100 100     863 $K_outer_closing += 1
11238             if ( $K_outer_closing < $Num
11239             && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
11240              
11241 248 100       467 next if ( $K_outer_closing >= $Num );
11242 244         404 my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
11243 244 100       520 next if ( !$outer_seqno );
11244 99         222 my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
11245 99 100       277 next if ( !$is_closing_token{$token_outer_closing} );
11246              
11247             # Simple filter: No commas or semicolons in the outer container
11248 77         170 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$outer_seqno};
11249 77 100       197 if ($rtype_count) {
11250 11 100 100     83 next if ( $rtype_count->{','} || $rtype_count->{';'} );
11251             }
11252              
11253             # Now we have to check the opening tokens.
11254 69         186 my $K_outer_opening = $K_opening_container->{$outer_seqno};
11255 69         140 my $K_inner_opening = $K_opening_container->{$inner_seqno};
11256 69 50       179 next if ( !defined($K_outer_opening) );
11257 69 50       162 next if ( !defined($K_inner_opening) );
11258              
11259 69         130 my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
11260 69         132 my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
11261              
11262             # Verify that the inner opening token is the next container after the
11263             # outer opening token.
11264 69         133 my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
11265 69 50       180 next unless defined($K_io_check);
11266 69 100       199 if ( $K_io_check != $K_inner_opening ) {
11267              
11268             # The inner opening container does not immediately follow the outer
11269             # opening container, but we may still allow a weld if they are
11270             # separated by a sub signature. For example, we may have something
11271             # like this, where $K_io_check may be at the first 'x' instead of
11272             # 'io'. So we need to hop over the signature and see if we arrive
11273             # at 'io'.
11274              
11275             # oo io
11276             # | x x |
11277             # $obj->then( sub ( $code ) {
11278             # ...
11279             # return $c->render(text => '', status => $code);
11280             # } );
11281             # | |
11282             # ic oc
11283              
11284 8 100 100     63 next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
11285 2 50       13 next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
11286 2         8 my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
11287 2 50       8 next unless defined($seqno_signature);
11288 2         6 my $K_signature_closing = $K_closing_container->{$seqno_signature};
11289 2 50       10 next unless defined($K_signature_closing);
11290 2         5 my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
11291             next
11292 2 50 33     20 unless ( defined($K_test) && $K_test == $K_inner_opening );
11293              
11294             # OK, we have arrived at 'io' in the above diagram. We should put
11295             # a limit on the length or complexity of the signature here. There
11296             # is no perfect way to do this, one way is to put a limit on token
11297             # count. For consistency with older versions, we should allow a
11298             # signature with a single variable to weld, but not with
11299             # multiple variables. A single variable as in 'sub ($code) {' can
11300             # have a $Kdiff of 2 to 4, depending on spacing.
11301              
11302             # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
11303             # 7, depending on spacing. So to keep formatting consistent with
11304             # previous versions, we will also avoid welding if there is a comma
11305             # in the signature.
11306              
11307 2         8 my $Kdiff = $K_signature_closing - $K_io_check;
11308 2 50       11 next if ( $Kdiff > 4 );
11309              
11310             # backup comma count test; but we cannot get here with Kdiff<=4
11311 2         6 my $rtc = $self->[_rtype_count_by_seqno_]->{$seqno_signature};
11312 2 0 33     10 next if ( $rtc && $rtc->{','} );
11313             }
11314              
11315             # Yes .. this is a possible nesting pair.
11316             # They can be separated by a small amount.
11317 63         1127 my $K_diff = $K_inner_opening - $K_outer_opening;
11318              
11319             # Count the number of nonblank characters separating them.
11320             # Note: the $nonblank_count includes the inner opening container
11321             # but not the outer opening container, so it will be >= 1.
11322 63 50       160 if ( $K_diff < 0 ) { next } # Shouldn't happen
  0         0  
11323 63         110 my $nonblank_count = 0;
11324 63         118 my $type;
11325             my $is_name;
11326              
11327             # Here is an example of a long identifier chain which counts as a
11328             # single nonblank here (this spans about 10 K indexes):
11329             # if ( !Boucherot::SetOfConnections->new->handler->execute(
11330             # ^--K_o_o ^--K_i_o
11331             # @array) )
11332 63         104 my $Kn_first = $K_outer_opening;
11333 63         110 my $Kn_last_nonblank;
11334             my $saw_comment;
11335              
11336 63         173 foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) {
11337 198 100       475 next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
11338 117 100       257 if ( !$nonblank_count ) { $Kn_first = $Kn }
  64         114  
11339 117 100       324 if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
  60         116  
  60         131  
11340 57         95 $Kn_last_nonblank = $Kn;
11341              
11342             # skip chain of identifier tokens
11343 57         123 my $last_type = $type;
11344 57         88 my $last_is_name = $is_name;
11345 57         97 $type = $rLL->[$Kn]->[_TYPE_];
11346 57 50       158 if ( $type eq '#' ) { $saw_comment = 1; last }
  0         0  
  0         0  
11347 57         102 $is_name = $is_name_type->{$type};
11348 57 100 100     199 next if ( $is_name && $last_is_name );
11349              
11350             # do not count a possible leading - of bareword hash key
11351 48 100 66     154 next if ( $type eq 'm' && !$last_type );
11352              
11353 47         87 $nonblank_count++;
11354 47 100       139 last if ( $nonblank_count > 2 );
11355             }
11356              
11357             # Do not weld across a comment .. fix for c058.
11358 63 50       179 next if ($saw_comment);
11359              
11360             # Patch for b1104: do not weld to a paren preceded by sort/map/grep
11361             # because the special line break rules may cause a blinking state
11362 63 100 100     373 if ( defined($Kn_last_nonblank)
      100        
11363             && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
11364             && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
11365             {
11366 2         9 my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
11367              
11368             # Turn off welding at sort/map/grep (
11369 2 50       21 if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
  0         0  
11370             }
11371              
11372 63         138 my $token_oo = $rLL->[$K_outer_opening]->[_TOKEN_];
11373              
11374 63 50 100     562 if (
      66        
      66        
      100        
      100        
      100        
      66        
      66        
      33        
      100        
11375              
11376             # 1: adjacent opening containers, like: do {{
11377             $nonblank_count == 1
11378              
11379             # 2. anonymous sub + prototype or sig: )->then( sub ($code) {
11380             # ... but it seems best not to stack two structural blocks, like
11381             # this
11382             # sub make_anon_with_my_sub { sub {
11383             # because it probably hides the structure a little too much.
11384             || ( $inner_blocktype
11385             && $inner_blocktype eq 'sub'
11386             && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
11387             && !$outer_blocktype )
11388              
11389             # 3. short item following opening paren, like: fun( yyy (
11390             || $nonblank_count == 2 && $token_oo eq '('
11391              
11392             # 4. weld around fat commas, if requested (git #108), such as
11393             # elf->call_method( method_name_foo => {
11394             || ( $type eq '=>'
11395             && $nonblank_count <= 3
11396             && %weld_fat_comma_rules
11397             && $weld_fat_comma_rules{$token_oo} )
11398             )
11399             {
11400 57         194 push @nested_pairs,
11401             [ $inner_seqno, $outer_seqno, $K_inner_closing ];
11402             }
11403 63         142 next;
11404             }
11405              
11406             # The weld routine expects the pairs in order in the form
11407             # [$seqno_inner, $seqno_outer]
11408             # And they must be in the same order as the inner closing tokens
11409             # (otherwise, welds of three or more adjacent tokens will not work). The K
11410             # value of this inner closing token has temporarily been stored for
11411             # sorting.
11412             @nested_pairs =
11413              
11414             # Drop the K index after sorting (it would cause trouble downstream)
11415 57         232 map { [ $_->[0], $_->[1] ] }
11416              
11417             # Sort on the K values
11418 23         319 sort { $a->[2] <=> $b->[2] } @nested_pairs;
  48         164  
11419              
11420 23         120 return \@nested_pairs;
11421             } ## end sub find_nested_pairs
11422              
11423             sub match_paren_control_flag {
11424              
11425             # Decide if this paren is excluded by user request:
11426             # undef matches no parens
11427             # '*' matches all parens
11428             # 'k' matches only if the previous nonblank token is a perl builtin
11429             # keyword (such as 'if', 'while'),
11430             # 'K' matches if 'k' does not, meaning if the previous token is not a
11431             # keyword.
11432             # 'f' matches if the previous token is a function other than a keyword.
11433             # 'F' matches if 'f' does not.
11434             # 'w' matches if either 'k' or 'f' match.
11435             # 'W' matches if 'w' does not.
11436 6     6 0 16 my ( $self, $seqno, $flag, $rLL ) = @_;
11437              
11438             # Input parameters:
11439             # $seqno = sequence number of the container (should be paren)
11440             # $flag = the flag which defines what matches
11441             # $rLL = an optional alternate token list needed for respace operations
11442 6 50       18 $rLL = $self->[_rLL_] unless ( defined($rLL) );
11443              
11444 6 50       16 return 0 unless ( defined($flag) );
11445 6 50       15 return 0 if $flag eq '0';
11446 6 50       16 return 1 if $flag eq '1';
11447 6 50       20 return 1 if $flag eq '*';
11448 6 50       15 return 0 unless ($seqno);
11449 6         11 my $K_opening = $self->[_K_opening_container_]->{$seqno};
11450 6 50       18 return unless ( defined($K_opening) );
11451              
11452 6         11 my ( $is_f, $is_k, $is_w );
11453 6         19 my $Kp = $self->K_previous_nonblank( $K_opening, $rLL );
11454 6 50       23 if ( defined($Kp) ) {
11455 6         15 my $type_p = $rLL->[$Kp]->[_TYPE_];
11456              
11457             # keyword?
11458 6         13 $is_k = $type_p eq 'k';
11459              
11460             # function call?
11461 6         12 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
11462              
11463             # either keyword or function call?
11464 6   100     25 $is_w = $is_k || $is_f;
11465             }
11466 6         10 my $match;
11467 6 50       23 if ( $flag eq 'k' ) { $match = $is_k }
  0 50       0  
    0          
    0          
    0          
    0          
11468 6         11 elsif ( $flag eq 'K' ) { $match = !$is_k }
11469 0         0 elsif ( $flag eq 'f' ) { $match = $is_f }
11470 0         0 elsif ( $flag eq 'F' ) { $match = !$is_f }
11471 0         0 elsif ( $flag eq 'w' ) { $match = $is_w }
11472 0         0 elsif ( $flag eq 'W' ) { $match = !$is_w }
11473             else {
11474             ## no match
11475             }
11476 6         31 return $match;
11477             } ## end sub match_paren_control_flag
11478              
11479             sub is_excluded_weld {
11480              
11481             # decide if this weld is excluded by user request
11482 35     35 0 76 my ( $self, $KK, $is_leading ) = @_;
11483 35         60 my $rLL = $self->[_rLL_];
11484 35         58 my $rtoken_vars = $rLL->[$KK];
11485 35         63 my $token = $rtoken_vars->[_TOKEN_];
11486 35         63 my $rflags = $weld_nested_exclusion_rules{$token};
11487 35 100       147 return 0 unless ( defined($rflags) );
11488 14 100       46 my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
11489 14 100       52 return 0 unless ( defined($flag) );
11490 10 100       36 return 1 if $flag eq '*';
11491 6         12 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
11492 6         21 return $self->match_paren_control_flag( $seqno, $flag );
11493             } ## end sub is_excluded_weld
11494              
11495             # hashes to simplify welding logic
11496             my %type_ok_after_bareword;
11497             my %has_tight_paren;
11498              
11499             BEGIN {
11500              
11501             # types needed for welding RULE 6
11502 39     39   344 my @q = qw# => -> { ( [ #;
11503 39         354 @type_ok_after_bareword{@q} = (1) x scalar(@q);
11504              
11505             # these types do not 'like' to be separated from a following paren
11506 39         219 @q = qw(w i q Q G C Z U);
11507 39         1992 @{has_tight_paren}{@q} = (1) x scalar(@q);
11508             } ## end BEGIN
11509              
11510 39     39   491 use constant DEBUG_WELD => 0;
  39         141  
  39         266868  
11511              
11512             sub setup_new_weld_measurements {
11513              
11514             # Define quantities to check for excess line lengths when welded.
11515             # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
11516              
11517 55     55 0 140 my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
11518              
11519             # Given indexes of outer and inner opening containers to be welded:
11520             # $Kouter_opening, $Kinner_opening
11521              
11522             # Returns these variables:
11523             # $new_weld_ok = true (new weld ok) or false (do not start new weld)
11524             # $starting_indent = starting indentation
11525             # $starting_lentot = starting cumulative length
11526             # $msg = diagnostic message for debugging
11527              
11528 55         113 my $rLL = $self->[_rLL_];
11529 55         142 my $rlines = $self->[_rlines_];
11530              
11531 55         215 my $starting_level;
11532             my $starting_ci;
11533 55         0 my $starting_lentot;
11534 55         0 my $maximum_text_length;
11535 55         115 my $msg = EMPTY_STRING;
11536              
11537 55         109 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
11538 55         139 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
11539 55         99 my ( $Kfirst, $Klast ) = @{$rK_range};
  55         139  
11540              
11541             #-------------------------------------------------------------------------
11542             # We now define a reference index, '$Kref', from which to start measuring
11543             # This choice turns out to be critical for keeping welds stable during
11544             # iterations, so we go through a number of STEPS...
11545             #-------------------------------------------------------------------------
11546              
11547             # STEP 1: Our starting guess is to use measure from the first token of the
11548             # current line. This is usually a good guess.
11549 55         97 my $Kref = $Kfirst;
11550              
11551             # STEP 2: See if we should go back a little farther
11552 55         154 my $Kprev = $self->K_previous_nonblank($Kfirst);
11553 55 100       182 if ( defined($Kprev) ) {
11554              
11555             # Avoid measuring from between an opening paren and a previous token
11556             # which should stay close to it ... fixes b1185
11557 46         111 my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_];
11558 46         115 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
11559 46 100 100     418 if ( $Kouter_opening == $Kfirst
    50 66        
      33        
11560             && $token_oo eq '('
11561             && $has_tight_paren{$type_prev} )
11562             {
11563 1         3 $Kref = $Kprev;
11564             }
11565              
11566             # Back up and count length from a token like '=' or '=>' if -lp
11567             # is used (this fixes b520)
11568             # ...or if a break is wanted before there
11569             elsif ($rOpts_line_up_parentheses
11570             || $want_break_before{$type_prev} )
11571             {
11572              
11573             # If there are other sequence items between the start of this line
11574             # and the opening token in question, then do not include tokens on
11575             # the previous line in length calculations. This check added to
11576             # fix case b1174 which had a '?' on the line
11577 0   0     0 my $no_previous_seq_item = $Kref == $Kouter_opening
11578             || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening;
11579              
11580 0 0 0     0 if ( $no_previous_seq_item
11581             && substr( $type_prev, 0, 1 ) eq '=' )
11582             {
11583 0         0 $Kref = $Kprev;
11584              
11585             # Fix for b1144 and b1112: backup to the first nonblank
11586             # character before the =>, or to the start of its line.
11587 0 0       0 if ( $type_prev eq '=>' ) {
11588 0         0 my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
11589 0         0 my $rK_range_prev = $rlines->[$iline_prev]->{_rK_range};
11590 0         0 my ( $Kfirst_prev, $Klast_prev ) = @{$rK_range_prev};
  0         0  
11591 0         0 foreach my $KK ( reverse( $Kfirst_prev .. $Kref - 1 ) ) {
11592 0 0       0 next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
11593 0         0 $Kref = $KK;
11594 0         0 last;
11595             }
11596             }
11597             }
11598             }
11599             else {
11600             ## ok
11601             }
11602             }
11603              
11604             # STEP 3: Now look ahead for a ternary and, if found, use it.
11605             # This fixes case b1182.
11606             # Also look for a ')' at the same level and, if found, use it.
11607             # This fixes case b1224.
11608 55 100       184 if ( $Kref < $Kouter_opening ) {
11609 49         120 my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_];
11610 49         98 my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
11611 49         166 while ( $Knext < $Kouter_opening ) {
11612 14 100       41 if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
11613 8 100 66     54 if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
11614             || $rLL->[$Knext]->[_TOKEN_] eq ')' )
11615             {
11616 4         22 $Kref = $Knext;
11617 4         10 last;
11618             }
11619             }
11620 10         20 $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_];
11621             }
11622             }
11623              
11624             # Define the starting measurements we will need
11625             $starting_lentot =
11626 55 100       226 $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
11627 55         113 $starting_level = $rLL->[$Kref]->[_LEVEL_];
11628 55         97 $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_];
11629              
11630 55         131 $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
11631             $starting_ci * $rOpts_continuation_indentation;
11632              
11633             # STEP 4: Switch to using the outer opening token as the reference
11634             # point if a line break before it would make a longer line.
11635             # Fixes case b1055 and is also an alternate fix for b1065.
11636 55         100 my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
11637 55 100       156 if ( $Kref < $Kouter_opening ) {
11638 49         100 my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
11639 49         138 my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
11640 49         104 my $maximum_text_length_oo =
11641             $maximum_text_length_at_level[$starting_level_oo] -
11642             $starting_ci_oo * $rOpts_continuation_indentation;
11643              
11644             # The excess length to any cumulative length K = lenK is either
11645             # $excess = $lenk - ($lentot + $maximum_text_length), or
11646             # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
11647             # so the worst case (maximum excess) corresponds to the configuration
11648             # with minimum value of the sum: $lentot + $maximum_text_length
11649 49 100       140 if ( $lentot_oo + $maximum_text_length_oo <
11650             $starting_lentot + $maximum_text_length )
11651             {
11652 1         1 $Kref = $Kouter_opening;
11653 1         3 $starting_level = $starting_level_oo;
11654 1         2 $starting_ci = $starting_ci_oo;
11655 1         3 $starting_lentot = $lentot_oo;
11656 1         3 $maximum_text_length = $maximum_text_length_oo;
11657             }
11658             }
11659              
11660 55         105 my $new_weld_ok = 1;
11661              
11662             # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The
11663             # combination -wn -lp -dws -naws does not work well and can cause blinkers.
11664             # It will probably only occur in stress testing. For this situation we
11665             # will only start a new weld if we start at a 'good' location.
11666             # - Added 'if' to fix case b1032.
11667             # - Require blank before certain previous characters to fix b1111.
11668             # - Add ';' to fix case b1139
11669             # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
11670             # - relaxed constraints for b1227
11671             # - added skip if type is 'q' for b1349 and b1350 b1351 b1352 b1353
11672             # - added skip if type is 'Q' for b1447
11673 55 0 66     245 if ( $starting_ci
      33        
      33        
      0        
      0        
      0        
11674             && $rOpts_line_up_parentheses
11675             && $rOpts_delete_old_whitespace
11676             && !$rOpts_add_whitespace
11677             && $rLL->[$Kinner_opening]->[_TYPE_] ne 'q'
11678             && $rLL->[$Kinner_opening]->[_TYPE_] ne 'Q'
11679             && defined($Kprev) )
11680             {
11681 0         0 my $type_first = $rLL->[$Kfirst]->[_TYPE_];
11682 0         0 my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
11683 0         0 my $type_prev = $rLL->[$Kprev]->[_TYPE_];
11684 0         0 my $type_pp = 'b';
11685 0 0       0 if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
  0         0  
11686              
11687 0   0     0 my $is_good_location =
11688              
11689             $type_prev =~ /^[\,\.\;]/
11690             || ( $type_prev =~ /^[=\{\[\(\L]/
11691             && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' ) )
11692             || $type_first =~ /^[=\,\.\;\{\[\(\L]/
11693             || $type_first eq '||'
11694             || (
11695             $type_first eq 'k'
11696             && ( $token_first eq 'if'
11697             || $token_first eq 'or' )
11698             );
11699              
11700 0 0       0 if ( !$is_good_location ) {
11701 0         0 $msg =
11702             "Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
11703 0         0 $new_weld_ok = 0;
11704             }
11705             }
11706 55         216 return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
11707             } ## end sub setup_new_weld_measurements
11708              
11709             sub excess_line_length_for_Krange {
11710 10     10 0 31 my ( $self, $Kfirst, $Klast ) = @_;
11711              
11712             # returns $excess_length =
11713             # by how many characters a line composed of tokens $Kfirst .. $Klast will
11714             # exceed the allowed line length
11715              
11716 10         29 my $rLL = $self->[_rLL_];
11717 10 50       56 my $length_before_Kfirst =
11718             $Kfirst <= 0
11719             ? 0
11720             : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
11721              
11722             # backup before a side comment if necessary
11723 10         25 my $Kend = $Klast;
11724 10 50 33     50 if ( $rOpts_ignore_side_comment_lengths
11725             && $rLL->[$Klast]->[_TYPE_] eq '#' )
11726             {
11727 0         0 my $Kprev = $self->K_previous_nonblank($Klast);
11728 0 0 0     0 if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
  0         0  
11729             }
11730              
11731             # get the length of the text
11732 10         27 my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
11733              
11734             # get the size of the text window
11735 10         24 my $level = $rLL->[$Kfirst]->[_LEVEL_];
11736 10         25 my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_];
11737 10         34 my $max_text_length = $maximum_text_length_at_level[$level] -
11738             $ci_level * $rOpts_continuation_indentation;
11739              
11740 10         21 my $excess_length = $length - $max_text_length;
11741              
11742 10         24 DEBUG_WELD
11743             && print
11744             "Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
11745 10         26 return ($excess_length);
11746             } ## end sub excess_line_length_for_Krange
11747              
11748             sub weld_nested_containers {
11749 23     23 0 86 my ($self) = @_;
11750              
11751             # Called once per file for option '--weld-nested-containers'
11752              
11753 23         87 my $rK_weld_left = $self->[_rK_weld_left_];
11754 23         69 my $rK_weld_right = $self->[_rK_weld_right_];
11755              
11756             # This routine implements the -wn flag by "welding together"
11757             # the nested closing and opening tokens which were previously
11758             # identified by sub 'find_nested_pairs'. "welding" simply
11759             # involves setting certain hash values which will be checked
11760             # later during formatting.
11761              
11762 23         63 my $rLL = $self->[_rLL_];
11763 23         64 my $rlines = $self->[_rlines_];
11764 23         77 my $K_opening_container = $self->[_K_opening_container_];
11765 23         66 my $K_closing_container = $self->[_K_closing_container_];
11766 23         64 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
11767 23         62 my $ris_asub_block = $self->[_ris_asub_block_];
11768 23         59 my $rmax_vertical_tightness = $self->[_rmax_vertical_tightness_];
11769              
11770 23         61 my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
11771              
11772             # Find nested pairs of container tokens for any welding.
11773 23         132 my $rnested_pairs = $self->find_nested_pairs();
11774              
11775             # Return unless there are nested pairs to weld
11776 23 100 66     111 return unless defined($rnested_pairs) && @{$rnested_pairs};
  23         113  
11777              
11778             # NOTE: It would be nice to apply RULE 5 right here by deleting unwanted
11779             # pairs. But it isn't clear if this is possible because we don't know
11780             # which sequences might actually start a weld.
11781              
11782             my $rOpts_break_at_old_method_breakpoints =
11783 22         76 $rOpts->{'break-at-old-method-breakpoints'};
11784              
11785             # This array will hold the sequence numbers of the tokens to be welded.
11786 22         93 my @welds;
11787              
11788             # Variables needed for estimating line lengths
11789             my $maximum_text_length; # maximum spaces available for text
11790 22         0 my $starting_lentot; # cumulative text to start of current line
11791              
11792 22         70 my $iline_outer_opening = -1;
11793 22         55 my $weld_count_this_start = 0;
11794 22         52 my $weld_starts_in_block = 0;
11795              
11796             # OLD: $single_line_tol added to fix cases b1180 b1181
11797             # = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
11798             # NEW: $single_line_tol=0 fixes b1212; and b1180-1181 work ok now
11799             # =1 for -vmll and -lp; fixes b1452, b1453, b1454
11800             # NOTE: the combination -vmll and -lp can be unstable, especially when
11801             # also combined with -wn. It may eventually be necessary to turn off -vmll
11802             # if -lp is set. For now, this works. The value '1' is a minimum which
11803             # works but can be increased if necessary.
11804 22 50 33     117 my $single_line_tol =
11805             $rOpts_variable_maximum_line_length && $rOpts_line_up_parentheses
11806             ? 1
11807             : 0;
11808              
11809 22         132 my $multiline_tol = $single_line_tol + 1 +
11810             max( $rOpts_indent_columns, $rOpts_continuation_indentation );
11811              
11812             # Define a welding cutoff level: do not start a weld if the inside
11813             # container level equals or exceeds this level.
11814              
11815             # We use the minimum of two criteria, either of which may be more
11816             # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and
11817             # the 'beta' value is more restrictive in other cases (b1243).
11818             # Reduced beta term from beta+3 to beta+2 to fix b1401. Previously:
11819             # my $weld_cutoff_level = min($stress_level_alpha, $stress_level_beta + 2);
11820             # This is now '$high_stress_level'.
11821              
11822             # The vertical tightness flags can throw off line length calculations.
11823             # This patch was added to fix instability issue b1284.
11824             # It works to always use a tol of 1 for 1 line block length tests, but
11825             # this restricted value keeps test case wn6.wn working as before.
11826             # It may be necessary to include '[' and '{' here in the future.
11827 22 50       88 my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
11828              
11829             # Abbreviations:
11830             # _oo=outer opening, i.e. first of { {
11831             # _io=inner opening, i.e. second of { {
11832             # _oc=outer closing, i.e. second of } {
11833             # _ic=inner closing, i.e. first of } }
11834              
11835 22         55 my $previous_pair;
11836              
11837             # Main loop over nested pairs...
11838             # We are working from outermost to innermost pairs so that
11839             # level changes will be complete when we arrive at the inner pairs.
11840 22         68 while ( my $item = pop( @{$rnested_pairs} ) ) {
  79         300  
11841 57         100 my ( $inner_seqno, $outer_seqno ) = @{$item};
  57         153  
11842              
11843 57         126 my $Kouter_opening = $K_opening_container->{$outer_seqno};
11844 57         116 my $Kinner_opening = $K_opening_container->{$inner_seqno};
11845 57         116 my $Kouter_closing = $K_closing_container->{$outer_seqno};
11846 57         139 my $Kinner_closing = $K_closing_container->{$inner_seqno};
11847              
11848             # RULE: do not weld if inner container has <= 3 tokens unless the next
11849             # token is a heredoc (so we know there will be multiple lines)
11850 57 100       172 if ( $Kinner_closing - $Kinner_opening <= 4 ) {
11851 4         24 my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
11852 4 50       22 next unless defined($Knext_nonblank);
11853 4         16 my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
11854 4 50       21 next unless ( $type eq 'h' );
11855             }
11856              
11857 53         135 my $outer_opening = $rLL->[$Kouter_opening];
11858 53         102 my $inner_opening = $rLL->[$Kinner_opening];
11859 53         103 my $outer_closing = $rLL->[$Kouter_closing];
11860 53         97 my $inner_closing = $rLL->[$Kinner_closing];
11861              
11862             # RULE: do not weld to a hash brace. The reason is that it has a very
11863             # strong bond strength to the next token, so a line break after it
11864             # may not work. Previously we allowed welding to something like @{
11865             # but that caused blinking states (cases b751, b779).
11866 53 100       166 if ( $inner_opening->[_TYPE_] eq 'L' ) {
11867 1         5 next;
11868             }
11869              
11870             # RULE: do not weld to a square bracket which does not contain commas
11871 52 50       153 if ( $inner_opening->[_TYPE_] eq '[' ) {
11872 0         0 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
11873 0 0 0     0 next unless ( $rtype_count && $rtype_count->{','} );
11874              
11875             # Do not weld if there is text before a '[' such as here:
11876             # curr_opt ( @beg [2,5] )
11877             # It will not break into the desired sandwich structure.
11878             # This fixes case b109, 110.
11879 0         0 my $Kdiff = $Kinner_opening - $Kouter_opening;
11880 0 0       0 next if ( $Kdiff > 2 );
11881             next
11882 0 0 0     0 if ( $Kdiff == 2
11883             && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
11884              
11885             }
11886              
11887             # RULE: Avoid welding under stress. The idea is that we need to have a
11888             # little space* within a welded container to avoid instability. Note
11889             # that after each weld the level values are reduced, so long multiple
11890             # welds can still be made. This rule will seldom be a limiting factor
11891             # in actual working code. Fixes b1206, b1243.
11892 52         116 my $inner_level = $inner_opening->[_LEVEL_];
11893 52 50       156 if ( $inner_level >= $high_stress_level ) { next }
  0         0  
11894              
11895             # Set flag saying if this pair starts a new weld
11896 52   100     273 my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
11897              
11898             # Set flag saying if this pair is adjacent to the previous nesting pair
11899             # (even if previous pair was rejected as a weld)
11900 52   100     233 my $touch_previous_pair =
11901             defined($previous_pair) && $outer_seqno == $previous_pair->[0];
11902 52         120 $previous_pair = $item;
11903              
11904 52         95 my $do_not_weld_rule = 0;
11905 52         96 my $Msg = EMPTY_STRING;
11906 52         87 my $is_one_line_weld;
11907              
11908 52         103 my $iline_oo = $outer_opening->[_LINE_INDEX_];
11909 52         105 my $iline_io = $inner_opening->[_LINE_INDEX_];
11910 52         102 my $iline_ic = $inner_closing->[_LINE_INDEX_];
11911 52         102 my $iline_oc = $outer_closing->[_LINE_INDEX_];
11912 52         122 my $token_oo = $outer_opening->[_TOKEN_];
11913 52         106 my $token_io = $inner_opening->[_TOKEN_];
11914              
11915             # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
11916             # Added for case b973. Moved here from below to fix b1423.
11917 52 50 66     338 if ( !$do_not_weld_rule
      66        
11918             && $rOpts_break_at_old_method_breakpoints
11919             && $iline_io > $iline_oo )
11920             {
11921              
11922 0         0 foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
11923 0         0 my $rK_range = $rlines->[$iline]->{_rK_range};
11924 0 0       0 next unless defined($rK_range);
11925 0         0 my ( $Kfirst, $Klast ) = @{$rK_range};
  0         0  
11926 0 0       0 next unless defined($Kfirst);
11927 0 0       0 if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
11928 0         0 $do_not_weld_rule = 7;
11929 0         0 last;
11930             }
11931             }
11932             }
11933 52 50       142 next if ($do_not_weld_rule);
11934              
11935             # Turn off vertical tightness at possible one-line welds. Fixes b1402,
11936             # b1419, b1421, b1424, b1425. This also fixes issues b1338, b1339,
11937             # b1340, b1341, b1342, b1343, which previously used a separate fix.
11938             # Issue c161 is the latest and simplest check, using
11939             # $iline_ic==$iline_io as the test.
11940 52 50 66     268 if ( %opening_vertical_tightness
      66        
11941             && $iline_ic == $iline_io
11942             && $opening_vertical_tightness{$token_oo} )
11943             {
11944 0         0 $rmax_vertical_tightness->{$outer_seqno} = 0;
11945             }
11946              
11947 52   100     281 my $is_multiline_weld =
11948             $iline_oo == $iline_io
11949             && $iline_ic == $iline_oc
11950             && $iline_io != $iline_ic;
11951              
11952 52         82 if (DEBUG_WELD) {
11953             my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
11954             my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
11955             $Msg .= <<EOM;
11956             Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
11957             Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
11958             tokens '$token_oo' .. '$token_io'
11959             EOM
11960             }
11961              
11962             # DO-NOT-WELD RULE 0:
11963             # Avoid a new paren-paren weld if inner parens are 'sheared' (separated
11964             # by one line). This can produce instabilities (fixes b1250 b1251
11965             # 1256).
11966 52 0 66     261 if ( !$is_multiline_weld
      33        
      33        
11967             && $iline_ic == $iline_io + 1
11968             && $token_oo eq '('
11969             && $token_io eq '(' )
11970             {
11971 0         0 if (DEBUG_WELD) {
11972             $Msg .= "RULE 0: Not welding due to sheared inner parens\n";
11973             print {*STDOUT} $Msg;
11974             }
11975 0         0 next;
11976             }
11977              
11978             # If this pair is not adjacent to the previous pair (skipped or not),
11979             # then measure lengths from the start of line of oo.
11980 52 100 33     207 if (
      66        
11981             !$touch_previous_pair
11982              
11983             # Also do this if restarting at a new line; fixes case b965, s001
11984             || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
11985             )
11986             {
11987              
11988             # Remember the line we are using as a reference
11989 48         98 $iline_outer_opening = $iline_oo;
11990 48         85 $weld_count_this_start = 0;
11991 48         93 $weld_starts_in_block = 0;
11992              
11993 48         212 ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
11994             = $self->setup_new_weld_measurements( $Kouter_opening,
11995             $Kinner_opening );
11996              
11997 48 0 0     152 if (
      33        
11998             !$new_weld_ok
11999             && ( $iline_oo != $iline_io
12000             || $iline_ic != $iline_oc )
12001             )
12002             {
12003 0         0 if (DEBUG_WELD) { print {*STDOUT} $msg }
12004 0         0 next;
12005             }
12006              
12007 48         119 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
12008 48         87 my ( $Kfirst, $Klast ) = @{$rK_range};
  48         120  
12009              
12010             # An existing one-line weld is a line in which
12011             # (1) the containers are all on one line, and
12012             # (2) the line does not exceed the allowable length
12013 48 100       153 if ( $iline_oo == $iline_oc ) {
12014              
12015             # All the tokens are on one line, now check their length.
12016             # Start with the full line index range. We will reduce this
12017             # in the coding below in some cases.
12018 4         15 my $Kstart = $Kfirst;
12019 4         18 my $Kstop = $Klast;
12020              
12021             # Note that the following minimal choice for measuring will
12022             # work and will not cause any instabilities because it is
12023             # invariant:
12024              
12025             ## my $Kstart = $Kouter_opening;
12026             ## my $Kstop = $Kouter_closing;
12027              
12028             # But that can lead to some undesirable welds. So a little
12029             # more complicated method has been developed.
12030              
12031             # We are trying to avoid creating bad two-line welds when we are
12032             # working on long, previously un-welded input text, such as
12033              
12034             # INPUT (example of a long input line weld candidate):
12035             ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
12036              
12037             # GOOD two-line break: (not welded; result marked too long):
12038             ## $mutation->transpos(
12039             ## $self->RNA->position($mutation->label, $atg_label));
12040              
12041             # BAD two-line break: (welded; result if we weld):
12042             ## $mutation->transpos($self->RNA->position(
12043             ## $mutation->label, $atg_label));
12044              
12045             # We can only get an approximate estimate of the final length,
12046             # since the line breaks may change, and for -lp mode because
12047             # even the indentation is not yet known.
12048              
12049 4         9 my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
12050 4         11 my $level_last = $rLL->[$Klast]->[_LEVEL_];
12051 4         10 my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
12052 4         12 my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_];
12053              
12054             # - measure to the end of the original line if balanced
12055             # - measure to the closing container if unbalanced (fixes b1230)
12056             #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
12057 4 100       14 if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
  1         3  
12058              
12059             # - measure from the start of the original line if balanced
12060             # - measure from the most previous token with same level
12061             # if unbalanced (b1232)
12062 4 100 100     38 if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
12063 1         3 $Kstart = $Kouter_opening;
12064              
12065 1         6 foreach
12066             my $KK ( reverse( $Kfirst + 1 .. $Kouter_opening - 1 ) )
12067             {
12068 1 50       5 next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
12069 1 50       4 last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
12070 0         0 $Kstart = $KK;
12071             }
12072             }
12073              
12074 4         22 my $excess =
12075             $self->excess_line_length_for_Krange( $Kstart, $Kstop );
12076              
12077             # Coding simplified here for case b1219.
12078             # Increased tol from 0 to 1 when pvt>0 to fix b1284.
12079 4         10 $is_one_line_weld = $excess <= $one_line_tol;
12080             }
12081              
12082             # DO-NOT-WELD RULE 1:
12083             # Do not weld something that looks like the start of a two-line
12084             # function call, like this: <<snippets/wn6.in>>
12085             # $trans->add_transformation(
12086             # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
12087             # We will look for a semicolon after the closing paren.
12088              
12089             # We want to weld something complex, like this though
12090             # my $compass = uc( opposite_direction( line_to_canvas_direction(
12091             # @{ $coords[0] }, @{ $coords[1] } ) ) );
12092             # Otherwise we will get a 'blinker'. For example, the following
12093             # would become a blinker without this rule:
12094             # $Self->_Add( $SortOrderDisplay{ $Field
12095             # ->GenerateFieldForSelectSQL() } );
12096             # But it is okay to weld a two-line statement if it looks like
12097             # it was already welded, meaning that the two opening containers are
12098             # on a different line that the two closing containers. This is
12099             # necessary to prevent blinking of something like this with
12100             # perltidy -wn -pbp (starting indentation two levels deep):
12101              
12102             # $top_label->set_text( gettext(
12103             # "Unable to create personal directory - check permissions.") );
12104 48 100 100     217 if ( $iline_oc == $iline_oo + 1
      66        
12105             && $iline_io == $iline_ic
12106             && $token_oo eq '(' )
12107             {
12108              
12109             # Look for following semicolon...
12110 1         6 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
12111 1 50       6 my $next_nonblank_type =
12112             defined($Knext_nonblank)
12113             ? $rLL->[$Knext_nonblank]->[_TYPE_]
12114             : 'b';
12115 1 50       4 if ( $next_nonblank_type eq ';' ) {
12116              
12117             # Then do not weld if no other containers between inner
12118             # opening and closing.
12119 1         2 my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
12120 1 50       5 if ( $Knext_seq_item == $Kinner_closing ) {
12121 0         0 $do_not_weld_rule = 1;
12122             }
12123             }
12124             }
12125             } ## end starting new weld sequence
12126              
12127             else {
12128              
12129             # set the 1-line flag if continuing a weld sequence; fixes b1239
12130 4         10 $is_one_line_weld = ( $iline_oo == $iline_oc );
12131             }
12132              
12133             # DO-NOT-WELD RULE 2:
12134             # Do not weld an opening paren to an inner one line brace block
12135             # We will just use old line numbers for this test and require
12136             # iterations if necessary for convergence
12137              
12138             # For example, otherwise we could cause the opening paren
12139             # in the following example to separate from the caller name
12140             # as here:
12141              
12142             # $_[0]->code_handler
12143             # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
12144              
12145             # Here is another example where we do not want to weld:
12146             # $wrapped->add_around_modifier(
12147             # sub { push @tracelog => 'around 1'; $_[0]->(); } );
12148              
12149             # If the one line sub block gets broken due to length or by the
12150             # user, then we can weld. The result will then be:
12151             # $wrapped->add_around_modifier( sub {
12152             # push @tracelog => 'around 1';
12153             # $_[0]->();
12154             # } );
12155              
12156             # Updated to fix cases b1082 b1102 b1106 b1115:
12157             # Also, do not weld to an intact inner block if the outer opening token
12158             # is on a different line. For example, this prevents oscillation
12159             # between these two states in case b1106:
12160              
12161             # return map{
12162             # ($_,[$self->$_(@_[1..$#_])])
12163             # }@every;
12164              
12165             # return map { (
12166             # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
12167             # ) } @every;
12168              
12169             # The effect of this change on typical code is very minimal. Sometimes
12170             # it may take a second iteration to converge, but this gives protection
12171             # against blinking.
12172 52 100 66     365 if ( !$do_not_weld_rule
      100        
12173             && !$is_one_line_weld
12174             && $iline_ic == $iline_io )
12175             {
12176 6 50 66     35 $do_not_weld_rule = 2
12177             if ( $token_oo eq '(' || $iline_oo != $iline_io );
12178             }
12179              
12180             # DO-NOT-WELD RULE 2A:
12181             # Do not weld an opening asub brace in -lp mode if -asbl is set. This
12182             # helps avoid instabilities in one-line block formation, and fixes
12183             # b1241. Previously, the '$is_one_line_weld' flag was tested here
12184             # instead of -asbl, and this fixed most cases. But it turns out that
12185             # the real problem was the -asbl flag, and switching to this was
12186             # necessary to fixe b1268. This also fixes b1269, b1277, b1278.
12187 52 0 66     242 if ( !$do_not_weld_rule
      33        
      33        
12188             && $rOpts_line_up_parentheses
12189             && $rOpts_asbl
12190             && $ris_asub_block->{$outer_seqno} )
12191             {
12192 0         0 $do_not_weld_rule = '2A';
12193             }
12194              
12195             # DO-NOT-WELD RULE 3:
12196             # Do not weld if this makes our line too long.
12197             # Use a tolerance which depends on if the old tokens were welded
12198             # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
12199 52 100       144 if ( !$do_not_weld_rule ) {
12200              
12201             # Measure to a little beyond the inner opening token if it is
12202             # followed by a bare word, which may have unusual line break rules.
12203              
12204             # NOTE: Originally this was OLD RULE 6: do not weld to a container
12205             # which is followed on the same line by an unknown bareword token.
12206             # This can cause blinkers (cases b626, b611). But OK to weld one
12207             # line welds to fix cases b1057 b1064. For generality, OLD RULE 6
12208             # has been merged into RULE 3 here to also fix cases b1078 b1091.
12209              
12210 46         81 my $K_for_length = $Kinner_opening;
12211 46         154 my $Knext_io = $self->K_next_nonblank($Kinner_opening);
12212 46 50       148 next unless ( defined($Knext_io) ); # shouldn't happen
12213 46         135 my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
12214              
12215             # Note: may need to eventually also include other types here,
12216             # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) {
12217 46 100       138 if ( $type_io_next eq 'w' ) {
12218 7         29 my $Knext_io2 = $self->K_next_nonblank($Knext_io);
12219 7 50       43 next unless ( defined($Knext_io2) );
12220 7         30 my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
12221 7 50       34 if ( !$type_ok_after_bareword{$type_io_next2} ) {
12222 0         0 $K_for_length = $Knext_io2;
12223             }
12224             }
12225              
12226             # Use a tolerance for welds over multiple lines to avoid blinkers.
12227             # We can use zero tolerance if it looks like we are working on an
12228             # existing weld.
12229 46 100 100     200 my $tol =
12230             $is_one_line_weld || $is_multiline_weld
12231             ? $single_line_tol
12232             : $multiline_tol;
12233              
12234             # By how many characters does this exceed the text window?
12235 46         155 my $excess =
12236             $self->cumulative_length_before_K($K_for_length) -
12237             $starting_lentot + 1 + $tol -
12238             $maximum_text_length;
12239              
12240             # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
12241             # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
12242             # Revised patch: New tolerance definition allows going back to '> 0'
12243             # here. This fixes case b1124. See also cases b1087 and b1087a.
12244 46 50       144 if ( $excess > 0 ) { $do_not_weld_rule = 3 }
  0         0  
12245              
12246 46         75 if (DEBUG_WELD) {
12247             $Msg .=
12248             "RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
12249             }
12250             }
12251              
12252             # DO-NOT-WELD RULE 4; implemented for git#10:
12253             # Do not weld an opening -ce brace if the next container is on a single
12254             # line, different from the opening brace. (This is very rare). For
12255             # example, given the following with -ce, we will avoid joining the {
12256             # and [
12257              
12258             # } else {
12259             # [ $_, length($_) ]
12260             # }
12261              
12262             # because this would produce a terminal one-line block:
12263              
12264             # } else { [ $_, length($_) ] }
12265              
12266             # which may not be what is desired. But given this input:
12267              
12268             # } else { [ $_, length($_) ] }
12269              
12270             # then we will do the weld and retain the one-line block
12271 52 100 100     284 if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
12272 2         5 my $block_type = $rblock_type_of_seqno->{$outer_seqno};
12273 2 100 66     10 if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
12274 1         2 my $io_line = $inner_opening->[_LINE_INDEX_];
12275 1         2 my $ic_line = $inner_closing->[_LINE_INDEX_];
12276 1         3 my $oo_line = $outer_opening->[_LINE_INDEX_];
12277 1 50 33     5 if ( $oo_line < $io_line && $ic_line == $io_line ) {
12278 0         0 $do_not_weld_rule = 4;
12279             }
12280             }
12281             }
12282              
12283             # DO-NOT-WELD RULE 5: do not include welds excluded by user
12284 52 100 100     303 if (
      100        
      100        
12285             !$do_not_weld_rule
12286             && %weld_nested_exclusion_rules
12287             && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
12288             || $self->is_excluded_weld( $Kinner_opening, 0 ) )
12289             )
12290             {
12291 6         13 $do_not_weld_rule = 5;
12292             }
12293              
12294             # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
12295              
12296 52 100       208 if ($do_not_weld_rule) {
    100          
12297              
12298             # After neglecting a pair, we start measuring from start of point
12299             # io ... but not if previous type does not like to be separated
12300             # from its container (fixes case b1184)
12301 12         40 my $Kprev = $self->K_previous_nonblank($Kinner_opening);
12302 12 50       62 my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
12303 12 100       54 if ( !$has_tight_paren{$type_prev} ) {
12304 11         23 my $starting_level = $inner_opening->[_LEVEL_];
12305 11         24 my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
12306 11         40 $starting_lentot =
12307             $self->cumulative_length_before_K($Kinner_opening);
12308 11         31 $maximum_text_length =
12309             $maximum_text_length_at_level[$starting_level] -
12310             $starting_ci_level * $rOpts_continuation_indentation;
12311             }
12312              
12313 12         23 if (DEBUG_WELD) {
12314             $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
12315             print {*STDOUT} $Msg;
12316             }
12317              
12318             # Normally, a broken pair should not decrease indentation of
12319             # intermediate tokens:
12320             ## if ( $last_pair_broken ) { next }
12321             # However, for long strings of welded tokens, such as '{{{{{{...'
12322             # we will allow broken pairs to also remove indentation.
12323             # This will keep very long strings of opening and closing
12324             # braces from marching off to the right. We will do this if the
12325             # number of tokens in a weld before the broken weld is 4 or more.
12326             # This rule will mainly be needed for test scripts, since typical
12327             # welds have fewer than about 4 welded tokens.
12328 12 50 66     43 if ( !@welds || @{ $welds[-1] } < 4 ) { next }
  7         24  
  12         39  
12329             }
12330              
12331             # otherwise start new weld ...
12332             elsif ($starting_new_weld) {
12333 36         65 $weld_count_this_start++;
12334 36         68 if (DEBUG_WELD) {
12335             $Msg .= "Starting new weld\n";
12336             print {*STDOUT} $Msg;
12337             }
12338 36         85 push @welds, $item;
12339              
12340 36         127 my $parent_seqno = $self->parent_seqno_by_K($Kouter_closing);
12341             $weld_starts_in_block = $parent_seqno == SEQ_ROOT
12342 36   100     142 || $rblock_type_of_seqno->{$parent_seqno};
12343              
12344 36         128 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
12345 36         142 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
12346              
12347 36         123 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
12348 36         130 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
12349             }
12350              
12351             # ... or extend current weld
12352             else {
12353 4         11 $weld_count_this_start++;
12354 4         11 if (DEBUG_WELD) {
12355             $Msg .= "Extending current weld\n";
12356             print {*STDOUT} $Msg;
12357             }
12358 4         16 unshift @{ $welds[-1] }, $inner_seqno;
  4         18  
12359 4         15 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
12360 4         31 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
12361              
12362 4         12 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
12363 4         9 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
12364              
12365             # Keep a broken container broken at multiple welds. This might
12366             # also be useful for simple welds, but for now it is restricted
12367             # to multiple welds to minimize changes to existing coding. This
12368             # fixes b1429, b1430. Updated for issue c198: but allow a
12369             # line differences of 1 (simple shear) so that a simple shear
12370             # can remain or become a single line.
12371 4 100       21 if ( $iline_ic - $iline_io > 1 ) {
12372              
12373             # Only set this break if it is the last possible weld in this
12374             # chain. This will keep some extreme test cases unchanged.
12375 3   100     6 my $is_chain_end = !@{$rnested_pairs}
12376             || $rnested_pairs->[-1]->[1] != $inner_seqno;
12377 3 100       10 if ($is_chain_end) {
12378 2         8 $self->[_rbreak_container_]->{$inner_seqno} = 1;
12379             }
12380             }
12381             }
12382              
12383             # After welding, reduce the indentation level if all intermediate tokens
12384 40         101 my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
12385 40 50       122 if ( $dlevel != 0 ) {
12386 40         81 my $Kstart = $Kinner_opening;
12387 40         79 my $Kstop = $Kinner_closing;
12388 40         114 foreach my $KK ( $Kstart .. $Kstop ) {
12389 1143         1745 $rLL->[$KK]->[_LEVEL_] += $dlevel;
12390             }
12391              
12392             # Copy opening ci level to help break at = for -lp mode (case b1124)
12393 40         186 $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
12394             $rLL->[$Kouter_opening]->[_CI_LEVEL_];
12395              
12396             # But only copy the closing ci level if the outer container is
12397             # in a block; otherwise poor results can be produced.
12398 40 100       196 if ($weld_starts_in_block) {
12399 39         147 $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
12400             $rLL->[$Kouter_closing]->[_CI_LEVEL_];
12401             }
12402             }
12403             }
12404              
12405 22         110 return;
12406             } ## end sub weld_nested_containers
12407              
12408             sub weld_nested_quotes {
12409              
12410             # Called once per file for option '--weld-nested-containers'. This
12411             # does welding on qw quotes.
12412              
12413 23     23 0 67 my $self = shift;
12414              
12415             # See if quotes are excluded from welding
12416 23         77 my $rflags = $weld_nested_exclusion_rules{'q'};
12417 23 100 66     119 return if ( defined($rflags) && defined( $rflags->[1] ) );
12418              
12419 22         71 my $rK_weld_left = $self->[_rK_weld_left_];
12420 22         62 my $rK_weld_right = $self->[_rK_weld_right_];
12421              
12422 22         77 my $rLL = $self->[_rLL_];
12423 22 50 33     115 return unless ( defined($rLL) && @{$rLL} );
  22         101  
12424 22         81 my $Num = @{$rLL};
  22         73  
12425              
12426 22         68 my $K_opening_container = $self->[_K_opening_container_];
12427 22         54 my $K_closing_container = $self->[_K_closing_container_];
12428 22         56 my $rlines = $self->[_rlines_];
12429              
12430 22         56 my $starting_lentot;
12431             my $maximum_text_length;
12432              
12433             my $is_single_quote = sub {
12434 7     7   26 my ( $Kbeg, $Kend, $quote_type ) = @_;
12435 7         26 foreach my $K ( $Kbeg .. $Kend ) {
12436 71         116 my $test_type = $rLL->[$K]->[_TYPE_];
12437 71 100       157 next if ( $test_type eq 'b' );
12438 32 50       79 return if ( $test_type ne $quote_type );
12439             }
12440 7         44 return 1;
12441 22         204 };
12442              
12443             # Length tolerance - same as previously used for sub weld_nested
12444 22         126 my $multiline_tol =
12445             1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
12446              
12447             # look for single qw quotes nested in containers
12448 22         85 my $KNEXT = $self->[_K_first_seq_item_];
12449 22         102 while ( defined($KNEXT) ) {
12450 468         606 my $KK = $KNEXT;
12451 468         700 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
12452 468         599 my $rtoken_vars = $rLL->[$KK];
12453 468         653 my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
12454 468 50       823 if ( !$outer_seqno ) {
12455 0 0       0 next if ( $KK == 0 ); # first token in file may not be container
12456              
12457             # A fault here implies that an error was made in the little loop at
12458             # the bottom of sub 'respace_tokens' which set the values of
12459             # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
12460             # loop control lines above.
12461 0         0 Fault("sequence = $outer_seqno not defined at K=$KK")
12462             if (DEVEL_MODE);
12463 0         0 next;
12464             }
12465              
12466 468         652 my $token = $rtoken_vars->[_TOKEN_];
12467 468 100       1033 if ( $is_opening_token{$token} ) {
12468              
12469             # see if the next token is a quote of some type
12470 230         331 my $Kn = $KK + 1;
12471 230 100 66     826 $Kn += 1
12472             if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
12473 230 50       433 next if ( $Kn >= $Num );
12474              
12475 230         370 my $next_token = $rLL->[$Kn]->[_TOKEN_];
12476 230         349 my $next_type = $rLL->[$Kn]->[_TYPE_];
12477             next
12478 230 100 100     1055 unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
      100        
12479             && substr( $next_token, 0, 1 ) eq 'q' );
12480              
12481             # The token before the closing container must also be a quote
12482 7         38 my $Kouter_closing = $K_closing_container->{$outer_seqno};
12483 7         43 my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
12484 7 50       59 next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
12485              
12486             # This is an inner opening container
12487 7         19 my $Kinner_opening = $Kn;
12488              
12489             # Do not weld to single-line quotes. Nothing is gained, and it may
12490             # look bad.
12491 7 50       32 next if ( $Kinner_closing == $Kinner_opening );
12492              
12493             # Only weld to quotes delimited with container tokens. This is
12494             # because welding to arbitrary quote delimiters can produce code
12495             # which is less readable than without welding.
12496 7         27 my $closing_delimiter =
12497             substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
12498             next
12499 7 50 33     40 unless ( $is_closing_token{$closing_delimiter}
12500             || $closing_delimiter eq '>' );
12501              
12502             # Now make sure that there is just a single quote in the container
12503             next
12504             unless (
12505 7 50       33 $is_single_quote->(
12506             $Kinner_opening + 1,
12507             $Kinner_closing - 1,
12508             $next_type
12509             )
12510             );
12511              
12512             # OK: This is a candidate for welding
12513 7         21 my $Msg = EMPTY_STRING;
12514 7         22 my $do_not_weld;
12515              
12516 7         22 my $Kouter_opening = $K_opening_container->{$outer_seqno};
12517 7         21 my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
12518 7         61 my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
12519 7         23 my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
12520 7         21 my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
12521 7   66     38 my $is_old_weld =
12522             ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
12523              
12524             # Fix for case b1189. If quote is marked as type 'Q' then only weld
12525             # if the two closing tokens are on the same input line. Otherwise,
12526             # the closing line will be output earlier in the pipeline than
12527             # other CODE lines and welding will not actually occur. This will
12528             # leave a half-welded structure with potential formatting
12529             # instability. This might be fixed by adding a check for a weld on
12530             # a closing Q token and sending it down the normal channel, but it
12531             # would complicate the code and is potentially risky.
12532             next
12533 7 50 66     48 if (!$is_old_weld
      33        
12534             && $next_type eq 'Q'
12535             && $iline_ic != $iline_oc );
12536              
12537             # If welded, the line must not exceed allowed line length
12538 7         30 ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
12539             = $self->setup_new_weld_measurements( $Kouter_opening,
12540             $Kinner_opening );
12541 7 50       48 if ( !$ok_to_weld ) {
12542 0         0 if (DEBUG_WELD) { print {*STDOUT} $msg }
12543 0         0 next;
12544             }
12545              
12546 7         30 my $length =
12547             $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
12548 7         22 my $excess = $length + $multiline_tol - $maximum_text_length;
12549              
12550 7 100       34 my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
12551 7 50       26 if ( $excess >= $excess_max ) {
12552 0         0 $do_not_weld = 1;
12553             }
12554              
12555 7         19 if (DEBUG_WELD) {
12556             if ( !$is_old_weld ) { $is_old_weld = EMPTY_STRING }
12557             $Msg .=
12558             "excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
12559             }
12560              
12561             # Check weld exclusion rules for outer container
12562 7 50       30 if ( !$do_not_weld ) {
12563 7         25 my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
12564 7 100       55 if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
12565 1         3 if (DEBUG_WELD) {
12566             $Msg .=
12567             "No qw weld due to weld exclusion rules for outer container\n";
12568             }
12569 1         3 $do_not_weld = 1;
12570             }
12571             }
12572              
12573             # Check the length of the last line (fixes case b1039)
12574 7 100       38 if ( !$do_not_weld ) {
12575 6         30 my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
12576 6         16 my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
  6         22  
12577 6         29 my $excess_ic =
12578             $self->excess_line_length_for_Krange( $Kfirst_ic,
12579             $Kouter_closing );
12580              
12581             # Allow extra space for additional welded closing container(s)
12582             # and a space and comma or semicolon.
12583             # NOTE: weld len has not been computed yet. Use 2 spaces
12584             # for now, correct for a single weld. This estimate could
12585             # be made more accurate if necessary.
12586             my $weld_len =
12587 6 100       37 defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
12588 6 50       33 if ( $excess_ic + $weld_len + 2 > 0 ) {
12589 0         0 if (DEBUG_WELD) {
12590             $Msg .=
12591             "No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
12592             }
12593 0         0 $do_not_weld = 1;
12594             }
12595             }
12596              
12597 7 100       31 if ($do_not_weld) {
12598 1         2 if (DEBUG_WELD) {
12599             $Msg .= "Not Welding QW\n";
12600             print {*STDOUT} $Msg;
12601             }
12602 1         4 next;
12603             }
12604              
12605             # OK to weld
12606 6         13 if (DEBUG_WELD) {
12607             $Msg .= "Welding QW\n";
12608             print {*STDOUT} $Msg;
12609             }
12610              
12611 6         20 $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
12612 6         22 $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
12613              
12614 6         25 $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
12615 6         21 $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
12616              
12617             # Undo one indentation level if an extra level was added to this
12618             # multiline quote
12619             my $qw_seqno =
12620 6         18 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
12621 6 50 33     54 if ( $qw_seqno
12622             && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
12623             {
12624 0         0 foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
12625 0         0 $rLL->[$K]->[_LEVEL_] -= 1;
12626             }
12627 0         0 $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
12628 0         0 $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
12629             }
12630              
12631             # undo CI for other welded quotes
12632             else {
12633              
12634 6         26 foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
12635 74         118 $rLL->[$K]->[_CI_LEVEL_] = 0;
12636             }
12637             }
12638              
12639             # Change the level of a closing qw token to be that of the outer
12640             # containing token. This will allow -lp indentation to function
12641             # correctly in the vertical aligner.
12642             # Patch to fix c002: but not if it contains text
12643 6 50       90 if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
12644 6         37 $rLL->[$Kinner_closing]->[_LEVEL_] =
12645             $rLL->[$Kouter_closing]->[_LEVEL_];
12646             }
12647             }
12648             }
12649 22         332 return;
12650             } ## end sub weld_nested_quotes
12651              
12652             sub is_welded_at_seqno {
12653              
12654 83     83 0 213 my ( $self, $seqno ) = @_;
12655              
12656             # given a sequence number:
12657             # return true if it is welded either left or right
12658             # return false otherwise
12659 83 50 33     427 return unless ( $total_weld_count && defined($seqno) );
12660 83         215 my $KK_o = $self->[_K_opening_container_]->{$seqno};
12661 83 50       221 return unless defined($KK_o);
12662             return defined( $self->[_rK_weld_left_]->{$KK_o} )
12663 83   100     567 || defined( $self->[_rK_weld_right_]->{$KK_o} );
12664             } ## end sub is_welded_at_seqno
12665              
12666             sub mark_short_nested_blocks {
12667              
12668             # This routine looks at the entire file and marks any short nested blocks
12669             # which should not be broken. The results are stored in the hash
12670             # $rshort_nested->{$type_sequence}
12671             # which will be true if the container should remain intact.
12672             #
12673             # For example, consider the following line:
12674              
12675             # sub cxt_two { sort { $a <=> $b } test_if_list() }
12676              
12677             # The 'sort' block is short and nested within an outer sub block.
12678             # Normally, the existence of the 'sort' block will force the sub block to
12679             # break open, but this is not always desirable. Here we will set a flag for
12680             # the sort block to prevent this. To give the user control, we will
12681             # follow the input file formatting. If either of the blocks is broken in
12682             # the input file then we will allow it to remain broken. Otherwise we will
12683             # set a flag to keep it together in later formatting steps.
12684              
12685             # The flag which is set here will be checked in two places:
12686             # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
12687              
12688 561     561 0 1339 my $self = shift;
12689 561 100       2202 return if $rOpts->{'indent-only'};
12690              
12691 558         1441 my $rLL = $self->[_rLL_];
12692 558 100 66     2253 return unless ( defined($rLL) && @{$rLL} );
  558         2026  
12693              
12694 554 100       2316 return unless ( $rOpts->{'one-line-block-nesting'} );
12695              
12696 1         3 my $K_opening_container = $self->[_K_opening_container_];
12697 1         2 my $K_closing_container = $self->[_K_closing_container_];
12698 1         3 my $rbreak_container = $self->[_rbreak_container_];
12699 1         2 my $ris_broken_container = $self->[_ris_broken_container_];
12700 1         2 my $rshort_nested = $self->[_rshort_nested_];
12701 1         2 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
12702              
12703             # Variables needed for estimating line lengths
12704 1         3 my $maximum_text_length;
12705             my $starting_lentot;
12706 1         4 my $length_tol = 1;
12707              
12708             my $excess_length_to_K = sub {
12709 2     2   5 my ($K) = @_;
12710              
12711             # Estimate the length from the line start to a given token
12712 2         8 my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
12713 2         6 my $excess_length = $length + $length_tol - $maximum_text_length;
12714 2         10 return ($excess_length);
12715 1         7 };
12716              
12717             # loop over all containers
12718 1         2 my @open_block_stack;
12719 1         2 my $iline = -1;
12720 1         3 my $KNEXT = $self->[_K_first_seq_item_];
12721 1         6 while ( defined($KNEXT) ) {
12722 4         7 my $KK = $KNEXT;
12723 4         13 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
12724 4         8 my $rtoken_vars = $rLL->[$KK];
12725 4         8 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
12726 4 50       10 if ( !$type_sequence ) {
12727 0 0       0 next if ( $KK == 0 ); # first token in file may not be container
12728              
12729             # A fault here implies that an error was made in the little loop at
12730             # the bottom of sub 'respace_tokens' which set the values of
12731             # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
12732             # loop control lines above.
12733 0         0 Fault("sequence = $type_sequence not defined at K=$KK")
12734             if (DEVEL_MODE);
12735 0         0 next;
12736             }
12737              
12738             # Patch: do not mark short blocks with welds.
12739             # In some cases blinkers can form (case b690).
12740 4 50 33     15 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
12741 0         0 next;
12742             }
12743              
12744             # We are just looking at code blocks
12745 4         8 my $token = $rtoken_vars->[_TOKEN_];
12746 4         7 my $type = $rtoken_vars->[_TYPE_];
12747 4 50       11 next unless ( $type eq $token );
12748 4 50       10 next unless ( $rblock_type_of_seqno->{$type_sequence} );
12749              
12750             # Keep a stack of all acceptable block braces seen.
12751             # Only consider blocks entirely on one line so dump the stack when line
12752             # changes.
12753 4         5 my $iline_last = $iline;
12754 4         9 $iline = $rLL->[$KK]->[_LINE_INDEX_];
12755 4 100       11 if ( $iline != $iline_last ) { @open_block_stack = () }
  1         3  
12756              
12757 4 100       7 if ( $token eq '}' ) {
12758 2 50       7 if (@open_block_stack) { pop @open_block_stack }
  2         4  
12759             }
12760 4 100       13 next unless ( $token eq '{' );
12761              
12762             # block must be balanced (bad scripts may be unbalanced)
12763 2         9 my $K_opening = $K_opening_container->{$type_sequence};
12764 2         6 my $K_closing = $K_closing_container->{$type_sequence};
12765 2 50 33     14 next unless ( defined($K_opening) && defined($K_closing) );
12766              
12767             # require that this block be entirely on one line
12768             next
12769             if ( $ris_broken_container->{$type_sequence}
12770 2 50 33     13 || $rbreak_container->{$type_sequence} );
12771              
12772             # See if this block fits on one line of allowed length (which may
12773             # be different from the input script)
12774 2 50       10 $starting_lentot =
12775             $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
12776 2         4 my $level = $rLL->[$KK]->[_LEVEL_];
12777 2         6 my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
12778 2         4 $maximum_text_length =
12779             $maximum_text_length_at_level[$level] -
12780             $ci_level * $rOpts_continuation_indentation;
12781              
12782             # Dump the stack if block is too long and skip this block
12783 2 50       6 if ( $excess_length_to_K->($K_closing) > 0 ) {
12784 0         0 @open_block_stack = ();
12785 0         0 next;
12786             }
12787              
12788             # OK, Block passes tests, remember it
12789 2         4 push @open_block_stack, $type_sequence;
12790              
12791             # We are only marking nested code blocks,
12792             # so check for a previous block on the stack
12793 2 100       10 next if ( @open_block_stack <= 1 );
12794              
12795             # Looks OK, mark this as a short nested block
12796 1         4 $rshort_nested->{$type_sequence} = 1;
12797              
12798             }
12799 1         6 return;
12800             } ## end sub mark_short_nested_blocks
12801              
12802             sub special_indentation_adjustments {
12803              
12804 561     561 0 1740 my ($self) = @_;
12805              
12806             # Called once per file to define the levels to be used for computing
12807             # actual indentation. These levels are initialized to be the structural
12808             # levels and then are adjusted if necessary for special purposes.
12809             # The adjustments are made either by changing _CI_LEVEL_ directly or
12810             # by setting modified levels in the array $self->[_radjusted_levels_].
12811              
12812             # NOTE: This routine is called after the weld routines, which may have
12813             # already adjusted the initial values of _LEVEL_, so we are making
12814             # adjustments on top of those levels. It would be nicer to have the
12815             # weld routines also use this adjustment, but that gets complicated
12816             # when we combine -gnu -wn and also have some welded quotes.
12817 561         1608 my $Klimit = $self->[_Klimit_];
12818 561         1391 my $rLL = $self->[_rLL_];
12819 561         1254 my $radjusted_levels = $self->[_radjusted_levels_];
12820              
12821 561 100       1806 return unless ( defined($Klimit) );
12822              
12823             # Initialize the adjusted levels to be the structural levels
12824 557         2002 foreach my $KK ( 0 .. $Klimit ) {
12825 58535         103772 $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
12826             }
12827              
12828             # First set adjusted levels for any non-indenting braces.
12829 557         5895 $self->do_non_indenting_braces();
12830              
12831             # Adjust breaks and indentation list containers
12832 557         3505 $self->break_before_list_opening_containers();
12833              
12834             # Set adjusted levels for the whitespace cycle option.
12835 557         2841 $self->whitespace_cycle_adjustment();
12836              
12837 557         3397 $self->braces_left_setup();
12838              
12839             # Adjust continuation indentation if -bli is set
12840 557         2735 $self->bli_adjustment();
12841              
12842 557 100       1817 $self->extended_ci()
12843             if ($rOpts_extended_continuation_indentation);
12844              
12845             # Now clip any adjusted levels to be non-negative
12846 557         2483 $self->clip_adjusted_levels();
12847              
12848 557         1134 return;
12849             } ## end sub special_indentation_adjustments
12850              
12851             sub clip_adjusted_levels {
12852              
12853             # Replace any negative adjusted levels with zero.
12854             # Negative levels can occur in files with brace errors.
12855 557     557 0 1671 my ($self) = @_;
12856 557         1332 my $radjusted_levels = $self->[_radjusted_levels_];
12857 557 50 33     2253 return unless defined($radjusted_levels) && @{$radjusted_levels};
  557         1999  
12858 557         1349 my $min = min( @{$radjusted_levels} ); # fast check for min
  557         3352  
12859 557 50       2190 if ( $min < 0 ) {
12860              
12861             # slow loop, but rarely needed
12862 0 0       0 foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
  0         0  
  0         0  
12863             }
12864 557         1098 return;
12865             } ## end sub clip_adjusted_levels
12866              
12867             sub do_non_indenting_braces {
12868              
12869             # Called once per file to handle the --non-indenting-braces parameter.
12870             # Remove indentation within marked braces if requested
12871 557     557 0 1725 my ($self) = @_;
12872              
12873             # Any non-indenting braces have been found by sub find_non_indenting_braces
12874             # and are defined by the following hash:
12875 557         1739 my $rseqno_non_indenting_brace_by_ix =
12876             $self->[_rseqno_non_indenting_brace_by_ix_];
12877 557 100       1141 return unless ( %{$rseqno_non_indenting_brace_by_ix} );
  557         4127  
12878              
12879 2         6 my $rlines = $self->[_rlines_];
12880 2         5 my $K_opening_container = $self->[_K_opening_container_];
12881 2         4 my $K_closing_container = $self->[_K_closing_container_];
12882 2         14 my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
12883 2         9 my $radjusted_levels = $self->[_radjusted_levels_];
12884              
12885             # First locate all of the marked blocks
12886 2         6 my @K_stack;
12887 2         7 foreach my $ix ( keys %{$rseqno_non_indenting_brace_by_ix} ) {
  2         10  
12888 6         14 my $seqno = $rseqno_non_indenting_brace_by_ix->{$ix};
12889 6         11 my $KK = $K_opening_container->{$seqno};
12890 6         13 my $line_of_tokens = $rlines->[$ix];
12891 6         11 my $rK_range = $line_of_tokens->{_rK_range};
12892 6         9 my ( $Kfirst, $Klast ) = @{$rK_range};
  6         15  
12893 6         14 $rspecial_side_comment_type->{$Klast} = 'NIB';
12894 6         11 push @K_stack, [ $KK, 1 ];
12895 6         11 my $Kc = $K_closing_container->{$seqno};
12896 6 50       20 push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
12897             }
12898 2 50       8 return unless (@K_stack);
12899 2         23 @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
  19         38  
12900              
12901             # Then loop to remove indentation within marked blocks
12902 2         4 my $KK_last = 0;
12903 2         5 my $ndeep = 0;
12904 2         5 foreach my $item (@K_stack) {
12905 12         16 my ( $KK, $inc ) = @{$item};
  12         21  
12906 12 100       23 if ( $ndeep > 0 ) {
12907              
12908 8         18 foreach ( $KK_last + 1 .. $KK ) {
12909 52         74 $radjusted_levels->[$_] -= $ndeep;
12910             }
12911              
12912             # We just subtracted the old $ndeep value, which only applies to a
12913             # '{'. The new $ndeep applies to a '}', so we undo the error.
12914 8 100       18 if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
  6         15  
12915             }
12916              
12917 12         15 $ndeep += $inc;
12918 12         21 $KK_last = $KK;
12919             }
12920 2         9 return;
12921             } ## end sub do_non_indenting_braces
12922              
12923             sub whitespace_cycle_adjustment {
12924              
12925 557     557 0 1298 my $self = shift;
12926              
12927             # Called once per file to implement the --whitespace-cycle option
12928 557         1400 my $rLL = $self->[_rLL_];
12929 557 50 33     2317 return unless ( defined($rLL) && @{$rLL} );
  557         2201  
12930 557         1629 my $radjusted_levels = $self->[_radjusted_levels_];
12931 557         1511 my $maximum_level = $self->[_maximum_level_];
12932              
12933 557 50 66     2467 if ( $rOpts_whitespace_cycle
      66        
12934             && $rOpts_whitespace_cycle > 0
12935             && $rOpts_whitespace_cycle < $maximum_level )
12936             {
12937              
12938 2         5 my $Kmax = @{$rLL} - 1;
  2         8  
12939              
12940 2         4 my $whitespace_last_level = -1;
12941 2         4 my @whitespace_level_stack = ();
12942 2         10 my $last_nonblank_type = 'b';
12943 2         5 my $last_nonblank_token = EMPTY_STRING;
12944 2         8 foreach my $KK ( 0 .. $Kmax ) {
12945 234         306 my $level_abs = $radjusted_levels->[$KK];
12946 234         283 my $level = $level_abs;
12947 234 100       380 if ( $level_abs < $whitespace_last_level ) {
12948 26         41 pop(@whitespace_level_stack);
12949             }
12950 234 100       368 if ( !@whitespace_level_stack ) {
12951 2         7 push @whitespace_level_stack, $level_abs;
12952             }
12953             else {
12954 232 100       382 if ( $level_abs > $whitespace_last_level ) {
12955 26         40 $level = $whitespace_level_stack[-1] +
12956             ( $level_abs - $whitespace_last_level );
12957              
12958 26 50 100     172 if (
      66        
      33        
      66        
      33        
12959             # 1 Try to break at a block brace
12960             (
12961             $level > $rOpts_whitespace_cycle
12962             && $last_nonblank_type eq '{'
12963             && $last_nonblank_token eq '{'
12964             )
12965              
12966             # 2 Then either a brace or bracket
12967             || ( $level > $rOpts_whitespace_cycle + 1
12968             && $last_nonblank_token =~ /^[\{\[]$/ )
12969              
12970             # 3 Then a paren too
12971             || $level > $rOpts_whitespace_cycle + 2
12972             )
12973             {
12974 1         6 $level = 1;
12975             }
12976 26         46 push @whitespace_level_stack, $level;
12977             }
12978             }
12979 234         331 $level = $whitespace_level_stack[-1];
12980 234         291 $radjusted_levels->[$KK] = $level;
12981              
12982 234         292 $whitespace_last_level = $level_abs;
12983 234         358 my $type = $rLL->[$KK]->[_TYPE_];
12984 234         324 my $token = $rLL->[$KK]->[_TOKEN_];
12985 234 100       431 if ( $type ne 'b' ) {
12986 150         193 $last_nonblank_type = $type;
12987 150         251 $last_nonblank_token = $token;
12988             }
12989             }
12990             }
12991 557         1179 return;
12992             } ## end sub whitespace_cycle_adjustment
12993              
12994 39     39   444 use constant DEBUG_BBX => 0;
  39         116  
  39         64818  
12995              
12996             sub break_before_list_opening_containers {
12997              
12998 557     557 0 1591 my ($self) = @_;
12999              
13000             # This routine is called once per batch to implement parameters
13001             # --break-before-hash-brace=n and similar -bbx=n flags
13002             # and their associated indentation flags:
13003             # --break-before-hash-brace-and-indent and similar -bbxi=n
13004              
13005             # Nothing to do if none of the -bbx=n parameters has been set
13006 557 100       1751 return unless %break_before_container_types;
13007              
13008 7         28 my $rLL = $self->[_rLL_];
13009 7 50 33     31 return unless ( defined($rLL) && @{$rLL} );
  7         27  
13010              
13011             # Loop over all opening container tokens
13012 7         29 my $K_opening_container = $self->[_K_opening_container_];
13013 7         19 my $K_closing_container = $self->[_K_closing_container_];
13014 7         17 my $ris_broken_container = $self->[_ris_broken_container_];
13015 7         16 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
13016 7         15 my $rhas_list = $self->[_rhas_list_];
13017 7         22 my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
13018 7         15 my $radjusted_levels = $self->[_radjusted_levels_];
13019 7         15 my $rparent_of_seqno = $self->[_rparent_of_seqno_];
13020 7         17 my $rlines = $self->[_rlines_];
13021 7         21 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
13022 7         16 my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
13023 7         16 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
13024 7         15 my $rK_weld_right = $self->[_rK_weld_right_];
13025 7         25 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13026              
13027 7         42 my $length_tol =
13028             max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
13029 7 50       19 if ($rOpts_ignore_old_breakpoints) {
13030              
13031             # Patch suggested by b1231; the old tol was excessive.
13032             ## $length_tol += $rOpts_maximum_line_length;
13033 0         0 $length_tol *= 2;
13034             }
13035              
13036 7         17 my $rbreak_before_container_by_seqno = {};
13037 7         19 my $rwant_reduced_ci = {};
13038 7         13 foreach my $seqno ( keys %{$K_opening_container} ) {
  7         37  
13039              
13040             #----------------------------------------------------------------
13041             # Part 1: Examine any -bbx=n flags
13042             #----------------------------------------------------------------
13043              
13044 47 100       103 next if ( $rblock_type_of_seqno->{$seqno} );
13045 45         97 my $KK = $K_opening_container->{$seqno};
13046              
13047             # This must be a list or contain a list.
13048             # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
13049             # Note2: 'has_list' holds the depth to the sub-list. We will require
13050             # a depth of just 1
13051 45         101 my $is_list = $self->is_list_by_seqno($seqno);
13052 45         72 my $has_list = $rhas_list->{$seqno};
13053              
13054             # Fix for b1173: if welded opening container, use flag of innermost
13055             # seqno. Otherwise, the restriction $has_list==1 prevents triple and
13056             # higher welds from following the -BBX parameters.
13057 45 50       89 if ($total_weld_count) {
13058 0         0 my $KK_test = $rK_weld_right->{$KK};
13059 0 0       0 if ( defined($KK_test) ) {
13060 0         0 my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
13061 0   0     0 $is_list ||= $self->is_list_by_seqno($seqno_inner);
13062 0         0 $has_list = $rhas_list->{$seqno_inner};
13063             }
13064             }
13065              
13066 45 100 66     129 next unless ( $is_list || $has_list && $has_list == 1 );
      66        
13067              
13068 41         79 my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
13069              
13070             # Only for types of container tokens with a non-default break option
13071 41         75 my $token = $rLL->[$KK]->[_TOKEN_];
13072 41         76 my $break_option = $break_before_container_types{$token};
13073 41 100       95 next unless ($break_option);
13074              
13075             # Do not use -bbx under stress for stability ... fixes b1300
13076             # TODO: review this; do we also need to look at stress_level_lalpha?
13077 16         28 my $level = $rLL->[$KK]->[_LEVEL_];
13078 16 50       44 if ( $level >= $stress_level_beta ) {
13079 0         0 DEBUG_BBX
13080             && print
13081             "BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
13082 0         0 next;
13083             }
13084              
13085             # Require previous nonblank to be '=' or '=>'
13086 16         46 my $Kprev = $KK - 1;
13087 16 50       43 next if ( $Kprev < 0 );
13088 16         36 my $prev_type = $rLL->[$Kprev]->[_TYPE_];
13089 16 50       40 if ( $prev_type eq 'b' ) {
13090 16         24 $Kprev--;
13091 16 50       37 next if ( $Kprev < 0 );
13092 16         35 $prev_type = $rLL->[$Kprev]->[_TYPE_];
13093             }
13094 16 100       48 next unless ( $is_equal_or_fat_comma{$prev_type} );
13095              
13096 14         35 my $ci = $rLL->[$KK]->[_CI_LEVEL_];
13097              
13098             #--------------------------------------------
13099             # New coding for option 2 (break if complex).
13100             #--------------------------------------------
13101             # This new coding uses clues which are invariant under formatting to
13102             # decide if a list is complex. For now it is only applied when -lp
13103             # and -vmll are used, but eventually it may become the standard method.
13104             # Fixes b1274, b1275, and others, including b1099.
13105 14 100       35 if ( $break_option == 2 ) {
13106              
13107 2 50 33     17 if ( $rOpts_line_up_parentheses
13108             || $rOpts_variable_maximum_line_length )
13109             {
13110              
13111             # Start with the basic definition of a complex list...
13112 0   0     0 my $is_complex = $is_list && $has_list;
13113              
13114             # and it is also complex if the parent is a list
13115 0 0       0 if ( !$is_complex ) {
13116 0         0 my $parent = $rparent_of_seqno->{$seqno};
13117 0 0       0 if ( $self->is_list_by_seqno($parent) ) {
13118 0         0 $is_complex = 1;
13119             }
13120             }
13121              
13122             # finally, we will call it complex if there are inner opening
13123             # and closing container tokens, not parens, within the outer
13124             # container tokens.
13125 0 0       0 if ( !$is_complex ) {
13126 0         0 my $Kp = $self->K_next_nonblank($KK);
13127 0 0       0 my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
13128 0 0 0     0 if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
13129              
13130 0         0 my $Kc = $K_closing_container->{$seqno};
13131 0         0 my $Km = $self->K_previous_nonblank($Kc);
13132 0 0       0 my $token_m =
13133             defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
13134              
13135             # ignore any optional ending comma
13136 0 0       0 if ( $token_m eq ',' ) {
13137 0         0 $Km = $self->K_previous_nonblank($Km);
13138 0 0       0 $token_m =
13139             defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
13140             }
13141              
13142             $is_complex ||=
13143 0   0     0 $is_closing_token{$token_m} && $token_m ne ')';
      0        
13144             }
13145             }
13146              
13147             # Convert to option 3 (always break) if complex
13148 0 0       0 next unless ($is_complex);
13149 0         0 $break_option = 3;
13150             }
13151             }
13152              
13153             # Fix for b1231: the has_list_with_lec does not cover all cases.
13154             # A broken container containing a list and with line-ending commas
13155             # will stay broken, so can be treated as if it had a list with lec.
13156             $has_list_with_lec ||=
13157             $has_list
13158             && $ris_broken_container->{$seqno}
13159 14   66     86 && $rlec_count_by_seqno->{$seqno};
      100        
13160              
13161             DEBUG_BBX
13162 14         21 && print {*STDOUT}
13163             "BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
13164              
13165             # -bbx=1 = stable, try to follow input
13166 14 50       60 if ( $break_option == 1 ) {
    100          
    50          
13167              
13168 0         0 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
13169 0         0 my $rK_range = $rlines->[$iline]->{_rK_range};
13170 0         0 my ( $Kfirst, $Klast ) = @{$rK_range};
  0         0  
13171 0 0       0 next unless ( $KK == $Kfirst );
13172             }
13173              
13174             # -bbx=2 => apply this style only for a 'complex' list
13175             elsif ( $break_option == 2 ) {
13176              
13177             # break if this list contains a broken list with line-ending comma
13178 2         3 my $ok_to_break;
13179 2         4 my $Msg = EMPTY_STRING;
13180 2 100       5 if ($has_list_with_lec) {
13181 1         1 $ok_to_break = 1;
13182 1         2 DEBUG_BBX && do { $Msg = "has list with lec;" };
13183             }
13184              
13185 2 100       25 if ( !$ok_to_break ) {
13186              
13187             # Turn off -xci if -bbx=2 and this container has a sublist but
13188             # not a broken sublist. This avoids creating blinkers. The
13189             # problem is that -xci can cause one-line lists to break open,
13190             # and thereby creating formatting instability.
13191             # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
13192             # b1045 b1046 b1047 b1051 b1052 b1061.
13193 1 50       3 if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
  0         0  
13194              
13195 1         5 my $parent = $rparent_of_seqno->{$seqno};
13196 1 50       4 if ( $self->is_list_by_seqno($parent) ) {
13197 1         2 DEBUG_BBX && do { $Msg = "parent is list" };
13198 1         2 $ok_to_break = 1;
13199             }
13200             }
13201              
13202 2 50       16 if ( !$ok_to_break ) {
13203             DEBUG_BBX
13204 0         0 && print {*STDOUT} "Not breaking at seqno=$seqno: $Msg\n";
13205 0         0 next;
13206             }
13207              
13208             DEBUG_BBX
13209 2         3 && print {*STDOUT} "OK to break at seqno=$seqno: $Msg\n";
13210              
13211             # Patch: turn off -xci if -bbx=2 and -lp
13212             # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
13213 2 50       5 $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
13214             }
13215              
13216             # -bbx=3 = always break
13217             elsif ( $break_option == 3 ) {
13218              
13219             # ok to break
13220             }
13221              
13222             # Shouldn't happen! Bad flag, but make behavior same as 3
13223             else {
13224             # ok to break
13225             }
13226              
13227             # Set a flag for actual implementation later in
13228             # sub insert_breaks_before_list_opening_containers
13229 14         32 $rbreak_before_container_by_seqno->{$seqno} = 1;
13230             DEBUG_BBX
13231 14         20 && print {*STDOUT} "BBX: ok to break at seqno=$seqno\n";
13232              
13233             # -bbxi=0: Nothing more to do if the ci value remains unchanged
13234 14         24 my $ci_flag = $container_indentation_options{$token};
13235 14 100       43 next unless ($ci_flag);
13236              
13237             # -bbxi=1: This option removes ci and is handled in
13238             # later sub get_final_indentation
13239 4 100       14 if ( $ci_flag == 1 ) {
13240 2         3 $rwant_reduced_ci->{$seqno} = 1;
13241 2         6 next;
13242             }
13243              
13244             # -bbxi=2: This option changes the level ...
13245             # This option can conflict with -xci in some cases. We can turn off
13246             # -xci for this container to avoid blinking. For now, only do this if
13247             # -vmll is set. ( fixes b1335, b1336 )
13248 2 50       7 if ($rOpts_variable_maximum_line_length) {
13249 0         0 $rno_xci_by_seqno->{$seqno} = 1;
13250             }
13251              
13252             #----------------------------------------------------------------
13253             # Part 2: Perform tests before committing to changing ci and level
13254             #----------------------------------------------------------------
13255              
13256             # Before changing the ci level of the opening container, we need
13257             # to be sure that the container will be broken in the later stages of
13258             # formatting. We have to do this because we are working early in the
13259             # formatting pipeline. A problem can occur if we change the ci or
13260             # level of the opening token but do not actually break the container
13261             # open as expected. In most cases it wouldn't make any difference if
13262             # we changed ci or not, but there are some edge cases where this
13263             # can cause blinking states, so we need to try to only change ci if
13264             # the container will really be broken.
13265              
13266             # Only consider containers already broken
13267 2 50       6 next if ( !$ris_broken_container->{$seqno} );
13268              
13269             # Patch to fix issue b1305: the combination of -naws and ci>i appears
13270             # to cause an instability. It should almost never occur in practice.
13271             next
13272 2 50 33     8 if (!$rOpts_add_whitespace
13273             && $rOpts_continuation_indentation > $rOpts_indent_columns );
13274              
13275             # Always ok to change ci for permanently broken containers
13276 2 50       7 if ( $ris_permanently_broken->{$seqno} ) { }
    100          
13277              
13278             # Always OK if this list contains a broken sub-container with
13279             # a non-terminal line-ending comma
13280             elsif ($has_list_with_lec) { }
13281              
13282             # Otherwise, we are considering a single container...
13283             else {
13284              
13285             # A single container must have at least 1 line-ending comma:
13286 1 50       7 next unless ( $rlec_count_by_seqno->{$seqno} );
13287              
13288 1         2 my $OK;
13289              
13290             # Since it has a line-ending comma, it will stay broken if the
13291             # -boc flag is set
13292 1 50       5 if ($rOpts_break_at_old_comma_breakpoints) { $OK = 1 }
  0         0  
13293              
13294             # OK if the container contains multiple fat commas
13295             # Better: multiple lines with fat commas
13296 1 50 33     7 if ( !$OK && !$rOpts_ignore_old_breakpoints ) {
13297 1         5 my $rtype_count = $rtype_count_by_seqno->{$seqno};
13298 1 50       3 next unless ($rtype_count);
13299 1         3 my $fat_comma_count = $rtype_count->{'=>'};
13300             DEBUG_BBX
13301 1         2 && print {*STDOUT} "BBX: fat comma count=$fat_comma_count\n";
13302 1 50 33     7 if ( $fat_comma_count && $fat_comma_count >= 2 ) { $OK = 1 }
  1         4  
13303             }
13304              
13305             # The last check we can make is to see if this container could
13306             # fit on a single line. Use the least possible indentation
13307             # estimate, ci=0, so we are not subtracting $ci *
13308             # $rOpts_continuation_indentation from tabulated
13309             # $maximum_text_length value.
13310 1 50       4 if ( !$OK ) {
13311 0         0 my $maximum_text_length = $maximum_text_length_at_level[$level];
13312 0         0 my $K_closing = $K_closing_container->{$seqno};
13313 0         0 my $length = $self->cumulative_length_before_K($K_closing) -
13314             $self->cumulative_length_before_K($KK);
13315 0         0 my $excess_length = $length - $maximum_text_length;
13316             DEBUG_BBX
13317 0         0 && print {*STDOUT}
13318             "BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
13319              
13320             # OK if the net container definitely breaks on length
13321 0 0       0 if ( $excess_length > $length_tol ) {
13322 0         0 $OK = 1;
13323             DEBUG_BBX
13324 0         0 && print {*STDOUT} "BBX: excess_length=$excess_length\n";
13325             }
13326              
13327             # Otherwise skip it
13328 0         0 else { next }
13329             }
13330             }
13331              
13332             #------------------------------------------------------------
13333             # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
13334             #------------------------------------------------------------
13335              
13336 2         3 DEBUG_BBX && print {*STDOUT} "BBX: OK to break\n";
13337              
13338             # -bbhbi=n
13339             # -bbsbi=n
13340             # -bbpi=n
13341              
13342             # where:
13343              
13344             # n=0 default indentation (usually one ci)
13345             # n=1 outdent one ci
13346             # n=2 indent one level (minus one ci)
13347             # n=3 indent one extra ci [This may be dropped]
13348              
13349             # NOTE: We are adjusting indentation of the opening container. The
13350             # closing container will normally follow the indentation of the opening
13351             # container automatically, so this is not currently done.
13352 2 50       8 next unless ($ci);
13353              
13354             # option 1: outdent
13355 2 50       8 if ( $ci_flag == 1 ) {
    50          
13356 0         0 $ci -= 1;
13357             }
13358              
13359             # option 2: indent one level
13360             elsif ( $ci_flag == 2 ) {
13361 2         5 $ci -= 1;
13362 2         4 $radjusted_levels->[$KK] += 1;
13363             }
13364              
13365             # unknown option
13366             else {
13367             # Shouldn't happen - leave ci unchanged
13368             }
13369              
13370 2 50       8 $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
13371             }
13372              
13373 7         36 $self->[_rbreak_before_container_by_seqno_] =
13374             $rbreak_before_container_by_seqno;
13375 7         16 $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
13376 7         22 return;
13377             } ## end sub break_before_list_opening_containers
13378              
13379 39     39   400 use constant DEBUG_XCI => 0;
  39         105  
  39         95114  
13380              
13381             sub extended_ci {
13382              
13383             # This routine implements the -xci (--extended-continuation-indentation)
13384             # flag. We add CI to interior tokens of a container which itself has CI but
13385             # only if a token does not already have CI.
13386              
13387             # To do this, we will locate opening tokens which themselves have
13388             # continuation indentation (CI). We track them with their sequence
13389             # numbers. These sequence numbers are called 'controlling sequence
13390             # numbers'. They apply continuation indentation to the tokens that they
13391             # contain. These inner tokens remember their controlling sequence numbers.
13392             # Later, when these inner tokens are output, they have to see if the output
13393             # lines with their controlling tokens were output with CI or not. If not,
13394             # then they must remove their CI too.
13395              
13396             # The controlling CI concept works hierarchically. But CI itself is not
13397             # hierarchical; it is either on or off. There are some rare instances where
13398             # it would be best to have hierarchical CI too, but not enough to be worth
13399             # the programming effort.
13400              
13401             # The operations to remove unwanted CI are done in sub 'undo_ci'.
13402              
13403 6     6 0 21 my ($self) = @_;
13404              
13405 6         18 my $rLL = $self->[_rLL_];
13406 6 50 33     36 return unless ( defined($rLL) && @{$rLL} );
  6         24  
13407              
13408 6         19 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
13409 6         16 my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
13410 6         18 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
13411 6         19 my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
13412 6         15 my $ris_bli_container = $self->[_ris_bli_container_];
13413 6         17 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13414              
13415 6         12 my %available_space;
13416              
13417             # Loop over all opening container tokens
13418 6         19 my $K_opening_container = $self->[_K_opening_container_];
13419 6         17 my $K_closing_container = $self->[_K_closing_container_];
13420 6         21 my @seqno_stack;
13421             my $seqno_top;
13422 6         0 my $KLAST;
13423 6         17 my $KNEXT = $self->[_K_first_seq_item_];
13424              
13425             # The following variable can be used to allow a little extra space to
13426             # avoid blinkers. A value $len_tol = 20 fixed the following
13427             # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
13428             # It turned out that the real problem was mis-parsing a list brace as
13429             # a code block in a 'use' statement when the line length was extremely
13430             # small. A value of 0 works now, but a slightly larger value can
13431             # be used to minimize the chance of a blinker.
13432 6         17 my $len_tol = 0;
13433              
13434 6         26 while ( defined($KNEXT) ) {
13435              
13436             # Fix all tokens up to the next sequence item if we are changing CI
13437 204 100       357 if ($seqno_top) {
13438              
13439 150         213 my $is_list = $ris_list_by_seqno->{$seqno_top};
13440 150         218 my $space = $available_space{$seqno_top};
13441 150         188 my $count = 0;
13442 150         287 foreach my $Kt ( $KLAST + 1 .. $KNEXT - 1 ) {
13443              
13444 626 100       1145 next if ( $rLL->[$Kt]->[_CI_LEVEL_] );
13445              
13446             # But do not include tokens which might exceed the line length
13447             # and are not in a list.
13448             # ... This fixes case b1031
13449 304 50 66     803 if ( $is_list
      33        
13450             || $rLL->[$Kt]->[_TOKEN_LENGTH_] < $space
13451             || $rLL->[$Kt]->[_TYPE_] eq '#' )
13452             {
13453 304         489 $rLL->[$Kt]->[_CI_LEVEL_] = 1;
13454 304         990 $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
13455 304         493 $count++;
13456             }
13457             }
13458 150         295 $ris_seqno_controlling_ci->{$seqno_top} += $count;
13459             }
13460              
13461 204         286 $KLAST = $KNEXT;
13462 204         262 my $KK = $KNEXT;
13463 204         301 $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
13464              
13465 204         292 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
13466              
13467             # see if we have reached the end of the current controlling container
13468 204 100 100     553 if ( $seqno_top && $seqno == $seqno_top ) {
13469 62         102 $seqno_top = pop @seqno_stack;
13470             }
13471              
13472             # Patch to fix some block types...
13473             # Certain block types arrive from the tokenizer without CI but should
13474             # have it for this option. These include anonymous subs and
13475             # do sort map grep eval
13476 204         303 my $block_type = $rblock_type_of_seqno->{$seqno};
13477 204 100 100     478 if ( $block_type && $is_block_with_ci{$block_type} ) {
13478 24         42 $rLL->[$KK]->[_CI_LEVEL_] = 1;
13479 24 100       57 if ($seqno_top) {
13480 16         56 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
13481 16         33 $ris_seqno_controlling_ci->{$seqno_top}++;
13482             }
13483             }
13484              
13485             # If this does not have ci, update ci if necessary and continue looking
13486             else {
13487 180 100       322 if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
13488 66 100       128 if ($seqno_top) {
13489 50         73 $rLL->[$KK]->[_CI_LEVEL_] = 1;
13490 50         175 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
13491 50         75 $ris_seqno_controlling_ci->{$seqno_top}++;
13492             }
13493 66         127 next;
13494             }
13495             }
13496              
13497             # We are looking for opening container tokens with ci
13498 138         230 my $K_opening = $K_opening_container->{$seqno};
13499 138 100 100     442 next unless ( defined($K_opening) && $KK == $K_opening );
13500              
13501             # Make sure there is a corresponding closing container
13502             # (could be missing if the script has a brace error)
13503 62         106 my $K_closing = $K_closing_container->{$seqno};
13504 62 50       111 next unless defined($K_closing);
13505              
13506             # Skip if requested by -bbx to avoid blinkers
13507 62 50       127 next if ( $rno_xci_by_seqno->{$seqno} );
13508              
13509             # Skip if this is a -bli container (this fixes case b1065) Note: case
13510             # b1065 is also fixed by the update for b1055, so this update is not
13511             # essential now. But there does not seem to be a good reason to add
13512             # xci and bli together, so the update is retained.
13513 62 50       130 next if ( $ris_bli_container->{$seqno} );
13514              
13515             # Require different input lines. This will filter out a large number
13516             # of small hash braces and array brackets. If we accidentally filter
13517             # out an important container, it will get fixed on the next pass.
13518 62 50 66     210 if (
13519             $rLL->[$K_opening]->[_LINE_INDEX_] ==
13520             $rLL->[$K_closing]->[_LINE_INDEX_]
13521             && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
13522             $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
13523             $rOpts_maximum_line_length )
13524             )
13525             {
13526 0         0 DEBUG_XCI
13527             && print "XCI: Skipping seqno=$seqno, require different lines\n";
13528 0         0 next;
13529             }
13530              
13531             # Do not apply -xci if adding extra ci will put the container contents
13532             # beyond the line length limit (fixes cases b899 b935)
13533 62         94 my $level = $rLL->[$K_opening]->[_LEVEL_];
13534 62         89 my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
13535 62         109 my $maximum_text_length =
13536             $maximum_text_length_at_level[$level] -
13537             $ci_level * $rOpts_continuation_indentation;
13538              
13539             # Fix for b1197 b1198 b1199 b1200 b1201 b1202
13540             # Do not apply -xci if we are running out of space
13541             # TODO: review this; do we also need to look at stress_level_alpha?
13542 62 50       115 if ( $level >= $stress_level_beta ) {
13543 0         0 DEBUG_XCI
13544             && print
13545             "XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
13546 0         0 next;
13547             }
13548              
13549             # remember how much space is available for patch b1031 above
13550 62         93 my $space =
13551             $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
13552              
13553 62 50       1044 if ( $space < 0 ) {
13554 0         0 DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
13555 0         0 next;
13556             }
13557 62         86 DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
13558              
13559 62         121 $available_space{$seqno} = $space;
13560              
13561             # This becomes the next controlling container
13562 62 100       142 push @seqno_stack, $seqno_top if ($seqno_top);
13563 62         129 $seqno_top = $seqno;
13564             }
13565 6         28 return;
13566             } ## end sub extended_ci
13567              
13568             sub braces_left_setup {
13569              
13570             # Called once per file to mark all -bl, -sbl, and -asbl containers
13571 557     557 0 1390 my $self = shift;
13572              
13573 557         1568 my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'};
13574 557         1490 my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'};
13575 557         1406 my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
13576 557 100 100     4630 return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
      66        
13577              
13578 23         66 my $rLL = $self->[_rLL_];
13579 23 50 33     147 return unless ( defined($rLL) && @{$rLL} );
  23         117  
13580              
13581             # We will turn on this hash for braces controlled by these flags:
13582 23         70 my $rbrace_left = $self->[_rbrace_left_];
13583              
13584 23         71 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13585 23         63 my $ris_asub_block = $self->[_ris_asub_block_];
13586 23         58 my $ris_sub_block = $self->[_ris_sub_block_];
13587 23         51 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
  23         135  
13588              
13589 62         114 my $block_type = $rblock_type_of_seqno->{$seqno};
13590              
13591             # use -asbl flag for an anonymous sub block
13592 62 100       156 if ( $ris_asub_block->{$seqno} ) {
    100          
13593 14 100       54 if ($rOpts_asbl) {
13594 10         41 $rbrace_left->{$seqno} = 1;
13595             }
13596             }
13597              
13598             # use -sbl flag for a named sub
13599             elsif ( $ris_sub_block->{$seqno} ) {
13600 4 50       19 if ($rOpts_sbl) {
13601 4         25 $rbrace_left->{$seqno} = 1;
13602             }
13603             }
13604              
13605             # use -bl flag if not a sub block of any type
13606             else {
13607 44 100 100     634 if ( $rOpts_bl
      100        
13608             && $block_type =~ /$bl_pattern/
13609             && $block_type !~ /$bl_exclusion_pattern/ )
13610             {
13611 21         73 $rbrace_left->{$seqno} = 1;
13612             }
13613             }
13614             }
13615 23         72 return;
13616             } ## end sub braces_left_setup
13617              
13618             sub bli_adjustment {
13619              
13620             # Called once per file to implement the --brace-left-and-indent option.
13621             # If -bli is set, adds one continuation indentation for certain braces
13622 557     557 0 1278 my $self = shift;
13623 557 100       1876 return unless ( $rOpts->{'brace-left-and-indent'} );
13624 6         17 my $rLL = $self->[_rLL_];
13625 6 50 33     37 return unless ( defined($rLL) && @{$rLL} );
  6         28  
13626              
13627 6         18 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13628 6         16 my $ris_bli_container = $self->[_ris_bli_container_];
13629 6         18 my $rbrace_left = $self->[_rbrace_left_];
13630 6         14 my $K_opening_container = $self->[_K_opening_container_];
13631 6         17 my $K_closing_container = $self->[_K_closing_container_];
13632              
13633 6         14 foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
  6         35  
13634 49         101 my $block_type = $rblock_type_of_seqno->{$seqno};
13635 49 100 66     622 if ( $block_type
      100        
13636             && $block_type =~ /$bli_pattern/
13637             && $block_type !~ /$bli_exclusion_pattern/ )
13638             {
13639 25         82 $ris_bli_container->{$seqno} = 1;
13640 25         48 $rbrace_left->{$seqno} = 1;
13641 25         42 my $Ko = $K_opening_container->{$seqno};
13642 25         41 my $Kc = $K_closing_container->{$seqno};
13643 25 50 33     110 if ( defined($Ko) && defined($Kc) ) {
13644 25         78 $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
13645             }
13646             }
13647             }
13648 6         30 return;
13649             } ## end sub bli_adjustment
13650              
13651             sub find_multiline_qw {
13652              
13653 561     561 0 2017 my ( $self, $rqw_lines ) = @_;
13654              
13655             # Multiline qw quotes are not sequenced items like containers { [ (
13656             # but behave in some respects in a similar way. So this routine finds them
13657             # and creates a separate sequence number system for later use.
13658              
13659             # This is straightforward because they always begin at the end of one line
13660             # and end at the beginning of a later line. This is true no matter how we
13661             # finally make our line breaks, so we can find them before deciding on new
13662             # line breaks.
13663              
13664             # Input parameter:
13665             # if $rqw_lines is defined it is a ref to array of all line index numbers
13666             # for which there is a type 'q' qw quote at either end of the line. This
13667             # was defined by sub resync_lines_and_tokens for efficiency.
13668             #
13669              
13670 561         1717 my $rlines = $self->[_rlines_];
13671              
13672             # if $rqw_lines is not defined (this will occur with -io option) then we
13673             # will have to scan all lines.
13674 561 100       1863 if ( !defined($rqw_lines) ) {
13675 3         9 $rqw_lines = [ 0 .. @{$rlines} - 1 ];
  3         15  
13676             }
13677              
13678             # if $rqw_lines is defined but empty, just return because there are no
13679             # multiline qw's
13680             else {
13681 558 100       1115 if ( !@{$rqw_lines} ) { return }
  558         2179  
  509         1390  
13682             }
13683              
13684 52         187 my $rstarting_multiline_qw_seqno_by_K = {};
13685 52         148 my $rending_multiline_qw_seqno_by_K = {};
13686 52         133 my $rKrange_multiline_qw_by_seqno = {};
13687 52         140 my $rmultiline_qw_has_extra_level = {};
13688              
13689 52         136 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
13690              
13691 52         167 my $rLL = $self->[_rLL_];
13692 52         104 my $qw_seqno;
13693 52         132 my $num_qw_seqno = 0;
13694 52         142 my $K_start_multiline_qw;
13695              
13696             # For reference, here is the old loop, before $rqw_lines became available:
13697             ## foreach my $line_of_tokens ( @{$rlines} ) {
13698 52         141 foreach my $iline ( @{$rqw_lines} ) {
  52         219  
13699 246         442 my $line_of_tokens = $rlines->[$iline];
13700              
13701             # Note that these first checks are required in case we have to scan
13702             # all lines, not just lines with type 'q' at the ends.
13703 246         501 my $line_type = $line_of_tokens->{_line_type};
13704 246 50       556 next unless ( $line_type eq 'CODE' );
13705 246         417 my $rK_range = $line_of_tokens->{_rK_range};
13706 246         371 my ( $Kfirst, $Klast ) = @{$rK_range};
  246         479  
13707 246 100 66     946 next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
13708              
13709             # Continuing a sequence of qw lines ...
13710 243 100       632 if ( defined($K_start_multiline_qw) ) {
13711 137         298 my $type = $rLL->[$Kfirst]->[_TYPE_];
13712              
13713             # shouldn't happen
13714 137 50       358 if ( $type ne 'q' ) {
13715 0         0 DEVEL_MODE && print {*STDERR} <<EOM;
13716             STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
13717             EOM
13718 0         0 $K_start_multiline_qw = undef;
13719 0         0 next;
13720             }
13721 137         295 my $Kprev = $self->K_previous_nonblank($Kfirst);
13722 137         344 my $Knext = $self->K_next_nonblank($Kfirst);
13723 137 50       389 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
13724 137 50       297 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
13725 137 100 66     571 if ( $type_m eq 'q' && $type_p ne 'q' ) {
13726 32         190 $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
13727 32         189 $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
13728             [ $K_start_multiline_qw, $Kfirst ];
13729 32         93 $K_start_multiline_qw = undef;
13730 32         76 $qw_seqno = undef;
13731             }
13732             }
13733              
13734             # Starting a new a sequence of qw lines ?
13735 243 100 100     1057 if ( !defined($K_start_multiline_qw)
13736             && $rLL->[$Klast]->[_TYPE_] eq 'q' )
13737             {
13738 41         199 my $Kprev = $self->K_previous_nonblank($Klast);
13739 41         181 my $Knext = $self->K_next_nonblank($Klast);
13740 41 50       182 my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
13741 41 50       157 my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
13742 41 100 100     302 if ( $type_m ne 'q' && $type_p eq 'q' ) {
13743 32         84 $num_qw_seqno++;
13744 32         109 $qw_seqno = 'q' . $num_qw_seqno;
13745 32         82 $K_start_multiline_qw = $Klast;
13746 32         139 $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
13747             }
13748             }
13749             }
13750              
13751             # Give multiline qw lists extra indentation instead of CI. This option
13752             # works well but is currently only activated when the -xci flag is set.
13753             # The reason is to avoid unexpected changes in formatting.
13754 52 100       338 if ($rOpts_extended_continuation_indentation) {
13755 1         7 while ( my ( $qw_seqno_x, $rKrange ) =
13756 2         14 each %{$rKrange_multiline_qw_by_seqno} )
13757             {
13758 1         3 my ( $Kbeg, $Kend ) = @{$rKrange};
  1         3  
13759              
13760             # require isolated closing token
13761 1         4 my $token_end = $rLL->[$Kend]->[_TOKEN_];
13762             next
13763             unless ( length($token_end) == 1
13764 1 50 33     10 && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
      33        
13765              
13766             # require isolated opening token
13767 1         3 my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
13768              
13769             # allow space(s) after the qw
13770 1 50 33     6 if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
13771             {
13772 0         0 $token_beg =~ s/\s+//;
13773             }
13774              
13775 1 50       4 next unless ( length($token_beg) == 3 );
13776              
13777 1         5 foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
13778 5         9 $rLL->[$KK]->[_LEVEL_]++;
13779 5         12 $rLL->[$KK]->[_CI_LEVEL_] = 0;
13780             }
13781              
13782             # set flag for -wn option, which will remove the level
13783 1         6 $rmultiline_qw_has_extra_level->{$qw_seqno_x} = 1;
13784             }
13785             }
13786              
13787             # For the -lp option we need to mark all parent containers of
13788             # multiline quotes
13789 52 100 66     319 if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
13790              
13791 1         3 while ( my ( $qw_seqno_x, $rKrange ) =
13792 1         8 each %{$rKrange_multiline_qw_by_seqno} )
13793             {
13794 0         0 my ( $Kbeg, $Kend ) = @{$rKrange};
  0         0  
13795 0         0 my $parent_seqno = $self->parent_seqno_by_K($Kend);
13796 0 0       0 next unless ($parent_seqno);
13797              
13798             # If the parent container exactly surrounds this qw, then -lp
13799             # formatting seems to work so we will not mark it.
13800 0         0 my $is_tightly_contained;
13801 0         0 my $Kn = $self->K_next_nonblank($Kend);
13802 0 0       0 my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
13803 0 0 0     0 if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
13804              
13805 0         0 my $Kp = $self->K_previous_nonblank($Kbeg);
13806 0 0       0 my $seqno_p =
13807             defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
13808 0 0 0     0 if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
13809 0         0 $is_tightly_contained = 1;
13810             }
13811             }
13812              
13813 0 0       0 $ris_excluded_lp_container->{$parent_seqno} = 1
13814             unless ($is_tightly_contained);
13815              
13816             # continue up the tree marking parent containers
13817 0         0 while (1) {
13818 0         0 $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
13819 0 0       0 last if ( !defined($parent_seqno) );
13820 0 0       0 last if ( $parent_seqno eq SEQ_ROOT );
13821 0         0 $ris_excluded_lp_container->{$parent_seqno} = 1;
13822             }
13823             }
13824             }
13825              
13826 52         183 $self->[_rstarting_multiline_qw_seqno_by_K_] =
13827             $rstarting_multiline_qw_seqno_by_K;
13828 52         152 $self->[_rending_multiline_qw_seqno_by_K_] =
13829             $rending_multiline_qw_seqno_by_K;
13830 52         135 $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
13831 52         144 $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
13832              
13833 52         190 return;
13834             } ## end sub find_multiline_qw
13835              
13836 39     39   399 use constant DEBUG_COLLAPSED_LENGTHS => 0;
  39         138  
  39         2781  
13837              
13838             # Minimum space reserved for contents of a code block. A value of 40 has given
13839             # reasonable results. With a large line length, say -l=120, this will not
13840             # normally be noticeable but it will prevent making a mess in some edge cases.
13841 39     39   306 use constant MIN_BLOCK_LEN => 40;
  39         152  
  39         5746  
13842              
13843             my %is_handle_type;
13844              
13845 0         0 BEGIN {
13846 39     39   272 my @q = qw( w C U G i k => );
13847 39         302 @is_handle_type{@q} = (1) x scalar(@q);
13848              
13849 39         281615 my $i = 0;
13850             use constant {
13851 39         4503 _max_prong_len_ => $i++,
13852             _handle_len_ => $i++,
13853             _seqno_o_ => $i++,
13854             _iline_o_ => $i++,
13855             _K_o_ => $i++,
13856             _K_c_ => $i++,
13857             _interrupted_list_rule_ => $i++,
13858 39     39   359 };
  39         157  
13859             } ## end BEGIN
13860              
13861             sub is_fragile_block_type {
13862 0     0 0 0 my ( $self, $block_type, $seqno ) = @_;
13863              
13864             # Given:
13865             # $block_type = the block type of a token, and
13866             # $seqno = its sequence number
13867              
13868             # Return:
13869             # true if this block type stays broken after being broken,
13870             # false otherwise
13871              
13872             # This sub has been added to isolate a tricky decision needed
13873             # to fix issue b1428.
13874              
13875             # The coding here needs to agree with:
13876             # - sub process_line where variable '$rbrace_follower' is set
13877             # - sub process_line_inner_loop where variable '$is_opening_BLOCK' is set,
13878              
13879 0 0 0     0 if ( $is_sort_map_grep_eval{$block_type}
      0        
13880             || $block_type eq 't'
13881             || $self->[_rshort_nested_]->{$seqno} )
13882             {
13883 0         0 return 0;
13884             }
13885              
13886 0         0 return 1;
13887              
13888             } ## end sub is_fragile_block_type
13889              
13890             { ## closure xlp_collapsed_lengths
13891              
13892             my $max_prong_len;
13893             my $len;
13894             my $last_nonblank_type;
13895             my @stack;
13896              
13897             sub xlp_collapsed_lengths_initialize {
13898              
13899 4     4 0 11 $max_prong_len = 0;
13900 4         10 $len = 0;
13901 4         12 $last_nonblank_type = 'b';
13902 4         12 @stack = ();
13903              
13904 4         18 push @stack, [
13905             0, # $max_prong_len,
13906             0, # $handle_len,
13907             SEQ_ROOT, # $seqno,
13908             undef, # $iline,
13909             undef, # $KK,
13910             undef, # $K_c,
13911             undef, # $interrupted_list_rule
13912             ];
13913              
13914 4         8 return;
13915             } ## end sub xlp_collapsed_lengths_initialize
13916              
13917             sub cumulative_length_to_comma {
13918 24     24 0 39 my ( $self, $KK, $K_comma, $K_closing ) = @_;
13919              
13920             # Given:
13921             # $KK = index of starting token, or blank before start
13922             # $K_comma = index of line-ending comma
13923             # $K_closing = index of the container closing token
13924              
13925             # Return:
13926             # $length = cumulative length of the term
13927              
13928 24         44 my $rLL = $self->[_rLL_];
13929 24 50       47 if ( $rLL->[$KK]->[_TYPE_] eq 'b' ) { $KK++ }
  0         0  
13930 24         37 my $length = 0;
13931 24 100 33     207 if (
      66        
      66        
      66        
13932             $KK < $K_comma
13933             && $rLL->[$K_comma]->[_TYPE_] eq ',' # should be true
13934              
13935             # Ignore if terminal comma, causes instability (b1297,
13936             # b1330)
13937             && (
13938             $K_closing - $K_comma > 2
13939             || ( $K_closing - $K_comma == 2
13940             && $rLL->[ $K_comma + 1 ]->[_TYPE_] ne 'b' )
13941             )
13942              
13943             # The comma should be in this container
13944             && ( $rLL->[$K_comma]->[_LEVEL_] - 1 ==
13945             $rLL->[$K_closing]->[_LEVEL_] )
13946             )
13947             {
13948              
13949             # An additional check: if line ends in ), and the ) has vtc then
13950             # skip this estimate. Otherwise, vtc can give oscillating results.
13951             # Fixes b1448. For example, this could be unstable:
13952              
13953             # ( $os ne 'win' ? ( -selectcolor => "red" ) : () ),
13954             # | |^--K_comma
13955             # | ^-- K_prev
13956             # ^--- KK
13957              
13958             # An alternative, possibly better strategy would be to try to turn
13959             # off -vtc locally, but it turns out to be difficult to locate the
13960             # appropriate closing token when it is not on the same line as its
13961             # opening token.
13962              
13963 18         49 my $K_prev = $self->K_previous_nonblank($K_comma);
13964 18 50 33     94 if ( defined($K_prev)
      33        
13965             && $K_prev >= $KK
13966             && $rLL->[$K_prev]->[_TYPE_SEQUENCE_] )
13967             {
13968 0         0 my $token = $rLL->[$K_prev]->[_TOKEN_];
13969 0         0 my $type = $rLL->[$K_prev]->[_TYPE_];
13970 0 0 0     0 if ( $closing_vertical_tightness{$token} && $type ne 'R' ) {
13971             ## type 'R' does not normally get broken, so ignore
13972             ## skip length calculation
13973 0         0 return 0;
13974             }
13975             }
13976 18 50       50 my $starting_len =
13977             $KK >= 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
13978 18         31 $length = $rLL->[$K_comma]->[_CUMULATIVE_LENGTH_] - $starting_len;
13979             }
13980 24         43 return $length;
13981             } ## end sub cumulative_length_to_comma
13982              
13983             sub xlp_collapsed_lengths {
13984              
13985 4     4 0 11 my $self = shift;
13986              
13987             #----------------------------------------------------------------
13988             # Define the collapsed lengths of containers for -xlp indentation
13989             #----------------------------------------------------------------
13990              
13991             # We need an estimate of the minimum required line length starting at
13992             # any opening container for the -xlp style. This is needed to avoid
13993             # using too much indentation space for lower level containers and
13994             # thereby running out of space for outer container tokens due to the
13995             # maximum line length limit.
13996              
13997             # The basic idea is that at each node in the tree we imagine that we
13998             # have a fork with a handle and collapsible prongs:
13999             #
14000             # |------------
14001             # |--------
14002             # ------------|-------
14003             # handle |------------
14004             # |--------
14005             # prongs
14006             #
14007             # Each prong has a minimum collapsed length. The collapsed length at a
14008             # node is the maximum of these minimum lengths, plus the handle length.
14009             # Each of the prongs may itself be a tree node.
14010              
14011             # This is just a rough calculation to get an approximate starting point
14012             # for indentation. Later routines will be more precise. It is
14013             # important that these estimates be independent of the line breaks of
14014             # the input stream in order to avoid instabilities.
14015              
14016 4         11 my $rLL = $self->[_rLL_];
14017 4         12 my $rlines = $self->[_rlines_];
14018 4         11 my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
14019 4         11 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
14020              
14021 4         9 my $K_start_multiline_qw;
14022 4         9 my $level_start_multiline_qw = 0;
14023              
14024 4         22 xlp_collapsed_lengths_initialize();
14025              
14026             #--------------------------------
14027             # Loop over all lines in the file
14028             #--------------------------------
14029 4         8 my $iline = -1;
14030 4         9 my $skip_next_line;
14031 4         11 foreach my $line_of_tokens ( @{$rlines} ) {
  4         14  
14032 172         219 $iline++;
14033 172 50       271 if ($skip_next_line) {
14034 0         0 $skip_next_line = 0;
14035 0         0 next;
14036             }
14037 172         279 my $line_type = $line_of_tokens->{_line_type};
14038 172 100       309 next if ( $line_type ne 'CODE' );
14039 170         227 my $CODE_type = $line_of_tokens->{_code_type};
14040              
14041             # Always skip blank lines
14042 170 100       315 next if ( $CODE_type eq 'BL' );
14043              
14044             # Note on other line types:
14045             # 'FS' (Format Skipping) lines may contain opening/closing tokens so
14046             # we have to process them to keep the stack correctly sequenced
14047             # 'VB' (Verbatim) lines could be skipped, but testing shows that
14048             # results look better if we include their lengths.
14049              
14050             # Also note that we could exclude -xlp formatting of containers with
14051             # 'FS' and 'VB' lines, but in testing that was not really beneficial
14052              
14053             # So we process tokens in 'FS' and 'VB' lines like all the rest...
14054              
14055 133         190 my $rK_range = $line_of_tokens->{_rK_range};
14056 133         170 my ( $K_first, $K_last ) = @{$rK_range};
  133         233  
14057 133 50 33     434 next unless ( defined($K_first) && defined($K_last) );
14058              
14059 133         241 my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
14060              
14061             # Always ignore block comments
14062 133 100 100     246 next if ( $has_comment && $K_first == $K_last );
14063              
14064             # Handle an intermediate line of a multiline qw quote. These may
14065             # require including some -ci or -i spaces. See cases c098/x063.
14066             # Updated to check all lines (not just $K_first==$K_last) to fix
14067             # b1316
14068 126         257 my $K_begin_loop = $K_first;
14069 126 50       231 if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
14070              
14071 0         0 my $KK = $K_first;
14072 0         0 my $level = $rLL->[$KK]->[_LEVEL_];
14073 0         0 my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
14074              
14075             # remember the level of the start
14076 0 0       0 if ( !defined($K_start_multiline_qw) ) {
14077 0         0 $K_start_multiline_qw = $K_first;
14078 0         0 $level_start_multiline_qw = $level;
14079             my $seqno_qw =
14080             $self->[_rstarting_multiline_qw_seqno_by_K_]
14081 0         0 ->{$K_start_multiline_qw};
14082 0 0       0 if ( !$seqno_qw ) {
14083 0         0 my $Kp = $self->K_previous_nonblank($K_first);
14084 0 0 0     0 if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
14085              
14086 0         0 $K_start_multiline_qw = $Kp;
14087 0         0 $level_start_multiline_qw =
14088             $rLL->[$K_start_multiline_qw]->[_LEVEL_];
14089             }
14090             else {
14091              
14092             # Fix for b1319, b1320
14093 0         0 $K_start_multiline_qw = undef;
14094             }
14095             }
14096             }
14097              
14098 0 0       0 if ( defined($K_start_multiline_qw) ) {
14099 0         0 $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
14100             $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
14101              
14102             # We may have to add the spaces of one level or ci level
14103             # ... it depends depends on the -xci flag, the -wn flag,
14104             # and if the qw uses a container token as the quote
14105             # delimiter.
14106              
14107             # First rule: add ci if there is a $ci_level
14108 0 0       0 if ($ci_level) {
14109 0         0 $len += $rOpts_continuation_indentation;
14110             }
14111              
14112             # Second rule: otherwise, look for an extra indentation
14113             # level from the start and add one indentation level if
14114             # found.
14115             else {
14116 0 0       0 if ( $level > $level_start_multiline_qw ) {
14117 0         0 $len += $rOpts_indent_columns;
14118             }
14119             }
14120              
14121 0 0       0 if ( $len > $max_prong_len ) { $max_prong_len = $len }
  0         0  
14122              
14123 0         0 $last_nonblank_type = 'q';
14124              
14125 0         0 $K_begin_loop = $K_first + 1;
14126              
14127             # We can skip to the next line if more tokens
14128 0 0       0 next if ( $K_begin_loop > $K_last );
14129             }
14130             }
14131              
14132 126         169 $K_start_multiline_qw = undef;
14133              
14134             # Find the terminal token, before any side comment
14135 126         167 my $K_terminal = $K_last;
14136 126 100       240 if ($has_comment) {
14137 5         12 $K_terminal -= 1;
14138 5 50 33     42 $K_terminal -= 1
14139             if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
14140             && $K_terminal > $K_first );
14141             }
14142              
14143             # Use length to terminal comma if interrupted list rule applies
14144 126 100 66     378 if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
14145 50         91 my $K_c = $stack[-1]->[_K_c_];
14146 50 50       90 if ( defined($K_c) ) {
14147              
14148             #----------------------------------------------------------
14149             # BEGIN patch for issue b1408: If this line ends in an
14150             # opening token, look for the closing token and comma at
14151             # the end of the next line. If so, combine the two lines to
14152             # get the correct sums. This problem seems to require -xlp
14153             # -vtc=2 and blank lines to occur. Use %is_opening_type to
14154             # fix b1431.
14155             #----------------------------------------------------------
14156 50 100 66     120 if ( $is_opening_type{ $rLL->[$K_terminal]->[_TYPE_] }
14157             && !$has_comment )
14158             {
14159 2         6 my $seqno_end = $rLL->[$K_terminal]->[_TYPE_SEQUENCE_];
14160 2         4 my $Kc_test = $rLL->[$K_terminal]->[_KNEXT_SEQ_ITEM_];
14161              
14162             # We are looking for a short broken remnant on the next
14163             # line; something like the third line here (b1408):
14164              
14165             # parent =>
14166             # Moose::Util::TypeConstraints::find_type_constraint(
14167             # 'RefXX' ),
14168             # or this
14169             #
14170             # Help::WorkSubmitter->_filter_chores_and_maybe_warn_user(
14171             # $story_set_all_chores),
14172             # or this (b1431):
14173             # $issue->{
14174             # 'borrowernumber'}, # borrowernumber
14175 2 50 66     19 if ( defined($Kc_test)
      66        
14176             && $seqno_end == $rLL->[$Kc_test]->[_TYPE_SEQUENCE_]
14177             && $rLL->[$Kc_test]->[_LINE_INDEX_] == $iline + 1 )
14178             {
14179 0         0 my $line_of_tokens_next = $rlines->[ $iline + 1 ];
14180             my $rtype_count =
14181 0         0 $rtype_count_by_seqno->{$seqno_end};
14182             my ( $K_first_next, $K_terminal_next ) =
14183 0         0 @{ $line_of_tokens_next->{_rK_range} };
  0         0  
14184              
14185             # backup at a side comment
14186 0 0 0     0 if ( defined($K_terminal_next)
14187             && $rLL->[$K_terminal_next]->[_TYPE_] eq '#' )
14188             {
14189 0         0 my $Kprev =
14190             $self->K_previous_nonblank($K_terminal_next);
14191 0 0 0     0 if ( defined($Kprev)
14192             && $Kprev >= $K_first_next )
14193             {
14194 0         0 $K_terminal_next = $Kprev;
14195             }
14196             }
14197              
14198 0 0 0     0 if (
      0        
      0        
      0        
      0        
      0        
14199             defined($K_terminal_next)
14200              
14201             # next line ends with a comma
14202             && $rLL->[$K_terminal_next]->[_TYPE_] eq ','
14203              
14204             # which follows the closing container token
14205             && (
14206             $K_terminal_next - $Kc_test == 1
14207             || ( $K_terminal_next - $Kc_test == 2
14208             && $rLL->[ $K_terminal_next - 1 ]
14209             ->[_TYPE_] eq 'b' )
14210             )
14211              
14212             # no commas in the container
14213             && ( !defined($rtype_count)
14214             || !$rtype_count->{','} )
14215              
14216             # for now, restrict this to a container with
14217             # just 1 or two tokens
14218             && $K_terminal_next - $K_terminal <= 5
14219              
14220             )
14221             {
14222              
14223             # combine the next line with the current line
14224 0         0 $K_terminal = $K_terminal_next;
14225 0         0 $skip_next_line = 1;
14226 0         0 if (DEBUG_COLLAPSED_LENGTHS) {
14227             print "Combining lines at line $iline\n";
14228             }
14229             }
14230             }
14231             }
14232              
14233             #--------------------------
14234             # END patch for issue b1408
14235             #--------------------------
14236 50 100       104 if ( $rLL->[$K_terminal]->[_TYPE_] eq ',' ) {
14237              
14238 24         55 my $length =
14239             $self->cumulative_length_to_comma( $K_first,
14240             $K_terminal, $K_c );
14241              
14242             # Fix for b1331: at a broken => item, include the
14243             # length of the previous half of the item plus one for
14244             # the missing space
14245 24 50       44 if ( $last_nonblank_type eq '=>' ) {
14246 0         0 $length += $len + 1;
14247             }
14248 24 100       56 if ( $length > $max_prong_len ) {
14249 17         31 $max_prong_len = $length;
14250             }
14251             }
14252             }
14253             }
14254              
14255             #----------------------------------
14256             # Loop over all tokens on this line
14257             #----------------------------------
14258 126         350 $self->xlp_collapse_lengths_inner_loop( $iline, $K_begin_loop,
14259             $K_terminal, $K_last );
14260              
14261             # Now take care of any side comment;
14262 126 100       287 if ($has_comment) {
14263 5 50       15 if ($rOpts_ignore_side_comment_lengths) {
14264 0         0 $len = 0;
14265             }
14266             else {
14267              
14268             # For a side comment when -iscl is not set, measure length from
14269             # the start of the previous nonblank token
14270 5 50       26 my $len0 =
14271             $K_terminal > 0
14272             ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
14273             : 0;
14274 5         7 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
14275 5 100       15 if ( $len > $max_prong_len ) { $max_prong_len = $len }
  2         22  
14276             }
14277             }
14278              
14279             } ## end loop over lines
14280              
14281 4         19 if (DEBUG_COLLAPSED_LENGTHS) {
14282             print "\nCollapsed lengths--\n";
14283             foreach
14284             my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
14285             {
14286             my $clen = $rcollapsed_length_by_seqno->{$key};
14287             print "$key -> $clen\n";
14288             }
14289             }
14290              
14291 4         21 return;
14292             } ## end sub xlp_collapsed_lengths
14293              
14294             sub xlp_collapse_lengths_inner_loop {
14295              
14296 126     126 0 231 my ( $self, $iline, $K_begin_loop, $K_terminal, $K_last ) = @_;
14297              
14298 126         183 my $rLL = $self->[_rLL_];
14299 126         174 my $K_closing_container = $self->[_K_closing_container_];
14300              
14301 126         177 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
14302 126         154 my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
14303 126         172 my $ris_permanently_broken = $self->[_ris_permanently_broken_];
14304 126         176 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
14305 126         174 my $rhas_broken_list = $self->[_rhas_broken_list_];
14306 126         161 my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
14307              
14308             #----------------------------------
14309             # Loop over tokens on this line ...
14310             #----------------------------------
14311 126         243 foreach my $KK ( $K_begin_loop .. $K_terminal ) {
14312              
14313 665         964 my $type = $rLL->[$KK]->[_TYPE_];
14314 665 100       1148 next if ( $type eq 'b' );
14315              
14316             #------------------------
14317             # Handle sequenced tokens
14318             #------------------------
14319 471         637 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
14320 471 100       741 if ($seqno) {
14321              
14322 120         188 my $token = $rLL->[$KK]->[_TOKEN_];
14323              
14324             #----------------------------
14325             # Entering a new container...
14326             #----------------------------
14327 120 100 66     437 if ( $is_opening_token{$token}
    50 33        
14328             && defined( $K_closing_container->{$seqno} ) )
14329             {
14330              
14331             # save current prong length
14332 60         95 $stack[-1]->[_max_prong_len_] = $max_prong_len;
14333 60         84 $max_prong_len = 0;
14334              
14335             # Start new prong one level deeper
14336 60         79 my $handle_len = 0;
14337 60 100       115 if ( $rblock_type_of_seqno->{$seqno} ) {
14338              
14339             # code blocks do not use -lp indentation, but behave as
14340             # if they had a handle of one indentation length
14341 10         18 $handle_len = $rOpts_indent_columns;
14342              
14343             }
14344             else {
14345 50 100       111 if ( $is_handle_type{$last_nonblank_type} ) {
14346 40         52 $handle_len = $len;
14347 40 100 66     146 $handle_len += 1
14348             if ( $KK > 0
14349             && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
14350             }
14351             }
14352              
14353             # Set a flag if the 'Interrupted List Rule' will be applied
14354             # (see sub copy_old_breakpoints).
14355             # - Added check on has_broken_list to fix issue b1298
14356              
14357             my $interrupted_list_rule =
14358             $ris_permanently_broken->{$seqno}
14359             && $ris_list_by_seqno->{$seqno}
14360 60   66     207 && !$rhas_broken_list->{$seqno}
14361             && !$rOpts_ignore_old_breakpoints;
14362              
14363             # NOTES: Since we are looking at old line numbers we have
14364             # to be very careful not to introduce an instability.
14365              
14366             # This following causes instability (b1288-b1296):
14367             # $interrupted_list_rule ||=
14368             # $rOpts_break_at_old_comma_breakpoints;
14369              
14370             # - We could turn off the interrupted list rule if there is
14371             # a broken sublist, to follow 'Compound List Rule 1'.
14372             # - We could use the _rhas_broken_list_ flag for this.
14373             # - But it seems safer not to do this, to avoid
14374             # instability, since the broken sublist could be
14375             # temporary. It seems better to let the formatting
14376             # stabilize by itself after one or two iterations.
14377             # - So, not doing this for now
14378              
14379             # Turn off the interrupted list rule if -vmll is set and a
14380             # list has '=>' characters. This avoids instabilities due
14381             # to dependence on old line breaks; issue b1325.
14382 60 50 66     166 if ( $interrupted_list_rule
14383             && $rOpts_variable_maximum_line_length )
14384             {
14385 0         0 my $rtype_count = $rtype_count_by_seqno->{$seqno};
14386 0 0 0     0 if ( $rtype_count && $rtype_count->{'=>'} ) {
14387 0         0 $interrupted_list_rule = 0;
14388             }
14389             }
14390              
14391 60         93 my $K_c = $K_closing_container->{$seqno};
14392              
14393             # Add length of any terminal list item if interrupted
14394             # so that the result is the same as if the term is
14395             # in the next line (b1446).
14396              
14397 60 50 66     139 if (
      33        
14398             $interrupted_list_rule
14399             && $KK < $K_terminal
14400              
14401             # The line should end in a comma
14402             # NOTE: this currently assumes break after comma.
14403             # As long as the other call to cumulative_length..
14404             # makes the same assumption we should remain stable.
14405             && $rLL->[$K_terminal]->[_TYPE_] eq ','
14406              
14407             )
14408             {
14409 0         0 $max_prong_len =
14410             $self->cumulative_length_to_comma( $KK + 1,
14411             $K_terminal, $K_c );
14412             }
14413              
14414 60         215 push @stack, [
14415              
14416             $max_prong_len,
14417             $handle_len,
14418             $seqno,
14419             $iline,
14420             $KK,
14421             $K_c,
14422             $interrupted_list_rule
14423             ];
14424              
14425             }
14426              
14427             #--------------------
14428             # Exiting a container
14429             #--------------------
14430             elsif ( $is_closing_token{$token} && @stack ) {
14431              
14432             # The current prong ends - get its handle
14433 60         95 my $item = pop @stack;
14434 60         86 my $handle_len = $item->[_handle_len_];
14435 60         87 my $seqno_o = $item->[_seqno_o_];
14436 60         85 my $iline_o = $item->[_iline_o_];
14437 60         78 my $K_o = $item->[_K_o_];
14438 60         83 my $K_c_expect = $item->[_K_c_];
14439 60         80 my $collapsed_len = $max_prong_len;
14440              
14441 60 50       115 if ( $seqno_o ne $seqno ) {
14442              
14443             # This can happen if input file has brace errors.
14444             # Otherwise it shouldn't happen. Not fatal but -lp
14445             # formatting could get messed up.
14446 0         0 if ( DEVEL_MODE && !get_saw_brace_error() ) {
14447             Fault(<<EOM);
14448             sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
14449             EOM
14450             }
14451             }
14452              
14453             #------------------------------------------
14454             # Rules to avoid scrunching code blocks ...
14455             #------------------------------------------
14456             # Some test cases:
14457             # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
14458 60         95 my $block_type = $rblock_type_of_seqno->{$seqno};
14459 60 100       113 if ($block_type) {
14460              
14461 10         21 my $K_c = $KK;
14462 10         20 my $block_length = MIN_BLOCK_LEN;
14463 10         24 my $is_one_line_block;
14464 10         20 my $level = $rLL->[$K_o]->[_LEVEL_];
14465 10 50 33     56 if ( defined($K_o) && defined($K_c) ) {
14466              
14467             # note: fixed 3 May 2022 (removed 'my')
14468 10         28 $block_length =
14469             $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
14470             $rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
14471 10         21 $is_one_line_block = $iline == $iline_o;
14472             }
14473              
14474             # Code block rule 1: Use the total block length if
14475             # it is less than the minimum.
14476 10 100 33     74 if ( $block_length < MIN_BLOCK_LEN ) {
    50 33        
    50          
14477 6         13 $collapsed_len = $block_length;
14478             }
14479              
14480             # Code block rule 2: Use the full length of a
14481             # one-line block to avoid breaking it, unless
14482             # extremely long. We do not need to do a precise
14483             # check here, because if it breaks then it will
14484             # stay broken on later iterations.
14485             elsif (
14486             $is_one_line_block
14487             && $block_length <
14488             $maximum_line_length_at_level[$level]
14489              
14490             # But skip this for blocks types which can reform,
14491             # like sort/map/grep/eval blocks, to avoid
14492             # instability (b1345, b1428)
14493             && $self->is_fragile_block_type( $block_type,
14494             $seqno )
14495             )
14496             {
14497 0         0 $collapsed_len = $block_length;
14498             }
14499              
14500             # Code block rule 3: Otherwise the length should be
14501             # at least MIN_BLOCK_LEN to avoid scrunching code
14502             # blocks.
14503             elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
14504 0         0 $collapsed_len = MIN_BLOCK_LEN;
14505             }
14506             else {
14507             ## ok
14508             }
14509             }
14510              
14511             # Store the result. Some extra space, '2', allows for
14512             # length of an opening token, inside space, comma, ...
14513             # This constant has been tuned to give good overall
14514             # results.
14515 60         90 $collapsed_len += 2;
14516 60         102 $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
14517              
14518             # Restart scanning the lower level prong
14519 60 50       107 if (@stack) {
14520 60         86 $max_prong_len = $stack[-1]->[_max_prong_len_];
14521 60         90 $collapsed_len += $handle_len;
14522 60 100       143 if ( $collapsed_len > $max_prong_len ) {
14523 33         65 $max_prong_len = $collapsed_len;
14524             }
14525             }
14526             }
14527              
14528             # it is a ternary - no special processing for these yet
14529             else {
14530              
14531             }
14532              
14533 120         181 $len = 0;
14534 120         158 $last_nonblank_type = $type;
14535 120         206 next;
14536             }
14537              
14538             #----------------------------
14539             # Handle non-container tokens
14540             #----------------------------
14541 351         450 my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
14542              
14543             # Count lengths of things like 'xx => yy' as a single item
14544 351 100       747 if ( $type eq '=>' ) {
    100          
    100          
14545 11         18 $len += $token_length + 1;
14546              
14547             # fix $len for -naws, issue b1457
14548 11 50       28 if ( !$rOpts_add_whitespace ) {
14549 0 0 0     0 if ( defined( $rLL->[ $KK + 1 ] )
14550             && $rLL->[ $KK + 1 ]->[_TYPE_] ne 'b' )
14551             {
14552 0         0 $len -= 1;
14553             }
14554             }
14555              
14556 11 100       49 if ( $len > $max_prong_len ) { $max_prong_len = $len }
  3         7  
14557             }
14558             elsif ( $last_nonblank_type eq '=>' ) {
14559 9         33 $len += $token_length;
14560 9 100       31 if ( $len > $max_prong_len ) { $max_prong_len = $len }
  1         3  
14561              
14562             # but only include one => per item
14563 9         12 $len = $token_length;
14564             }
14565              
14566             # include everything to end of line after a here target
14567             elsif ( $type eq 'h' ) {
14568 1         14 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
14569             $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
14570 1 50       6 if ( $len > $max_prong_len ) { $max_prong_len = $len }
  1         2  
14571             }
14572              
14573             # for everything else just use the token length
14574             else {
14575 330         433 $len = $token_length;
14576 330 100       556 if ( $len > $max_prong_len ) { $max_prong_len = $len }
  58         72  
14577             }
14578 351         513 $last_nonblank_type = $type;
14579              
14580             } ## end loop over tokens on this line
14581              
14582 126         233 return;
14583              
14584             } ## end sub xlp_collapse_lengths_inner_loop
14585              
14586             } ## end closure xlp_collapsed_lengths
14587              
14588             sub is_excluded_lp {
14589              
14590             # Decide if this container is excluded by user request:
14591             # returns true if this token is excluded (i.e., may not use -lp)
14592             # returns false otherwise
14593              
14594             # The control hash can either describe:
14595             # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or
14596             # what to include: $line_up_parentheses_control_is_lxpl = 0
14597              
14598             # Input parameter:
14599             # $KK = index of the container opening token
14600              
14601 320     320 0 535 my ( $self, $KK ) = @_;
14602 320         487 my $rLL = $self->[_rLL_];
14603 320         465 my $rtoken_vars = $rLL->[$KK];
14604 320         480 my $token = $rtoken_vars->[_TOKEN_];
14605 320         489 my $rflags = $line_up_parentheses_control_hash{$token};
14606              
14607             #-----------------------------------------------
14608             # TEST #1: check match to listed container types
14609             #-----------------------------------------------
14610 320 100       577 if ( !defined($rflags) ) {
14611              
14612             # There is no entry for this container, so we are done
14613 241         687 return !$line_up_parentheses_control_is_lxpl;
14614             }
14615              
14616 79         105 my ( $flag1, $flag2 ) = @{$rflags};
  79         157  
14617              
14618             #-----------------------------------------------------------
14619             # TEST #2: check match to flag1, the preceding nonblank word
14620             #-----------------------------------------------------------
14621 79   66     237 my $match_flag1 = !defined($flag1) || $flag1 eq '*';
14622 79 100       149 if ( !$match_flag1 ) {
14623              
14624             # Find the previous token
14625 39         58 my ( $is_f, $is_k, $is_w );
14626 39         81 my $Kp = $self->K_previous_nonblank($KK);
14627 39 50       76 if ( defined($Kp) ) {
14628 39         59 my $type_p = $rLL->[$Kp]->[_TYPE_];
14629 39         59 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
14630              
14631             # keyword?
14632 39         64 $is_k = $type_p eq 'k';
14633              
14634             # function call?
14635 39         62 $is_f = $self->[_ris_function_call_paren_]->{$seqno};
14636              
14637             # either keyword or function call?
14638 39   100     110 $is_w = $is_k || $is_f;
14639             }
14640              
14641             # Check for match based on flag1 and the previous token:
14642 39 50       131 if ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
  0 50       0  
    100          
    100          
    50          
    50          
14643 0         0 elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
14644 13         20 elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
14645 13         25 elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
14646 0         0 elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
14647 13         19 elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
14648             else {
14649             ## no match
14650             }
14651             }
14652              
14653             # See if we can exclude this based on the flag1 test...
14654 79 100       128 if ($line_up_parentheses_control_is_lxpl) {
14655 66 100       153 return 1 if ($match_flag1);
14656             }
14657             else {
14658 13 100       40 return 1 if ( !$match_flag1 );
14659             }
14660              
14661             #-------------------------------------------------------------
14662             # TEST #3: exclusion based on flag2 and the container contents
14663             #-------------------------------------------------------------
14664              
14665             # Note that this is an exclusion test for both -lpxl or -lpil input methods
14666             # The options are:
14667             # 0 or blank: ignore container contents
14668             # 1 exclude non-lists or lists with sublists
14669             # 2 same as 1 but also exclude lists with code blocks
14670              
14671 30         43 my $match_flag2;
14672 30 50       52 if ($flag2) {
14673              
14674 30         46 my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
14675              
14676 30         53 my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
14677 30         49 my $has_list = $self->[_rhas_list_]->{$seqno};
14678 30         45 my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
14679 30         47 my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
14680              
14681 30 100 100     230 if ( !$is_list
      100        
      100        
      100        
14682             || $has_list
14683             || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
14684             {
14685 13         23 $match_flag2 = 1;
14686             }
14687             }
14688 30         85 return $match_flag2;
14689             } ## end sub is_excluded_lp
14690              
14691             sub set_excluded_lp_containers {
14692              
14693 561     561 0 1715 my ($self) = @_;
14694 561 100       1915 return unless ($rOpts_line_up_parentheses);
14695 31         102 my $rLL = $self->[_rLL_];
14696 31 50 33     186 return unless ( defined($rLL) && @{$rLL} );
  31         134  
14697              
14698 31         113 my $K_opening_container = $self->[_K_opening_container_];
14699 31         82 my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
14700 31         106 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
14701              
14702 31         73 foreach my $seqno ( keys %{$K_opening_container} ) {
  31         180  
14703              
14704             # code blocks are always excluded by the -lp coding so we can skip them
14705 363 100       771 next if ( $rblock_type_of_seqno->{$seqno} );
14706              
14707 320         488 my $KK = $K_opening_container->{$seqno};
14708 320 50       581 next unless defined($KK);
14709              
14710             # see if a user exclusion rule turns off -lp for this container
14711 320 100       627 if ( $self->is_excluded_lp($KK) ) {
14712 71         151 $ris_excluded_lp_container->{$seqno} = 1;
14713             }
14714             }
14715 31         174 return;
14716             } ## end sub set_excluded_lp_containers
14717              
14718             ######################################
14719             # CODE SECTION 6: Process line-by-line
14720             ######################################
14721              
14722             sub process_all_lines {
14723              
14724             #----------------------------------------------------------
14725             # Main loop to format all lines of a file according to type
14726             #----------------------------------------------------------
14727              
14728 561     561 0 1347 my $self = shift;
14729 561         2962 my $rlines = $self->[_rlines_];
14730 561         1560 my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
14731 561         1388 my $file_writer_object = $self->[_file_writer_object_];
14732 561         1309 my $logger_object = $self->[_logger_object_];
14733 561         1370 my $vertical_aligner_object = $self->[_vertical_aligner_object_];
14734 561         1361 my $save_logfile = $self->[_save_logfile_];
14735              
14736             # Flag to prevent blank lines when POD occurs in a format skipping sect.
14737 561         1128 my $in_format_skipping_section;
14738              
14739             # set locations for blanks around long runs of keywords
14740 561         2909 my $rwant_blank_line_after = $self->keyword_group_scan();
14741              
14742 561         1333 my $line_type = EMPTY_STRING;
14743 561         1159 my $i_last_POD_END = -10;
14744 561         1261 my $i = -1;
14745 561         1161 foreach my $line_of_tokens ( @{$rlines} ) {
  561         1793  
14746              
14747             # insert blank lines requested for keyword sequences
14748 7666 100 100     21473 if ( defined( $rwant_blank_line_after->{$i} )
14749             && $rwant_blank_line_after->{$i} == 1 )
14750             {
14751 12         68 $self->want_blank_line();
14752             }
14753              
14754 7666         12430 $i++;
14755              
14756 7666         12288 my $last_line_type = $line_type;
14757 7666         19282 $line_type = $line_of_tokens->{_line_type};
14758 7666         16888 my $input_line = $line_of_tokens->{_line_text};
14759              
14760             # _line_type codes are:
14761             # SYSTEM - system-specific code before hash-bang line
14762             # CODE - line of perl code (including comments)
14763             # POD_START - line starting pod, such as '=head'
14764             # POD - pod documentation text
14765             # POD_END - last line of pod section, '=cut'
14766             # HERE - text of here-document
14767             # HERE_END - last line of here-doc (target word)
14768             # FORMAT - format section
14769             # FORMAT_END - last line of format section, '.'
14770             # SKIP - code skipping section
14771             # SKIP_END - last line of code skipping section, '#>>V'
14772             # DATA_START - __DATA__ line
14773             # DATA - unidentified text following __DATA__
14774             # END_START - __END__ line
14775             # END - unidentified text following __END__
14776             # ERROR - we are in big trouble, probably not a perl script
14777              
14778             # put a blank line after an =cut which comes before __END__ and __DATA__
14779             # (required by podchecker)
14780 7666 100 100     18336 if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
14781 13         29 $i_last_POD_END = $i;
14782 13         94 $file_writer_object->reset_consecutive_blank_lines();
14783 13 50 66     125 if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
14784 0         0 $self->want_blank_line();
14785             }
14786             }
14787              
14788             # handle line of code..
14789 7666 100       15165 if ( $line_type eq 'CODE' ) {
14790              
14791 7493         14947 my $CODE_type = $line_of_tokens->{_code_type};
14792 7493         12185 $in_format_skipping_section = $CODE_type eq 'FS';
14793              
14794             # Handle blank lines
14795 7493 100       14236 if ( $CODE_type eq 'BL' ) {
14796              
14797             # Keep this blank? Start with the flag -kbl=n, where
14798             # n=0 ignore all old blank lines
14799             # n=1 stable: keep old blanks, but limited by -mbl=n
14800             # n=2 keep all old blank lines, regardless of -mbl=n
14801             # If n=0 we delete all old blank lines and let blank line
14802             # rules generate any needed blank lines.
14803 807         1815 my $kgb_keep = $rOpts_keep_old_blank_lines;
14804              
14805             # Then delete lines requested by the keyword-group logic if
14806             # allowed
14807 807 100 100     4481 if ( $kgb_keep == 1
      100        
14808             && defined( $rwant_blank_line_after->{$i} )
14809             && $rwant_blank_line_after->{$i} == 2 )
14810             {
14811 3         5 $kgb_keep = 0;
14812             }
14813              
14814             # But always keep a blank line following an =cut
14815 807 50 66     2886 if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
14816 0         0 $kgb_keep = 1;
14817             }
14818              
14819 807 100       1918 if ($kgb_keep) {
14820 779         2862 $self->flush($CODE_type);
14821 779         3554 $file_writer_object->write_blank_code_line(
14822             $rOpts_keep_old_blank_lines == 2 );
14823 779         1616 $self->[_last_line_leading_type_] = 'b';
14824             }
14825 807         2109 next;
14826             }
14827             else {
14828              
14829             # Let logger see all non-blank lines of code. This is a slow
14830             # operation so we avoid it if it is not going to be saved.
14831 6686 100 66     15277 if ( $save_logfile && $logger_object ) {
14832 6         29 $logger_object->black_box( $line_of_tokens,
14833             $vertical_aligner_object->get_output_line_number );
14834             }
14835             }
14836              
14837             # Handle Format Skipping (FS) and Verbatim (VB) Lines
14838 6686 100 100     23899 if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
14839 98         399 $self->write_unindented_line($input_line);
14840 98         367 $file_writer_object->reset_consecutive_blank_lines();
14841 98         227 next;
14842             }
14843              
14844             # Handle all other lines of code
14845 6588         16610 $self->process_line_of_CODE($line_of_tokens);
14846             }
14847              
14848             # handle line of non-code..
14849             else {
14850              
14851             # set special flags
14852 173         343 my $skip_line = 0;
14853 173 100 100     956 if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
    100          
    100          
14854              
14855             # Pod docs should have a preceding blank line. But stay
14856             # out of __END__ and __DATA__ sections, because
14857             # the user may be using this section for any purpose whatsoever
14858 67 100       171 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
  12         24  
14859 67 100       145 if ( $rOpts->{'trim-pod'} ) {
14860 6         11 chomp $input_line;
14861 6         24 $input_line =~ s/\s+$//;
14862 6         11 $input_line .= "\n";
14863             }
14864 67 100 100     354 if ( !$skip_line
      100        
      100        
14865             && !$in_format_skipping_section
14866             && $line_type eq 'POD_START'
14867             && !$self->[_saw_END_or_DATA_] )
14868             {
14869 9         1025 $self->want_blank_line();
14870             }
14871             }
14872              
14873             # leave the blank counters in a predictable state
14874             # after __END__ or __DATA__
14875             elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
14876 7         36 $file_writer_object->reset_consecutive_blank_lines();
14877 7         16 $self->[_saw_END_or_DATA_] = 1;
14878             }
14879              
14880             # Patch to avoid losing blank lines after a code-skipping block;
14881             # fixes case c047.
14882             elsif ( $line_type eq 'SKIP_END' ) {
14883 2         14 $file_writer_object->reset_consecutive_blank_lines();
14884             }
14885             else {
14886             ## some other line type
14887             }
14888              
14889             # write unindented non-code line
14890 173 100       396 if ( !$skip_line ) {
14891 161         431 $self->write_unindented_line($input_line);
14892             }
14893             }
14894             }
14895 561         2550 return;
14896              
14897             } ## end sub process_all_lines
14898              
14899             { ## closure keyword_group_scan
14900              
14901             # this is the return var
14902             my $rhash_of_desires;
14903              
14904             # user option variables for -kgb
14905             my (
14906              
14907             $rOpts_kgb_after,
14908             $rOpts_kgb_before,
14909             $rOpts_kgb_delete,
14910             $rOpts_kgb_inside,
14911             $rOpts_kgb_size_max,
14912             $rOpts_kgb_size_min,
14913              
14914             );
14915              
14916             # group variables, initialized by kgb_initialize_group_vars
14917             my ( $ibeg, $iend, $count, $level_beg, $K_closing );
14918             my ( @iblanks, @group, @subgroup );
14919              
14920             # line variables, updated by sub keyword_group_scan
14921             my ( $line_type, $CODE_type, $K_first, $K_last );
14922             my $number_of_groups_seen;
14923              
14924             #------------------------
14925             # -kgb helper subroutines
14926             #------------------------
14927              
14928             sub kgb_initialize_options {
14929              
14930             # check and initialize user options for -kgb
14931             # return error flag:
14932             # true for some input error, do not continue
14933             # false if ok
14934              
14935             # Local copies of the various control parameters
14936 549     549 0 1544 $rOpts_kgb_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
14937 549         1294 $rOpts_kgb_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
14938 549         1300 $rOpts_kgb_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
14939 549         1355 $rOpts_kgb_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
14940              
14941             # A range of sizes can be input with decimal notation like 'min.max'
14942             # with any number of dots between the two numbers. Examples:
14943             # string => min max matches
14944             # 1.1 1 1 exactly 1
14945             # 1.3 1 3 1,2, or 3
14946             # 1..3 1 3 1,2, or 3
14947             # 5 5 - 5 or more
14948             # 6. 6 - 6 or more
14949             # .2 - 2 up to 2
14950             # 1.0 1 0 nothing
14951 549         1458 my $rOpts_kgb_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
14952 549         3277 ( $rOpts_kgb_size_min, $rOpts_kgb_size_max ) = split /\.+/,
14953             $rOpts_kgb_size;
14954 549 50 33     8287 if ( $rOpts_kgb_size_min && $rOpts_kgb_size_min !~ /^\d+$/
      33        
      33        
14955             || $rOpts_kgb_size_max && $rOpts_kgb_size_max !~ /^\d+$/ )
14956             {
14957 0         0 Warn(<<EOM);
14958             Unexpected value for -kgbs: '$rOpts_kgb_size'; expecting 'min' or 'min.max';
14959             ignoring all -kgb flags
14960             EOM
14961              
14962             # Turn this option off so that this message does not keep repeating
14963             # during iterations and other files.
14964 0         0 $rOpts->{'keyword-group-blanks-size'} = EMPTY_STRING;
14965 0         0 return $rhash_of_desires;
14966             }
14967 549 50       2161 $rOpts_kgb_size_min = 1 unless ($rOpts_kgb_size_min);
14968              
14969 549 50 33     2243 if ( $rOpts_kgb_size_max && $rOpts_kgb_size_max < $rOpts_kgb_size_min )
14970             {
14971 0         0 return $rhash_of_desires;
14972             }
14973              
14974             # check codes for $rOpts_kgb_before and
14975             # $rOpts_kgb_after:
14976             # 0 = never (delete if exist)
14977             # 1 = stable (keep unchanged)
14978             # 2 = always (insert if missing)
14979 549   66     5809 my $ok = $rOpts_kgb_size_min > 0
14980             && ( $rOpts_kgb_before != 1
14981             || $rOpts_kgb_after != 1
14982             || $rOpts_kgb_inside
14983             || $rOpts_kgb_delete );
14984              
14985 549 100       2165 return $rhash_of_desires if ( !$ok );
14986              
14987 6         13 return;
14988             } ## end sub kgb_initialize_options
14989              
14990             sub kgb_initialize_group_vars {
14991              
14992             # Definitions:
14993             # $ibeg = first line index of this entire group
14994             # $iend = last line index of this entire group
14995             # $count = total number of keywords seen in this entire group
14996             # $level_beg = indentation level of this group
14997             # @group = [ $i, $token, $count ] =list of all keywords & blanks
14998             # @subgroup = $j, index of group where token changes
14999             # @iblanks = line indexes of blank lines in input stream in this group
15000             # where i=starting line index
15001             # token (the keyword)
15002             # count = number of this token in this subgroup
15003             # j = index in group where token changes
15004 31     31 0 48 $ibeg = -1;
15005 31         54 $iend = undef;
15006 31         45 $level_beg = -1;
15007 31         42 $K_closing = undef;
15008 31         47 $count = 0;
15009 31         75 @group = ();
15010 31         45 @subgroup = ();
15011 31         43 @iblanks = ();
15012 31         52 return;
15013             } ## end sub kgb_initialize_group_vars
15014              
15015             sub kgb_initialize_line_vars {
15016 187     187 0 286 $CODE_type = EMPTY_STRING;
15017 187         267 $K_first = undef;
15018 187         242 $K_last = undef;
15019 187         247 $line_type = EMPTY_STRING;
15020 187         242 return;
15021             } ## end sub kgb_initialize_line_vars
15022              
15023             sub kgb_initialize {
15024              
15025             # initialize all closure variables for -kgb
15026             # return:
15027             # true to cause immediate exit (something is wrong)
15028             # false to continue ... all is okay
15029              
15030             # This is the return variable:
15031 549     549 0 1789 $rhash_of_desires = {};
15032              
15033             # initialize and check user options;
15034 549         2298 my $quit = kgb_initialize_options();
15035 549 100       1868 if ($quit) { return $quit }
  543         1482  
15036              
15037             # initialize variables for the current group and subgroups:
15038 6         32 kgb_initialize_group_vars();
15039              
15040             # initialize variables for the most recently seen line:
15041 6         28 kgb_initialize_line_vars();
15042              
15043 6         11 $number_of_groups_seen = 0;
15044              
15045             # all okay
15046 6         11 return;
15047             } ## end sub kgb_initialize
15048              
15049             sub kgb_insert_blank_after {
15050 12     12 0 35 my ($i) = @_;
15051 12         37 $rhash_of_desires->{$i} = 1;
15052 12         22 my $ip = $i + 1;
15053 12 50 33     49 if ( defined( $rhash_of_desires->{$ip} )
15054             && $rhash_of_desires->{$ip} == 2 )
15055             {
15056 0         0 $rhash_of_desires->{$ip} = 0;
15057             }
15058 12         33 return;
15059             } ## end sub kgb_insert_blank_after
15060              
15061             sub kgb_split_into_sub_groups {
15062              
15063             # place blanks around long sub-groups of keywords
15064             # ...if requested
15065 9 50   9 0 30 return unless ($rOpts_kgb_inside);
15066              
15067             # loop over sub-groups, index k
15068 9         19 push @subgroup, scalar @group;
15069 9         18 my $kbeg = 1;
15070 9         21 my $kend = @subgroup - 1;
15071 9         38 foreach my $k ( $kbeg .. $kend ) {
15072              
15073             # index j runs through all keywords found
15074 23         48 my $j_b = $subgroup[ $k - 1 ];
15075 23         38 my $j_e = $subgroup[$k] - 1;
15076              
15077             # index i is the actual line number of a keyword
15078 23         33 my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
  23         55  
15079 23         34 my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
  23         43  
15080 23         50 my $num = $count_e - $count_b + 1;
15081              
15082             # This subgroup runs from line $ib to line $ie-1, but may contain
15083             # blank lines
15084 23 100       62 if ( $num >= $rOpts_kgb_size_min ) {
15085              
15086             # if there are blank lines, we require that at least $num lines
15087             # be non-blank up to the boundary with the next subgroup.
15088 5         23 my $nog_b = my $nog_e = 1;
15089 5 50 33     38 if ( @iblanks && !$rOpts_kgb_delete ) {
15090 0         0 my $j_bb = $j_b + $num - 1;
15091 0         0 my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
  0         0  
15092 0         0 $nog_b = $count_bb - $count_b + 1 == $num;
15093              
15094 0         0 my $j_ee = $j_e - ( $num - 1 );
15095 0         0 my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
  0         0  
15096 0         0 $nog_e = $count_e - $count_ee + 1 == $num;
15097             }
15098 5 100 66     28 if ( $nog_b && $k > $kbeg ) {
15099 3         12 kgb_insert_blank_after( $i_b - 1 );
15100             }
15101 5 100 66     32 if ( $nog_e && $k < $kend ) {
15102             my ( $i_ep, $tok_ep, $count_ep ) =
15103 2         11 @{ $group[ $j_e + 1 ] };
  2         7  
15104 2         37 kgb_insert_blank_after( $i_ep - 1 );
15105             }
15106             }
15107             }
15108 9         21 return;
15109             } ## end sub kgb_split_into_sub_groups
15110              
15111             sub kgb_delete_if_blank {
15112 0     0 0 0 my ( $self, $i ) = @_;
15113              
15114             # delete line $i if it is blank
15115 0         0 my $rlines = $self->[_rlines_];
15116 0 0 0     0 return if ( $i < 0 || $i >= @{$rlines} );
  0         0  
15117 0 0       0 return if ( $rlines->[$i]->{_line_type} ne 'CODE' );
15118 0         0 my $code_type = $rlines->[$i]->{_code_type};
15119 0 0       0 if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
  0         0  
15120 0         0 return;
15121             } ## end sub kgb_delete_if_blank
15122              
15123             sub kgb_delete_inner_blank_lines {
15124              
15125             # always remove unwanted trailing blank lines from our list
15126 6 100   6 0 31 return unless (@iblanks);
15127 1         7 while ( my $ibl = pop(@iblanks) ) {
15128 1 50       6 if ( $ibl < $iend ) { push @iblanks, $ibl; last }
  1         3  
  1         2  
15129 0         0 $iend = $ibl;
15130             }
15131              
15132             # now mark mark interior blank lines for deletion if requested
15133 1 50       5 return unless ($rOpts_kgb_delete);
15134              
15135 1         7 while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
  3         12  
15136              
15137 1         2 return;
15138             } ## end sub kgb_delete_inner_blank_lines
15139              
15140             sub kgb_end_group {
15141              
15142             # end a group of keywords
15143 25     25 0 53 my ( $self, $bad_ending ) = @_;
15144 25 100 66     108 if ( defined($ibeg) && $ibeg >= 0 ) {
15145              
15146             # then handle sufficiently large groups
15147 9 100       27 if ( $count >= $rOpts_kgb_size_min ) {
15148              
15149 6         16 $number_of_groups_seen++;
15150              
15151             # do any blank deletions regardless of the count
15152 6         33 kgb_delete_inner_blank_lines();
15153              
15154 6         21 my $rlines = $self->[_rlines_];
15155 6 50       25 if ( $ibeg > 0 ) {
15156 6         19 my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
15157              
15158             # patch for hash bang line which is not currently marked as
15159             # a comment; mark it as a comment
15160 6 100 100     43 if ( $ibeg == 1 && !$code_type ) {
15161 2         8 my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
15162 2 100 66     19 $code_type = 'BC'
15163             if ( $line_text && $line_text =~ /^#/ );
15164             }
15165              
15166             # Do not insert a blank after a comment
15167             # (this could be subject to a flag in the future)
15168 6 100       44 if ( $code_type !~ /(?:BC|SBC|SBCX)/ ) {
15169 4 50       15 if ( $rOpts_kgb_before == INSERT ) {
    0          
15170 4         18 kgb_insert_blank_after( $ibeg - 1 );
15171              
15172             }
15173             elsif ( $rOpts_kgb_before == DELETE ) {
15174 0         0 $self->kgb_delete_if_blank( $ibeg - 1 );
15175             }
15176             else {
15177             ## == STABLE
15178             }
15179             }
15180             }
15181              
15182             # We will only put blanks before code lines. We could loosen
15183             # this rule a little, but we have to be very careful because
15184             # for example we certainly don't want to drop a blank line
15185             # after a line like this:
15186             # my $var = <<EOM;
15187 6 100 66     34 if ( $line_type eq 'CODE' && defined($K_first) ) {
15188              
15189             # - Do not put a blank before a line of different level
15190             # - Do not put a blank line if we ended the search badly
15191             # - Do not put a blank at the end of the file
15192             # - Do not put a blank line before a hanging side comment
15193 5         14 my $rLL = $self->[_rLL_];
15194 5         13 my $level = $rLL->[$K_first]->[_LEVEL_];
15195 5         10 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
15196              
15197 5 50 66     37 if ( $level == $level_beg
      100        
      66        
      66        
15198             && $ci_level == 0
15199             && !$bad_ending
15200 3         32 && $iend < @{$rlines}
15201             && $CODE_type ne 'HSC' )
15202             {
15203 3 50       15 if ( $rOpts_kgb_after == INSERT ) {
    0          
15204 3         14 kgb_insert_blank_after($iend);
15205             }
15206             elsif ( $rOpts_kgb_after == DELETE ) {
15207 0         0 $self->kgb_delete_if_blank( $iend + 1 );
15208             }
15209             else {
15210             ## == STABLE
15211             }
15212             }
15213             }
15214             }
15215 9         36 kgb_split_into_sub_groups();
15216             }
15217              
15218             # reset for another group
15219 25         66 kgb_initialize_group_vars();
15220              
15221 25         35 return;
15222             } ## end sub kgb_end_group
15223              
15224             sub kgb_find_container_end {
15225              
15226             # If the keyword line is continued onto subsequent lines, find the
15227             # closing token '$K_closing' so that we can easily skip past the
15228             # contents of the container.
15229              
15230             # We only set this value if we find a simple list, meaning
15231             # -contents only one level deep
15232             # -not welded
15233              
15234 75     75 0 160 my ($self) = @_;
15235              
15236             # First check: skip if next line is not one deeper
15237 75         158 my $Knext_nonblank = $self->K_next_nonblank($K_last);
15238 75 50       146 return if ( !defined($Knext_nonblank) );
15239 75         107 my $rLL = $self->[_rLL_];
15240 75         127 my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
15241 75 100       172 return if ( $level_next != $level_beg + 1 );
15242              
15243             # Find the parent container of the first token on the next line
15244 7         33 my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
15245 7 50       17 return unless ( defined($parent_seqno) );
15246              
15247             # Must not be a weld (can be unstable)
15248             return
15249 7 50 33     21 if ( $total_weld_count
15250             && $self->is_welded_at_seqno($parent_seqno) );
15251              
15252             # Opening container must exist and be on this line
15253 7         16 my $Ko = $self->[_K_opening_container_]->{$parent_seqno};
15254 7 50 33     43 return if ( !defined($Ko) || $Ko <= $K_first || $Ko > $K_last );
      33        
15255              
15256             # Verify that the closing container exists and is on a later line
15257 7         13 my $Kc = $self->[_K_closing_container_]->{$parent_seqno};
15258 7 50 33     39 return if ( !defined($Kc) || $Kc <= $K_last );
15259              
15260             # That's it
15261 7         17 $K_closing = $Kc;
15262              
15263 7         14 return;
15264             } ## end sub kgb_find_container_end
15265              
15266             sub kgb_add_to_group {
15267 75     75 0 159 my ( $self, $i, $token, $level ) = @_;
15268              
15269             # End the previous group if we have reached the maximum
15270             # group size
15271 75 50 33     150 if ( $rOpts_kgb_size_max && @group >= $rOpts_kgb_size_max ) {
15272 0         0 $self->kgb_end_group();
15273             }
15274              
15275 75 100       188 if ( @group == 0 ) {
15276 9         19 $ibeg = $i;
15277 9         16 $level_beg = $level;
15278 9         22 $count = 0;
15279             }
15280              
15281 75         111 $count++;
15282 75         96 $iend = $i;
15283              
15284             # New sub-group?
15285 75 100 100     249 if ( !@group || $token ne $group[-1]->[1] ) {
15286 23         46 push @subgroup, scalar(@group);
15287             }
15288 75         226 push @group, [ $i, $token, $count ];
15289              
15290             # remember if this line ends in an open container
15291 75         210 $self->kgb_find_container_end();
15292              
15293 75         104 return;
15294             } ## end sub kgb_add_to_group
15295              
15296             #---------------------
15297             # -kgb main subroutine
15298             #---------------------
15299              
15300             sub keyword_group_scan {
15301 561     561 0 1243 my $self = shift;
15302              
15303             # Called once per file to process --keyword-group-blanks-* parameters.
15304              
15305             # Task:
15306             # Manipulate blank lines around keyword groups (kgb* flags)
15307             # Scan all lines looking for runs of consecutive lines beginning with
15308             # selected keywords. Example keywords are 'my', 'our', 'local', ... but
15309             # they may be anything. We will set flags requesting that blanks be
15310             # inserted around and within them according to input parameters. Note
15311             # that we are scanning the lines as they came in in the input stream, so
15312             # they are not necessarily well formatted.
15313              
15314             # Returns:
15315             # The output of this sub is a return hash ref whose keys are the indexes
15316             # of lines after which we desire a blank line. For line index $i:
15317             # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
15318             # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
15319              
15320             # Nothing to do if no blanks can be output. This test added to fix
15321             # case b760.
15322 561 100       2048 if ( !$rOpts_maximum_consecutive_blank_lines ) {
15323 12         39 return $rhash_of_desires;
15324             }
15325              
15326             #---------------
15327             # initialization
15328             #---------------
15329 549         2385 my $quit = kgb_initialize();
15330 549 100       1931 if ($quit) { return $rhash_of_desires }
  543         1242  
15331              
15332 6         16 my $rLL = $self->[_rLL_];
15333 6         17 my $rlines = $self->[_rlines_];
15334              
15335 6         34 $self->kgb_end_group();
15336 6         28 my $i = -1;
15337             my $Opt_repeat_count =
15338 6         35 $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
15339              
15340             #----------------------------------
15341             # loop over all lines of the source
15342             #----------------------------------
15343 6         12 foreach my $line_of_tokens ( @{$rlines} ) {
  6         22  
15344              
15345 181         229 $i++;
15346             last
15347 181 50 33     367 if ( $Opt_repeat_count > 0
15348             && $number_of_groups_seen >= $Opt_repeat_count );
15349              
15350 181         355 kgb_initialize_line_vars();
15351              
15352 181         314 $line_type = $line_of_tokens->{_line_type};
15353              
15354             # always end a group at non-CODE
15355 181 100       351 if ( $line_type ne 'CODE' ) { $self->kgb_end_group(); next }
  5         16  
  5         13  
15356              
15357 176         254 $CODE_type = $line_of_tokens->{_code_type};
15358              
15359             # end any group at a format skipping line
15360 176 50 66     352 if ( $CODE_type && $CODE_type eq 'FS' ) {
15361 0         0 $self->kgb_end_group();
15362 0         0 next;
15363             }
15364              
15365             # continue in a verbatim (VB) type; it may be quoted text
15366 176 100       295 if ( $CODE_type eq 'VB' ) {
15367 6 50       12 if ( $ibeg >= 0 ) { $iend = $i; }
  6         12  
15368 6         12 next;
15369             }
15370              
15371             # and continue in blank (BL) types
15372 170 100       300 if ( $CODE_type eq 'BL' ) {
15373 5 100       19 if ( $ibeg >= 0 ) {
15374 3         8 $iend = $i;
15375 3         8 push @{iblanks}, $i;
15376              
15377             # propagate current subgroup token
15378 3         6 my $tok = $group[-1]->[1];
15379 3         13 push @group, [ $i, $tok, $count ];
15380             }
15381 5         10 next;
15382             }
15383              
15384             # examine the first token of this line
15385 165         240 my $rK_range = $line_of_tokens->{_rK_range};
15386 165         216 ( $K_first, $K_last ) = @{$rK_range};
  165         316  
15387 165 50       352 if ( !defined($K_first) ) {
15388              
15389             # Somewhat unexpected blank line..
15390             # $rK_range is normally defined for line type CODE, but this can
15391             # happen for example if the input line was a single semicolon
15392             # which is being deleted. In that case there was code in the
15393             # input file but it is not being retained. So we can silently
15394             # return.
15395 0         0 return $rhash_of_desires;
15396             }
15397              
15398 165         246 my $level = $rLL->[$K_first]->[_LEVEL_];
15399 165         267 my $type = $rLL->[$K_first]->[_TYPE_];
15400 165         272 my $token = $rLL->[$K_first]->[_TOKEN_];
15401 165         227 my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
15402              
15403             # End a group 'badly' at an unexpected level. This will prevent
15404             # blank lines being incorrectly placed after the end of the group.
15405             # We are looking for any deviation from two acceptable patterns:
15406             # PATTERN 1: a simple list; secondary lines are at level+1
15407             # PATTERN 2: a long statement; all secondary lines same level
15408             # This was added as a fix for case b1177, in which a complex
15409             # structure got incorrectly inserted blank lines.
15410 165 100       302 if ( $ibeg >= 0 ) {
15411              
15412             # Check for deviation from PATTERN 1, simple list:
15413 118 100 100     376 if ( defined($K_closing) && $K_first < $K_closing ) {
    100          
15414 19 100       39 $self->kgb_end_group(1) if ( $level != $level_beg + 1 );
15415             }
15416              
15417             # Check for deviation from PATTERN 2, single statement:
15418 1         14 elsif ( $level != $level_beg ) { $self->kgb_end_group(1) }
15419             else {
15420             ## no deviation
15421             }
15422             }
15423              
15424             # Do not look for keywords in lists ( keyword 'my' can occur in
15425             # lists, see case b760); fixed for c048.
15426 165 100       298 if ( $self->is_list_by_K($K_first) ) {
15427 27 100       57 if ( $ibeg >= 0 ) { $iend = $i }
  15         19  
15428 27         51 next;
15429             }
15430              
15431             # see if this is a code type we seek (i.e. comment)
15432 138 50 66     275 if ( $CODE_type
      33        
15433             && $keyword_group_list_comment_pattern
15434             && $CODE_type =~ /$keyword_group_list_comment_pattern/ )
15435             {
15436              
15437 0         0 my $tok = $CODE_type;
15438              
15439             # Continuing a group
15440 0 0 0     0 if ( $ibeg >= 0 && $level == $level_beg ) {
15441 0         0 $self->kgb_add_to_group( $i, $tok, $level );
15442             }
15443              
15444             # Start new group
15445             else {
15446              
15447             # first end old group if any; we might be starting new
15448             # keywords at different level
15449 0 0       0 if ( $ibeg >= 0 ) { $self->kgb_end_group(); }
  0         0  
15450 0         0 $self->kgb_add_to_group( $i, $tok, $level );
15451             }
15452 0         0 next;
15453             }
15454              
15455             # See if it is a keyword we seek, but never start a group in a
15456             # continuation line; the code may be badly formatted.
15457 138 100 100     844 if ( $ci_level == 0
    100 100        
15458             && $type eq 'k'
15459             && $token =~ /$keyword_group_list_pattern/ )
15460             {
15461              
15462             # Continuing a keyword group
15463 75 100 66     235 if ( $ibeg >= 0 && $level == $level_beg ) {
15464 66         162 $self->kgb_add_to_group( $i, $token, $level );
15465             }
15466              
15467             # Start new keyword group
15468             else {
15469              
15470             # first end old group if any; we might be starting new
15471             # keywords at different level
15472 9 50       26 if ( $ibeg >= 0 ) { $self->kgb_end_group(); }
  0         0  
15473 9         36 $self->kgb_add_to_group( $i, $token, $level );
15474             }
15475 75         144 next;
15476             }
15477              
15478             # This is not one of our keywords, but we are in a keyword group
15479             # so see if we should continue or quit
15480             elsif ( $ibeg >= 0 ) {
15481              
15482             # - bail out on a large level change; we may have walked into a
15483             # data structure or anonymous sub code.
15484 35 50 33     137 if ( $level > $level_beg + 1 || $level < $level_beg ) {
15485 0         0 $self->kgb_end_group(1);
15486 0         0 next;
15487             }
15488              
15489             # - keep going on a continuation line of the same level, since
15490             # it is probably a continuation of our previous keyword,
15491             # - and keep going past hanging side comments because we never
15492             # want to interrupt them.
15493 35 100 100     144 if ( ( ( $level == $level_beg ) && $ci_level > 0 )
      100        
15494             || $CODE_type eq 'HSC' )
15495             {
15496 25         32 $iend = $i;
15497 25         51 next;
15498             }
15499              
15500             # - continue if if we are within in a container which started
15501             # with the line of the previous keyword.
15502 10 100 100     56 if ( defined($K_closing) && $K_first <= $K_closing ) {
15503              
15504             # continue if entire line is within container
15505 5 100       17 if ( $K_last <= $K_closing ) { $iend = $i; next }
  3         17  
  3         9  
15506              
15507             # continue at ); or }; or ];
15508 2         10 my $KK = $K_closing + 1;
15509 2 100       15 if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
15510 1 50       5 if ( $KK < $K_last ) {
15511 0 0       0 if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
  0         0  
15512 0 0 0     0 if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' )
15513             {
15514 0         0 $self->kgb_end_group(1);
15515 0         0 next;
15516             }
15517             }
15518 1         3 $iend = $i;
15519 1         4 next;
15520             }
15521              
15522 1         5 $self->kgb_end_group(1);
15523 1         2 next;
15524             }
15525              
15526             # - end the group if none of the above
15527 5         16 $self->kgb_end_group();
15528 5         24 next;
15529             }
15530              
15531             # not in a keyword group; continue
15532 28         55 else { next }
15533             } ## end of loop over all lines
15534              
15535 6         31 $self->kgb_end_group();
15536 6         19 return $rhash_of_desires;
15537              
15538             } ## end sub keyword_group_scan
15539             } ## end closure keyword_group_scan
15540              
15541             #######################################
15542             # CODE SECTION 7: Process lines of code
15543             #######################################
15544              
15545             { ## begin closure process_line_of_CODE
15546              
15547             # The routines in this closure receive lines of code and combine them into
15548             # 'batches' and send them along. A 'batch' is the unit of code which can be
15549             # processed further as a unit. It has the property that it is the largest
15550             # amount of code into which which perltidy is free to place one or more
15551             # line breaks within it without violating any constraints.
15552              
15553             # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
15554              
15555             # flags needed by the store routine
15556             my $line_of_tokens;
15557             my $no_internal_newlines;
15558             my $CODE_type;
15559             my $current_line_starts_in_quote;
15560              
15561             # range of K of tokens for the current line
15562             my ( $K_first, $K_last );
15563              
15564             my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
15565             $rblock_type_of_seqno, $ri_starting_one_line_block );
15566              
15567             # past stored nonblank tokens and flags
15568             my (
15569             $K_last_nonblank_code, $K_dangling_elsif,
15570             $is_static_block_comment, $last_CODE_type,
15571             $last_line_had_side_comment, $next_parent_seqno,
15572             $next_slevel,
15573             );
15574              
15575             # Called once at the start of a new file
15576             sub initialize_process_line_of_CODE {
15577 561     561 0 1422 $K_last_nonblank_code = undef;
15578 561         1242 $K_dangling_elsif = 0;
15579 561         1174 $is_static_block_comment = 0;
15580 561         1198 $last_line_had_side_comment = 0;
15581 561         1337 $next_parent_seqno = SEQ_ROOT;
15582 561         1242 $next_slevel = undef;
15583 561         1073 return;
15584             } ## end sub initialize_process_line_of_CODE
15585              
15586             # Batch variables: these describe the current batch of code being formed
15587             # and sent down the pipeline. They are initialized in the next
15588             # sub.
15589             my (
15590             $rbrace_follower, $index_start_one_line_block,
15591             $starting_in_quote, $ending_in_quote,
15592             );
15593              
15594             # Called before the start of each new batch
15595             sub initialize_batch_variables {
15596              
15597             # Initialize array values for a new batch. Any changes here must be
15598             # carefully coordinated with sub store_token_to_go.
15599              
15600 5122     5122 0 9116 $max_index_to_go = UNDEFINED_INDEX;
15601 5122         8584 $summed_lengths_to_go[0] = 0;
15602 5122         8460 $nesting_depth_to_go[0] = 0;
15603 5122         9806 $ri_starting_one_line_block = [];
15604              
15605             # Redefine some sparse arrays.
15606             # It is more efficient to redefine these sparse arrays and rely on
15607             # undef's instead of initializing to 0's. Testing showed that using
15608             # @array=() is more efficient than $#array=-1
15609              
15610 5122         9835 @old_breakpoint_to_go = ();
15611 5122         8448 @forced_breakpoint_to_go = ();
15612 5122         8921 @block_type_to_go = ();
15613 5122         8306 @mate_index_to_go = ();
15614 5122         9339 @type_sequence_to_go = ();
15615              
15616             # NOTE: @nobreak_to_go is sparse and could be treated this way, but
15617             # testing showed that there would be very little efficiency gain
15618             # because an 'if' test must be added in store_token_to_go.
15619              
15620             # The initialization code for the remaining batch arrays is as follows
15621             # and can be activated for testing. But profiling shows that it is
15622             # time-consuming to re-initialize the batch arrays and is not necessary
15623             # because the maximum valid token, $max_index_to_go, is carefully
15624             # controlled. This means however that it is not possible to do any
15625             # type of filter or map operation directly on these arrays. And it is
15626             # not possible to use negative indexes. As a precaution against program
15627             # changes which might do this, sub pad_array_to_go adds some undefs at
15628             # the end of the current batch of data.
15629              
15630             ## 0 && do { #<<<
15631             ## @nobreak_to_go = ();
15632             ## @token_lengths_to_go = ();
15633             ## @levels_to_go = ();
15634             ## @ci_levels_to_go = ();
15635             ## @tokens_to_go = ();
15636             ## @K_to_go = ();
15637             ## @types_to_go = ();
15638             ## @leading_spaces_to_go = ();
15639             ## @reduced_spaces_to_go = ();
15640             ## @inext_to_go = ();
15641             ## @parent_seqno_to_go = ();
15642             ## };
15643              
15644 5122         7893 $rbrace_follower = undef;
15645 5122         7902 $ending_in_quote = 0;
15646              
15647 5122         7211 $index_start_one_line_block = undef;
15648              
15649             # initialize forced breakpoint vars associated with each output batch
15650 5122         7626 $forced_breakpoint_count = 0;
15651 5122         7459 $index_max_forced_break = UNDEFINED_INDEX;
15652 5122         7215 $forced_breakpoint_undo_count = 0;
15653              
15654 5122         13739 return;
15655             } ## end sub initialize_batch_variables
15656              
15657             sub leading_spaces_to_go {
15658              
15659             # return the number of indentation spaces for a token in the output
15660             # stream
15661              
15662 5029     5029 0 9255 my ($ii) = @_;
15663 5029 50       10498 return 0 if ( $ii < 0 );
15664 5029         8334 my $indentation = $leading_spaces_to_go[$ii];
15665 5029 100       13390 return ref($indentation) ? $indentation->get_spaces() : $indentation;
15666             } ## end sub leading_spaces_to_go
15667              
15668             sub create_one_line_block {
15669              
15670             # set index starting next one-line block
15671             # call with no args to delete the current one-line block
15672 1329     1329 0 2684 ($index_start_one_line_block) = @_;
15673 1329         2089 return;
15674             } ## end sub create_one_line_block
15675              
15676             # Routine to place the current token into the output stream.
15677             # Called once per output token.
15678              
15679 39     39   494 use constant DEBUG_STORE => 0;
  39         139  
  39         55830  
15680              
15681             sub store_token_to_go {
15682              
15683 54926     54926 0 90223 my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
15684              
15685             #-------------------------------------------------------
15686             # Token storage utility for sub process_line_of_CODE.
15687             # Add one token to the next batch of '_to_go' variables.
15688             #-------------------------------------------------------
15689              
15690             # Input parameters:
15691             # $Ktoken_vars = the index K in the global token array
15692             # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
15693             # unless they are temporarily being overridden
15694              
15695             #------------------------------------------------------------------
15696             # NOTE: called once per token so coding efficiency is critical here.
15697             # All changes need to be benchmarked with Devel::NYTProf.
15698             #------------------------------------------------------------------
15699              
15700             my (
15701              
15702             $type,
15703             $token,
15704             $ci_level,
15705             $level,
15706             $seqno,
15707             $length,
15708              
15709 54926         75261 ) = @{$rtoken_vars}[
  54926         149356  
15710              
15711             _TYPE_,
15712             _TOKEN_,
15713             _CI_LEVEL_,
15714             _LEVEL_,
15715             _TYPE_SEQUENCE_,
15716             _TOKEN_LENGTH_,
15717              
15718             ];
15719              
15720             # Check for emergency flush...
15721             # The K indexes in the batch must always be a continuous sequence of
15722             # the global token array. The batch process programming assumes this.
15723             # If storing this token would cause this relation to fail we must dump
15724             # the current batch before storing the new token. It is extremely rare
15725             # for this to happen. One known example is the following two-line
15726             # snippet when run with parameters
15727             # --noadd-newlines --space-terminal-semicolon:
15728             # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
15729             # $yy=1;
15730 54926 100       94601 if ( $max_index_to_go >= 0 ) {
15731 50160 50 66     146263 if ( $Ktoken_vars != $K_to_go[$max_index_to_go] + 1 ) {
    50          
15732 0         0 $self->flush_batch_of_CODE();
15733             }
15734              
15735             # Do not output consecutive blank tokens ... this should not
15736             # happen, but it is worth checking. Later code can then make the
15737             # simplifying assumption that blank tokens are not consecutive.
15738             elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
15739              
15740 0         0 if (DEVEL_MODE) {
15741              
15742             # if this happens, it is may be that consecutive blanks
15743             # were inserted into the token stream in 'respace_tokens'
15744             my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
15745             Fault("consecutive blanks near line $lno; please fix");
15746             }
15747 0         0 return;
15748             }
15749             else {
15750             ## all ok
15751             }
15752             }
15753              
15754             # Do not start a batch with a blank token.
15755             # Fixes cases b149 b888 b984 b985 b986 b987
15756             else {
15757 4766 100       11207 if ( $type eq 'b' ) { return }
  202         470  
15758             }
15759              
15760             # Update counter and do initializations if first token of new batch
15761 54724 100       97417 if ( !++$max_index_to_go ) {
15762              
15763             # Reset flag '$starting_in_quote' for a new batch. It must be set
15764             # to the value of '$in_continued_quote', but here for efficiency we
15765             # set it to zero, which is its normal value. Then in coding below
15766             # we will change it if we find we are actually in a continued quote.
15767 4564         7081 $starting_in_quote = 0;
15768              
15769             # Update the next parent sequence number for each new batch.
15770              
15771             #----------------------------------------
15772             # Begin coding from sub parent_seqno_by_K
15773             #----------------------------------------
15774              
15775             # The following is equivalent to this call but much faster:
15776             # $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
15777              
15778 4564         7373 $next_parent_seqno = SEQ_ROOT;
15779 4564 100       8598 if ($seqno) {
15780 886         2579 $next_parent_seqno = $rparent_of_seqno->{$seqno};
15781             }
15782             else {
15783 3678         7330 my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_];
15784 3678 100       8102 if ( defined($Kt) ) {
15785 3385         7623 my $type_sequence_t = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
15786 3385         6210 my $type_t = $rLL->[$Kt]->[_TYPE_];
15787              
15788             # if next container token is closing, it is the parent seqno
15789 3385 100       9604 if ( $is_closing_type{$type_t} ) {
15790 522         1291 $next_parent_seqno = $type_sequence_t;
15791             }
15792              
15793             # otherwise we want its parent container
15794             else {
15795             $next_parent_seqno =
15796 2863         7485 $rparent_of_seqno->{$type_sequence_t};
15797             }
15798             }
15799             }
15800 4564 50       9665 $next_parent_seqno = SEQ_ROOT
15801             if ( !defined($next_parent_seqno) );
15802              
15803             #--------------------------------------
15804             # End coding from sub parent_seqno_by_K
15805             #--------------------------------------
15806              
15807 4564         8163 $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
15808             }
15809              
15810             # Clip levels to zero if there are level errors in the file.
15811             # We had to wait until now for reasons explained in sub 'write_line'.
15812 54724 50       94527 if ( $level < 0 ) { $level = 0 }
  0         0  
15813              
15814             # Safety check that length is defined. This is slow and should not be
15815             # needed now, so just do it in DEVEL_MODE to check programming changes.
15816             # Formerly needed for --indent-only, in which the entire set of tokens
15817             # is normally turned into type 'q'. Lengths are now defined in sub
15818             # 'respace_tokens' so this check is no longer needed.
15819 54724         67111 if ( DEVEL_MODE && !defined($length) ) {
15820             my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
15821             $length = length($token);
15822             Fault(<<EOM);
15823             undefined length near line $lno; num chars=$length, token='$token'
15824             EOM
15825             }
15826              
15827             #----------------------------
15828             # add this token to the batch
15829             #----------------------------
15830 54724         86595 $K_to_go[$max_index_to_go] = $Ktoken_vars;
15831 54724         91044 $types_to_go[$max_index_to_go] = $type;
15832 54724         86755 $tokens_to_go[$max_index_to_go] = $token;
15833 54724         76035 $ci_levels_to_go[$max_index_to_go] = $ci_level;
15834 54724         78095 $levels_to_go[$max_index_to_go] = $level;
15835 54724         74889 $nobreak_to_go[$max_index_to_go] = $no_internal_newlines;
15836 54724         73350 $token_lengths_to_go[$max_index_to_go] = $length;
15837              
15838             # Skip point initialization for these sparse arrays - undef's okay;
15839             # See also related code in sub initialize_batch_variables.
15840             ## $old_breakpoint_to_go[$max_index_to_go] = 0;
15841             ## $forced_breakpoint_to_go[$max_index_to_go] = 0;
15842             ## $block_type_to_go[$max_index_to_go] = EMPTY_STRING;
15843             ## $type_sequence_to_go[$max_index_to_go] = $seqno;
15844              
15845             # NOTE: nobreak_to_go can be treated as a sparse array, but testing
15846             # showed that there is almost no efficiency gain because an if test
15847             # would need to be added.
15848              
15849             # We keep a running sum of token lengths from the start of this batch:
15850             # summed_lengths_to_go[$i] = total length to just before token $i
15851             # summed_lengths_to_go[$i+1] = total length to just after token $i
15852 54724         84273 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
15853             $summed_lengths_to_go[$max_index_to_go] + $length;
15854              
15855             # Initialize some sequence-dependent variables to their normal values
15856 54724         83106 $parent_seqno_to_go[$max_index_to_go] = $next_parent_seqno;
15857 54724         76934 $nesting_depth_to_go[$max_index_to_go] = $next_slevel;
15858              
15859             # Then fix them at container tokens:
15860 54724 100       89240 if ($seqno) {
15861              
15862 9146         18335 $type_sequence_to_go[$max_index_to_go] = $seqno;
15863              
15864             $block_type_to_go[$max_index_to_go] =
15865 9146         16715 $rblock_type_of_seqno->{$seqno};
15866              
15867 9146 100       22726 if ( $is_opening_token{$token} ) {
    100          
15868              
15869 4412         8279 my $slevel = $rdepth_of_opening_seqno->[$seqno];
15870 4412         7006 $nesting_depth_to_go[$max_index_to_go] = $slevel;
15871 4412         6675 $next_slevel = $slevel + 1;
15872              
15873 4412         7020 $next_parent_seqno = $seqno;
15874              
15875             }
15876             elsif ( $is_closing_token{$token} ) {
15877              
15878 4362         7683 $next_slevel = $rdepth_of_opening_seqno->[$seqno];
15879 4362         7078 my $slevel = $next_slevel + 1;
15880 4362         6748 $nesting_depth_to_go[$max_index_to_go] = $slevel;
15881              
15882 4362         8659 my $parent_seqno = $rparent_of_seqno->{$seqno};
15883 4362 50       9235 $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
15884 4362         6769 $parent_seqno_to_go[$max_index_to_go] = $parent_seqno;
15885 4362         7029 $next_parent_seqno = $parent_seqno;
15886              
15887             }
15888             else {
15889             # ternary token: nothing to do
15890             }
15891             }
15892              
15893             # Define the indentation that this token will have in two cases:
15894             # Without CI = reduced_spaces_to_go
15895             # With CI = leading_spaces_to_go
15896 54724         98271 $leading_spaces_to_go[$max_index_to_go] =
15897             $reduced_spaces_to_go[$max_index_to_go] =
15898             $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
15899 54724 100       92478 if ($ci_level) {
15900 32969         45780 $leading_spaces_to_go[$max_index_to_go] +=
15901             $rOpts_continuation_indentation;
15902             }
15903              
15904             # Correct these values if we are starting in a continued quote
15905 54724 100 100     99238 if ( $current_line_starts_in_quote
15906             && $Ktoken_vars == $K_first )
15907             {
15908             # in a continued quote - correct value set above if first token
15909 19 50       91 if ( $max_index_to_go == 0 ) { $starting_in_quote = 1 }
  19         58  
15910              
15911 19         48 $leading_spaces_to_go[$max_index_to_go] = 0;
15912 19         46 $reduced_spaces_to_go[$max_index_to_go] = 0;
15913             }
15914              
15915 54724         66870 DEBUG_STORE && do {
15916             my ( $a, $b, $c ) = caller();
15917             print {*STDOUT}
15918             "STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
15919             };
15920 54724         83496 return;
15921             } ## end sub store_token_to_go
15922              
15923             sub flush_batch_of_CODE {
15924              
15925             # Finish and process the current batch.
15926             # This must be the only call to grind_batch_of_CODE()
15927 5463     5463 0 10053 my ($self) = @_;
15928              
15929             # If a batch has been started ...
15930 5463 100       11995 if ( $max_index_to_go >= 0 ) {
15931              
15932             # Create an array to hold variables for this batch
15933 4561         8601 my $this_batch = [];
15934              
15935 4561 100       9737 $this_batch->[_starting_in_quote_] = 1 if ($starting_in_quote);
15936 4561 100       9165 $this_batch->[_ending_in_quote_] = 1 if ($ending_in_quote);
15937              
15938 4561 100 100     15475 if ( $CODE_type || $last_CODE_type ) {
15939 1190 100       4026 $this_batch->[_batch_CODE_type_] =
15940             $K_to_go[$max_index_to_go] >= $K_first
15941             ? $CODE_type
15942             : $last_CODE_type;
15943             }
15944              
15945             $last_line_had_side_comment =
15946 4561   100     14661 ( $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#' );
15947              
15948             # The flag $is_static_block_comment applies to the line which just
15949             # arrived. So it only applies if we are outputting that line.
15950 4561 100 66     11136 if ( $is_static_block_comment && !$last_line_had_side_comment ) {
15951 13         46 $this_batch->[_is_static_block_comment_] =
15952             $K_to_go[0] == $K_first;
15953             }
15954              
15955 4561         9909 $this_batch->[_ri_starting_one_line_block_] =
15956             $ri_starting_one_line_block;
15957              
15958 4561         8001 $self->[_this_batch_] = $this_batch;
15959              
15960             #-------------------
15961             # process this batch
15962             #-------------------
15963 4561         14661 $self->grind_batch_of_CODE();
15964              
15965             # Done .. this batch is history
15966 4561         8369 $self->[_this_batch_] = undef;
15967              
15968 4561         11334 initialize_batch_variables();
15969             }
15970              
15971 5463         9362 return;
15972             } ## end sub flush_batch_of_CODE
15973              
15974             sub end_batch {
15975              
15976             # End the current batch, EXCEPT for a few special cases
15977 4970     4970 0 9670 my ($self) = @_;
15978              
15979 4970 50       10863 if ( $max_index_to_go < 0 ) {
15980              
15981             # nothing to do .. this is harmless but wastes time.
15982 0         0 if (DEVEL_MODE) {
15983             Fault("sub end_batch called with nothing to do; please fix\n");
15984             }
15985 0         0 return;
15986             }
15987              
15988             # Exceptions when a line does not end with a comment... (fixes c058)
15989 4970 100       11832 if ( $types_to_go[$max_index_to_go] ne '#' ) {
15990              
15991             # Exception 1: Do not end line in a weld
15992             return
15993             if ( $total_weld_count
15994 3974 100 100     10594 && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
15995              
15996             # Exception 2: just set a tentative breakpoint if we might be in a
15997             # one-line block
15998 3926 100       10039 if ( defined($index_start_one_line_block) ) {
15999 432         1638 $self->set_forced_breakpoint($max_index_to_go);
16000 432         847 return;
16001             }
16002             }
16003              
16004 4490         11716 $self->flush_batch_of_CODE();
16005 4490         9452 return;
16006             } ## end sub end_batch
16007              
16008             sub flush_vertical_aligner {
16009 1818     1818 0 3940 my ($self) = @_;
16010 1818         3637 my $vao = $self->[_vertical_aligner_object_];
16011 1818         7061 $vao->flush();
16012 1818         3036 return;
16013             } ## end sub flush_vertical_aligner
16014              
16015             # flush is called to output any tokens in the pipeline, so that
16016             # an alternate source of lines can be written in the correct order
16017             sub flush {
16018 1752     1752 0 4228 my ( $self, $CODE_type_flush ) = @_;
16019              
16020             # end the current batch with 1 exception
16021              
16022 1752         2997 $index_start_one_line_block = undef;
16023              
16024             # Exception: if we are flushing within the code stream only to insert
16025             # blank line(s), then we can keep the batch intact at a weld. This
16026             # improves formatting of -ce. See test 'ce1.ce'
16027 1752 100 66     6037 if ( $CODE_type_flush && $CODE_type_flush eq 'BL' ) {
16028 779 100       2255 $self->end_batch() if ( $max_index_to_go >= 0 );
16029             }
16030              
16031             # otherwise, we have to shut things down completely.
16032 973         2569 else { $self->flush_batch_of_CODE() }
16033              
16034 1752         5841 $self->flush_vertical_aligner();
16035 1752         2900 return;
16036             } ## end sub flush
16037              
16038             my %is_assignment_or_fat_comma;
16039              
16040             BEGIN {
16041 39     39   709 %is_assignment_or_fat_comma = %is_assignment;
16042 39         164381 $is_assignment_or_fat_comma{'=>'} = 1;
16043             }
16044              
16045             sub add_missing_else {
16046              
16047             # Add a missing 'else' block.
16048             # $K_dangling_elsif = index of closing elsif brace not followed by else
16049 1     1 0 5 my ($self) = @_;
16050              
16051             # Make sure everything looks okay
16052 1 50 33     11 if ( !$K_dangling_elsif
      33        
16053             || $K_dangling_elsif < $K_first
16054             || $rLL->[$K_dangling_elsif]->[_TYPE_] ne '}' )
16055             {
16056 0         0 DEVEL_MODE && Fault("could not find closing elsif brace\n");
16057             }
16058              
16059 1         5 my $comment = $rOpts->{'add-missing-else-comment'};
16060              
16061             # Safety check
16062 1 50       16 if ( substr( $comment, 0, 1 ) ne '#' ) { $comment = '#' . $comment }
  0         0  
16063              
16064             # Calculate indentation
16065 1         6 my $level = $radjusted_levels->[$K_dangling_elsif];
16066 1         4 my $spaces = SPACE x ( $level * $rOpts_indent_columns );
16067 1         4 my $line1 = $spaces . "else {\n";
16068 1         3 my $line3 = $spaces . "}\n";
16069 1         3 $spaces .= SPACE x $rOpts_indent_columns;
16070 1         4 my $line2 = $spaces . $comment . "\n";
16071              
16072             # clear the output pipeline
16073 1         4 $self->flush();
16074              
16075 1         2 my $file_writer_object = $self->[_file_writer_object_];
16076              
16077 1         7 $file_writer_object->write_code_line($line1);
16078 1         5 $file_writer_object->write_code_line($line2);
16079 1         7 $file_writer_object->write_code_line($line3);
16080 1         7 return;
16081             }
16082              
16083             sub process_line_of_CODE {
16084              
16085 6588     6588 0 12821 my ( $self, $my_line_of_tokens ) = @_;
16086              
16087             #----------------------------------------------------------------
16088             # This routine is called once per INPUT line to format all of the
16089             # tokens on that line.
16090             #----------------------------------------------------------------
16091              
16092             # It outputs full-line comments and blank lines immediately.
16093              
16094             # For lines of code:
16095             # - Tokens are copied one-by-one from the global token
16096             # array $rLL to a set of '_to_go' arrays which collect batches of
16097             # tokens. This is done with calls to 'store_token_to_go'.
16098             # - A batch is closed and processed upon reaching a well defined
16099             # structural break point (i.e. code block boundary) or forced
16100             # breakpoint (i.e. side comment or special user controls).
16101             # - Subsequent stages of formatting make additional line breaks
16102             # appropriate for lists and logical structures, and as necessary to
16103             # keep line lengths below the requested maximum line length.
16104              
16105             #-----------------------------------
16106             # begin initialize closure variables
16107             #-----------------------------------
16108 6588         13056 $line_of_tokens = $my_line_of_tokens;
16109 6588         13229 my $rK_range = $line_of_tokens->{_rK_range};
16110 6588 50       15953 if ( !defined( $rK_range->[0] ) ) {
16111              
16112             # Empty line: This can happen if tokens are deleted, for example
16113             # with the -mangle parameter
16114 0         0 return;
16115             }
16116              
16117 6588         9539 ( $K_first, $K_last ) = @{$rK_range};
  6588         14555  
16118 6588         11376 $last_CODE_type = $CODE_type;
16119 6588         10648 $CODE_type = $line_of_tokens->{_code_type};
16120 6588         12435 $current_line_starts_in_quote = $line_of_tokens->{_starting_in_quote};
16121              
16122 6588         73845 $rLL = $self->[_rLL_];
16123 6588         14706 $radjusted_levels = $self->[_radjusted_levels_];
16124 6588         11906 $rparent_of_seqno = $self->[_rparent_of_seqno_];
16125 6588         12637 $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
16126 6588         10636 $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
16127              
16128             #---------------------------------
16129             # end initialize closure variables
16130             #---------------------------------
16131              
16132             # This flag will become nobreak_to_go and should be set to 2 to prevent
16133             # a line break AFTER the current token.
16134 6588         10321 $no_internal_newlines = 0;
16135 6588 100 66     23245 if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
16136 119         211 $no_internal_newlines = 2;
16137             }
16138              
16139 6588         11053 my $input_line = $line_of_tokens->{_line_text};
16140              
16141 6588         10322 my ( $is_block_comment, $has_side_comment );
16142 6588 100       20010 if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
16143 1065 100       2788 if ( $K_last == $K_first ) { $is_block_comment = 1 }
  701         1338  
16144 364         764 else { $has_side_comment = 1 }
16145             }
16146              
16147 6588         10939 my $is_static_block_comment_without_leading_space =
16148             $CODE_type eq 'SBCX';
16149 6588   100     18433 $is_static_block_comment =
16150             $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
16151              
16152             # check for a $VERSION statement
16153 6588 100       13416 if ( $CODE_type eq 'VER' ) {
16154 4         10 $self->[_saw_VERSION_in_this_file_] = 1;
16155 4         8 $no_internal_newlines = 2;
16156             }
16157              
16158             # Add interline blank if any
16159 6588         10131 my $last_old_nonblank_type = "b";
16160 6588         9775 my $first_new_nonblank_token = EMPTY_STRING;
16161 6588         9880 my $K_first_true = $K_first;
16162 6588 100       13802 if ( $max_index_to_go >= 0 ) {
16163 2485         5002 $last_old_nonblank_type = $types_to_go[$max_index_to_go];
16164 2485         6281 $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
16165 2485 100 66     18979 if ( !$is_block_comment
      66        
      66        
16166             && $types_to_go[$max_index_to_go] ne 'b'
16167             && $K_first > 0
16168             && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
16169             {
16170 2309         4115 $K_first -= 1;
16171             }
16172             }
16173              
16174 6588         10663 my $rtok_first = $rLL->[$K_first];
16175              
16176 6588         12482 my $in_quote = $line_of_tokens->{_ending_in_quote};
16177 6588         10162 $ending_in_quote = $in_quote;
16178              
16179             #------------------------------------
16180             # Handle a block (full-line) comment.
16181             #------------------------------------
16182 6588 100       13542 if ($is_block_comment) {
16183              
16184 701 100       2406 if ( $rOpts->{'delete-block-comments'} ) {
16185 21         70 $self->flush();
16186 21         68 return;
16187             }
16188              
16189 680         1348 $index_start_one_line_block = undef;
16190 680 100       2025 $self->end_batch() if ( $max_index_to_go >= 0 );
16191              
16192             # output a blank line before block comments
16193 680 100 100     4077 if (
      66        
      100        
      66        
      100        
      100        
16194             # unless we follow a blank or comment line
16195             $self->[_last_line_leading_type_] ne '#'
16196             && $self->[_last_line_leading_type_] ne 'b'
16197              
16198             # only if allowed
16199             && $rOpts->{'blanks-before-comments'}
16200              
16201             # if this is NOT an empty comment, unless it follows a side
16202             # comment and could become a hanging side comment.
16203             && (
16204             $rtok_first->[_TOKEN_] ne '#'
16205             || ( $last_line_had_side_comment
16206             && $rLL->[$K_first]->[_LEVEL_] > 0 )
16207             )
16208              
16209             # not after a short line ending in an opening token
16210             # because we already have space above this comment.
16211             # Note that the first comment in this if block, after
16212             # the 'if (', does not get a blank line because of this.
16213             && !$self->[_last_output_short_opening_token_]
16214              
16215             # never before static block comments
16216             && !$is_static_block_comment
16217             )
16218             {
16219 50         204 $self->flush(); # switching to new output stream
16220 50         121 my $file_writer_object = $self->[_file_writer_object_];
16221 50         250 $file_writer_object->write_blank_code_line();
16222 50         133 $self->[_last_line_leading_type_] = 'b';
16223             }
16224              
16225 680 100 100     5109 if (
      100        
      100        
16226             $rOpts->{'indent-block-comments'}
16227             && ( !$rOpts->{'indent-spaced-block-comments'}
16228             || $input_line =~ /^\s+/ )
16229             && !$is_static_block_comment_without_leading_space
16230             )
16231             {
16232 632         2377 my $Ktoken_vars = $K_first;
16233 632         1335 my $rtoken_vars = $rLL->[$Ktoken_vars];
16234 632         2588 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16235 632         2134 $self->end_batch();
16236             }
16237             else {
16238              
16239             # switching to new output stream
16240 48         178 $self->flush();
16241              
16242             # Note that last arg in call here is 'undef' for comments
16243 48         105 my $file_writer_object = $self->[_file_writer_object_];
16244 48         277 $file_writer_object->write_code_line(
16245             $rtok_first->[_TOKEN_] . "\n", undef );
16246 48         139 $self->[_last_line_leading_type_] = '#';
16247             }
16248 680         2521 return;
16249             }
16250              
16251             #--------------------------------------------
16252             # Compare input/output indentation in logfile
16253             #--------------------------------------------
16254 5887 100       13887 if ( $self->[_save_logfile_] ) {
16255              
16256             my $guessed_indentation_level =
16257 5         8 $line_of_tokens->{_guessed_indentation_level};
16258              
16259             # Compare input/output indentation except for:
16260             # - hanging side comments
16261             # - continuation lines (have unknown leading blank space)
16262             # - and lines which are quotes (they may have been outdented)
16263 5   66     85 my $exception =
16264             $CODE_type eq 'HSC'
16265             || $rtok_first->[_CI_LEVEL_] > 0
16266             || $guessed_indentation_level == 0
16267             && $rtok_first->[_TYPE_] eq 'Q';
16268              
16269 5 100       14 if ( !$exception ) {
16270 3         9 my $input_line_number = $line_of_tokens->{_line_number};
16271 3         14 $self->compare_indentation_levels( $K_first,
16272             $guessed_indentation_level, $input_line_number );
16273             }
16274             }
16275              
16276             #-----------------------------------------
16277             # Handle a line marked as indentation-only
16278             #-----------------------------------------
16279              
16280 5887 100       13002 if ( $CODE_type eq 'IO' ) {
16281 12         42 $self->flush();
16282 12         21 my $line = $input_line;
16283              
16284             # Fix for rt #125506 Unexpected string formatting
16285             # in which leading space of a terminal quote was removed
16286 12         84 $line =~ s/\s+$//;
16287 12 100       52 $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
16288              
16289 12         22 my $Ktoken_vars = $K_first;
16290              
16291             # We work with a copy of the token variables and change the
16292             # first token to be the entire line as a quote variable
16293 12         25 my $rtoken_vars = $rLL->[$Ktoken_vars];
16294 12         43 $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
16295              
16296             # Patch: length is not really important here but must be defined
16297 12         25 $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
16298              
16299 12         42 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16300 12         35 $self->end_batch();
16301 12         53 return;
16302             }
16303              
16304             #---------------------------
16305             # Handle all other lines ...
16306             #---------------------------
16307              
16308 5875         8807 $K_dangling_elsif = 0;
16309              
16310             # This is a good place to kill incomplete one-line blocks
16311 5875 100       14054 if ( $max_index_to_go >= 0 ) {
16312              
16313             # For -iob and -lp, mark essential old breakpoints.
16314             # Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
16315             # See related code below.
16316 2444 50 66     7416 if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
16317 0         0 my $type_first = $rLL->[$K_first_true]->[_TYPE_];
16318 0 0       0 if ( $is_assignment_or_fat_comma{$type_first} ) {
16319 0         0 $old_breakpoint_to_go[$max_index_to_go] = 1;
16320             }
16321             }
16322              
16323 2444 100 100     15547 if (
      100        
      100        
      100        
16324              
16325             # this check needed -mangle (for example rt125012)
16326             (
16327             ( !$index_start_one_line_block )
16328             && ( $last_old_nonblank_type eq ';' )
16329             && ( $first_new_nonblank_token ne '}' )
16330             )
16331              
16332             # Patch for RT #98902. Honor request to break at old commas.
16333             || ( $rOpts_break_at_old_comma_breakpoints
16334             && $last_old_nonblank_type eq ',' )
16335             )
16336             {
16337 30 100       112 $forced_breakpoint_to_go[$max_index_to_go] = 1
16338             if ($rOpts_break_at_old_comma_breakpoints);
16339 30         54 $index_start_one_line_block = undef;
16340 30         109 $self->end_batch();
16341             }
16342              
16343             # Keep any requested breaks before this line. Note that we have to
16344             # use the original K_first because it may have been reduced above
16345             # to add a blank. The value of the flag is as follows:
16346             # 1 => hard break, flush the batch
16347             # 2 => soft break, set breakpoint and continue building the batch
16348             # added check on max_index_to_go for c177
16349 2444 100 100     9892 if ( $max_index_to_go >= 0
16350             && $self->[_rbreak_before_Kfirst_]->{$K_first_true} )
16351             {
16352 9         18 $index_start_one_line_block = undef;
16353 9 100       31 if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
16354 4         13 $self->set_forced_breakpoint($max_index_to_go);
16355             }
16356             else {
16357 5         15 $self->end_batch();
16358             }
16359             }
16360             }
16361              
16362             #--------------------------------------
16363             # loop to process the tokens one-by-one
16364             #--------------------------------------
16365 5875         17676 $self->process_line_inner_loop($has_side_comment);
16366              
16367             # if there is anything left in the output buffer ...
16368 5875 100       14849 if ( $max_index_to_go >= 0 ) {
16369              
16370 3264         6637 my $type = $rLL->[$K_last]->[_TYPE_];
16371 3264         6232 my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
16372              
16373             # we have to flush ..
16374 3264 100 100     39824 if (
      100        
      100        
      100        
      66        
      100        
      100        
      100        
      66        
      33        
      66        
16375              
16376             # if there is a side comment...
16377             $type eq '#'
16378              
16379             # if this line ends in a quote
16380             # NOTE: This is critically important for insuring that quoted
16381             # lines do not get processed by things like -sot and -sct
16382             || $in_quote
16383              
16384             # if this is a VERSION statement
16385             || $CODE_type eq 'VER'
16386              
16387             # to keep a label at the end of a line
16388             || ( $type eq 'J' && $rOpts_break_after_labels != 2 )
16389              
16390             # if we have a hard break request
16391             || $break_flag && $break_flag != 2
16392              
16393             # if we are instructed to keep all old line breaks
16394             || !$rOpts->{'delete-old-newlines'}
16395              
16396             # if this is a line of the form 'use overload'. A break here in
16397             # the input file is a good break because it will allow the
16398             # operators which follow to be formatted well. Without this
16399             # break the formatting with -ci=4 -xci is poor, for example.
16400              
16401             # use overload
16402             # '+' => sub {
16403             # print length $_[2], "\n";
16404             # my ( $x, $y ) = _order(@_);
16405             # Number::Roman->new( int $x + $y );
16406             # },
16407             # '-' => sub {
16408             # my ( $x, $y ) = _order(@_);
16409             # Number::Roman->new( int $x - $y );
16410             # };
16411             || ( $max_index_to_go == 2
16412             && $types_to_go[0] eq 'k'
16413             && $tokens_to_go[0] eq 'use'
16414             && $tokens_to_go[$max_index_to_go] eq 'overload' )
16415             )
16416             {
16417 562         1229 $index_start_one_line_block = undef;
16418 562         1580 $self->end_batch();
16419             }
16420              
16421             else {
16422              
16423             # Check for a soft break request
16424 2702 50 33     7089 if ( $break_flag && $break_flag == 2 ) {
16425 0         0 $self->set_forced_breakpoint($max_index_to_go);
16426             }
16427              
16428             # mark old line breakpoints in current output stream
16429 2702 50 33     7119 if (
      66        
16430             !$rOpts_ignore_old_breakpoints
16431              
16432             # Mark essential old breakpoints if combination -iob -lp is
16433             # used. These two options do not work well together, but
16434             # we can avoid turning -iob off by ignoring -iob at certain
16435             # essential line breaks. See also related code above.
16436             # Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
16437             || ( $rOpts_line_up_parentheses
16438             && $is_assignment_or_fat_comma{$type} )
16439             )
16440             {
16441 2692         5911 $old_breakpoint_to_go[$max_index_to_go] = 1;
16442             }
16443             }
16444             }
16445              
16446 5875 100 100     15215 if ( $K_dangling_elsif && $rOpts_add_missing_else ) {
16447 1         4 $self->add_missing_else();
16448             }
16449              
16450 5875         18998 return;
16451             } ## end sub process_line_of_CODE
16452              
16453             sub process_line_inner_loop {
16454              
16455 5875     5875 0 11141 my ( $self, $has_side_comment ) = @_;
16456              
16457             #--------------------------------------------------------------------
16458             # Loop to move all tokens from one input line to a newly forming batch
16459             #--------------------------------------------------------------------
16460              
16461             # Do not start a new batch with a blank space
16462 5875 100 100     21545 if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
16463 20         43 $K_first++;
16464             }
16465              
16466 5875         13816 foreach my $Ktoken_vars ( $K_first .. $K_last ) {
16467              
16468 54232         88106 my $rtoken_vars = $rLL->[$Ktoken_vars];
16469              
16470             #--------------
16471             # handle blanks
16472             #--------------
16473 54232 100       120619 if ( $rtoken_vars->[_TYPE_] eq 'b' ) {
16474 19152         43945 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16475 19152         31765 next;
16476             }
16477              
16478             #------------------
16479             # handle non-blanks
16480             #------------------
16481 35080         51043 my $type = $rtoken_vars->[_TYPE_];
16482              
16483             # If we are continuing after seeing a right curly brace, flush
16484             # buffer unless we see what we are looking for, as in
16485             # } else ...
16486 35080 100       59296 if ($rbrace_follower) {
16487 198         702 my $token = $rtoken_vars->[_TOKEN_];
16488 198 100       875 if ( !$rbrace_follower->{$token} ) {
16489 157 100       740 $self->end_batch() if ( $max_index_to_go >= 0 );
16490             }
16491 198         577 $rbrace_follower = undef;
16492             }
16493              
16494             my (
16495 35080         50873 $block_type, $type_sequence,
16496             $is_opening_BLOCK, $is_closing_BLOCK,
16497             $nobreak_BEFORE_BLOCK
16498             );
16499              
16500 35080 100       64811 if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
16501              
16502 9096         17112 my $token = $rtoken_vars->[_TOKEN_];
16503 9096         14446 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
16504 9096         16019 $block_type = $rblock_type_of_seqno->{$type_sequence};
16505              
16506 9096 100 66     30052 if ( $block_type
      100        
      100        
16507             && $token eq $type
16508             && $block_type ne 't'
16509             && !$self->[_rshort_nested_]->{$type_sequence} )
16510             {
16511              
16512 1938 100       6331 if ( $type eq '{' ) {
    50          
16513 969         1628 $is_opening_BLOCK = 1;
16514 969         1759 $nobreak_BEFORE_BLOCK = $no_internal_newlines;
16515             }
16516             elsif ( $type eq '}' ) {
16517 969         1930 $is_closing_BLOCK = 1;
16518 969         1860 $nobreak_BEFORE_BLOCK = $no_internal_newlines;
16519             }
16520             else {
16521             ## error - block should be enclosed by curly brace
16522 0         0 DEVEL_MODE && Fault(<<EOM);
16523             block type '$block_type' has unexpected container type '$type'
16524             EOM
16525             }
16526             }
16527             }
16528              
16529             #---------------------
16530             # handle side comments
16531             #---------------------
16532 35080 100       59426 if ($has_side_comment) {
16533              
16534             # if at last token ...
16535 2196 100       4556 if ( $Ktoken_vars == $K_last ) {
16536 364         1382 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16537 364         1016 next;
16538             }
16539              
16540             # if before last token ... do not allow breaks which would
16541             # promote a side comment to a block comment
16542 1832 100 100     7719 if ( $Ktoken_vars == $K_last - 1
      100        
16543             || $Ktoken_vars == $K_last - 2
16544             && $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' )
16545             {
16546 364         695 $no_internal_newlines = 2;
16547             }
16548             }
16549              
16550             # Process non-blank and non-comment tokens ...
16551              
16552             #-----------------
16553             # handle semicolon
16554             #-----------------
16555 34716 100       88782 if ( $type eq ';' ) {
    100          
    100          
    100          
16556              
16557 2544         6031 my $next_nonblank_token_type = 'b';
16558 2544         4900 my $next_nonblank_token = EMPTY_STRING;
16559 2544 100       6181 if ( $Ktoken_vars < $K_last ) {
16560 530         1101 my $Knnb = $Ktoken_vars + 1;
16561 530 100       1802 $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
16562 530         1153 $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
16563 530         951 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
16564             }
16565              
16566 2544 50 66     6916 if ( $rOpts_break_at_old_semicolon_breakpoints
      66        
      33        
16567             && ( $Ktoken_vars == $K_first )
16568             && $max_index_to_go >= 0
16569             && !defined($index_start_one_line_block) )
16570             {
16571 1         9 $self->end_batch();
16572             }
16573              
16574 2544         6751 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16575              
16576 2544 100 100     20370 $self->end_batch()
      100        
      100        
16577             if (
16578             !$no_internal_newlines
16579             && ( !$rOpts_keep_interior_semicolons
16580             || $Ktoken_vars >= $K_last )
16581             && ( $next_nonblank_token ne '}' )
16582             );
16583             }
16584              
16585             #-----------
16586             # handle '{'
16587             #-----------
16588             elsif ($is_opening_BLOCK) {
16589              
16590             # Tentatively output this token. This is required before
16591             # calling starting_one_line_block. We may have to unstore
16592             # it, though, if we have to break before it.
16593 969         3009 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16594              
16595             # Look ahead to see if we might form a one-line block..
16596 969         4864 my $too_long =
16597             $self->starting_one_line_block( $Ktoken_vars,
16598             $K_last_nonblank_code, $K_last );
16599 969         3730 $self->clear_breakpoint_undo_stack();
16600              
16601             # to simplify the logic below, set a flag to indicate if
16602             # this opening brace is far from the keyword which introduces it
16603 969         1669 my $keyword_on_same_line = 1;
16604 969 0 66     6296 if (
      66        
      0        
      33        
16605             $max_index_to_go >= 0
16606             && defined($K_last_nonblank_code)
16607             && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
16608             && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
16609             || $too_long )
16610             )
16611             {
16612 0         0 $keyword_on_same_line = 0;
16613             }
16614              
16615             # Break before '{' if requested with -bl or -bli flag
16616 969         2195 my $want_break = $self->[_rbrace_left_]->{$type_sequence};
16617              
16618             # But do not break if this token is welded to the left
16619 969 100 100     3438 if ( $total_weld_count
16620             && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
16621             {
16622 21         58 $want_break = 0;
16623             }
16624              
16625             # Break BEFORE an opening '{' ...
16626 969 100 100     5631 if (
      33        
      66        
16627              
16628             # if requested
16629             $want_break
16630              
16631             # and we were unable to start looking for a block,
16632             && !defined($index_start_one_line_block)
16633              
16634             # or if it will not be on same line as its keyword, so that
16635             # it will be outdented (eval.t, overload.t), and the user
16636             # has not insisted on keeping it on the right
16637             || ( !$keyword_on_same_line
16638             && !$rOpts_opening_brace_always_on_right )
16639             )
16640             {
16641              
16642             # but only if allowed
16643 50 50       139 if ( !$nobreak_BEFORE_BLOCK ) {
16644              
16645             # since we already stored this token, we must unstore it
16646 50         186 $self->unstore_token_to_go();
16647              
16648             # then output the line
16649 50 100       197 $self->end_batch() if ( $max_index_to_go >= 0 );
16650              
16651             # and now store this token at the start of a new line
16652 50         178 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16653             }
16654             }
16655              
16656             # now output this line
16657             $self->end_batch()
16658 969 100 66     4913 if ( $max_index_to_go >= 0 && !$no_internal_newlines );
16659             }
16660              
16661             #-----------
16662             # handle '}'
16663             #-----------
16664             elsif ($is_closing_BLOCK) {
16665              
16666 969         2024 my $next_nonblank_token_type = 'b';
16667 969         1860 my $next_nonblank_token = EMPTY_STRING;
16668 969         1745 my $Knnb;
16669 969 100       2590 if ( $Ktoken_vars < $K_last ) {
16670 417         894 $Knnb = $Ktoken_vars + 1;
16671 417 100       1510 $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
16672 417         1038 $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
16673 417         938 $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
16674             }
16675              
16676             # If there is a pending one-line block ..
16677 969 100       2549 if ( defined($index_start_one_line_block) ) {
16678              
16679             # Fix for b1208: if a side comment follows this closing
16680             # brace then we must include its length in the length test
16681             # ... unless the -issl flag is set (fixes b1307-1309).
16682             # Assume a minimum of 1 blank space to the comment.
16683 355         686 my $added_length = 0;
16684 355 100 100     1361 if ( $has_side_comment
      100        
16685             && !$rOpts_ignore_side_comment_lengths
16686             && $next_nonblank_token_type eq '#' )
16687             {
16688 17         54 $added_length = 1 + $rLL->[$K_last]->[_TOKEN_LENGTH_];
16689             }
16690              
16691             # we have to terminate it if..
16692 355 50       1209 if (
16693              
16694             # it is too long (final length may be different from
16695             # initial estimate). note: must allow 1 space for this
16696             # token
16697             $self->excess_line_length( $index_start_one_line_block,
16698             $max_index_to_go ) + $added_length >= 0
16699             )
16700             {
16701 0         0 $index_start_one_line_block = undef;
16702             }
16703             }
16704              
16705             # put a break before this closing curly brace if appropriate
16706             $self->end_batch()
16707 969 100 100     4562 if ( $max_index_to_go >= 0
      100        
16708             && !$nobreak_BEFORE_BLOCK
16709             && !defined($index_start_one_line_block) );
16710              
16711             # store the closing curly brace
16712 969         3034 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16713              
16714             # ok, we just stored a closing curly brace. Often, but
16715             # not always, we want to end the line immediately.
16716             # So now we have to check for special cases.
16717              
16718             # if this '}' successfully ends a one-line block..
16719 969         2158 my $one_line_block_type = EMPTY_STRING;
16720 969         1950 my $keep_going;
16721 969 100       2728 if ( defined($index_start_one_line_block) ) {
16722              
16723             # Remember the type of token just before the
16724             # opening brace. It would be more general to use
16725             # a stack, but this will work for one-line blocks.
16726 355         795 $one_line_block_type =
16727             $types_to_go[$index_start_one_line_block];
16728              
16729             # we have to actually make it by removing tentative
16730             # breaks that were set within it
16731 355         1536 $self->undo_forced_breakpoint_stack(0);
16732              
16733             # For -lp, extend the nobreak to include a trailing
16734             # terminal ','. This is because the -lp indentation was
16735             # not known when making one-line blocks, so we may be able
16736             # to move the line back to fit. Otherwise we may create a
16737             # needlessly stranded comma on the next line.
16738 355         788 my $iend_nobreak = $max_index_to_go - 1;
16739 355 100 100     1190 if ( $rOpts_line_up_parentheses
      66        
16740             && $next_nonblank_token_type eq ','
16741             && $Knnb eq $K_last )
16742             {
16743 1         3 my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
16744             my $is_excluded =
16745 1         3 $self->[_ris_excluded_lp_container_]->{$p_seqno};
16746 1 50       4 $iend_nobreak = $max_index_to_go if ( !$is_excluded );
16747             }
16748              
16749 355         1423 $self->set_nobreaks( $index_start_one_line_block,
16750             $iend_nobreak );
16751              
16752             # save starting block indexes so that sub correct_lp can
16753             # check and adjust -lp indentation (c098)
16754 355         551 push @{$ri_starting_one_line_block},
  355         977  
16755             $index_start_one_line_block;
16756              
16757             # then re-initialize for the next one-line block
16758 355         715 $index_start_one_line_block = undef;
16759              
16760             # then decide if we want to break after the '}' ..
16761             # We will keep going to allow certain brace followers as in:
16762             # do { $ifclosed = 1; last } unless $losing;
16763             #
16764             # But make a line break if the curly ends a
16765             # significant block:
16766 355 100 100     2793 if (
      66        
16767             (
16768             $is_block_without_semicolon{$block_type}
16769              
16770             # Follow users break point for
16771             # one line block types U & G, such as a 'try' block
16772             || $one_line_block_type =~ /^[UG]$/
16773             && $Ktoken_vars == $K_last
16774             )
16775              
16776             # if needless semicolon follows we handle it later
16777             && $next_nonblank_token ne ';'
16778             )
16779             {
16780 88 100       333 $self->end_batch()
16781             unless ($no_internal_newlines);
16782             }
16783             }
16784              
16785             # set string indicating what we need to look for brace follower
16786             # tokens
16787 969 100 100     7501 if ( $is_if_unless_elsif_else{$block_type} ) {
    100          
    100          
    100          
16788 188         470 $rbrace_follower = undef;
16789             }
16790             elsif ( $block_type eq 'do' ) {
16791 45         204 $rbrace_follower = \%is_do_follower;
16792 45 100       286 if (
16793             $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
16794             )
16795             {
16796 3         12 $rbrace_follower = { ')' => 1 };
16797             }
16798             }
16799              
16800             # added eval for borris.t
16801             elsif ($is_sort_map_grep_eval{$block_type}
16802             || $one_line_block_type eq 'G' )
16803             {
16804 133         332 $rbrace_follower = undef;
16805 133         278 $keep_going = 1;
16806             }
16807              
16808             # anonymous sub
16809             elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
16810 173 100       530 if ($one_line_block_type) {
16811              
16812 81         209 $rbrace_follower = \%is_anon_sub_1_brace_follower;
16813              
16814             # Exceptions to help keep -lp intact, see git #74 ...
16815             # Exception 1: followed by '}' on this line
16816 81 100 100     654 if ( $Ktoken_vars < $K_last
    100 100        
16817             && $next_nonblank_token eq '}' )
16818             {
16819 2         18 $rbrace_follower = undef;
16820 2         5 $keep_going = 1;
16821             }
16822              
16823             # Exception 2: followed by '}' on next line if -lp set.
16824             # The -lp requirement allows the formatting to follow
16825             # old breaks when -lp is not used, minimizing changes.
16826             # Fixes issue c087.
16827             elsif ($Ktoken_vars == $K_last
16828             && $rOpts_line_up_parentheses )
16829             {
16830 1         3 my $K_closing_container =
16831             $self->[_K_closing_container_];
16832 1         4 my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
16833 1         3 my $Kc = $K_closing_container->{$p_seqno};
16834             my $is_excluded =
16835 1         3 $self->[_ris_excluded_lp_container_]->{$p_seqno};
16836 1   33     15 $keep_going =
16837             ( defined($Kc)
16838             && $rLL->[$Kc]->[_TOKEN_] eq '}'
16839             && !$is_excluded
16840             && $Kc - $Ktoken_vars <= 2 );
16841 1 50       5 $rbrace_follower = undef if ($keep_going);
16842             }
16843             else {
16844             ## not an exception
16845             }
16846             }
16847             else {
16848 92         242 $rbrace_follower = \%is_anon_sub_brace_follower;
16849             }
16850             }
16851              
16852             # None of the above: specify what can follow a closing
16853             # brace of a block which is not an
16854             # if/elsif/else/do/sort/map/grep/eval
16855             # Testfiles:
16856             # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
16857             else {
16858 430         1094 $rbrace_follower = \%is_other_brace_follower;
16859             }
16860              
16861             # See if an elsif block is followed by another elsif or else;
16862             # complain if not.
16863 969 100       2805 if ( $block_type eq 'elsif' ) {
16864              
16865             # more code on this line ? ( this is unusual )
16866 27 100 66     188 if ( $next_nonblank_token_type ne 'b'
16867             && $next_nonblank_token_type ne '#' )
16868             {
16869             # check for 'elsif' or 'else'
16870 8 50       37 if ( !$is_elsif_else{$next_nonblank_token} ) {
16871 0         0 write_logfile_entry("(No else block)\n");
16872              
16873             # Note that we cannot add a missing else block
16874             # in this case because more code follows the
16875             # closing elsif brace on the same line.
16876 0 0 0     0 if ( $rOpts_warn_missing_else && !DEVEL_MODE ) {
16877 0         0 my $lno =
16878             $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
16879 0         0 warning("$lno: No else block\n");
16880             }
16881             }
16882             }
16883              
16884             # no more code on this line, so check on next line
16885             else {
16886 19         114 my $K_next = $self->K_next_code($K_last);
16887 19 50 66     165 if ( !defined($K_next)
      66        
16888             || $rLL->[$K_next]->[_TYPE_] ne 'k'
16889             || !$is_elsif_else{ $rLL->[$K_next]->[_TOKEN_] } )
16890             {
16891 6         14 $K_dangling_elsif = $Ktoken_vars;
16892 6         38 write_logfile_entry("(No else block)\n");
16893 6 50 50     55 if ( $rOpts_warn_missing_else && !DEVEL_MODE ) {
16894 0         0 my $lno =
16895             $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
16896 0 0       0 if ($rOpts_add_missing_else) {
16897 0         0 warning(
16898             "$lno: Adding missing else block\n");
16899             }
16900             else {
16901 0         0 warning(
16902             "$lno: No else block (use -ame to add one)\n"
16903             );
16904             }
16905             }
16906             }
16907             }
16908             }
16909              
16910             # keep going after certain block types (map,sort,grep,eval)
16911             # added eval for borris.t
16912 969 100 100     4564 if ($keep_going) {
    100          
    100          
16913              
16914             # keep going
16915 136         284 $rbrace_follower = undef;
16916              
16917             }
16918              
16919             # if no more tokens, postpone decision until re-entering
16920             elsif ( ( $next_nonblank_token_type eq 'b' )
16921             && $rOpts_add_newlines )
16922             {
16923 513 100       1591 if ( !$rbrace_follower ) {
16924 160 100 66     993 $self->end_batch()
16925             if (!$no_internal_newlines
16926             && $max_index_to_go >= 0 );
16927             }
16928             }
16929             elsif ($rbrace_follower) {
16930              
16931 292 100       970 if ( $rbrace_follower->{$next_nonblank_token} ) {
16932              
16933             # Fix for b1385: keep break after a comma following a
16934             # 'do' block. This could also be used for other block
16935             # types, but that would cause a significant change in
16936             # existing formatting without much benefit.
16937 192 0 100     975 if ( $next_nonblank_token eq ','
      66        
      33        
      33        
16938             && $Knnb eq $K_last
16939             && $block_type eq 'do'
16940             && $rOpts_add_newlines
16941             && $self->is_trailing_comma($Knnb) )
16942             {
16943 0         0 $self->[_rbreak_after_Klast_]->{$K_last} = 1;
16944             }
16945             }
16946             else {
16947 100 100 100     608 $self->end_batch()
16948             if (!$no_internal_newlines
16949             && $max_index_to_go >= 0 );
16950             }
16951              
16952 292         723 $rbrace_follower = undef;
16953             }
16954              
16955             else {
16956 28 100 100     169 $self->end_batch()
16957             if ( !$no_internal_newlines && $max_index_to_go >= 0 );
16958             }
16959              
16960             } ## end treatment of closing block token
16961              
16962             #------------------------------
16963             # handle here_doc target string
16964             #------------------------------
16965             elsif ( $type eq 'h' ) {
16966              
16967             # no newlines after seeing here-target
16968 9         42 $no_internal_newlines = 2;
16969 9         50 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16970             }
16971              
16972             #-----------------------------
16973             # handle all other token types
16974             #-----------------------------
16975             else {
16976              
16977 30225         66869 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
16978              
16979             # break after a label if requested
16980 30225 100 100     59414 if ( $rOpts_break_after_labels
      100        
16981             && $type eq 'J'
16982             && $rOpts_break_after_labels == 1 )
16983             {
16984 3 50       11 $self->end_batch()
16985             unless ($no_internal_newlines);
16986             }
16987             }
16988              
16989             # remember previous nonblank, non-comment OUTPUT token
16990 34716         59182 $K_last_nonblank_code = $Ktoken_vars;
16991              
16992             } ## end of loop over all tokens in this line
16993 5875         10435 return;
16994             } ## end sub process_line_inner_loop
16995              
16996             } ## end closure process_line_of_CODE
16997              
16998             sub is_trailing_comma {
16999 0     0 0 0 my ( $self, $KK ) = @_;
17000              
17001             # Given:
17002             # $KK - index of a comma in token list
17003             # Return:
17004             # true if the comma at index $KK is a trailing comma
17005             # false if not
17006              
17007 0         0 my $rLL = $self->[_rLL_];
17008 0         0 my $type_KK = $rLL->[$KK]->[_TYPE_];
17009 0 0       0 if ( $type_KK ne ',' ) {
17010 0         0 DEVEL_MODE
17011             && Fault("Bad call: expected type ',' but received '$type_KK'\n");
17012 0         0 return;
17013             }
17014 0         0 my $Knnb = $self->K_next_nonblank($KK);
17015 0 0       0 if ( defined($Knnb) ) {
17016 0         0 my $type_sequence = $rLL->[$Knnb]->[_TYPE_SEQUENCE_];
17017 0         0 my $type_Knnb = $rLL->[$Knnb]->[_TYPE_];
17018 0 0 0     0 if ( $type_sequence && $is_closing_type{$type_Knnb} ) {
17019 0         0 return 1;
17020             }
17021             }
17022 0         0 return;
17023             } ## end sub is_trailing_comma
17024              
17025             sub tight_paren_follows {
17026              
17027 45     45 0 162 my ( $self, $K_to_go_0, $K_ic ) = @_;
17028              
17029             # Input parameters:
17030             # $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
17031             # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
17032             # Return parameter:
17033             # false if we want a break after the closing do brace
17034             # true if we do not want a break after the closing do brace
17035              
17036             # We are at the closing brace of a 'do' block. See if this brace is
17037             # followed by a closing paren, and if so, set a flag which indicates
17038             # that we do not want a line break between the '}' and ')'.
17039              
17040             # xxxxx ( ...... do { ... } ) {
17041             # ^-------looking at this brace, K_ic
17042              
17043             # Subscript notation:
17044             # _i = inner container (braces in this case)
17045             # _o = outer container (parens in this case)
17046             # _io = inner opening = '{'
17047             # _ic = inner closing = '}'
17048             # _oo = outer opening = '('
17049             # _oc = outer closing = ')'
17050              
17051             # |--K_oo |--K_oc = outer container
17052             # xxxxx ( ...... do { ...... } ) {
17053             # |--K_io |--K_ic = inner container
17054              
17055             # In general, the safe thing to do is return a 'false' value
17056             # if the statement appears to be complex. This will have
17057             # the downstream side-effect of opening up outer containers
17058             # to help make complex code readable. But for simpler
17059             # do blocks it can be preferable to keep the code compact
17060             # by returning a 'true' value.
17061              
17062 45 50       160 return unless defined($K_ic);
17063 45         129 my $rLL = $self->[_rLL_];
17064              
17065             # we should only be called at a closing block
17066 45         168 my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
17067 45 50       155 return unless ($seqno_i); # shouldn't happen;
17068              
17069             # This only applies if the next nonblank is a ')'
17070 45         205 my $K_oc = $self->K_next_nonblank($K_ic);
17071 45 100       402 return unless defined($K_oc);
17072 44         116 my $token_next = $rLL->[$K_oc]->[_TOKEN_];
17073 44 100       207 return unless ( $token_next eq ')' );
17074              
17075 7         23 my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
17076 7         20 my $K_io = $self->[_K_opening_container_]->{$seqno_i};
17077 7         22 my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
17078 7 50 33     37 return unless ( defined($K_io) && defined($K_oo) );
17079              
17080             # RULE 1: Do not break before a closing signature paren
17081             # (regardless of complexity). This is a fix for issue git#22.
17082             # Looking for something like:
17083             # sub xxx ( ... do { ... } ) {
17084             # ^----- next block_type
17085 7         205 my $K_test = $self->K_next_nonblank($K_oc);
17086 7 100 66     64 if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
17087 3         7 my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
17088 3 50       10 if ($seqno_test) {
17089 3 50 66     29 if ( $self->[_ris_asub_block_]->{$seqno_test}
17090             || $self->[_ris_sub_block_]->{$seqno_test} )
17091             {
17092 3         14 return 1;
17093             }
17094             }
17095             }
17096              
17097             # RULE 2: Break if the contents within braces appears to be 'complex'. We
17098             # base this decision on the number of tokens between braces.
17099              
17100             # xxxxx ( ... do { ... } ) {
17101             # ^^^^^^
17102              
17103             # Although very simple, it has the advantages of (1) being insensitive to
17104             # changes in lengths of identifier names, (2) easy to understand, implement
17105             # and test. A test case for this is 't/snippets/long_line.in'.
17106              
17107             # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
17108             # if ( do { $2 !~ /&/ } ) { ... }
17109              
17110             # Example: $K_ic - $K_oo = 10 [Pass Rule 2]
17111             # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
17112              
17113             # Example: $K_ic - $K_oo = 20 [Fail Rule 2]
17114             # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
17115              
17116 4 50       30 return if ( $K_ic - $K_io > 16 );
17117              
17118             # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
17119             # As with the previous rule, we decide based on the token count
17120              
17121             # xxxxx ( ... do { ... } ) {
17122             # ^^^^^^^^
17123              
17124             # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
17125             # $K_io - $K_oo = 4 [Pass Rule 3]
17126             # if ( do { $2 !~ /&/ } ) { ... }
17127              
17128             # Example: $K_ic - $K_oo = 10 [Pass rule 2]
17129             # $K_io - $K_oo = 9 [Pass rule 3]
17130             # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
17131              
17132 0 0       0 return if ( $K_io - $K_oo > 9 );
17133              
17134             # RULE 4: Break if we have already broken this batch of output tokens
17135 0 0       0 return if ( $K_oo < $K_to_go_0 );
17136              
17137             # RULE 5: Break if input is not on one line
17138             # For example, we will set the flag for the following expression
17139             # written in one line:
17140              
17141             # This has: $K_ic - $K_oo = 10 [Pass rule 2]
17142             # $K_io - $K_oo = 8 [Pass rule 3]
17143             # $self->debug( 'Error: ' . do { local $/; <$err> } );
17144              
17145             # but we break after the brace if it is on multiple lines on input, since
17146             # the user may prefer it on multiple lines:
17147              
17148             # [Fail rule 5]
17149             # $self->debug(
17150             # 'Error: ' . do { local $/; <$err> }
17151             # );
17152              
17153 0 0       0 if ( !$rOpts_ignore_old_breakpoints ) {
17154 0         0 my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
17155 0         0 my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
17156 0 0       0 return if ( $iline_oo != $iline_oc );
17157             }
17158              
17159             # OK to keep the paren tight
17160 0         0 return 1;
17161             } ## end sub tight_paren_follows
17162              
17163             my %is_brace_semicolon_colon;
17164              
17165             BEGIN {
17166 39     39   342 my @q = qw( { } ; : );
17167 39         82050 @is_brace_semicolon_colon{@q} = (1) x scalar(@q);
17168             }
17169              
17170             sub starting_one_line_block {
17171              
17172             # After seeing an opening curly brace, look for the closing brace and see
17173             # if the entire block will fit on a line. This routine is not always right
17174             # so a check is made later (at the closing brace) to make sure we really
17175             # have a one-line block. We have to do this preliminary check, though,
17176             # because otherwise we would always break at a semicolon within a one-line
17177             # block if the block contains multiple statements.
17178              
17179             # Given:
17180             # $Kj = index of opening brace
17181             # $K_last_nonblank = index of previous nonblank code token
17182             # $K_last = index of last token of input line
17183              
17184             # Calls 'create_one_line_block' if one-line block might be formed.
17185              
17186             # Also returns a flag '$too_long':
17187             # true = distance from opening keyword to OPENING brace exceeds
17188             # the maximum line length.
17189             # false (simple return) => not too long
17190             # Note that this flag is for distance from the statement start to the
17191             # OPENING brace, not the closing brace.
17192              
17193 969     969 0 2691 my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
17194              
17195 969         1951 my $rbreak_container = $self->[_rbreak_container_];
17196 969         1833 my $rshort_nested = $self->[_rshort_nested_];
17197 969         1741 my $rLL = $self->[_rLL_];
17198 969         1700 my $K_opening_container = $self->[_K_opening_container_];
17199 969         1738 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
17200              
17201             # kill any current block - we can only go 1 deep
17202 969         3132 create_one_line_block();
17203              
17204 969         1680 my $i_start = 0;
17205              
17206             # This routine should not have been called if there are no tokens in the
17207             # 'to_go' arrays of previously stored tokens. A previous call to
17208             # 'store_token_to_go' should have stored an opening brace. An error here
17209             # indicates that a programming change may have caused a flush operation to
17210             # clean out the previously stored tokens.
17211 969 50 33     4433 if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
17212 0         0 Fault("program bug: store_token_to_go called incorrectly\n")
17213             if (DEVEL_MODE);
17214 0         0 return;
17215             }
17216              
17217             # Return if block should be broken
17218 969         1991 my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
17219 969 100       2660 if ( $rbreak_container->{$type_sequence_j} ) {
17220 20         58 return;
17221             }
17222              
17223 949         1874 my $ris_bli_container = $self->[_ris_bli_container_];
17224 949         1788 my $is_bli = $ris_bli_container->{$type_sequence_j};
17225              
17226 949         1966 my $block_type = $rblock_type_of_seqno->{$type_sequence_j};
17227 949 50       2318 $block_type = EMPTY_STRING unless ( defined($block_type) );
17228              
17229 949         1688 my $previous_nonblank_token = EMPTY_STRING;
17230 949         1722 my $i_last_nonblank = -1;
17231 949 100       2461 if ( defined($K_last_nonblank) ) {
17232 931         1699 $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
17233 931 100       2372 if ( $i_last_nonblank >= 0 ) {
17234 827         1758 $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
17235             }
17236             }
17237              
17238             #---------------------------------------------------------------------
17239             # find the starting keyword for this block (such as 'if', 'else', ...)
17240             #---------------------------------------------------------------------
17241 949 100 100     12020 if (
    100 100        
    100 100        
    50 33        
      33        
17242             $max_index_to_go == 0
17243             ##|| $block_type =~ /^[\{\}\;\:]$/
17244             || $is_brace_semicolon_colon{$block_type}
17245             || substr( $block_type, 0, 7 ) eq 'package'
17246             )
17247             {
17248 148         373 $i_start = $max_index_to_go;
17249             }
17250              
17251             # the previous nonblank token should start these block types
17252             elsif (
17253             $i_last_nonblank >= 0
17254             && ( $previous_nonblank_token eq $block_type
17255             || $self->[_ris_asub_block_]->{$type_sequence_j}
17256             || $self->[_ris_sub_block_]->{$type_sequence_j}
17257             || substr( $block_type, -2, 2 ) eq '()' )
17258             )
17259             {
17260 577         1275 $i_start = $i_last_nonblank;
17261              
17262             # For signatures and extended syntax ...
17263             # If this brace follows a parenthesized list, we should look back to
17264             # find the keyword before the opening paren because otherwise we might
17265             # form a one line block which stays intact, and cause the parenthesized
17266             # expression to break open. That looks bad.
17267 577 100       1899 if ( $tokens_to_go[$i_start] eq ')' ) {
17268              
17269             # Find the opening paren
17270 33         104 my $K_start = $K_to_go[$i_start];
17271 33 50       126 return unless defined($K_start);
17272 33         99 my $seqno = $type_sequence_to_go[$i_start];
17273 33 50       105 return unless ($seqno);
17274 33         86 my $K_opening = $K_opening_container->{$seqno};
17275 33 50       125 return if ( !defined($K_opening) );
17276 33         104 my $i_opening = $i_start + ( $K_opening - $K_start );
17277              
17278             # give up if not on this line
17279 33 50       107 return if ( $i_opening < 0 );
17280 33         66 $i_start = $i_opening;
17281              
17282             # go back one token before the opening paren
17283 33 50       99 if ( $i_start > 0 ) { $i_start-- }
  33         61  
17284 33 100 66     180 if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
  19         47  
17285 33         81 my $lev = $levels_to_go[$i_start];
17286 33 100       131 if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return }
  2         9  
17287             }
17288             }
17289              
17290             elsif ( $previous_nonblank_token eq ')' ) {
17291              
17292             # For something like "if (xxx) {", the keyword "if" will be
17293             # just after the most recent break. This will be 0 unless
17294             # we have just killed a one-line block and are starting another.
17295             # (doif.t)
17296             # Note: cannot use inext_index_to_go[] here because that array
17297             # is still being constructed.
17298 220         570 $i_start = $index_max_forced_break + 1;
17299 220 100       769 if ( $types_to_go[$i_start] eq 'b' ) {
17300 2         6 $i_start++;
17301             }
17302              
17303             # Patch to avoid breaking short blocks defined with extended_syntax:
17304             # Strip off any trailing () which was added in the parser to mark
17305             # the opening keyword. For example, in the following
17306             # create( TypeFoo $e) {$bubba}
17307             # the blocktype would be marked as create()
17308 220         492 my $stripped_block_type = $block_type;
17309 220 50       792 if ( substr( $block_type, -2, 2 ) eq '()' ) {
17310 0         0 $stripped_block_type = substr( $block_type, 0, -2 );
17311             }
17312 220 100       732 if ( $tokens_to_go[$i_start] ne $stripped_block_type ) {
17313 10         36 return;
17314             }
17315             }
17316              
17317             # patch for SWITCH/CASE to retain one-line case/when blocks
17318             elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
17319              
17320             # Note: cannot use inext_index_to_go[] here because that array
17321             # is still being constructed.
17322 4         11 $i_start = $index_max_forced_break + 1;
17323 4 50       13 if ( $types_to_go[$i_start] eq 'b' ) {
17324 0         0 $i_start++;
17325             }
17326 4 50       11 if ( $tokens_to_go[$i_start] ne $block_type ) {
17327 0         0 return;
17328             }
17329             }
17330             else {
17331              
17332             #-------------------------------------------
17333             # Couldn't find start - return too_long flag
17334             #-------------------------------------------
17335 0         0 return 1;
17336             }
17337              
17338 937         3243 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
17339              
17340 937         2224 my $maximum_line_length =
17341             $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
17342              
17343             # see if distance to the opening container is too great to even start
17344 937 100       2492 if ( $pos > $maximum_line_length ) {
17345              
17346             #------------------------------
17347             # too long to the opening token
17348             #------------------------------
17349 14         48 return 1;
17350             }
17351              
17352             #-----------------------------------------------------------------------
17353             # OK so far: the statement is not to long just to the OPENING token. Now
17354             # see if everything to the closing token will fit on one line
17355             #-----------------------------------------------------------------------
17356              
17357             # This is part of an update to fix cases b562 .. b983
17358 923         2134 my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j};
17359 923 50       2347 return unless ( defined($K_closing) );
17360 923         2555 my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
17361             $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
17362              
17363 923         1932 my $excess = $pos + 1 + $container_length - $maximum_line_length;
17364              
17365             # Add a small tolerance for welded tokens (case b901)
17366 923 100 100     2734 if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence_j) ) {
17367 24         57 $excess += 2;
17368             }
17369              
17370 923 100       2483 if ( $excess > 0 ) {
17371              
17372             # line is too long... there is no chance of forming a one line block
17373             # if the excess is more than 1 char
17374 273 100       1081 return if ( $excess > 1 );
17375              
17376             # ... and give up if it is not a one-line block on input.
17377             # note: for a one-line block on input, it may be possible to keep
17378             # it as a one-line block (by removing a needless semicolon ).
17379 2         10 my $K_start = $K_to_go[$i_start];
17380 2         9 my $ldiff =
17381             $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
17382 2 50       13 return if ($ldiff);
17383             }
17384              
17385             #------------------------------------------------------------------
17386             # Loop to check contents and length of the potential one-line block
17387             #------------------------------------------------------------------
17388 650         2116 foreach my $Ki ( $Kj + 1 .. $K_last ) {
17389              
17390             # old whitespace could be arbitrarily large, so don't use it
17391 3306 100       7433 if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
  1261         1884  
17392 2045         3050 else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
17393              
17394             # ignore some small blocks
17395 3306         5242 my $type_sequence_i = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
17396 3306         4431 my $nobreak = $rshort_nested->{$type_sequence_i};
17397              
17398             # Return false result if we exceed the maximum line length,
17399 3306 50 100     12605 if ( $pos > $maximum_line_length ) {
    100 100        
    100 100        
    100 100        
      100        
      100        
17400 0         0 return;
17401             }
17402              
17403             # keep going for non-containers
17404             elsif ( !$type_sequence_i ) {
17405              
17406             }
17407              
17408             # return if we encounter another opening brace before finding the
17409             # closing brace.
17410             elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
17411             && $rLL->[$Ki]->[_TYPE_] eq '{'
17412             && $rblock_type_of_seqno->{$type_sequence_i}
17413             && !$nobreak )
17414             {
17415 26         82 return;
17416             }
17417              
17418             # if we find our closing brace..
17419             elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
17420             && $rLL->[$Ki]->[_TYPE_] eq '}'
17421             && $rblock_type_of_seqno->{$type_sequence_i}
17422             && !$nobreak )
17423             {
17424              
17425             # be sure any trailing comment also fits on the line
17426 334         676 my $Ki_nonblank = $Ki;
17427 334 100       973 if ( $Ki_nonblank < $K_last ) {
17428 183         347 $Ki_nonblank++;
17429 183 100 66     994 if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
17430             && $Ki_nonblank < $K_last )
17431             {
17432 111         242 $Ki_nonblank++;
17433             }
17434             }
17435              
17436             # Patch for one-line sort/map/grep/eval blocks with side comments:
17437             # We will ignore the side comment length for sort/map/grep/eval
17438             # because this can lead to statements which change every time
17439             # perltidy is run. Here is an example from Denis Moskowitz which
17440             # oscillates between these two states without this patch:
17441              
17442             ## --------
17443             ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
17444             ## @baz;
17445             ##
17446             ## grep {
17447             ## $_->foo ne 'bar'
17448             ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
17449             ## @baz;
17450             ## --------
17451              
17452             # When the first line is input it gets broken apart by the main
17453             # line break logic in sub process_line_of_CODE.
17454             # When the second line is input it gets recombined by
17455             # process_line_of_CODE and passed to the output routines. The
17456             # output routines (break_long_lines) do not break it apart
17457             # because the bond strengths are set to the highest possible value
17458             # for grep/map/eval/sort blocks, so the first version gets output.
17459             # It would be possible to fix this by changing bond strengths,
17460             # but they are high to prevent errors in older versions of perl.
17461             # See c100 for eval test.
17462 334 100 100     1940 if ( $Ki < $K_last
      100        
      100        
      100        
      66        
17463             && $rLL->[$K_last]->[_TYPE_] eq '#'
17464             && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
17465             && !$rOpts_ignore_side_comment_lengths
17466             && !$is_sort_map_grep_eval{$block_type}
17467             && $K_last - $Ki_nonblank <= 2 )
17468             {
17469             # Only include the side comment for if/else/elsif/unless if it
17470             # immediately follows (because the current '$rbrace_follower'
17471             # logic for these will give an immediate brake after these
17472             # closing braces). So for example a line like this
17473             # if (...) { ... } ; # very long comment......
17474             # will already break like this:
17475             # if (...) { ... }
17476             # ; # very long comment......
17477             # so we do not need to include the length of the comment, which
17478             # would break the block. Project 'bioperl' has coding like this.
17479             ## !~ /^(if|else|elsif|unless)$/
17480 19 50 66     118 if ( !$is_if_unless_elsif_else{$block_type}
17481             || $K_last == $Ki_nonblank )
17482             {
17483 19         53 $Ki_nonblank = $K_last;
17484 19         52 $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
17485              
17486 19 50       86 if ( $Ki_nonblank > $Ki + 1 ) {
17487              
17488             # source whitespace could be anything, assume
17489             # at least one space before the hash on output
17490 19 100       111 if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
17491 17         66 $pos += 1;
17492             }
17493 2         7 else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
17494             }
17495              
17496 19 50       91 if ( $pos >= $maximum_line_length ) {
17497 0         0 return;
17498             }
17499             }
17500             }
17501              
17502             #--------------------------
17503             # ok, it's a one-line block
17504             #--------------------------
17505 334         1049 create_one_line_block($i_start);
17506 334         1035 return;
17507             }
17508              
17509             # just keep going for other characters
17510             else {
17511             }
17512             }
17513              
17514             #--------------------------------------------------
17515             # End Loop to examine tokens in potential one-block
17516             #--------------------------------------------------
17517              
17518             # We haven't hit the closing brace, but there is still space. So the
17519             # question here is, should we keep going to look at more lines in hopes of
17520             # forming a new one-line block, or should we stop right now. The problem
17521             # with continuing is that we will not be able to honor breaks before the
17522             # opening brace if we continue.
17523              
17524             # Typically we will want to keep trying to make one-line blocks for things
17525             # like sort/map/grep/eval. But it is not always a good idea to make as
17526             # many one-line blocks as possible, so other types are not done. The user
17527             # can always use -mangle.
17528              
17529             # If we want to keep going, we will create a new one-line block.
17530             # The blocks which we can keep going are in a hash, but we never want
17531             # to continue if we are at a '-bli' block.
17532 290 100 66     1457 if ( $want_one_line_block{$block_type} && !$is_bli ) {
17533 47         145 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence_j};
17534             my $semicolon_count = $rtype_count
17535 47 100 100     253 && $rtype_count->{';'} ? $rtype_count->{';'} : 0;
17536              
17537             # Ignore a terminal semicolon in the count
17538 47 100       151 if ( $semicolon_count <= 2 ) {
17539 44         101 my $K_closing_container = $self->[_K_closing_container_];
17540 44         87 my $K_closing_j = $K_closing_container->{$type_sequence_j};
17541 44         168 my $Kp = $self->K_previous_nonblank($K_closing_j);
17542 44 100 66     368 if ( defined($Kp)
17543             && $rLL->[$Kp]->[_TYPE_] eq ';' )
17544             {
17545 23         55 $semicolon_count -= 1;
17546             }
17547             }
17548 47 100 66     227 if ( $semicolon_count <= 0 ) {
    100          
17549 26         75 create_one_line_block($i_start);
17550             }
17551             elsif ( $semicolon_count == 1 && $block_type eq 'eval' ) {
17552              
17553             # Mark short broken eval blocks for possible later use in
17554             # avoiding adding spaces before a 'package' line. This is not
17555             # essential but helps keep newer and older formatting the same.
17556 18         60 $self->[_ris_short_broken_eval_block_]->{$type_sequence_j} = 1;
17557             }
17558             else {
17559             ## ok
17560             }
17561             }
17562 290         771 return;
17563             } ## end sub starting_one_line_block
17564              
17565             sub unstore_token_to_go {
17566              
17567             # remove most recent token from output stream
17568 50     50 0 96 my $self = shift;
17569 50 100       130 if ( $max_index_to_go > 0 ) {
17570 47         85 $max_index_to_go--;
17571             }
17572             else {
17573 3         6 $max_index_to_go = UNDEFINED_INDEX;
17574             }
17575 50         89 return;
17576             } ## end sub unstore_token_to_go
17577              
17578             sub compare_indentation_levels {
17579              
17580             # Check to see if output line tabbing agrees with input line
17581             # this can be very useful for debugging a script which has an extra
17582             # or missing brace.
17583              
17584 3     3 0 9 my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
17585 3 50       8 return unless ( defined($K_first) );
17586              
17587 3         9 my $rLL = $self->[_rLL_];
17588              
17589             # ignore a line with a leading blank token - issue c195
17590 3         6 my $type = $rLL->[$K_first]->[_TYPE_];
17591 3 50       16 return if ( $type eq 'b' );
17592              
17593 3         10 my $structural_indentation_level = $self->[_radjusted_levels_]->[$K_first];
17594              
17595             # record max structural depth for log file
17596 3 50       10 if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
17597 0         0 $self->[_maximum_BLOCK_level_] = $structural_indentation_level;
17598 0         0 $self->[_maximum_BLOCK_level_at_line_] = $line_number;
17599             }
17600              
17601 3         8 my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
17602             my $is_closing_block =
17603             $type_sequence
17604 3   0     10 && $self->[_rblock_type_of_seqno_]->{$type_sequence}
17605             && $type eq '}';
17606              
17607 3 50       9 if ( $guessed_indentation_level ne $structural_indentation_level ) {
17608 0         0 $self->[_last_tabbing_disagreement_] = $line_number;
17609              
17610 0 0       0 if ($is_closing_block) {
17611              
17612 0 0       0 if ( !$self->[_in_brace_tabbing_disagreement_] ) {
17613 0         0 $self->[_in_brace_tabbing_disagreement_] = $line_number;
17614             }
17615 0 0       0 if ( !$self->[_first_brace_tabbing_disagreement_] ) {
17616 0         0 $self->[_first_brace_tabbing_disagreement_] = $line_number;
17617             }
17618             }
17619              
17620 0 0       0 if ( !$self->[_in_tabbing_disagreement_] ) {
17621 0         0 $self->[_tabbing_disagreement_count_]++;
17622              
17623 0 0       0 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
17624 0         0 write_logfile_entry(
17625             "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
17626             );
17627             }
17628 0         0 $self->[_in_tabbing_disagreement_] = $line_number;
17629 0 0       0 $self->[_first_tabbing_disagreement_] = $line_number
17630             unless ( $self->[_first_tabbing_disagreement_] );
17631             }
17632             }
17633             else {
17634              
17635 3 50       14 $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
17636              
17637 3         10 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
17638 3 50       8 if ($in_tabbing_disagreement) {
17639              
17640 0 0       0 if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
17641 0         0 write_logfile_entry(
17642             "End indentation disagreement from input line $in_tabbing_disagreement\n"
17643             );
17644              
17645 0 0       0 if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
17646             {
17647 0         0 write_logfile_entry(
17648             "No further tabbing disagreements will be noted\n");
17649             }
17650             }
17651 0         0 $self->[_in_tabbing_disagreement_] = 0;
17652              
17653             }
17654             }
17655 3         8 return;
17656             } ## end sub compare_indentation_levels
17657              
17658             ###################################################
17659             # CODE SECTION 8: Utilities for setting breakpoints
17660             ###################################################
17661              
17662             { ## begin closure set_forced_breakpoint
17663              
17664             my @forced_breakpoint_undo_stack;
17665              
17666             # These are global vars for efficiency:
17667             # my $forced_breakpoint_count;
17668             # my $forced_breakpoint_undo_count;
17669             # my $index_max_forced_break;
17670              
17671             # Break before or after certain tokens based on user settings
17672             my %break_before_or_after_token;
17673              
17674             BEGIN {
17675              
17676             # Updated to use all operators. This fixes case b1054
17677             # Here is the previous simplified version:
17678             ## my @q = qw( . : ? and or xor && || );
17679 39     39   751 my @q = @all_operators;
17680              
17681 39         204 push @q, ',';
17682 39         3769 @break_before_or_after_token{@q} = (1) x scalar(@q);
17683             } ## end BEGIN
17684              
17685             sub set_fake_breakpoint {
17686              
17687             # Just bump up the breakpoint count as a signal that there are breaks.
17688             # This is useful if we have breaks but may want to postpone deciding
17689             # where to make them.
17690 213     213 0 450 $forced_breakpoint_count++;
17691 213         416 return;
17692             } ## end sub set_fake_breakpoint
17693              
17694 39     39   401 use constant DEBUG_FORCE => 0;
  39         125  
  39         28993  
17695              
17696             sub set_forced_breakpoint {
17697 3946     3946 0 7889 my ( $self, $i ) = @_;
17698              
17699             # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
17700              
17701             # Exceptions:
17702             # - If the token at index $i is a blank, backup to $i-1 to
17703             # get to the previous nonblank token.
17704             # - For certain tokens, the break may be placed BEFORE the token
17705             # at index $i, depending on user break preference settings.
17706             # - If a break is made after an opening token, then a break will
17707             # also be made before the corresponding closing token.
17708              
17709             # Returns '$i_nonblank':
17710             # = index of the token after which the breakpoint was actually placed
17711             # = undef if breakpoint was not set.
17712 3946         6030 my $i_nonblank;
17713              
17714 3946 50 33     14048 if ( !defined($i) || $i < 0 ) {
17715              
17716             # Calls with bad index $i are harmless but waste time and should
17717             # be caught and eliminated during code development.
17718 0         0 if (DEVEL_MODE) {
17719             my ( $a, $b, $c ) = caller();
17720             Fault(
17721             "Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
17722             );
17723             }
17724 0         0 return;
17725             }
17726              
17727             # Break after token $i
17728 3946         8986 $i_nonblank = $self->set_forced_breakpoint_AFTER($i);
17729              
17730             # If we break at an opening container..break at the closing
17731 3946         6143 my $set_closing;
17732 3946 100 100     13944 if ( defined($i_nonblank)
17733             && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
17734             {
17735 1584         2826 $set_closing = 1;
17736 1584         4364 $self->set_closing_breakpoint($i_nonblank);
17737             }
17738              
17739 3946         5474 DEBUG_FORCE && do {
17740             my ( $a, $b, $c ) = caller();
17741             my $msg =
17742             "FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go";
17743             if ( !defined($i_nonblank) ) {
17744             $i = EMPTY_STRING unless defined($i);
17745             $msg .= " but could not set break after i='$i'\n";
17746             }
17747             else {
17748             my $nobr = $nobreak_to_go[$i_nonblank];
17749             $nobr = 0 if ( !defined($nobr) );
17750             $msg .= <<EOM;
17751             set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobr
17752             EOM
17753             if ( defined($set_closing) ) {
17754             $msg .=
17755             " Also set closing breakpoint corresponding to this token\n";
17756             }
17757             }
17758             print {*STDOUT} $msg;
17759             };
17760              
17761 3946         7173 return $i_nonblank;
17762             } ## end sub set_forced_breakpoint
17763              
17764             sub set_forced_breakpoint_AFTER {
17765 4500     4500 0 8002 my ( $self, $i ) = @_;
17766              
17767             # This routine is only called by sub set_forced_breakpoint and
17768             # sub set_closing_breakpoint.
17769              
17770             # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
17771              
17772             # Exceptions:
17773             # - If the token at index $i is a blank, backup to $i-1 to
17774             # get to the previous nonblank token.
17775             # - For certain tokens, the break may be placed BEFORE the token
17776             # at index $i, depending on user break preference settings.
17777              
17778             # Returns:
17779             # - the index of the token after which the break was set, or
17780             # - undef if no break was set
17781              
17782 4500 50       9055 return if ( !defined($i) );
17783 4500 50       8842 return if ( $i < 0 );
17784              
17785             # Back up at a blank so we have a token to examine.
17786             # This was added to fix for cases like b932 involving an '=' break.
17787 4500 100 100     15559 if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
  752         1391  
17788              
17789             # Never break between welded tokens
17790             return
17791             if ( $total_weld_count
17792 4500 100 100     10511 && $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
17793              
17794 4457         7713 my $token = $tokens_to_go[$i];
17795 4457         6663 my $type = $types_to_go[$i];
17796              
17797             # For certain tokens, use user settings to decide if we break before or
17798             # after it
17799 4457 100 66     19836 if ( $break_before_or_after_token{$token}
    100 66        
      66        
17800             && ( $type eq $token || $type eq 'k' ) )
17801             {
17802 1925 100 66     5656 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
  238         450  
17803             }
17804              
17805             # breaks are forced before 'if' and 'unless'
17806 12         41 elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
17807             else {
17808             ## ok
17809             }
17810              
17811 4457 100 66     14640 if ( $i >= 0 && $i <= $max_index_to_go ) {
17812 4451 100       9548 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
17813              
17814 4451 100 66     20788 if ( $i_nonblank >= 0
      100        
17815             && !$nobreak_to_go[$i_nonblank]
17816             && !$forced_breakpoint_to_go[$i_nonblank] )
17817             {
17818 3523         6254 $forced_breakpoint_to_go[$i_nonblank] = 1;
17819              
17820 3523 100       7179 if ( $i_nonblank > $index_max_forced_break ) {
17821 2393         3631 $index_max_forced_break = $i_nonblank;
17822             }
17823 3523         4789 $forced_breakpoint_count++;
17824 3523         6852 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
17825             = $i_nonblank;
17826              
17827             # success
17828 3523         7936 return $i_nonblank;
17829             }
17830             }
17831 934         2095 return;
17832             } ## end sub set_forced_breakpoint_AFTER
17833              
17834             sub clear_breakpoint_undo_stack {
17835 969     969 0 2057 my ($self) = @_;
17836 969         1607 $forced_breakpoint_undo_count = 0;
17837 969         1537 return;
17838             }
17839              
17840 39     39   400 use constant DEBUG_UNDOBP => 0;
  39         104  
  39         24563  
17841              
17842             sub undo_forced_breakpoint_stack {
17843              
17844 451     451 0 1130 my ( $self, $i_start ) = @_;
17845              
17846             # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
17847             # remove all breakpoints from the top of the 'undo stack' down to and
17848             # including index $i_start.
17849              
17850             # The 'undo stack' is a stack of all breakpoints made for a batch of
17851             # code.
17852              
17853 451 50       1232 if ( $i_start < 0 ) {
17854 0         0 $i_start = 0;
17855 0         0 my ( $a, $b, $c ) = caller();
17856              
17857             # Bad call, can only be due to a recent programming change.
17858 0         0 Fault(
17859             "Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
17860             ) if (DEVEL_MODE);
17861 0         0 return;
17862             }
17863              
17864 451         1308 while ( $forced_breakpoint_undo_count > $i_start ) {
17865 750         1384 my $i =
17866             $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
17867 750 50 33     2764 if ( $i >= 0 && $i <= $max_index_to_go ) {
17868 750         1314 $forced_breakpoint_to_go[$i] = 0;
17869 750         1149 $forced_breakpoint_count--;
17870              
17871 750         1731 DEBUG_UNDOBP && do {
17872             my ( $a, $b, $c ) = caller();
17873             print {*STDOUT}
17874             "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
17875             };
17876             }
17877              
17878             # shouldn't happen, but not a critical error
17879             else {
17880 0         0 if (DEVEL_MODE) {
17881             my ( $a, $b, $c ) = caller();
17882             Fault(<<EOM);
17883             Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go
17884             EOM
17885             }
17886             }
17887             }
17888 451         856 return;
17889             } ## end sub undo_forced_breakpoint_stack
17890             } ## end closure set_forced_breakpoint
17891              
17892             { ## begin closure set_closing_breakpoint
17893              
17894             my %postponed_breakpoint;
17895              
17896             sub initialize_postponed_breakpoint {
17897 561     561 0 1820 %postponed_breakpoint = ();
17898 561         1085 return;
17899             }
17900              
17901             sub has_postponed_breakpoint {
17902 2988     2988 0 5566 my ($seqno) = @_;
17903 2988         7763 return $postponed_breakpoint{$seqno};
17904             }
17905              
17906             sub set_closing_breakpoint {
17907              
17908             # set a breakpoint at a matching closing token
17909 2254     2254 0 4816 my ( $self, $i_break ) = @_;
17910              
17911 2254 100       5056 if ( defined( $mate_index_to_go[$i_break] ) ) {
17912              
17913             # Don't reduce the '2' in the statement below.
17914             # Test files: attrib.t, BasicLyx.pm.html
17915 561 100       1773 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
17916              
17917             # break before } ] and ), but sub set_forced_breakpoint will decide
17918             # to break before or after a ? and :
17919 554 100       1397 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
17920 554         1421 $self->set_forced_breakpoint_AFTER(
17921             $mate_index_to_go[$i_break] - $inc );
17922             }
17923             }
17924             else {
17925 1693         3115 my $type_sequence = $type_sequence_to_go[$i_break];
17926 1693 50       3608 if ($type_sequence) {
17927 1693         3715 $postponed_breakpoint{$type_sequence} = 1;
17928             }
17929             }
17930 2254         4079 return;
17931             } ## end sub set_closing_breakpoint
17932             } ## end closure set_closing_breakpoint
17933              
17934             #########################################
17935             # CODE SECTION 9: Process batches of code
17936             #########################################
17937              
17938             { ## begin closure grind_batch_of_CODE
17939              
17940             # The routines in this closure begin the processing of a 'batch' of code.
17941              
17942             # A variable to keep track of consecutive nonblank lines so that we can
17943             # insert occasional blanks
17944             my @nonblank_lines_at_depth;
17945              
17946             # A variable to remember maximum size of previous batches; this is needed
17947             # by the logical padding routine
17948             my $peak_batch_size;
17949             my $batch_count;
17950              
17951             # variables to keep track of indentation of unmatched containers.
17952             my %saved_opening_indentation;
17953              
17954             sub initialize_grind_batch_of_CODE {
17955 561     561 0 1656 @nonblank_lines_at_depth = ();
17956 561         1218 $peak_batch_size = 0;
17957 561         1086 $batch_count = 0;
17958 561         2226 %saved_opening_indentation = ();
17959 561         1114 return;
17960             } ## end sub initialize_grind_batch_of_CODE
17961              
17962             # sub grind_batch_of_CODE receives sections of code which are the longest
17963             # possible lines without a break. In other words, it receives what is left
17964             # after applying all breaks forced by blank lines, block comments, side
17965             # comments, pod text, and structural braces. Its job is to break this code
17966             # down into smaller pieces, if necessary, which fit within the maximum
17967             # allowed line length. Then it sends the resulting lines of code on down
17968             # the pipeline to the VerticalAligner package, breaking the code into
17969             # continuation lines as necessary. The batch of tokens are in the "to_go"
17970             # arrays. The name 'grind' is slightly suggestive of a machine continually
17971             # breaking down long lines of code, but mainly it is unique and easy to
17972             # remember and find with an editor search.
17973              
17974             # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
17975             # together in the following way:
17976              
17977             # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
17978             # combines them into the largest sequences of tokens which might form a new
17979             # line.
17980             # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
17981             # lines.
17982              
17983             # So sub 'process_line_of_CODE' builds up the longest possible continuous
17984             # sequences of tokens, regardless of line length, and then
17985             # grind_batch_of_CODE breaks these sequences back down into the new output
17986             # lines.
17987              
17988             # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
17989              
17990 39     39   404 use constant DEBUG_GRIND => 0;
  39         148  
  39         14876  
17991              
17992             sub check_grind_input {
17993              
17994             # Check for valid input to sub grind_batch_of_CODE. An error here
17995             # would most likely be due to an error in 'sub store_token_to_go'.
17996 0     0 0 0 my ($self) = @_;
17997              
17998             # Be sure there are tokens in the batch
17999 0 0       0 if ( $max_index_to_go < 0 ) {
18000 0         0 Fault(<<EOM);
18001             sub grind incorrectly called with max_index_to_go=$max_index_to_go
18002             EOM
18003             }
18004 0         0 my $Klimit = $self->[_Klimit_];
18005              
18006             # The local batch tokens must be a continuous part of the global token
18007             # array.
18008 0         0 my $KK;
18009 0         0 foreach my $ii ( 0 .. $max_index_to_go ) {
18010              
18011 0         0 my $Km = $KK;
18012              
18013 0         0 $KK = $K_to_go[$ii];
18014 0 0 0     0 if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
      0        
18015 0 0       0 $KK = '(undef)' unless defined($KK);
18016 0         0 Fault(<<EOM);
18017             at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
18018             EOM
18019             }
18020              
18021 0 0 0     0 if ( $ii > 0 && $KK != $Km + 1 ) {
18022 0         0 my $im = $ii - 1;
18023 0         0 Fault(<<EOM);
18024             Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK; expecting K = Km+1
18025             EOM
18026             }
18027             }
18028 0         0 return;
18029             } ## end sub check_grind_input
18030              
18031             # This filter speeds up a critical if-test
18032             my %quick_filter;
18033              
18034             BEGIN {
18035 39     39   288 my @q = qw# L { ( [ R ] ) } ? : f => #;
18036 39         139 push @q, ',';
18037 39         198987 @quick_filter{@q} = (1) x scalar(@q);
18038             }
18039              
18040             sub grind_batch_of_CODE {
18041              
18042 4561     4561 0 8012 my ($self) = @_;
18043              
18044             #-----------------------------------------------------------------
18045             # This sub directs the formatting of one complete batch of tokens.
18046             # The tokens of the batch are in the '_to_go' arrays.
18047             #-----------------------------------------------------------------
18048              
18049 4561         7929 my $this_batch = $self->[_this_batch_];
18050 4561         8686 $this_batch->[_peak_batch_size_] = $peak_batch_size;
18051 4561         7861 $this_batch->[_batch_count_] = ++$batch_count;
18052              
18053 4561         6674 $self->check_grind_input() if (DEVEL_MODE);
18054              
18055             # This routine is only called from sub flush_batch_of_code, so that
18056             # routine is a better spot for debugging.
18057 4561         6180 DEBUG_GRIND && do {
18058             my $token = my $type = EMPTY_STRING;
18059             if ( $max_index_to_go >= 0 ) {
18060             $token = $tokens_to_go[$max_index_to_go];
18061             $type = $types_to_go[$max_index_to_go];
18062             }
18063             my $output_str = EMPTY_STRING;
18064             if ( $max_index_to_go > 20 ) {
18065             my $mm = $max_index_to_go - 10;
18066             $output_str =
18067             join( EMPTY_STRING, @tokens_to_go[ 0 .. 10 ] ) . " ... "
18068             . join( EMPTY_STRING,
18069             @tokens_to_go[ $mm .. $max_index_to_go ] );
18070             }
18071             else {
18072             $output_str = join EMPTY_STRING,
18073             @tokens_to_go[ 0 .. $max_index_to_go ];
18074             }
18075             print {*STDOUT} <<EOM;
18076             grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
18077             $output_str
18078             EOM
18079             };
18080              
18081             # Remove any trailing blank, which is possible (c192 has example)
18082 4561 100 66     17856 if ( $max_index_to_go >= 0 && $types_to_go[$max_index_to_go] eq 'b' ) {
18083 223         501 $max_index_to_go -= 1;
18084             }
18085              
18086 4561 50       9593 return if ( $max_index_to_go < 0 );
18087              
18088 4561         6854 my $lp_object_count_this_batch;
18089 4561 100       9313 if ($rOpts_line_up_parentheses) {
18090 302         891 $this_batch->[_lp_object_count_this_batch_] =
18091             $lp_object_count_this_batch = $self->set_lp_indentation();
18092             }
18093              
18094             #-----------------------------------------------------------
18095             # Shortcut for block comments. But not for block comments
18096             # with lp because they must use the lp corrector step below.
18097             #-----------------------------------------------------------
18098 4561 100 100     15779 if ( !$max_index_to_go
      100        
18099             && $types_to_go[0] eq '#'
18100             && !$lp_object_count_this_batch )
18101             {
18102 629         1321 my $ibeg = 0;
18103 629         1703 $this_batch->[_ri_first_] = [$ibeg];
18104 629         1525 $this_batch->[_ri_last_] = [$ibeg];
18105              
18106 629         2798 $self->convey_batch_to_vertical_aligner();
18107              
18108 629         1575 my $level = $levels_to_go[$ibeg];
18109 629         1529 $self->[_last_line_leading_type_] = $types_to_go[$ibeg];
18110 629         1351 $self->[_last_line_leading_level_] = $level;
18111 629         1389 $nonblank_lines_at_depth[$level] = 1;
18112 629         1333 return;
18113             }
18114              
18115             #-------------
18116             # Normal route
18117             #-------------
18118              
18119 3932         7224 my $rLL = $self->[_rLL_];
18120              
18121             #-------------------------------------------------------
18122             # Loop over the batch to initialize some batch variables
18123             #-------------------------------------------------------
18124 3932         6288 my $comma_count_in_batch = 0;
18125 3932         9469 my @colon_list;
18126             my @ix_seqno_controlling_ci;
18127 3932         0 my %comma_arrow_count;
18128 3932         6005 my $comma_arrow_count_contained = 0;
18129 3932         9258 my @unmatched_closing_indexes_in_this_batch;
18130             my @unmatched_opening_indexes_in_this_batch;
18131              
18132 3932         0 my @i_for_semicolon;
18133 3932         9002 foreach my $i ( 0 .. $max_index_to_go ) {
18134              
18135 53822 100       95097 if ( $types_to_go[$i] eq 'b' ) {
18136 18727         31013 $inext_to_go[$i] = $inext_to_go[ $i - 1 ] = $i + 1;
18137 18727         26599 next;
18138             }
18139              
18140 35095         49755 $inext_to_go[$i] = $i + 1;
18141              
18142             # This is an optional shortcut to save a bit of time by skipping
18143             # most tokens. Note: the filter may need to be updated if the
18144             # next 'if' tests are ever changed to include more token types.
18145 35095 100       70995 next if ( !$quick_filter{ $types_to_go[$i] } );
18146              
18147 13062         19683 my $type = $types_to_go[$i];
18148              
18149             # gather info needed by sub break_long_lines
18150 13062 100       27067 if ( $type_sequence_to_go[$i] ) {
    100          
    100          
    50          
18151 9096         13617 my $seqno = $type_sequence_to_go[$i];
18152 9096         13129 my $token = $tokens_to_go[$i];
18153              
18154             # remember indexes of any tokens controlling xci
18155             # in this batch. This list is needed by sub undo_ci.
18156 9096 100       17864 if ( $self->[_ris_seqno_controlling_ci_]->{$seqno} ) {
18157 120         203 push @ix_seqno_controlling_ci, $i;
18158             }
18159              
18160 9096 100       16769 if ( $is_opening_sequence_token{$token} ) {
18161 4548 100       9482 if ( $self->[_rbreak_container_]->{$seqno} ) {
18162 22         105 $self->set_forced_breakpoint($i);
18163             }
18164 4548         8092 push @unmatched_opening_indexes_in_this_batch, $i;
18165 4548 100       10679 if ( $type eq '?' ) {
18166 186         659 push @colon_list, $type;
18167             }
18168             }
18169             else { ## $is_closing_sequence_token{$token}
18170              
18171 4548 100 100     16759 if ( $i > 0 && $self->[_rbreak_container_]->{$seqno} ) {
18172 3         10 $self->set_forced_breakpoint( $i - 1 );
18173             }
18174              
18175 4548         7783 my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
18176 4548 100 66     14083 if ( defined($i_mate) && $i_mate >= 0 ) {
18177 3730 50       7594 if ( $type_sequence_to_go[$i_mate] ==
18178             $type_sequence_to_go[$i] )
18179             {
18180 3730         6403 $mate_index_to_go[$i] = $i_mate;
18181 3730         5914 $mate_index_to_go[$i_mate] = $i;
18182 3730         5654 my $cac = $comma_arrow_count{$seqno};
18183 3730 100       7665 $comma_arrow_count_contained += $cac if ($cac);
18184             }
18185             else {
18186 0         0 push @unmatched_opening_indexes_in_this_batch,
18187             $i_mate;
18188 0         0 push @unmatched_closing_indexes_in_this_batch, $i;
18189             }
18190             }
18191             else {
18192 818         1939 push @unmatched_closing_indexes_in_this_batch, $i;
18193             }
18194 4548 100       10865 if ( $type eq ':' ) {
18195 186         651 push @colon_list, $type;
18196             }
18197             }
18198              
18199             } ## end if ($seqno)
18200              
18201 2916         4707 elsif ( $type eq ',' ) { $comma_count_in_batch++; }
18202             elsif ( $type eq '=>' ) {
18203 1016 100       2406 if (@unmatched_opening_indexes_in_this_batch) {
18204 948         1479 my $j = $unmatched_opening_indexes_in_this_batch[-1];
18205 948         1594 my $seqno = $type_sequence_to_go[$j];
18206 948         2233 $comma_arrow_count{$seqno}++;
18207             }
18208             }
18209             elsif ( $type eq 'f' ) {
18210 34         80 push @i_for_semicolon, $i;
18211             }
18212             else {
18213             ## not a special type
18214             }
18215              
18216             } ## end for ( my $i = 0 ; $i <=...)
18217              
18218             # Break at a single interior C-style for semicolon in this batch (c154)
18219 3932 100 100     12064 if ( @i_for_semicolon && @i_for_semicolon == 1 ) {
18220 2         3 my $i = $i_for_semicolon[0];
18221 2         5 my $inext = $inext_to_go[$i];
18222 2 50 33     8 if ( $inext <= $max_index_to_go && $types_to_go[$inext] ne '#' ) {
18223 2         7 $self->set_forced_breakpoint($i);
18224             }
18225             }
18226              
18227 3932         7703 my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
18228             @unmatched_closing_indexes_in_this_batch;
18229              
18230 3932 100       8884 if (@unmatched_opening_indexes_in_this_batch) {
18231 714         2211 $this_batch->[_runmatched_opening_indexes_] =
18232             \@unmatched_opening_indexes_in_this_batch;
18233             }
18234              
18235 3932 100       8227 if (@ix_seqno_controlling_ci) {
18236 40         107 $this_batch->[_rix_seqno_controlling_ci_] =
18237             \@ix_seqno_controlling_ci;
18238             }
18239              
18240             #------------------------
18241             # Set special breakpoints
18242             #------------------------
18243             # If this line ends in a code block brace, set breaks at any
18244             # previous closing code block braces to breakup a chain of code
18245             # blocks on one line. This is very rare but can happen for
18246             # user-defined subs. For example we might be looking at this:
18247             # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
18248 3932         6106 my $saw_good_break; # flag to force breaks even if short line
18249 3932 100 100     13756 if (
      100        
18250              
18251             # looking for opening or closing block brace
18252             $block_type_to_go[$max_index_to_go]
18253              
18254             # never any good breaks if just one token
18255             && $max_index_to_go > 0
18256              
18257             # but not one of these which are never duplicated on a line:
18258             # until|while|for|if|elsif|else
18259             && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
18260             }
18261             )
18262             {
18263 356         821 my $lev = $nesting_depth_to_go[$max_index_to_go];
18264              
18265             # Walk backwards from the end and
18266             # set break at any closing block braces at the same level.
18267             # But quit if we are not in a chain of blocks.
18268 356         1444 foreach my $i ( reverse( 0 .. $max_index_to_go - 1 ) ) {
18269 754 100       1846 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
18270 732 50       1584 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
18271              
18272 732 50       3461 if ( $block_type_to_go[$i] ) {
    100          
18273 0 0       0 if ( $tokens_to_go[$i] eq '}' ) {
18274 0         0 $self->set_forced_breakpoint($i);
18275 0         0 $saw_good_break = 1;
18276             }
18277             }
18278              
18279             # quit if we see anything besides words, function, blanks
18280             # at this level
18281 324         761 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
18282             else {
18283             ## keep going
18284             }
18285             }
18286             }
18287              
18288             #-----------------------------------------------
18289             # insertion of any blank lines before this batch
18290             #-----------------------------------------------
18291              
18292 3932         6624 my $imin = 0;
18293 3932         6320 my $imax = $max_index_to_go;
18294              
18295             # trim any blank tokens - for safety, but should not be necessary
18296 3932 50       8810 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
  0         0  
18297 3932 50       8454 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
  0         0  
18298              
18299 3932 50       8441 if ( $imin > $imax ) {
18300 0         0 if (DEVEL_MODE) {
18301             my $K0 = $K_to_go[0];
18302             my $lno = EMPTY_STRING;
18303             if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
18304             Fault(<<EOM);
18305             Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
18306             EOM
18307             }
18308 0         0 return;
18309             }
18310              
18311 3932         7404 my $last_line_leading_type = $self->[_last_line_leading_type_];
18312 3932         6812 my $last_line_leading_level = $self->[_last_line_leading_level_];
18313              
18314 3932         6439 my $leading_type = $types_to_go[0];
18315 3932         6429 my $leading_level = $levels_to_go[0];
18316              
18317             # add blank line(s) before certain key types but not after a comment
18318 3932 100       8924 if ( $last_line_leading_type ne '#' ) {
18319 3078         5199 my $blank_count = 0;
18320 3078         5339 my $leading_token = $tokens_to_go[0];
18321              
18322             # break before certain key blocks except one-liners
18323 3078 100 100     14240 if ( $leading_type eq 'k' ) {
    100          
    100          
18324 1142 100 100     8585 if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
    100 66        
18325 7 100       26 $blank_count = $rOpts->{'blank-lines-before-subs'}
18326             if ( terminal_type_i( 0, $max_index_to_go ) ne '}' );
18327             }
18328              
18329             # Break before certain block types if we haven't had a
18330             # break at this level for a while. This is the
18331             # difficult decision..
18332             elsif ($last_line_leading_type ne 'b'
18333             && $is_if_unless_while_until_for_foreach{$leading_token} )
18334             {
18335 102         286 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
18336 102 50       366 if ( !defined($lc) ) { $lc = 0 }
  0         0  
18337              
18338             # patch for RT #128216: no blank line inserted at a level
18339             # change
18340 102 100       444 if ( $levels_to_go[0] != $last_line_leading_level ) {
18341 32         83 $lc = 0;
18342             }
18343              
18344 102 50 100     871 if ( $rOpts->{'blanks-before-blocks'}
      66        
      66        
18345             && $lc >= $rOpts->{'long-block-line-count'}
18346             && $self->consecutive_nonblank_lines() >=
18347             $rOpts->{'long-block-line-count'}
18348             && terminal_type_i( 0, $max_index_to_go ) ne '}' )
18349             {
18350 1         3 $blank_count = 1;
18351             }
18352             }
18353             else {
18354             ## no blank
18355             }
18356             }
18357              
18358             # blank lines before subs except declarations and one-liners
18359             # Fix for c250: added new type 'P', changed 'i' to 'S'
18360             elsif ( $leading_type eq 'S' || $leading_type eq 'P' ) {
18361             my $special_identifier =
18362 73         289 $self->[_ris_special_identifier_token_]->{$leading_token};
18363 73 50       242 if ($special_identifier) {
18364             ## $leading_token =~ /$SUB_PATTERN/
18365 73 100       236 if ( $special_identifier eq 'sub' ) {
    50          
18366              
18367 55 100       262 $blank_count = $rOpts->{'blank-lines-before-subs'}
18368             if ( terminal_type_i( 0, $max_index_to_go ) !~
18369             /^[\;\}\,]$/ );
18370             }
18371              
18372             # break before all package declarations
18373             ## substr( $leading_token, 0, 8 ) eq 'package '
18374             elsif ( $special_identifier eq 'package' ) {
18375              
18376             # ... except in a very short eval block
18377 18         31 my $pseqno = $parent_seqno_to_go[0];
18378             $blank_count = $rOpts->{'blank-lines-before-packages'}
18379             if (
18380 18 50       67 !$self->[_ris_short_broken_eval_block_]->{$pseqno}
18381             );
18382             }
18383             else {
18384 0         0 DEVEL_MODE && Fault(<<EOM);
18385             Found special identifier '$special_identifier', but expecting 'sub' or 'package'
18386             EOM
18387             }
18388             }
18389             }
18390              
18391             # Check for blank lines wanted before a closing brace
18392             elsif ( $leading_token eq '}' ) {
18393 604 50 66     2355 if ( $rOpts->{'blank-lines-before-closing-block'}
      33        
18394             && $block_type_to_go[0]
18395             && $block_type_to_go[0] =~
18396             /$blank_lines_before_closing_block_pattern/ )
18397             {
18398 2         7 my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
18399 2 50       6 if ( $nblanks > $blank_count ) {
18400 2         5 $blank_count = $nblanks;
18401             }
18402             }
18403             }
18404             else {
18405             ## ok
18406             }
18407              
18408 3078 100       7374 if ($blank_count) {
18409              
18410             # future: send blank line down normal path to VerticalAligner?
18411 43         168 $self->flush_vertical_aligner();
18412 43         124 my $file_writer_object = $self->[_file_writer_object_];
18413 43         229 $file_writer_object->require_blank_code_lines($blank_count);
18414             }
18415             }
18416              
18417             # update blank line variables and count number of consecutive
18418             # non-blank, non-comment lines at this level
18419 3932 100 100     18327 if ( $leading_level == $last_line_leading_level
      100        
18420             && $leading_type ne '#'
18421             && defined( $nonblank_lines_at_depth[$leading_level] ) )
18422             {
18423 2297         4010 $nonblank_lines_at_depth[$leading_level]++;
18424             }
18425             else {
18426 1635         4843 $nonblank_lines_at_depth[$leading_level] = 1;
18427             }
18428              
18429 3932         7133 $self->[_last_line_leading_type_] = $leading_type;
18430 3932         6549 $self->[_last_line_leading_level_] = $leading_level;
18431              
18432             #--------------------------
18433             # scan lists and long lines
18434             #--------------------------
18435              
18436             # Flag to remember if we called sub 'pad_array_to_go'.
18437             # Some routines (break_lists(), break_long_lines() ) need some
18438             # extra tokens added at the end of the batch. Most batches do not
18439             # use these routines, so we will avoid calling 'pad_array_to_go'
18440             # unless it is needed.
18441 3932         9526 my $called_pad_array_to_go;
18442              
18443             # set all forced breakpoints for good list formatting
18444             my $is_long_line;
18445 3932         0 my $multiple_old_lines_in_batch;
18446 3932 100       8714 if ( $max_index_to_go > 0 ) {
    100          
18447 3273         10330 $is_long_line =
18448             $self->excess_line_length( $imin, $max_index_to_go ) > 0;
18449              
18450 3273         6057 my $Kbeg = $K_to_go[0];
18451 3273         5330 my $Kend = $K_to_go[$max_index_to_go];
18452 3273         7574 $multiple_old_lines_in_batch =
18453             $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
18454             }
18455              
18456             # Optional optimization: avoid calling break_lists for a single block
18457             # brace. This is done by turning off the flag $is_unbalanced_batch.
18458             elsif ($is_unbalanced_batch) {
18459 496         1082 my $block_type = $block_type_to_go[0];
18460 496 100 100     3220 if ( $block_type
      100        
18461             && !$lp_object_count_this_batch
18462             && $is_block_without_semicolon{$block_type} )
18463             {
18464             # opening blocks can skip break_lists call if no commas in
18465             # container.
18466 192 100       610 if ( $leading_type eq '{' ) {
18467 14         46 my $seqno = $type_sequence_to_go[0];
18468 14         1140 my $rtype_count = $self->[_rtype_count_by_seqno_]->{$seqno};
18469 14 50       51 if ($rtype_count) {
18470 14         51 my $comma_count = $rtype_count->{','};
18471 14 50       112 if ( !$comma_count ) {
18472 14         58 $is_unbalanced_batch = 0;
18473             }
18474             }
18475             }
18476              
18477             # closing block braces can be skipped
18478             else {
18479 178         391 $is_unbalanced_batch = 0;
18480             }
18481              
18482             }
18483             }
18484             else {
18485             ## ok - single token
18486             }
18487              
18488 3932         7580 my $rbond_strength_bias = [];
18489 3932 100 100     29457 if (
      100        
      33        
      66        
      66        
      66        
      66        
18490             $is_long_line
18491             || $multiple_old_lines_in_batch
18492              
18493             # must always call break_lists() with unbalanced batches because
18494             # it is maintaining some stacks
18495             || $is_unbalanced_batch
18496              
18497             # call break_lists if we might want to break at commas
18498             || (
18499             $comma_count_in_batch
18500             && ( $rOpts_maximum_fields_per_table > 0
18501             && $rOpts_maximum_fields_per_table <= $comma_count_in_batch
18502             || $rOpts_comma_arrow_breakpoints == 0 )
18503             )
18504              
18505             # call break_lists if user may want to break open some one-line
18506             # hash references
18507             || ( $comma_arrow_count_contained
18508             && $rOpts_comma_arrow_breakpoints != 3 )
18509             )
18510             {
18511             # add a couple of extra terminal blank tokens
18512 1745         6637 $self->pad_array_to_go();
18513 1745         2760 $called_pad_array_to_go = 1;
18514              
18515 1745         6124 my $sgb = $self->break_lists( $is_long_line, $rbond_strength_bias );
18516 1745   66     5996 $saw_good_break ||= $sgb;
18517             }
18518              
18519             # let $ri_first and $ri_last be references to lists of
18520             # first and last tokens of line fragments to output..
18521 3932         7301 my ( $ri_first, $ri_last );
18522              
18523             #-----------------------------
18524             # a single token uses one line
18525             #-----------------------------
18526 3932 100       8153 if ( !$max_index_to_go ) {
18527 659         1490 $ri_first = [$imin];
18528 659         1549 $ri_last = [$imax];
18529             }
18530              
18531             # for multiple tokens
18532             else {
18533              
18534             #-------------------------
18535             # write a single line if..
18536             #-------------------------
18537 3273 100 100     18149 if (
18538             (
18539              
18540             # this line is 'short'
18541             !$is_long_line
18542              
18543             # and we didn't see a good breakpoint
18544             && !$saw_good_break
18545              
18546             # and we don't already have an interior breakpoint
18547             && !$forced_breakpoint_count
18548             )
18549              
18550             # or, we aren't allowed to add any newlines
18551             || !$rOpts_add_newlines
18552              
18553             )
18554             {
18555 2160         4640 $ri_first = [$imin];
18556 2160         4602 $ri_last = [$imax];
18557             }
18558              
18559             #-----------------------------
18560             # otherwise use multiple lines
18561             #-----------------------------
18562             else {
18563              
18564             # add a couple of extra terminal blank tokens if we haven't
18565             # already done so
18566 1113 50       2842 $self->pad_array_to_go() unless ($called_pad_array_to_go);
18567              
18568 1113         5014 ( $ri_first, $ri_last, my $rbond_strength_to_go ) =
18569             $self->break_long_lines( $saw_good_break, \@colon_list,
18570             $rbond_strength_bias );
18571              
18572 1113         5091 $self->break_all_chain_tokens( $ri_first, $ri_last );
18573              
18574             $self->break_equals( $ri_first, $ri_last )
18575 1113 100       1890 if @{$ri_first} >= 3;
  1113         5943  
18576              
18577             # now we do a correction step to clean this up a bit
18578             # (The only time we would not do this is for debugging)
18579             $self->recombine_breakpoints( $ri_first, $ri_last,
18580             $rbond_strength_to_go )
18581 1113 100 100     3446 if ( $rOpts_recombine && @{$ri_first} > 1 );
  1084         5695  
18582              
18583 1113 100       5387 $self->insert_final_ternary_breaks( $ri_first, $ri_last )
18584             if (@colon_list);
18585             }
18586              
18587 3273 100 66     9946 $self->insert_breaks_before_list_opening_containers( $ri_first,
18588             $ri_last )
18589             if ( %break_before_container_types && $max_index_to_go > 0 );
18590              
18591             # Check for a phantom semicolon at the end of the batch
18592 3273 100 66     9458 if ( !$token_lengths_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
18593 19         116 $self->unmask_phantom_token($imax);
18594             }
18595              
18596 3273 100       7549 if ( $rOpts_one_line_block_semicolons == 0 ) {
18597 6         22 $self->delete_one_line_semicolons( $ri_first, $ri_last );
18598             }
18599              
18600             # Remember the largest batch size processed. This is needed by the
18601             # logical padding routine to avoid padding the first nonblank token
18602 3273 100       7560 if ( $max_index_to_go > $peak_batch_size ) {
18603 959         1961 $peak_batch_size = $max_index_to_go;
18604             }
18605             }
18606              
18607             #-------------------
18608             # -lp corrector step
18609             #-------------------
18610 3932 100       8227 if ($lp_object_count_this_batch) {
18611 134         506 $self->correct_lp_indentation( $ri_first, $ri_last );
18612             }
18613              
18614             #--------------------
18615             # ship this batch out
18616             #--------------------
18617 3932         7222 $this_batch->[_ri_first_] = $ri_first;
18618 3932         6304 $this_batch->[_ri_last_] = $ri_last;
18619              
18620 3932         13150 $self->convey_batch_to_vertical_aligner();
18621              
18622             #-------------------------------------------------------------------
18623             # Write requested number of blank lines after an opening block brace
18624             #-------------------------------------------------------------------
18625 3932 100       10360 if ($rOpts_blank_lines_after_opening_block) {
18626 6         11 my $iterm = $imax;
18627 6 50 33     21 if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
18628 0         0 $iterm -= 1;
18629 0 0 0     0 if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
18630 0         0 $iterm -= 1;
18631             }
18632             }
18633              
18634 6 50 66     43 if ( $types_to_go[$iterm] eq '{'
      33        
18635             && $block_type_to_go[$iterm]
18636             && $block_type_to_go[$iterm] =~
18637             /$blank_lines_after_opening_block_pattern/ )
18638             {
18639 2         4 my $nblanks = $rOpts_blank_lines_after_opening_block;
18640 2         10 $self->flush_vertical_aligner();
18641 2         4 my $file_writer_object = $self->[_file_writer_object_];
18642 2         10 $file_writer_object->require_blank_code_lines($nblanks);
18643             }
18644             }
18645              
18646 3932         13128 return;
18647             } ## end sub grind_batch_of_CODE
18648              
18649             sub iprev_to_go {
18650 4088     4088 0 7561 my ($i) = @_;
18651              
18652             # Given index $i of a token in the '_to_go' arrays, return
18653             # the index of the previous nonblank token.
18654 4088 100 100     16159 return $i - 1 > 0
18655             && $types_to_go[ $i - 1 ] eq 'b' ? $i - 2 : $i - 1;
18656             }
18657              
18658             sub unmask_phantom_token {
18659 19     19 0 82 my ( $self, $iend ) = @_;
18660              
18661             # Turn a phantom token into a real token.
18662              
18663             # Input parameter:
18664             # $iend = the index in the output batch array of this token.
18665              
18666             # Phantom tokens are specially marked token types (such as ';') with
18667             # no token text which only become real tokens if they occur at the end
18668             # of an output line. At one time phantom ',' tokens were handled
18669             # here, but now they are processed elsewhere.
18670              
18671 19         54 my $rLL = $self->[_rLL_];
18672 19         50 my $KK = $K_to_go[$iend];
18673 19         56 my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
18674              
18675 19         86 my $type = $types_to_go[$iend];
18676 19 50       81 return unless ( $type eq ';' );
18677 19         47 my $tok = $type;
18678 19         48 my $tok_len = length($tok);
18679 19 50       81 if ( $want_left_space{$type} != WS_NO ) {
18680 0         0 $tok = SPACE . $tok;
18681 0         0 $tok_len += 1;
18682             }
18683              
18684 19         47 $tokens_to_go[$iend] = $tok;
18685 19         43 $token_lengths_to_go[$iend] = $tok_len;
18686              
18687 19         55 $rLL->[$KK]->[_TOKEN_] = $tok;
18688 19         48 $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
18689              
18690 19         117 $self->note_added_semicolon($line_number);
18691              
18692             # This changes the summed lengths of the rest of this batch
18693 19         105 foreach ( $iend .. $max_index_to_go ) {
18694 19         87 $summed_lengths_to_go[ $_ + 1 ] += $tok_len;
18695             }
18696 19         52 return;
18697             } ## end sub unmask_phantom_token
18698              
18699             sub save_opening_indentation {
18700              
18701             # This should be called after each batch of tokens is output. It
18702             # saves indentations of lines of all unmatched opening tokens.
18703             # These will be used by sub get_opening_indentation.
18704              
18705 842     842 0 2483 my ( $self, $ri_first, $ri_last, $rindentation_list,
18706             $runmatched_opening_indexes )
18707             = @_;
18708              
18709 842 100       2335 $runmatched_opening_indexes = []
18710             if ( !defined($runmatched_opening_indexes) );
18711              
18712             # QW INDENTATION PATCH 1:
18713             # Also save indentation for multiline qw quotes
18714 842         1633 my @i_qw;
18715             my $seqno_qw_opening;
18716 842 100       2517 if ( $types_to_go[$max_index_to_go] eq 'q' ) {
18717 149         310 my $KK = $K_to_go[$max_index_to_go];
18718             $seqno_qw_opening =
18719 149         330 $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
18720 149 100       394 if ($seqno_qw_opening) {
18721 32         95 push @i_qw, $max_index_to_go;
18722             }
18723             }
18724              
18725             # we need to save indentations of any unmatched opening tokens
18726             # in this batch because we may need them in a subsequent batch.
18727 842         1488 foreach ( @{$runmatched_opening_indexes}, @i_qw ) {
  842         2465  
18728              
18729 850         1810 my $seqno = $type_sequence_to_go[$_];
18730              
18731 850 100       2328 if ( !$seqno ) {
18732 32 50 33     225 if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
18733 32         88 $seqno = $seqno_qw_opening;
18734             }
18735             else {
18736              
18737             # shouldn't happen
18738 0         0 $seqno = 'UNKNOWN';
18739 0         0 DEVEL_MODE && Fault("unable to find sequence number\n");
18740             }
18741             }
18742              
18743 850         2429 $saved_opening_indentation{$seqno} = [
18744             lookup_opening_indentation(
18745             $_, $ri_first, $ri_last, $rindentation_list
18746             )
18747             ];
18748             }
18749 842         1980 return;
18750             } ## end sub save_opening_indentation
18751              
18752             sub get_saved_opening_indentation {
18753 868     868 0 1836 my ($seqno) = @_;
18754 868         2054 my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
18755              
18756 868 50       3128 if ($seqno) {
18757 868 50       2486 if ( $saved_opening_indentation{$seqno} ) {
18758             ( $indent, $offset, $is_leading ) =
18759 868         1436 @{ $saved_opening_indentation{$seqno} };
  868         2320  
18760 868         1619 $exists = 1;
18761             }
18762             }
18763              
18764             # some kind of serious error it doesn't exist
18765             # (example is badfile.t)
18766              
18767 868         3175 return ( $indent, $offset, $is_leading, $exists );
18768             } ## end sub get_saved_opening_indentation
18769             } ## end closure grind_batch_of_CODE
18770              
18771             sub lookup_opening_indentation {
18772              
18773             # get the indentation of the line in the current output batch
18774             # which output a selected opening token
18775             #
18776             # given:
18777             # $i_opening - index of an opening token in the current output batch
18778             # whose line indentation we need
18779             # $ri_first - reference to list of the first index $i for each output
18780             # line in this batch
18781             # $ri_last - reference to list of the last index $i for each output line
18782             # in this batch
18783             # $rindentation_list - reference to a list containing the indentation
18784             # used for each line. (NOTE: the first slot in
18785             # this list is the last returned line number, and this is
18786             # followed by the list of indentations).
18787             #
18788             # return
18789             # -the indentation of the line which contained token $i_opening
18790             # -and its offset (number of columns) from the start of the line
18791              
18792 1400     1400 0 3501 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
18793              
18794 1400 50       2179 if ( !@{$ri_last} ) {
  1400         3631  
18795              
18796             # An error here implies a bug introduced by a recent program change.
18797             # Every batch of code has lines, so this should never happen.
18798 0         0 if (DEVEL_MODE) {
18799             Fault("Error in opening_indentation: no lines");
18800             }
18801 0         0 return ( 0, 0, 0 );
18802             }
18803              
18804 1400         2809 my $nline = $rindentation_list->[0]; # line number of previous lookup
18805              
18806             # reset line location if necessary
18807 1400 100       3739 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
18808              
18809             # find the correct line
18810 1400 50       3408 if ( $i_opening <= $ri_last->[-1] ) {
18811 1400         4006 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
  5302         8796  
18812             }
18813              
18814             # Error - token index is out of bounds - shouldn't happen
18815             # A program bug has been introduced in one of the calling routines.
18816             # We better stop here.
18817             else {
18818 0         0 my $i_last_line = $ri_last->[-1];
18819 0         0 if (DEVEL_MODE) {
18820             Fault(<<EOM);
18821             Program bug in call to lookup_opening_indentation - index out of range
18822             called with index i_opening=$i_opening > $i_last_line = max index of last line
18823             This batch has max index = $max_index_to_go,
18824             EOM
18825             }
18826 0         0 $nline = $#{$ri_last};
  0         0  
18827             }
18828              
18829 1400         2516 $rindentation_list->[0] =
18830             $nline; # save line number to start looking next call
18831 1400         2464 my $ibeg = $ri_start->[$nline];
18832 1400         3584 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
18833 1400         2927 my $is_leading = ( $ibeg == $i_opening );
18834 1400         6493 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
18835             } ## end sub lookup_opening_indentation
18836              
18837             sub terminal_type_i {
18838              
18839             # returns type of last token on this line (terminal token), as follows:
18840             # returns # for a full-line comment
18841             # returns ' ' for a blank line
18842             # otherwise returns final token type
18843              
18844 69     69 0 227 my ( $ibeg, $iend ) = @_;
18845              
18846             # Start at the end and work backwards
18847 69         141 my $i = $iend;
18848 69         167 my $type_i = $types_to_go[$i];
18849              
18850             # Check for side comment
18851 69 100       256 if ( $type_i eq '#' ) {
18852 8         20 $i--;
18853 8 50       34 if ( $i < $ibeg ) {
18854 0 0       0 return wantarray ? ( $type_i, $ibeg ) : $type_i;
18855             }
18856 8         20 $type_i = $types_to_go[$i];
18857             }
18858              
18859             # Skip past a blank
18860 69 100       252 if ( $type_i eq 'b' ) {
18861 7         15 $i--;
18862 7 50       33 if ( $i < $ibeg ) {
18863 0 0       0 return wantarray ? ( $type_i, $ibeg ) : $type_i;
18864             }
18865 7         19 $type_i = $types_to_go[$i];
18866             }
18867              
18868             # Found it..make sure it is a BLOCK termination,
18869             # but hide a terminal } after sort/map/grep/eval/do because it is not
18870             # necessarily the end of the line. (terminal.t)
18871 69         187 my $block_type = $block_type_to_go[$i];
18872 69 100 66     409 if (
      66        
18873             $type_i eq '}'
18874             && ( !$block_type
18875             || $is_sort_map_grep_eval_do{$block_type} )
18876             )
18877             {
18878 1         3 $type_i = 'b';
18879             }
18880 69 100       530 return wantarray ? ( $type_i, $i ) : $type_i;
18881             } ## end sub terminal_type_i
18882              
18883             sub pad_array_to_go {
18884              
18885             # To simplify coding in break_lists and set_bond_strengths, it helps to
18886             # create some extra blank tokens at the end of the arrays. We also add
18887             # some undef's to help guard against using invalid data.
18888 1745     1745 0 3442 my ($self) = @_;
18889 1745         3953 $K_to_go[ $max_index_to_go + 1 ] = undef;
18890 1745         3627 $tokens_to_go[ $max_index_to_go + 1 ] = EMPTY_STRING;
18891 1745         3557 $tokens_to_go[ $max_index_to_go + 2 ] = EMPTY_STRING;
18892 1745         3182 $tokens_to_go[ $max_index_to_go + 3 ] = undef;
18893 1745         3166 $types_to_go[ $max_index_to_go + 1 ] = 'b';
18894 1745         3477 $types_to_go[ $max_index_to_go + 2 ] = 'b';
18895 1745         3219 $types_to_go[ $max_index_to_go + 3 ] = undef;
18896 1745         3180 $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
18897 1745         3818 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
18898             $nesting_depth_to_go[$max_index_to_go];
18899              
18900             # /^[R\}\)\]]$/
18901 1745 100       6149 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
    100          
18902 225 50       1039 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
18903              
18904             # Nesting depths are set to be >=0 in sub write_line, so it should
18905             # not be possible to get here unless the code has a bracing error
18906             # which leaves a closing brace with zero nesting depth.
18907 0 0       0 if ( !get_saw_brace_error() ) {
18908 0         0 if (DEVEL_MODE) {
18909             Fault(<<EOM);
18910             Program bug in pad_array_to_go: hit nesting error which should have been caught
18911             EOM
18912             }
18913             }
18914             }
18915             else {
18916 225         576 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
18917             }
18918             }
18919              
18920             # /^[L\{\(\[]$/
18921             elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
18922 562         1315 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
18923             }
18924             else {
18925             ## must be ? or :
18926             }
18927 1745         3000 return;
18928             } ## end sub pad_array_to_go
18929              
18930             sub break_all_chain_tokens {
18931              
18932             # scan the current breakpoints looking for breaks at certain "chain
18933             # operators" (. : && || + etc) which often occur repeatedly in a long
18934             # statement. If we see a break at any one, break at all similar tokens
18935             # within the same container.
18936             #
18937 1113     1113 0 2702 my ( $self, $ri_left, $ri_right ) = @_;
18938              
18939 1113         3929 my %saw_chain_type;
18940             my %left_chain_type;
18941 1113         0 my %right_chain_type;
18942 1113         0 my %interior_chain_type;
18943 1113         1803 my $nmax = @{$ri_right} - 1;
  1113         2523  
18944              
18945             # scan the left and right end tokens of all lines
18946 1113         1992 my $count = 0;
18947 1113         3036 for my $n ( 0 .. $nmax ) {
18948 3991         6040 my $il = $ri_left->[$n];
18949 3991         5826 my $ir = $ri_right->[$n];
18950 3991         6187 my $typel = $types_to_go[$il];
18951 3991         5844 my $typer = $types_to_go[$ir];
18952 3991 100       7741 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
18953 3991 100       7331 $typer = '+' if ( $typer eq '-' );
18954 3991 100       7083 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
18955 3991 100       7327 $typer = '*' if ( $typer eq '/' );
18956              
18957 3991 100       8195 my $keyl = $typel eq 'k' ? $tokens_to_go[$il] : $typel;
18958 3991 100       7041 my $keyr = $typer eq 'k' ? $tokens_to_go[$ir] : $typer;
18959 3991 100 100     9144 if ( $is_chain_operator{$keyl} && $want_break_before{$typel} ) {
18960 321 100       811 next if ( $typel eq '?' );
18961 255         529 push @{ $left_chain_type{$keyl} }, $il;
  255         690  
18962 255         500 $saw_chain_type{$keyl} = 1;
18963 255         408 $count++;
18964             }
18965 3925 100 100     10637 if ( $is_chain_operator{$keyr} && !$want_break_before{$typer} ) {
18966 48 100       157 next if ( $typer eq '?' );
18967 47         83 push @{ $right_chain_type{$keyr} }, $ir;
  47         165  
18968 47         1103 $saw_chain_type{$keyr} = 1;
18969 47         108 $count++;
18970             }
18971             }
18972 1113 100       4667 return unless $count;
18973              
18974             # now look for any interior tokens of the same types
18975 124         357 $count = 0;
18976 124         295 my $has_interior_dot_or_plus;
18977 124         441 for my $n ( 0 .. $nmax ) {
18978 781         1167 my $il = $ri_left->[$n];
18979 781         1102 my $ir = $ri_right->[$n];
18980 781         1597 foreach my $i ( $il + 1 .. $ir - 1 ) {
18981 4183         6189 my $type = $types_to_go[$i];
18982 4183 100       6513 my $key = $type eq 'k' ? $tokens_to_go[$i] : $type;
18983 4183 100       6934 $key = '+' if ( $key eq '-' );
18984 4183 100       6788 $key = '*' if ( $key eq '/' );
18985 4183 100       7829 if ( $saw_chain_type{$key} ) {
18986 193         296 push @{ $interior_chain_type{$key} }, $i;
  193         445  
18987 193         284 $count++;
18988 193   100     707 $has_interior_dot_or_plus ||= ( $key eq '.' || $key eq '+' );
      100        
18989             }
18990             }
18991             }
18992 124 100       928 return unless $count;
18993              
18994 33         254 my @keys = keys %saw_chain_type;
18995              
18996             # quit if just ONE continuation line with leading . For example--
18997             # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
18998             # . $contents;
18999             # Fixed for b1399.
19000 33 50 66     260 if ( $has_interior_dot_or_plus && $nmax == 1 && @keys == 1 ) {
      33        
19001 0         0 return;
19002             }
19003              
19004             # now make a list of all new break points
19005 33         93 my @insert_list;
19006              
19007             # loop over all chain types
19008 33         120 foreach my $key (@keys) {
19009              
19010             # loop over all interior chain tokens
19011 41         84 foreach my $itest ( @{ $interior_chain_type{$key} } ) {
  41         143  
19012              
19013             # loop over all left end tokens of same type
19014 193 100       447 if ( $left_chain_type{$key} ) {
19015 71 50       185 next if $nobreak_to_go[ $itest - 1 ];
19016 71         113 foreach my $i ( @{ $left_chain_type{$key} } ) {
  71         145  
19017 146 100       355 next unless $self->in_same_container_i( $i, $itest );
19018 15         60 push @insert_list, $itest - 1;
19019              
19020             # Break at matching ? if this : is at a different level.
19021             # For example, the ? before $THRf_DEAD in the following
19022             # should get a break if its : gets a break.
19023             #
19024             # my $flags =
19025             # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
19026             # : ( $_ & 4 ) ? $THRf_R_DETACHED
19027             # : $THRf_R_JOINABLE;
19028 15 100 66     66 if ( $key eq ':'
19029             && $levels_to_go[$i] != $levels_to_go[$itest] )
19030             {
19031 1         3 my $i_question = $mate_index_to_go[$itest];
19032 1 50 33     9 if ( defined($i_question) && $i_question > 0 ) {
19033 1         4 push @insert_list, $i_question - 1;
19034             }
19035             }
19036 15         29 last;
19037             }
19038             }
19039              
19040             # loop over all right end tokens of same type
19041 193 100       460 if ( $right_chain_type{$key} ) {
19042 122 50       255 next if $nobreak_to_go[$itest];
19043 122         170 foreach my $i ( @{ $right_chain_type{$key} } ) {
  122         240  
19044 227 100       438 next unless $self->in_same_container_i( $i, $itest );
19045 31         86 push @insert_list, $itest;
19046              
19047             # break at matching ? if this : is at a different level
19048 31 50 33     130 if ( $key eq ':'
19049             && $levels_to_go[$i] != $levels_to_go[$itest] )
19050             {
19051 0         0 my $i_question = $mate_index_to_go[$itest];
19052 0 0       0 if ( defined($i_question) ) {
19053 0         0 push @insert_list, $i_question;
19054             }
19055             }
19056 31         73 last;
19057             }
19058             }
19059             }
19060             }
19061              
19062             # insert any new break points
19063 33 100       144 if (@insert_list) {
19064 20         111 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
19065             }
19066 33         195 return;
19067             } ## end sub break_all_chain_tokens
19068              
19069             sub insert_additional_breaks {
19070              
19071             # this routine will add line breaks at requested locations after
19072             # sub break_long_lines has made preliminary breaks.
19073              
19074 101     101 0 335 my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
19075 101         222 my $i_f;
19076             my $i_l;
19077 101         182 my $line_number = 0;
19078 101         194 foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
  210         1383  
  101         547  
19079              
19080 216 50       506 next if ( $nobreak_to_go[$i_break_left] );
19081              
19082 216         1330 $i_f = $ri_first->[$line_number];
19083 216         335 $i_l = $ri_last->[$line_number];
19084 216         512 while ( $i_break_left >= $i_l ) {
19085 383         541 $line_number++;
19086              
19087             # shouldn't happen unless caller passes bad indexes
19088 383 50       504 if ( $line_number >= @{$ri_last} ) {
  383         766  
19089 0         0 if (DEVEL_MODE) {
19090             Fault(<<EOM);
19091             Non-fatal program bug: couldn't set break at $i_break_left
19092             EOM
19093             }
19094 0         0 return;
19095             }
19096 383         566 $i_f = $ri_first->[$line_number];
19097 383         749 $i_l = $ri_last->[$line_number];
19098             }
19099              
19100             # Do not leave a blank at the end of a line; back up if necessary
19101 216 100       565 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
  11         21  
19102              
19103 216         342 my $i_break_right = $inext_to_go[$i_break_left];
19104 216 50 66     1110 if ( $i_break_left >= $i_f
      66        
      33        
19105             && $i_break_left < $i_l
19106             && $i_break_right > $i_f
19107             && $i_break_right <= $i_l )
19108             {
19109 101         186 splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
  101         379  
19110 101         195 splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
  101         358  
19111             }
19112             }
19113 101         300 return;
19114             } ## end sub insert_additional_breaks
19115              
19116             { ## begin closure in_same_container_i
19117             my $ris_break_token;
19118             my $ris_comma_token;
19119              
19120             BEGIN {
19121              
19122             # all cases break on seeing commas at same level
19123 39     39   324 my @q = qw( => );
19124 39         118 push @q, ',';
19125 39         148 @{$ris_comma_token}{@q} = (1) x scalar(@q);
  39         226  
19126              
19127             # Non-ternary text also breaks on seeing any of qw(? : || or )
19128             # Example: we would not want to break at any of these .'s
19129             # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
19130 39         207 push @q, qw( or || ? : );
19131 39         124 @{$ris_break_token}{@q} = (1) x scalar(@q);
  39         39225  
19132             } ## end BEGIN
19133              
19134             sub in_same_container_i {
19135              
19136             # Check to see if tokens at i1 and i2 are in the same container, and
19137             # not separated by certain characters: => , ? : || or
19138             # This is an interface between the _to_go arrays to the rLL array
19139 374     374 0 624 my ( $self, $i1, $i2 ) = @_;
19140              
19141             # quick check
19142 374         601 my $parent_seqno_1 = $parent_seqno_to_go[$i1];
19143 374 100       988 return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
19144              
19145 58 100       159 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
  52         140  
19146 58         130 my $K1 = $K_to_go[$i1];
19147 58         109 my $K2 = $K_to_go[$i2];
19148 58         117 my $rLL = $self->[_rLL_];
19149              
19150 58         112 my $depth_1 = $nesting_depth_to_go[$i1];
19151 58 50       149 return if ( $depth_1 < 0 );
19152              
19153             # Shouldn't happen since i1 and i2 have same parent:
19154 58 50       153 return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
19155              
19156             # Select character set to scan for
19157 58         116 my $type_1 = $types_to_go[$i1];
19158 58 100       175 my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
19159              
19160             # Fast preliminary loop to verify that tokens are in the same container
19161 58         123 my $KK = $K1;
19162 58         99 while (1) {
19163 326         557 $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
19164 326 100       592 last if !defined($KK);
19165 323 100       589 last if ( $KK >= $K2 );
19166 268         1923 my $ii = $i1 + $KK - $K1;
19167 268         412 my $depth_i = $nesting_depth_to_go[$ii];
19168 268 50       462 return if ( $depth_i < $depth_1 );
19169 268 100       579 next if ( $depth_i > $depth_1 );
19170 51 100       150 if ( $type_1 ne ':' ) {
19171 45         96 my $tok_i = $tokens_to_go[$ii];
19172 45 50 33     224 return if ( $tok_i eq '?' || $tok_i eq ':' );
19173             }
19174             }
19175              
19176             # Slow loop checking for certain characters
19177              
19178             #-----------------------------------------------------
19179             # This is potentially a slow routine and not critical.
19180             # For safety just give up for large differences.
19181             # See test file 'infinite_loop.txt'
19182             #-----------------------------------------------------
19183 58 50       213 return if ( $i2 - $i1 > 200 );
19184              
19185 58         196 foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
19186              
19187 1668         2211 my $depth_i = $nesting_depth_to_go[$ii];
19188 1668 100       2829 next if ( $depth_i > $depth_1 );
19189 400 50       669 return if ( $depth_i < $depth_1 );
19190 400         576 my $tok_i = $tokens_to_go[$ii];
19191 400 100       828 return if ( $rbreak->{$tok_i} );
19192             }
19193 47         191 return 1;
19194             } ## end sub in_same_container_i
19195             } ## end closure in_same_container_i
19196              
19197             sub break_equals {
19198              
19199             # Look for assignment operators that could use a breakpoint.
19200             # For example, in the following snippet
19201             #
19202             # $HOME = $ENV{HOME}
19203             # || $ENV{LOGDIR}
19204             # || $pw[7]
19205             # || die "no home directory for user $<";
19206             #
19207             # we could break at the = to get this, which is a little nicer:
19208             # $HOME =
19209             # $ENV{HOME}
19210             # || $ENV{LOGDIR}
19211             # || $pw[7]
19212             # || die "no home directory for user $<";
19213             #
19214             # The logic here follows the logic in set_logical_padding, which
19215             # will add the padding in the second line to improve alignment.
19216             #
19217 502     502 0 1466 my ( $self, $ri_left, $ri_right ) = @_;
19218 502         984 my $nmax = @{$ri_right} - 1;
  502         1149  
19219 502 50       1626 return if ( $nmax < 2 );
19220              
19221             # scan the left ends of first two lines
19222 502         1087 my $tokbeg = EMPTY_STRING;
19223 502         861 my $depth_beg;
19224 502         1430 for my $n ( 1 .. 2 ) {
19225 533         1239 my $il = $ri_left->[$n];
19226 533         1160 my $typel = $types_to_go[$il];
19227 533         1148 my $tokenl = $tokens_to_go[$il];
19228 533 100       1699 my $keyl = $typel eq 'k' ? $tokenl : $typel;
19229              
19230 533         1119 my $has_leading_op = $is_chain_operator{$keyl};
19231 533 100       1797 return unless ($has_leading_op);
19232 50 100       239 if ( $n > 1 ) {
19233             return
19234 19 100 66     236 unless ( $tokenl eq $tokbeg
19235             && $nesting_depth_to_go[$il] eq $depth_beg );
19236             }
19237 46         109 $tokbeg = $tokenl;
19238 46         125 $depth_beg = $nesting_depth_to_go[$il];
19239             }
19240              
19241             # now look for any interior tokens of the same types
19242 15         48 my $il = $ri_left->[0];
19243 15         46 my $ir = $ri_right->[0];
19244              
19245             # now make a list of all new break points
19246 15         38 my @insert_list;
19247 15         80 foreach my $i ( reverse( $il + 1 .. $ir - 1 ) ) {
19248 132         205 my $type = $types_to_go[$i];
19249 132 100 66     339 if ( $is_assignment{$type}
19250             && $nesting_depth_to_go[$i] eq $depth_beg )
19251             {
19252 1 50       4 if ( $want_break_before{$type} ) {
19253 0         0 push @insert_list, $i - 1;
19254             }
19255             else {
19256 1         3 push @insert_list, $i;
19257             }
19258             }
19259             }
19260              
19261             # Break after a 'return' followed by a chain of operators
19262             # return ( $^O !~ /win32|dos/i )
19263             # && ( $^O ne 'VMS' )
19264             # && ( $^O ne 'OS2' )
19265             # && ( $^O ne 'MacOS' );
19266             # To give:
19267             # return
19268             # ( $^O !~ /win32|dos/i )
19269             # && ( $^O ne 'VMS' )
19270             # && ( $^O ne 'OS2' )
19271             # && ( $^O ne 'MacOS' );
19272 15         74 my $i = 0;
19273 15 100 100     165 if ( $types_to_go[$i] eq 'k'
      66        
      100        
19274             && $tokens_to_go[$i] eq 'return'
19275             && $ir > $il
19276             && $nesting_depth_to_go[$i] eq $depth_beg )
19277             {
19278 4         11 push @insert_list, $i;
19279             }
19280              
19281 15 100       91 return unless (@insert_list);
19282              
19283             # One final check...
19284             # scan second and third lines and be sure there are no assignments
19285             # we want to avoid breaking at an = to make something like this:
19286             # unless ( $icon =
19287             # $html_icons{"$type-$state"}
19288             # or $icon = $html_icons{$type}
19289             # or $icon = $html_icons{$state} )
19290 5         19 for my $n ( 1 .. 2 ) {
19291 10         22 my $il_n = $ri_left->[$n];
19292 10         21 my $ir_n = $ri_right->[$n];
19293 10         29 foreach my $i ( $il_n + 1 .. $ir_n ) {
19294 100         137 my $type = $types_to_go[$i];
19295             return
19296 100 50 33     206 if ( $is_assignment{$type}
19297             && $nesting_depth_to_go[$i] eq $depth_beg );
19298             }
19299             }
19300              
19301             # ok, insert any new break point
19302 5 50       30 if (@insert_list) {
19303 5         23 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
19304             }
19305 5         22 return;
19306             } ## end sub break_equals
19307              
19308             { ## begin closure recombine_breakpoints
19309              
19310             # This routine is called once per batch to see if it would be better
19311             # to combine some of the lines into which the batch has been broken.
19312              
19313             my %is_amp_amp;
19314             my %is_math_op;
19315             my %is_plus_minus;
19316             my %is_mult_div;
19317              
19318             BEGIN {
19319              
19320 39     39   210 my @q;
19321 39         177 @q = qw( && || );
19322 39         204 @is_amp_amp{@q} = (1) x scalar(@q);
19323              
19324 39         160 @q = qw( + - * / );
19325 39         203 @is_math_op{@q} = (1) x scalar(@q);
19326              
19327 39         147 @q = qw( + - );
19328 39         182 @is_plus_minus{@q} = (1) x scalar(@q);
19329              
19330 39         164 @q = qw( * / );
19331 39         30073 @is_mult_div{@q} = (1) x scalar(@q);
19332             } ## end BEGIN
19333              
19334             sub Debug_dump_breakpoints {
19335              
19336             # Debug routine to dump current breakpoints...not normally called
19337             # We are given indexes to the current lines:
19338             # $ri_beg = ref to array of BEGinning indexes of each line
19339             # $ri_end = ref to array of ENDing indexes of each line
19340 0     0 0 0 my ( $self, $ri_beg, $ri_end, $msg ) = @_;
19341 0         0 print {*STDOUT} "----Dumping breakpoints from: $msg----\n";
  0         0  
19342 0         0 for my $n ( 0 .. @{$ri_end} - 1 ) {
  0         0  
19343 0         0 my $ibeg = $ri_beg->[$n];
19344 0         0 my $iend = $ri_end->[$n];
19345 0         0 my $text = EMPTY_STRING;
19346 0         0 foreach my $i ( $ibeg .. $iend ) {
19347 0         0 $text .= $tokens_to_go[$i];
19348             }
19349 0         0 print {*STDOUT} "$n ($ibeg:$iend) $text\n";
  0         0  
19350             }
19351 0         0 print {*STDOUT} "----\n";
  0         0  
19352 0         0 return;
19353             } ## end sub Debug_dump_breakpoints
19354              
19355             sub delete_one_line_semicolons {
19356              
19357 6     6 0 11 my ( $self, $ri_beg, $ri_end ) = @_;
19358 6         13 my $rLL = $self->[_rLL_];
19359 6         12 my $K_opening_container = $self->[_K_opening_container_];
19360              
19361             # Walk down the lines of this batch and delete any semicolons
19362             # terminating one-line blocks;
19363 6         7 my $nmax = @{$ri_end} - 1;
  6         13  
19364              
19365 6         16 foreach my $n ( 0 .. $nmax ) {
19366 6         13 my $i_beg = $ri_beg->[$n];
19367 6         11 my $i_e = $ri_end->[$n];
19368 6         9 my $K_beg = $K_to_go[$i_beg];
19369 6         13 my $K_e = $K_to_go[$i_e];
19370 6         8 my $K_end = $K_e;
19371 6         13 my $type_end = $rLL->[$K_end]->[_TYPE_];
19372 6 100       16 if ( $type_end eq '#' ) {
19373 2         6 $K_end = $self->K_previous_nonblank($K_end);
19374 2 50       6 if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
  2         5  
19375             }
19376              
19377             # we are looking for a line ending in closing brace
19378             next
19379 6 50 33     27 unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
19380              
19381             # ...and preceded by a semicolon on the same line
19382 6         17 my $K_semicolon = $self->K_previous_nonblank($K_end);
19383 6 50       15 next unless defined($K_semicolon);
19384 6         11 my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
19385 6 50       15 next if ( $i_semicolon <= $i_beg );
19386 6 50       16 next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
19387              
19388             # Safety check - shouldn't happen - not critical
19389             # This is not worth throwing a Fault, except in DEVEL_MODE
19390 6 50       14 if ( $types_to_go[$i_semicolon] ne ';' ) {
19391 0         0 DEVEL_MODE
19392             && Fault("unexpected type looking for semicolon");
19393 0         0 next;
19394             }
19395              
19396             # ... with the corresponding opening brace on the same line
19397 6         13 my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
19398 6         13 my $K_opening = $K_opening_container->{$type_sequence};
19399 6 50       15 next unless ( defined($K_opening) );
19400 6         12 my $i_opening = $i_beg + ( $K_opening - $K_beg );
19401 6 50       14 next if ( $i_opening < $i_beg );
19402              
19403             # ... and only one semicolon between these braces
19404 6         11 my $semicolon_count = 0;
19405 6         17 foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
19406 22 100       50 if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
19407 2         5 $semicolon_count++;
19408 2         5 last;
19409             }
19410             }
19411 6 100       18 next if ($semicolon_count);
19412              
19413             # ...ok, then make the semicolon invisible
19414 4         7 my $len = $token_lengths_to_go[$i_semicolon];
19415 4         11 $tokens_to_go[$i_semicolon] = EMPTY_STRING;
19416 4         7 $token_lengths_to_go[$i_semicolon] = 0;
19417 4         7 $rLL->[$K_semicolon]->[_TOKEN_] = EMPTY_STRING;
19418 4         7 $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
19419 4         11 foreach ( $i_semicolon .. $max_index_to_go ) {
19420 16         28 $summed_lengths_to_go[ $_ + 1 ] -= $len;
19421             }
19422             }
19423 6         10 return;
19424             } ## end sub delete_one_line_semicolons
19425              
19426 39     39   421 use constant DEBUG_RECOMBINE => 0;
  39         120  
  39         38669  
19427              
19428             sub recombine_breakpoints {
19429              
19430 732     732 0 2095 my ( $self, $ri_beg, $ri_end, $rbond_strength_to_go ) = @_;
19431              
19432             # This sub implements the 'recombine' operation on a batch.
19433             # Its task is to combine some of these lines back together to
19434             # improve formatting. The need for this arises because
19435             # sub 'break_long_lines' is very liberal in setting line breaks
19436             # for long lines, always setting breaks at good breakpoints, even
19437             # when that creates small lines. Sometimes small line fragments
19438             # are produced which would look better if they were combined.
19439              
19440             # Input parameters:
19441             # $ri_beg = ref to array of BEGinning indexes of each line
19442             # $ri_end = ref to array of ENDing indexes of each line
19443             # $rbond_strength_to_go = array of bond strengths pulling
19444             # tokens together, used to decide where best to recombine lines.
19445              
19446             #-------------------------------------------------------------------
19447             # Do nothing under extreme stress; use <= 2 for c171.
19448             # (NOTE: New optimizations make this unnecessary. But removing this
19449             # check is not really useful because this condition only occurs in
19450             # test runs, and another formatting pass will fix things anyway.)
19451             # This routine has a long history of improvements. Some past
19452             # relevant issues are : c118, c167, c171, c186, c187, c193, c200.
19453             #-------------------------------------------------------------------
19454 732 100       2017 return if ( $high_stress_level <= 2 );
19455              
19456 731         1217 my $nmax_start = @{$ri_end} - 1;
  731         1555  
19457 731 50       1785 return if ( $nmax_start <= 0 );
19458              
19459 731         1547 my $iend_max = $ri_end->[$nmax_start];
19460 731 100       2215 if ( $types_to_go[$iend_max] eq '#' ) {
19461 46         185 $iend_max = iprev_to_go($iend_max);
19462             }
19463 731   66     3159 my $has_terminal_semicolon =
19464             $iend_max >= 0 && $types_to_go[$iend_max] eq ';';
19465              
19466             #--------------------------------------------------------------------
19467             # Break into the smallest possible sub-sections to improve efficiency
19468             #--------------------------------------------------------------------
19469              
19470             # Also make a list of all good joining tokens between the lines
19471             # n-1 and n.
19472 731         1252 my @joint;
19473              
19474 731         1465 my $rsections = [];
19475 731         1371 my $nbeg_sec = 0;
19476 731         1132 my $nend_sec;
19477 731         1405 my $nmax_section = 0;
19478 731         1933 foreach my $nn ( 1 .. $nmax_start ) {
19479 2748         4968 my $ibeg_1 = $ri_beg->[ $nn - 1 ];
19480 2748         4221 my $iend_1 = $ri_end->[ $nn - 1 ];
19481 2748         4129 my $iend_2 = $ri_end->[$nn];
19482 2748         4081 my $ibeg_2 = $ri_beg->[$nn];
19483              
19484             # Define certain good joint tokens
19485 2748         4274 my ( $itok, $itokp, $itokm );
19486 2748         4347 foreach my $itest ( $iend_1, $ibeg_2 ) {
19487 5496         8272 my $type = $types_to_go[$itest];
19488 5496 100 100     30673 if ( $is_math_op{$type}
      100        
      100        
19489             || $is_amp_amp{$type}
19490             || $is_assignment{$type}
19491             || $type eq ':' )
19492             {
19493 376         814 $itok = $itest;
19494             }
19495             }
19496              
19497             # joint[$nn] = index of joint character
19498 2748         4945 $joint[$nn] = $itok;
19499              
19500             # Update the section list
19501 2748         5900 my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
19502 2748 100 100     10091 if (
      100        
      100        
19503             $excess <= 1
19504              
19505             # The number 5 here is an arbitrary small number intended
19506             # to keep most small matches in one sub-section.
19507             || ( defined($nend_sec)
19508             && ( $nn < 5 || $nmax_start - $nn < 5 ) )
19509             )
19510             {
19511 2586         5281 $nend_sec = $nn;
19512             }
19513             else {
19514 162 100       871 if ( defined($nend_sec) ) {
19515 29         62 push @{$rsections}, [ $nbeg_sec, $nend_sec ];
  29         117  
19516 29         74 my $num = $nend_sec - $nbeg_sec;
19517 29 100       97 if ( $num > $nmax_section ) { $nmax_section = $num }
  19         59  
19518 29         59 $nbeg_sec = $nn;
19519 29         62 $nend_sec = undef;
19520             }
19521 162         419 $nbeg_sec = $nn;
19522             }
19523             }
19524              
19525 731 100       2540 if ( defined($nend_sec) ) {
19526 657         1281 push @{$rsections}, [ $nbeg_sec, $nend_sec ];
  657         2223  
19527 657         1456 my $num = $nend_sec - $nbeg_sec;
19528 657 100       1796 if ( $num > $nmax_section ) { $nmax_section = $num }
  648         1256  
19529             }
19530              
19531 731         1237 my $num_sections = @{$rsections};
  731         1372  
19532              
19533 731         1139 if ( DEBUG_RECOMBINE > 1 ) {
19534             print {*STDOUT} <<EOM;
19535             sections=$num_sections; nmax_sec=$nmax_section
19536             EOM
19537             }
19538              
19539 731         1143 if ( DEBUG_RECOMBINE > 0 ) {
19540             my $max = 0;
19541             print {*STDOUT}
19542             "-----\n$num_sections sections found for nmax=$nmax_start\n";
19543             foreach my $sect ( @{$rsections} ) {
19544             my ( $nbeg, $nend ) = @{$sect};
19545             my $num = $nend - $nbeg;
19546             if ( $num > $max ) { $max = $num }
19547             print {*STDOUT} "$nbeg $nend\n";
19548             }
19549             print {*STDOUT} "max size=$max of $nmax_start lines\n";
19550             }
19551              
19552             # Loop over all sub-sections. Note that we have to work backwards
19553             # from the end of the batch since the sections use original line
19554             # numbers, and the line numbers change as we go.
19555 731         1331 while ( my $section = pop @{$rsections} ) {
  1417         4521  
19556 686         1139 my ( $nbeg, $nend ) = @{$section};
  686         1627  
19557 686         6952 $self->recombine_section_loop(
19558             {
19559             _ri_beg => $ri_beg,
19560             _ri_end => $ri_end,
19561             _nbeg => $nbeg,
19562             _nend => $nend,
19563             _rjoint => \@joint,
19564             _rbond_strength_to_go => $rbond_strength_to_go,
19565             _has_terminal_semicolon => $has_terminal_semicolon,
19566             }
19567             );
19568             }
19569              
19570 731         1871 return;
19571             } ## end sub recombine_breakpoints
19572              
19573             sub recombine_section_loop {
19574 686     686 0 1853 my ( $self, $rhash ) = @_;
19575              
19576             # Recombine breakpoints for one section of lines in the current batch
19577              
19578             # Given:
19579             # $ri_beg, $ri_end = ref to arrays with token indexes of the first
19580             # and last line
19581             # $nbeg, $nend = line numbers bounding this section
19582             # $rjoint = ref to array of good joining tokens per line
19583              
19584             # Update: $ri_beg, $ri_end, $rjoint if lines are joined
19585              
19586             # Returns:
19587             # nothing
19588              
19589             #-------------
19590             # Definitions:
19591             #-------------
19592             # $rhash = {
19593              
19594             # _ri_beg = ref to array with starting token index by line
19595             # _ri_end = ref to array with ending token index by line
19596             # _nbeg = first line number of this section
19597             # _nend = last line number of this section
19598             # _rjoint = ref to array of good joining tokens for each line
19599             # _rbond_strength_to_go = array of bond strengths
19600             # _has_terminal_semicolon = true if last line of batch has ';'
19601              
19602             # _num_freeze = fixed number of lines at end of this batch
19603             # _optimization_on = true during final optimization loop
19604             # _num_compares = total number of line compares made so far
19605             # _pair_list = list of line pairs in optimal search order
19606              
19607             # };
19608              
19609             #-------------
19610             # How it works
19611             #-------------
19612              
19613             # We are working with a sequence of output lines and looking at
19614             # each pair. We must decide if it is better to join each of
19615             # these line pairs.
19616              
19617             # The brute force method is to loop through all line pairs and
19618             # join the best possible pair, as determined by either some
19619             # logical criterion or by the maximum 'bond strength' assigned
19620             # to the joining token. Then keep doing this until there are
19621             # no remaining line pairs to join.
19622              
19623             # This works, but a problem is that it can theoretically take
19624             # on the order of N^2 comparisons in some pathological cases.
19625             # This can require an excessive amount of run time.
19626              
19627             # We can avoid excessive run time by conceptually dividing the
19628             # work into two phases. In the first phase we make any joints
19629             # required by user settings or logic other than the strength of
19630             # joints. In the second phase we make any remaining joints
19631             # based on strengths. To do this optimally, we do a preliminary
19632             # sort on joint strengths and always loop in that order. That
19633             # way, we can stop a search on the first joint strength because
19634             # it will be the maximum.
19635              
19636             # This method is very fast, requiring no more than 3*N line
19637             # comparisons, where N is the number of lines (see below).
19638              
19639 686         1760 my $ri_beg = $rhash->{_ri_beg};
19640 686         1392 my $ri_end = $rhash->{_ri_end};
19641              
19642             # Line index range of this section:
19643 686         1210 my $nbeg = $rhash->{_nbeg}; # stays constant
19644 686         1334 my $nend = $rhash->{_nend}; # will decrease
19645              
19646             # $nmax_batch = starting number of lines in the full batch
19647             # $num_freeze = number of lines following this section to leave alone
19648 686         1166 my $nmax_batch = @{$ri_end} - 1;
  686         1477  
19649 686         2717 $rhash->{_num_freeze} = $nmax_batch - $nend;
19650              
19651             # Setup the list of line pairs to test. This stores the following
19652             # values for each line pair:
19653             # [ $n=index of the second line of the pair, $bs=bond strength]
19654 686         1368 my @pair_list;
19655 686         1341 my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go};
19656 686         2125 foreach my $n ( $nbeg + 1 .. $nend ) {
19657 2586         4267 my $iend_1 = $ri_end->[ $n - 1 ];
19658 2586         3838 my $ibeg_2 = $ri_beg->[$n];
19659 2586         3552 my $bs_tweak = 0;
19660 2586 100       5516 if ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { $bs_tweak = 0.25 }
  69         122  
19661 2586         4438 my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
19662 2586         6910 push @pair_list, [ $n, $bs ];
19663             }
19664              
19665             # Any order for testing is possible, but optimization is only possible
19666             # if we sort the line pairs on decreasing joint strength.
19667             @pair_list =
19668 686 50       5281 sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @pair_list;
  4022         10904  
19669 686         2032 $rhash->{_rpair_list} = \@pair_list;
19670              
19671             #----------------
19672             # Iteration limit
19673             #----------------
19674              
19675             # This is now a very fast loop which runs in O(n) time, but a
19676             # check on total number of iterations is retained to guard
19677             # against future programming errors.
19678              
19679             # Most cases require roughly 1 comparison per line pair (1 full pass).
19680             # The upper bound is estimated to be about 3 comparisons per line pair
19681             # unless optimization is deactivated. The approximate breakdown is:
19682             # 1 pass with 1 compare per joint to do any special cases, plus
19683             # 1 pass with up to 2 compares per joint in optimization mode
19684             # The most extreme cases in my collection are:
19685             # camel1.t - needs 2.7 compares per line (12 without optimization)
19686             # ternary.t - needs 2.8 compares per line (12 without optimization)
19687             # c206 - needs 3.3 compares per line, found with random testing
19688             # So a value of MAX_COMPARE_RATIO = 4 looks like an upper bound as
19689             # long as optimization is used. A value of 20 should allow all code to
19690             # pass even if optimization is turned off for testing.
19691 39     39   365 use constant MAX_COMPARE_RATIO => DEVEL_MODE ? 4 : 20;
  39         118  
  39         222491  
19692              
19693 686         1752 my $num_pairs = $nend - $nbeg + 1;
19694 686         1455 my $max_compares = MAX_COMPARE_RATIO * $num_pairs;
19695              
19696             # Always start with optimization off
19697 686         1553 $rhash->{_num_compares} = 0;
19698 686         1561 $rhash->{_optimization_on} = 0;
19699 686         1541 $rhash->{_ix_best_last} = 0;
19700              
19701             #--------------------------------------------
19702             # loop until there are no more recombinations
19703             #--------------------------------------------
19704 686         1371 my $nmax_last = $nmax_batch + 1;
19705 686         1135 while (1) {
19706              
19707             # Stop when the number of lines in the batch does not decrease
19708 1494         2243 $nmax_batch = @{$ri_end} - 1;
  1494         2694  
19709 1494 100       3669 if ( $nmax_batch >= $nmax_last ) {
19710 686         1534 last;
19711             }
19712 808         1378 $nmax_last = $nmax_batch;
19713              
19714             #-----------------------------------------
19715             # inner loop to find next best combination
19716             #-----------------------------------------
19717 808         3289 $self->recombine_inner_loop($rhash);
19718              
19719             # Iteration limit check:
19720 808 50       2451 if ( $rhash->{_num_compares} > $max_compares ) {
19721              
19722             # See note above; should only get here on a programming error
19723 0         0 if (DEVEL_MODE) {
19724             my $ibeg = $ri_beg->[$nbeg];
19725             my $Kbeg = $K_to_go[$ibeg];
19726             my $lno = $self->[_rLL_]->[$Kbeg]->[_LINE_INDEX_];
19727             Fault(<<EOM);
19728             inner loop passes =$rhash->{_num_compares} exceeds max=$max_compares, near line $lno
19729             EOM
19730             }
19731 0         0 last;
19732             }
19733              
19734             } ## end iteration loop
19735              
19736 686         1131 if (DEBUG_RECOMBINE) {
19737             my $ratio = sprintf "%0.3f", $rhash->{_num_compares} / $num_pairs;
19738             print {*STDOUT}
19739             "exiting recombine_inner_loop with $nmax_last lines, opt=$rhash->{_optimization_on}, starting pairs=$num_pairs, num_compares=$rhash->{_num_compares}, ratio=$ratio\n";
19740             }
19741              
19742 686         4272 return;
19743             } ## end sub recombine_section_loop
19744              
19745             sub recombine_inner_loop {
19746 808     808 0 1913 my ( $self, $rhash ) = @_;
19747              
19748             # This is the inner loop of the recombine operation. We look at all of
19749             # the remaining joints in this section and select the best joint to be
19750             # recombined. If a recombination is made, the number of lines
19751             # in this section will be reduced by one.
19752              
19753             # Returns: nothing
19754              
19755 808         1698 my $rK_weld_right = $self->[_rK_weld_right_];
19756 808         1511 my $rK_weld_left = $self->[_rK_weld_left_];
19757              
19758 808         1610 my $ri_beg = $rhash->{_ri_beg};
19759 808         1526 my $ri_end = $rhash->{_ri_end};
19760 808         1416 my $nbeg = $rhash->{_nbeg};
19761 808         1519 my $rjoint = $rhash->{_rjoint};
19762 808         1546 my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go};
19763 808         1501 my $rpair_list = $rhash->{_rpair_list};
19764              
19765             # This will remember the best joint:
19766 808         1332 my $n_best = 0;
19767 808         1326 my $bs_best = 0.;
19768 808         1197 my $ix_best = 0;
19769 808         1266 my $num_bs = 0;
19770              
19771             # The range of lines in this group is $nbeg to $nstop
19772 808         1150 my $nmax = @{$ri_end} - 1;
  808         1432  
19773 808         1589 my $nstop = $nmax - $rhash->{_num_freeze};
19774 808         1490 my $num_joints = $nstop - $nbeg;
19775              
19776             # Turn off optimization if just two joints remain to allow
19777             # special two-line logic to be checked (c193)
19778 808 100 100     3022 if ( $rhash->{_optimization_on} && $num_joints <= 2 ) {
19779 42         120 $rhash->{_optimization_on} = 0;
19780             }
19781              
19782             # Start where we ended the last search
19783 808         1493 my $ix_start = $rhash->{_ix_best_last};
19784              
19785             # Keep the starting index in bounds
19786 808         2792 $ix_start = max( 0, $ix_start );
19787              
19788             # Make a search order list which cycles around to visit
19789             # all line pairs.
19790 808         1850 my $ix_max = @{$rpair_list} - 1;
  808         1655  
19791 808         2784 my @ix_list = ( $ix_start .. $ix_max, 0 .. $ix_start - 1 );
19792 808         1561 my $ix_last = $ix_list[-1];
19793              
19794             #-------------------------
19795             # loop over all line pairs
19796             #-------------------------
19797 808         1333 my $incomplete_loop;
19798 808         1819 foreach my $ix (@ix_list) {
19799 2915         5082 my $item = $rpair_list->[$ix];
19800 2915         4054 my ( $n, $bs ) = @{$item};
  2915         5396  
19801              
19802             # This flag will be true if we 'last' out of this loop early.
19803             # We cannot turn on optimization if this is true.
19804 2915         4857 $incomplete_loop = $ix != $ix_last;
19805              
19806             # Update the count of the number of times through this inner loop
19807 2915         4507 $rhash->{_num_compares}++;
19808              
19809             #----------------------------------------------------------
19810             # If we join the current pair of lines,
19811             # line $n-1 will become the left part of the joined line
19812             # line $n will become the right part of the joined line
19813             #
19814             # Here are Indexes of the endpoint tokens of the two lines:
19815             #
19816             # -----line $n-1--- | -----line $n-----
19817             # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
19818             # ^
19819             # |
19820             # We want to decide if we should remove the line break
19821             # between the tokens at $iend_1 and $ibeg_2
19822             #
19823             # We will apply a number of ad-hoc tests to see if joining
19824             # here will look ok. The code will just move to the next
19825             # pair if the join doesn't look good. If we get through
19826             # the gauntlet of tests, the lines will be recombined.
19827             #----------------------------------------------------------
19828             #
19829             # beginning and ending tokens of the lines we are working on
19830 2915         4778 my $ibeg_1 = $ri_beg->[ $n - 1 ];
19831 2915         4410 my $iend_1 = $ri_end->[ $n - 1 ];
19832 2915         4235 my $iend_2 = $ri_end->[$n];
19833 2915         4012 my $ibeg_2 = $ri_beg->[$n];
19834              
19835             # The combined line cannot be too long
19836 2915         5801 my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
19837 2915 100       6531 next if ( $excess > 0 );
19838              
19839 2526         4278 my $type_iend_1 = $types_to_go[$iend_1];
19840 2526         3825 my $type_iend_2 = $types_to_go[$iend_2];
19841 2526         3900 my $type_ibeg_1 = $types_to_go[$ibeg_1];
19842 2526         3788 my $type_ibeg_2 = $types_to_go[$ibeg_2];
19843              
19844 2526         3358 DEBUG_RECOMBINE > 1 && do {
19845             print {*STDOUT}
19846             "RECOMBINE: ix=$ix iend1=$iend_1 iend2=$iend_2 n=$n nmax=$nmax if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
19847             };
19848              
19849             # If line $n is the last line, we set some flags and
19850             # do any special checks for it
19851 2526         3608 my $this_line_is_semicolon_terminated;
19852 2526 100       5248 if ( $n == $nmax ) {
19853              
19854 610 100       2294 if ( $type_ibeg_2 eq '{' ) {
19855              
19856             # join isolated ')' and '{' if requested (git #110)
19857 40 50 66     216 if ( $rOpts_cuddled_paren_brace
      66        
      33        
19858             && $type_iend_1 eq '}'
19859             && $iend_1 == $ibeg_1
19860             && $ibeg_2 == $iend_2 )
19861             {
19862 1 50 33     10 if ( $tokens_to_go[$iend_1] eq ')'
19863             && $tokens_to_go[$ibeg_2] eq '{' )
19864             {
19865 1         3 $n_best = $n;
19866 1         2 $ix_best = $ix;
19867 1         3 last;
19868             }
19869             }
19870              
19871             # otherwise, a terminal '{' should stay where it is
19872             # unless preceded by a fat comma
19873 39 50       219 next if ( $type_iend_1 ne '=>' );
19874             }
19875              
19876             $this_line_is_semicolon_terminated =
19877 570         1232 $rhash->{_has_terminal_semicolon};
19878              
19879             }
19880              
19881             #----------------------------------------------------------
19882             # Recombine Section 0:
19883             # Examine the special token joining this line pair, if any.
19884             # Put as many tests in this section to avoid duplicate code
19885             # and to make formatting independent of whether breaks are
19886             # to the left or right of an operator.
19887             #----------------------------------------------------------
19888              
19889 2486         3930 my $itok = $rjoint->[$n];
19890 2486 100       4835 if ($itok) {
19891 339         1105 my $ok_0 = recombine_section_0( $itok, $ri_beg, $ri_end, $n );
19892 339 100       1013 next if ( !$ok_0 );
19893             }
19894              
19895             #----------------------------------------------------------
19896             # Recombine Section 1:
19897             # Join welded nested containers immediately
19898             #----------------------------------------------------------
19899              
19900 2319 50 33     4888 if (
      66        
19901             $total_weld_count
19902             && ( $type_sequence_to_go[$iend_1]
19903             && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
19904             || $type_sequence_to_go[$ibeg_2]
19905             && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
19906             )
19907             {
19908 0         0 $n_best = $n;
19909 0         0 $ix_best = $ix;
19910 0         0 last;
19911             }
19912              
19913             #----------------------------------------------------------
19914             # Recombine Section 2:
19915             # Examine token at $iend_1 (right end of first line of pair)
19916             #----------------------------------------------------------
19917              
19918 2319         5530 my ( $ok_2, $skip_Section_3 ) =
19919             recombine_section_2( $ri_beg, $ri_end, $n,
19920             $this_line_is_semicolon_terminated );
19921 2319 100       6112 next if ( !$ok_2 );
19922              
19923             #----------------------------------------------------------
19924             # Recombine Section 3:
19925             # Examine token at $ibeg_2 (left end of second line of pair)
19926             #----------------------------------------------------------
19927              
19928             # Join lines identified above as capable of
19929             # causing an outdented line with leading closing paren.
19930             # Note that we are skipping the rest of this section
19931             # and the rest of the loop to do the join.
19932 618 100       2580 if ($skip_Section_3) {
19933 12         30 $forced_breakpoint_to_go[$iend_1] = 0;
19934 12         33 $n_best = $n;
19935 12         26 $ix_best = $ix;
19936 12         32 $incomplete_loop = 1;
19937 12         39 last;
19938             }
19939              
19940 606         1976 my ( $ok_3, $bs_tweak ) =
19941             recombine_section_3( $ri_beg, $ri_end, $n,
19942             $this_line_is_semicolon_terminated );
19943 606 100       1686 next if ( !$ok_3 );
19944              
19945             #----------------------------------------------------------
19946             # Recombine Section 4:
19947             # Combine the lines if we arrive here and it is possible
19948             #----------------------------------------------------------
19949              
19950             # honor hard breakpoints
19951 376 100       1229 next if ( $forced_breakpoint_to_go[$iend_1] );
19952              
19953 149         283 if (DEVEL_MODE) {
19954              
19955             # This fault can only occur if an array index error has been
19956             # introduced by a recent programming change.
19957             my $bs_check = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
19958             if ( $bs_check != $bs ) {
19959             Fault(<<EOM);
19960             bs=$bs != $bs_check for break after type $type_iend_1 ix=$ix n=$n
19961             EOM
19962             }
19963             }
19964              
19965             # Require a few extra spaces before recombining lines if we
19966             # are at an old breakpoint unless this is a simple list or
19967             # terminal line. The goal is to avoid oscillating between
19968             # two quasi-stable end states. For example this snippet
19969             # caused problems:
19970              
19971             ## my $this =
19972             ## bless {
19973             ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
19974             ## },
19975             ## $type;
19976             next
19977 149 50 66     542 if ( $old_breakpoint_to_go[$iend_1]
      66        
      33        
      33        
19978             && !$this_line_is_semicolon_terminated
19979             && $n < $nmax
19980             && $excess + 4 > 0
19981             && $type_iend_2 ne ',' );
19982              
19983             # do not recombine if we would skip in indentation levels
19984 149 100       438 if ( $n < $nmax ) {
19985 138         320 my $if_next = $ri_beg->[ $n + 1 ];
19986             next
19987             if (
19988 138 50 66     532 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
      0        
      33        
19989             && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
19990              
19991             # but an isolated 'if (' is undesirable
19992             && !(
19993             $n == 1
19994             && $iend_1 - $ibeg_1 <= 2
19995             && $type_ibeg_1 eq 'k'
19996             && $tokens_to_go[$ibeg_1] eq 'if'
19997             && $tokens_to_go[$iend_1] ne '('
19998             )
19999             );
20000             }
20001              
20002             ## OLD: honor no-break's
20003             ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257
20004              
20005             # remember the pair with the greatest bond strength
20006 149 100       421 if ( !$n_best ) {
20007              
20008             # First good joint ...
20009 109         216 $n_best = $n;
20010 109         208 $ix_best = $ix;
20011 109         230 $bs_best = $bs;
20012 109         281 $num_bs = 1;
20013              
20014             # In optimization mode: stop on the first acceptable joint
20015             # because we already know it has the highest strength
20016 109 100       448 if ( $rhash->{_optimization_on} == 1 ) {
20017 40         91 last;
20018             }
20019             }
20020             else {
20021              
20022             # Second and later joints ..
20023 40         69 $num_bs++;
20024              
20025             # save maximum strength; in case of a tie select min $n
20026 40 50 66     260 if ( $bs > $bs_best || $bs == $bs_best && $n < $n_best ) {
      33        
20027 0         0 $n_best = $n;
20028 0         0 $ix_best = $ix;
20029 0         0 $bs_best = $bs;
20030             }
20031             }
20032              
20033             } ## end loop over all line pairs
20034              
20035             #---------------------------------------------------
20036             # recombine the pair with the greatest bond strength
20037             #---------------------------------------------------
20038 808 100       2649 if ($n_best) {
20039 122         214 DEBUG_RECOMBINE > 1
20040             && print "BEST: nb=$n_best nbeg=$nbeg stop=$nstop bs=$bs_best\n";
20041 122         247 splice @{$ri_beg}, $n_best, 1;
  122         373  
20042 122         258 splice @{$ri_end}, $n_best - 1, 1;
  122         290  
20043 122         219 splice @{$rjoint}, $n_best, 1;
  122         262  
20044              
20045 122         235 splice @{$rpair_list}, $ix_best, 1;
  122         246  
20046              
20047             # Update the line indexes in the pair list:
20048             # Old $n values greater than the best $n decrease by 1
20049             # because of the splice we just did.
20050 122         309 foreach my $item ( @{$rpair_list} ) {
  122         693  
20051 726         1036 my $n_old = $item->[0];
20052 726 100       1603 if ( $n_old > $n_best ) { $item->[0] -= 1 }
  361         668  
20053             }
20054              
20055             # Store the index of this location for starting the next search.
20056             # We must subtract 1 to get an updated index because the splice
20057             # above just removed the best pair.
20058             # BUT CAUTION: if this is the first pair in the pair list, then
20059             # this produces an invalid index. So this index must be tested
20060             # before use in the next pass through the outer loop.
20061 122         418 $rhash->{_ix_best_last} = $ix_best - 1;
20062              
20063             # Turn on optimization if ...
20064 122 100 100     929 if (
      100        
20065              
20066             # it is not already on, and
20067             !$rhash->{_optimization_on}
20068              
20069             # we have not taken a shortcut to get here, and
20070             && !$incomplete_loop
20071              
20072             # we have seen a good break on strength, and
20073             && $num_bs
20074              
20075             )
20076             {
20077              
20078             # To deactivate optimization for testing purposes, the next
20079             # line can be commented out. This will increase run time.
20080 69         184 $rhash->{_optimization_on} = 1;
20081 69         157 if (DEBUG_RECOMBINE) {
20082             my $num_compares = $rhash->{_num_compares};
20083             my $pair_count = @ix_list;
20084             print {*STDOUT}
20085             "Entering optimization phase at $num_compares compares, pair count = $pair_count\n";
20086             }
20087             }
20088             }
20089 808         2200 return;
20090             } ## end sub recombine_inner_loop
20091              
20092             sub recombine_section_0 {
20093 339     339 0 836 my ( $itok, $ri_beg, $ri_end, $n ) = @_;
20094              
20095             # Recombine Section 0:
20096             # Examine special candidate joining token $itok
20097              
20098             # Given:
20099             # $itok = index of token at a possible join of lines $n-1 and $n
20100              
20101             # Return:
20102             # true => ok to combine
20103             # false => do not combine lines
20104              
20105             # Here are Indexes of the endpoint tokens of the two lines:
20106             #
20107             # -----line $n-1--- | -----line $n-----
20108             # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
20109             # ^ ^
20110             # | |
20111             # ------------$itok is one of these tokens
20112              
20113             # Put as many tests in this section to avoid duplicate code
20114             # and to make formatting independent of whether breaks are
20115             # to the left or right of an operator.
20116              
20117 339         501 my $nmax = @{$ri_end} - 1;
  339         621  
20118 339         636 my $ibeg_1 = $ri_beg->[ $n - 1 ];
20119 339         597 my $iend_1 = $ri_end->[ $n - 1 ];
20120 339         612 my $ibeg_2 = $ri_beg->[$n];
20121 339         621 my $iend_2 = $ri_end->[$n];
20122              
20123 339 50       1753 if ($itok) {
20124              
20125 339         631 my $type = $types_to_go[$itok];
20126              
20127 339 100       1584 if ( $type eq ':' ) {
    100          
    100          
    50          
20128              
20129             # do not join at a colon unless it disobeys the
20130             # break request
20131 103 100       338 if ( $itok eq $iend_1 ) {
20132 1 50       5 return unless $want_break_before{$type};
20133             }
20134             else {
20135 102 50       359 return if $want_break_before{$type};
20136             }
20137             } ## end if ':'
20138              
20139             # handle math operators + - * /
20140             elsif ( $is_math_op{$type} ) {
20141              
20142             # Combine these lines if this line is a single
20143             # number, or if it is a short term with same
20144             # operator as the previous line. For example, in
20145             # the following code we will combine all of the
20146             # short terms $A, $B, $C, $D, $E, $F, together
20147             # instead of leaving them one per line:
20148             # my $time =
20149             # $A * $B * $C * $D * $E * $F *
20150             # ( 2. * $eps * $sigma * $area ) *
20151             # ( 1. / $tcold**3 - 1. / $thot**3 );
20152              
20153             # This can be important in math-intensive code.
20154              
20155 87         136 my $good_combo;
20156              
20157 87         232 my $itokp = min( $inext_to_go[$itok], $iend_2 );
20158 87         184 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
20159 87         247 my $itokm = max( iprev_to_go($itok), $ibeg_1 );
20160 87         216 my $itokmm = max( iprev_to_go($itokm), $ibeg_1 );
20161              
20162             # check for a number on the right
20163 87 100       245 if ( $types_to_go[$itokp] eq 'n' ) {
20164              
20165             # ok if nothing else on right
20166 26 100       79 if ( $itokp == $iend_2 ) {
20167 2         9 $good_combo = 1;
20168             }
20169             else {
20170              
20171             # look one more token to right..
20172             # okay if math operator or some termination
20173             $good_combo =
20174             ( ( $itokpp == $iend_2 )
20175 24   100     197 && $is_math_op{ $types_to_go[$itokpp] } )
20176             || $types_to_go[$itokpp] =~ /^[#,;]$/;
20177             }
20178             }
20179              
20180             # check for a number on the left
20181 87 100 100     382 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
20182              
20183             # okay if nothing else to left
20184 15 100       71 if ( $itokm == $ibeg_1 ) {
20185 6         23 $good_combo = 1;
20186             }
20187              
20188             # otherwise look one more token to left
20189             else {
20190              
20191             # okay if math operator, comma, or assignment
20192             $good_combo = ( $itokmm == $ibeg_1 )
20193             && ( $is_math_op{ $types_to_go[$itokmm] }
20194             || $types_to_go[$itokmm] =~ /^[,]$/
20195 9   66     84 || $is_assignment{ $types_to_go[$itokmm] } );
20196             }
20197             }
20198              
20199             # look for a single short token either side of the
20200             # operator
20201 87 100       254 if ( !$good_combo ) {
20202              
20203             # Slight adjustment factor to make results
20204             # independent of break before or after operator
20205             # in long summed lists. (An operator and a
20206             # space make two spaces).
20207 68 100       215 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
20208              
20209             $good_combo =
20210              
20211             # numbers or id's on both sides of this joint
20212             $types_to_go[$itokp] =~ /^[in]$/
20213             && $types_to_go[$itokm] =~ /^[in]$/
20214              
20215             # one of the two lines must be short:
20216             && (
20217             (
20218             # no more than 2 nonblank tokens right
20219             # of joint
20220             $itokpp == $iend_2
20221              
20222             # short
20223             && token_sequence_length( $itokp, $iend_2 ) <
20224             $two + $rOpts_short_concatenation_item_length
20225             )
20226             || (
20227             # no more than 2 nonblank tokens left of
20228             # joint
20229             $itokmm == $ibeg_1
20230              
20231             # short
20232             && token_sequence_length( $ibeg_1, $itokm ) <
20233             2 - $two + $rOpts_short_concatenation_item_length
20234             )
20235              
20236             )
20237              
20238             # keep pure terms; don't mix +- with */
20239             && !(
20240             $is_plus_minus{$type}
20241             && ( $is_mult_div{ $types_to_go[$itokmm] }
20242             || $is_mult_div{ $types_to_go[$itokpp] } )
20243             )
20244             && !(
20245             $is_mult_div{$type}
20246             && ( $is_plus_minus{ $types_to_go[$itokmm] }
20247 68   66     463 || $is_plus_minus{ $types_to_go[$itokpp] } )
20248             )
20249              
20250             ;
20251             }
20252              
20253             # it is also good to combine if we can reduce to 2
20254             # lines
20255 87 100       232 if ( !$good_combo ) {
20256              
20257             # index on other line where same token would be
20258             # in a long chain.
20259 64 100       156 my $iother = ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
20260              
20261 64   33     267 $good_combo =
20262             $n == 2
20263             && $n == $nmax
20264             && $types_to_go[$iother] ne $type;
20265             }
20266              
20267 87 100       246 return unless ($good_combo);
20268              
20269             } ## end math
20270              
20271             elsif ( $is_amp_amp{$type} ) {
20272             ##TBD
20273             } ## end &&, ||
20274              
20275             elsif ( $is_assignment{$type} ) {
20276             ##TBD
20277             }
20278             else {
20279             ## ok - not a special type
20280             }
20281             ## end assignment
20282             }
20283              
20284             # ok to combine lines
20285 172         380 return 1;
20286             } ## end sub recombine_section_0
20287              
20288             sub recombine_section_2 {
20289              
20290 2319     2319 0 4624 my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_;
20291              
20292             # Recombine Section 2:
20293             # Examine token at $iend_1 (right end of first line of pair)
20294              
20295             # Here are Indexes of the endpoint tokens of the two lines:
20296             #
20297             # -----line $n-1--- | -----line $n-----
20298             # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
20299             # ^
20300             # |
20301             # -----Section 2 looks at this token
20302              
20303             # Returns:
20304             # (nothing) => do not join lines
20305             # 1, skip_Section_3 => ok to join lines
20306              
20307             # $skip_Section_3 is a flag for skipping the next section
20308 2319         3397 my $skip_Section_3 = 0;
20309              
20310 2319         3167 my $nmax = @{$ri_end} - 1;
  2319         3805  
20311 2319         3892 my $ibeg_1 = $ri_beg->[ $n - 1 ];
20312 2319         3558 my $iend_1 = $ri_end->[ $n - 1 ];
20313 2319         3684 my $iend_2 = $ri_end->[$n];
20314 2319         3355 my $ibeg_2 = $ri_beg->[$n];
20315 2319 100       5092 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
20316 2319         3379 my $ibeg_nmax = $ri_beg->[$nmax];
20317              
20318 2319         3606 my $type_iend_1 = $types_to_go[$iend_1];
20319 2319         3372 my $type_iend_2 = $types_to_go[$iend_2];
20320 2319         3341 my $type_ibeg_1 = $types_to_go[$ibeg_1];
20321 2319         3389 my $type_ibeg_2 = $types_to_go[$ibeg_2];
20322              
20323             # an isolated '}' may join with a ';' terminated segment
20324 2319 100       11763 if ( $type_iend_1 eq '}' ) {
    100          
    50          
    100          
    100          
    50          
    50          
    100          
    100          
    100          
    50          
20325              
20326             # Check for cases where combining a semicolon terminated
20327             # statement with a previous isolated closing paren will
20328             # allow the combined line to be outdented. This is
20329             # generally a good move. For example, we can join up
20330             # the last two lines here:
20331             # (
20332             # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
20333             # $size, $atime, $mtime, $ctime, $blksize, $blocks
20334             # )
20335             # = stat($file);
20336             #
20337             # to get:
20338             # (
20339             # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
20340             # $size, $atime, $mtime, $ctime, $blksize, $blocks
20341             # ) = stat($file);
20342             #
20343             # which makes the parens line up.
20344             #
20345             # Another example, from Joe Matarazzo, probably looks best
20346             # with the 'or' clause appended to the trailing paren:
20347             # $self->some_method(
20348             # PARAM1 => 'foo',
20349             # PARAM2 => 'bar'
20350             # ) or die "Some_method didn't work";
20351             #
20352             # But we do not want to do this for something like the -lp
20353             # option where the paren is not outdentable because the
20354             # trailing clause will be far to the right.
20355             #
20356             # The logic here is synchronized with the logic in sub
20357             # sub get_final_indentation, which actually does
20358             # the outdenting.
20359             #
20360             my $combine_ok = $this_line_is_semicolon_terminated
20361              
20362             # only one token on last line
20363             && $ibeg_1 == $iend_1
20364              
20365             # must be structural paren
20366             && $tokens_to_go[$iend_1] eq ')'
20367              
20368             # style must allow outdenting,
20369 345   66     2207 && !$closing_token_indentation{')'}
20370              
20371             # but leading colons probably line up with a
20372             # previous colon or question (count could be wrong).
20373             && $type_ibeg_2 ne ':'
20374              
20375             # only one step in depth allowed. this line must not
20376             # begin with a ')' itself.
20377             && ( $nesting_depth_to_go[$iend_1] ==
20378             $nesting_depth_to_go[$iend_2] + 1 );
20379              
20380             # But only combine leading '&&', '||', if no previous && || :
20381             # seen. This count includes these tokens at all levels. The
20382             # idea is that seeing these at any level can make it hard to read
20383             # formatting if we recombine.
20384 345 100       978 if ( $is_amp_amp{$type_ibeg_2} ) {
20385 16         51 foreach my $n_t ( reverse( 0 .. $n - 2 ) ) {
20386 15         33 my $ibeg_t = $ri_beg->[$n_t];
20387 15         31 my $type_t = $types_to_go[$ibeg_t];
20388 15 100 66     84 if ( $is_amp_amp{$type_t} || $type_t eq ':' ) {
20389 5         14 $combine_ok = 0;
20390 5         15 last;
20391             }
20392             }
20393             }
20394              
20395 345   66     1567 $skip_Section_3 ||= $combine_ok;
20396              
20397             # YVES patch 2 of 2:
20398             # Allow cuddled eval chains, like this:
20399             # eval {
20400             # #STUFF;
20401             # 1; # return true
20402             # } or do {
20403             # #handle error
20404             # };
20405             # This patch works together with a patch in
20406             # setting adjusted indentation (where the closing eval
20407             # brace is outdented if possible).
20408             # The problem is that an 'eval' block has continuation
20409             # indentation and it looks better to undo it in some
20410             # cases. If we do not use this patch we would get:
20411             # eval {
20412             # #STUFF;
20413             # 1; # return true
20414             # }
20415             # or do {
20416             # #handle error
20417             # };
20418             # The alternative, for uncuddled style, is to create
20419             # a patch in get_final_indentation which undoes
20420             # the indentation of a leading line like 'or do {'.
20421             # This doesn't work well with -icb through
20422 345 50 100     1786 if (
      100        
      100        
      33        
      66        
20423             $block_type_to_go[$iend_1]
20424             && $rOpts_brace_follower_vertical_tightness > 0
20425             && (
20426              
20427             # -bfvt=1, allow cuddled eval chains [default]
20428             (
20429             $tokens_to_go[$iend_2] eq '{'
20430             && $block_type_to_go[$iend_1] eq 'eval'
20431             && !ref( $leading_spaces_to_go[$iend_1] )
20432             && !$rOpts_indent_closing_brace
20433             )
20434              
20435             # -bfvt=2, allow most brace followers [part of git #110]
20436             || ( $rOpts_brace_follower_vertical_tightness > 1
20437             && $ibeg_1 == $iend_1 )
20438              
20439             )
20440              
20441             && (
20442             ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
20443             || ( $type_ibeg_2 eq 'k'
20444             && $is_and_or{ $tokens_to_go[$ibeg_2] } )
20445             || $is_if_unless{ $tokens_to_go[$ibeg_2] }
20446             )
20447             )
20448             {
20449 8   50     48 $skip_Section_3 ||= 1;
20450             }
20451              
20452             return
20453             unless (
20454 345 100 100     2692 $skip_Section_3
      66        
20455              
20456             # handle '.' and '?' specially below
20457             || ( $type_ibeg_2 =~ /^[\.\?]$/ )
20458              
20459             # fix for c054 (unusual -pbp case)
20460             || $type_ibeg_2 eq '=='
20461              
20462             );
20463             }
20464              
20465             elsif ( $type_iend_1 eq '{' ) {
20466              
20467             # YVES
20468             # honor breaks at opening brace
20469             # Added to prevent recombining something like this:
20470             # } || eval { package main;
20471 597 100       2424 return if ( $forced_breakpoint_to_go[$iend_1] );
20472             }
20473              
20474             # do not recombine lines with ending &&, ||,
20475             elsif ( $is_amp_amp{$type_iend_1} ) {
20476 0 0       0 return unless ( $want_break_before{$type_iend_1} );
20477             }
20478              
20479             # Identify and recombine a broken ?/: chain
20480             elsif ( $type_iend_1 eq '?' ) {
20481              
20482             # Do not recombine different levels
20483             return
20484 1 50       5 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
20485              
20486             # do not recombine unless next line ends in :
20487 1 50       5 return unless $type_iend_2 eq ':';
20488             }
20489              
20490             # for lines ending in a comma...
20491             elsif ( $type_iend_1 eq ',' ) {
20492              
20493             # Do not recombine at comma which is following the
20494             # input bias.
20495             # NOTE: this could be controlled by a special flag,
20496             # but it seems to work okay.
20497 805 100       2730 return if ( $old_breakpoint_to_go[$iend_1] );
20498              
20499             # An isolated '},' may join with an identifier + ';'
20500             # This is useful for the class of a 'bless' statement
20501             # (bless.t)
20502 140 100 100     559 if ( $type_ibeg_1 eq '}'
20503             && $type_ibeg_2 eq 'i' )
20504             {
20505             return
20506 1 50 33     11 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
      33        
20507             && ( $iend_2 == ( $ibeg_2 + 1 ) )
20508             && $this_line_is_semicolon_terminated );
20509              
20510             # override breakpoint
20511 0         0 $forced_breakpoint_to_go[$iend_1] = 0;
20512             }
20513              
20514             # but otherwise ..
20515             else {
20516              
20517             # do not recombine after a comma unless this will
20518             # leave just 1 more line
20519 139 100       415 return if ( $n + 1 < $nmax );
20520              
20521             # do not recombine if there is a change in
20522             # indentation depth
20523             return
20524 27 100       123 if ( $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
20525              
20526             # do not recombine a "complex expression" after a
20527             # comma. "complex" means no parens.
20528 10         21 my $saw_paren;
20529 10         34 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
20530 29 50       100 if ( $tokens_to_go[$ii] eq '(' ) {
20531 0         0 $saw_paren = 1;
20532 0         0 last;
20533             }
20534             }
20535 10 50       44 return if $saw_paren;
20536             }
20537             }
20538              
20539             # opening paren..
20540             elsif ( $type_iend_1 eq '(' ) {
20541              
20542             # No longer doing this
20543             }
20544              
20545             elsif ( $type_iend_1 eq ')' ) {
20546              
20547             # No longer doing this
20548             }
20549              
20550             # keep a terminal for-semicolon
20551             elsif ( $type_iend_1 eq 'f' ) {
20552 8         22 return;
20553             }
20554              
20555             # if '=' at end of line ...
20556             elsif ( $is_assignment{$type_iend_1} ) {
20557              
20558             # keep break after = if it was in input stream
20559             # this helps prevent 'blinkers'
20560             return
20561             if (
20562 78 100 66     543 $old_breakpoint_to_go[$iend_1]
20563              
20564             # don't strand an isolated '='
20565             && $iend_1 != $ibeg_1
20566             );
20567              
20568 42   66     256 my $is_short_quote =
20569             ( $type_ibeg_2 eq 'Q'
20570             && $ibeg_2 == $iend_2
20571             && token_sequence_length( $ibeg_2, $ibeg_2 ) <
20572             $rOpts_short_concatenation_item_length );
20573 42   33     187 my $is_ternary = (
20574             $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
20575             && $types_to_go[$ibeg_3] eq ':' )
20576             );
20577              
20578             # always join an isolated '=', a short quote, or if this
20579             # will put ?/: at start of adjacent lines
20580 42 50 33     365 if ( $ibeg_1 != $iend_1
      33        
20581             && !$is_short_quote
20582             && !$is_ternary )
20583             {
20584 42   66     561 my $combine_ok = (
20585             (
20586              
20587             # unless we can reduce this to two lines
20588             $nmax < $n + 2
20589              
20590             # or three lines, the last with a leading
20591             # semicolon
20592             || ( $nmax == $n + 2
20593             && $types_to_go[$ibeg_nmax] eq ';' )
20594              
20595             # or the next line ends with a here doc
20596             || $type_iend_2 eq 'h'
20597              
20598             # or the next line ends in an open paren or
20599             # brace and the break hasn't been forced
20600             # [dima.t]
20601             || (!$forced_breakpoint_to_go[$iend_1]
20602             && $type_iend_2 eq '{' )
20603             )
20604              
20605             # do not recombine if the two lines might align
20606             # well this is a very approximate test for this
20607             && (
20608              
20609             # RT#127633 - the leading tokens are not
20610             # operators
20611             ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
20612              
20613             # or they are different
20614             || ( $ibeg_3 >= 0
20615             && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
20616             )
20617             );
20618              
20619 42 100       206 return if ( !$combine_ok );
20620              
20621 21 100 33     124 if (
      66        
20622              
20623             # Recombine if we can make two lines
20624             $nmax >= $n + 2
20625              
20626             # -lp users often prefer this:
20627             # my $title = function($env, $env, $sysarea,
20628             # "bubba Borrower Entry");
20629             # so we will recombine if -lp is used we have
20630             # ending comma
20631             && !(
20632             $ibeg_3 > 0
20633             && ref( $leading_spaces_to_go[$ibeg_3] )
20634             && $type_iend_2 eq ','
20635             )
20636             )
20637             {
20638              
20639             # otherwise, scan the rhs line up to last token for
20640             # complexity. Note that we are not counting the last token
20641             # in case it is an opening paren.
20642 1         5 my $ok = simple_rhs( $ri_end, $n, $nmax, $ibeg_2, $iend_2 );
20643 1 50       4 return if ( !$ok );
20644              
20645             }
20646             }
20647              
20648 21 100       150 if ( $tokens_to_go[$ibeg_2] !~ /^[\{\(\[]$/ ) {
20649 19         56 $forced_breakpoint_to_go[$iend_1] = 0;
20650             }
20651             }
20652              
20653             # for keywords..
20654             elsif ( $type_iend_1 eq 'k' ) {
20655              
20656             # make major control keywords stand out
20657             # (recombine.t)
20658             return
20659             if (
20660              
20661             #/^(last|next|redo|return)$/
20662 26 100 100     182 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
20663              
20664             # but only if followed by multiple lines
20665             && $n < $nmax
20666             );
20667              
20668 15 50       66 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
20669             return
20670 0 0       0 unless $want_break_before{ $tokens_to_go[$iend_1] };
20671             }
20672             }
20673             elsif ( $type_iend_1 eq '.' ) {
20674              
20675             # NOTE: the logic here should match that of section 3 so that
20676             # line breaks are independent of choice of break before or after.
20677             # It would be nice to combine them in section 0, but the
20678             # special junction case ') .' makes that difficult.
20679             # This section added to fix issues c172, c174.
20680 0         0 my $i_next_nonblank = $ibeg_2;
20681 0         0 my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] -
20682             $summed_lengths_to_go[$ibeg_1];
20683 0         0 my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] -
20684             $summed_lengths_to_go[$ibeg_2];
20685 0         0 my $iend_1_minus = max( $ibeg_1, iprev_to_go($iend_1) );
20686              
20687 0   0     0 my $combine_ok = (
20688              
20689             # ... unless there is just one and we can reduce
20690             # this to two lines if we do. For example, this
20691             #
20692             #
20693             # $bodyA .=
20694             # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
20695             #
20696             # looks better than this:
20697             # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' .
20698             # '$args .= $pat;'
20699              
20700             # check for 2 lines, not in a long broken '.' chain
20701             ( $n == 2 && $n == $nmax && $type_iend_1 ne $type_iend_2 )
20702              
20703             # ... or this would strand a short quote , like this
20704             # "some long quote" .
20705             # "\n";
20706             || (
20707             $types_to_go[$i_next_nonblank] eq 'Q'
20708             && $i_next_nonblank >= $iend_2 - 2
20709             && $token_lengths_to_go[$i_next_nonblank] <
20710             $rOpts_short_concatenation_item_length
20711              
20712             # additional constraints to fix c167
20713             && ( $types_to_go[$iend_1_minus] ne 'Q'
20714             || $summed_len_2 < $summed_len_1 )
20715             )
20716             );
20717 0 0       0 return if ( !$combine_ok );
20718             }
20719             else {
20720             ## ok - not a special type
20721             }
20722 618         1871 return ( 1, $skip_Section_3 );
20723             } ## end sub recombine_section_2
20724              
20725             sub simple_rhs {
20726              
20727 1     1 0 4 my ( $ri_end, $n, $nmax, $ibeg_2, $iend_2 ) = @_;
20728              
20729             # Scan line ibeg_2 to $iend_2 up to last token for complexity.
20730             # We are not counting the last token in case it is an opening paren.
20731             # Return:
20732             # true if rhs is simple, ok to recombine
20733             # false otherwise
20734              
20735 1         3 my $tv = 0;
20736 1         3 my $depth = $nesting_depth_to_go[$ibeg_2];
20737 1         4 foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
20738 2 50       7 if ( $nesting_depth_to_go[$i] != $depth ) {
20739 0         0 $tv++;
20740 0 0       0 last if ( $tv > 1 );
20741             }
20742 2         5 $depth = $nesting_depth_to_go[$i];
20743             }
20744              
20745             # ok to recombine if no level changes before
20746             # last token
20747 1 50       4 if ( $tv > 0 ) {
20748              
20749             # otherwise, do not recombine if more than
20750             # two level changes.
20751 0 0       0 return if ( $tv > 1 );
20752              
20753             # check total complexity of the two
20754             # adjacent lines that will occur if we do
20755             # this join
20756 0 0       0 my $istop =
20757             ( $n < $nmax )
20758             ? $ri_end->[ $n + 1 ]
20759             : $iend_2;
20760 0         0 foreach my $i ( $iend_2 .. $istop ) {
20761 0 0       0 if ( $nesting_depth_to_go[$i] != $depth ) {
20762 0         0 $tv++;
20763 0 0       0 last if ( $tv > 2 );
20764             }
20765 0         0 $depth = $nesting_depth_to_go[$i];
20766             }
20767              
20768             # do not recombine if total is more than 2
20769             # level changes
20770 0 0       0 return if ( $tv > 2 );
20771             }
20772 1         3 return 1;
20773             } ## end sub simple_rhs
20774              
20775             sub recombine_section_3 {
20776              
20777 606     606 0 1371 my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_;
20778              
20779             # Recombine Section 3:
20780             # Examine token at $ibeg_2 (right end of first line of pair)
20781              
20782             # Here are Indexes of the endpoint tokens of the two lines:
20783             #
20784             # -----line $n-1--- | -----line $n-----
20785             # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
20786             # ^
20787             # |
20788             # -----Section 3 looks at this token
20789              
20790             # Returns:
20791             # (nothing) => do not join lines
20792             # 1, bs_tweak => ok to join lines
20793              
20794             # $bstweak is a small tolerance to add to bond strengths
20795 606         1003 my $bs_tweak = 0;
20796              
20797 606         913 my $nmax = @{$ri_end} - 1;
  606         1168  
20798 606         1135 my $ibeg_1 = $ri_beg->[ $n - 1 ];
20799 606         1043 my $iend_1 = $ri_end->[ $n - 1 ];
20800 606         1169 my $iend_2 = $ri_end->[$n];
20801 606         1046 my $ibeg_2 = $ri_beg->[$n];
20802              
20803 606 100       1564 my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
20804 606 100       1475 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
20805 606 100       1527 my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
20806 606         1130 my $ibeg_nmax = $ri_beg->[$nmax];
20807              
20808 606         1153 my $type_iend_1 = $types_to_go[$iend_1];
20809 606         1064 my $type_iend_2 = $types_to_go[$iend_2];
20810 606         1074 my $type_ibeg_1 = $types_to_go[$ibeg_1];
20811 606         1081 my $type_ibeg_2 = $types_to_go[$ibeg_2];
20812              
20813             # handle lines with leading &&, ||
20814 606 100       3217 if ( $is_amp_amp{$type_ibeg_2} ) {
    100          
    100          
    100          
    50          
    100          
20815              
20816             # ok to recombine if it follows a ? or :
20817             # and is followed by an open paren..
20818             my $ok =
20819             ( $is_ternary{$type_ibeg_1} && $tokens_to_go[$iend_2] eq '(' )
20820              
20821             # or is followed by a ? or : at same depth
20822             #
20823             # We are looking for something like this. We can
20824             # recombine the && line with the line above to make the
20825             # structure more clear:
20826             # return
20827             # exists $G->{Attr}->{V}
20828             # && exists $G->{Attr}->{V}->{$u}
20829             # ? %{ $G->{Attr}->{V}->{$u} }
20830             # : ();
20831             #
20832             # We should probably leave something like this alone:
20833             # return
20834             # exists $G->{Attr}->{E}
20835             # && exists $G->{Attr}->{E}->{$u}
20836             # && exists $G->{Attr}->{E}->{$u}->{$v}
20837             # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
20838             # : ();
20839             # so that we either have all of the &&'s (or ||'s)
20840             # on one line, as in the first example, or break at
20841             # each one as in the second example. However, it
20842             # sometimes makes things worse to check for this because
20843             # it prevents multiple recombinations. So this is not done.
20844             || ( $ibeg_3 >= 0
20845 44   66     590 && $is_ternary{ $types_to_go[$ibeg_3] }
20846             && $nesting_depth_to_go[$ibeg_3] ==
20847             $nesting_depth_to_go[$ibeg_2] );
20848              
20849             # Combine a trailing && term with an || term: fix for
20850             # c060 This is rare but can happen.
20851 44 50 0     254 $ok ||= 1
      100        
      66        
      33        
20852             if ( $ibeg_3 < 0
20853             && $type_ibeg_2 eq '&&'
20854             && $type_ibeg_1 eq '||'
20855             && $nesting_depth_to_go[$ibeg_2] ==
20856             $nesting_depth_to_go[$ibeg_1] );
20857              
20858 44 50 66     262 return if !$ok && $want_break_before{$type_ibeg_2};
20859 1         4 $forced_breakpoint_to_go[$iend_1] = 0;
20860              
20861             # tweak the bond strength to give this joint priority
20862             # over ? and :
20863 1         3 $bs_tweak = 0.25;
20864             }
20865              
20866             # Identify and recombine a broken ?/: chain
20867             elsif ( $type_ibeg_2 eq '?' ) {
20868              
20869             # Do not recombine different levels
20870 87         180 my $lev = $levels_to_go[$ibeg_2];
20871 87 100       260 return if ( $lev ne $levels_to_go[$ibeg_1] );
20872              
20873             # Do not recombine a '?' if either next line or
20874             # previous line does not start with a ':'. The reasons
20875             # are that (1) no alignment of the ? will be possible
20876             # and (2) the expression is somewhat complex, so the
20877             # '?' is harder to see in the interior of the line.
20878 72   66     305 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
20879 72   100     303 my $precedes_colon = $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
20880 72 100 100     342 return unless ( $follows_colon || $precedes_colon );
20881              
20882             # we will always combining a ? line following a : line
20883 55 100       170 if ( !$follows_colon ) {
20884              
20885             # ...otherwise recombine only if it looks like a
20886             # chain. we will just look at a few nearby lines
20887             # to see if this looks like a chain.
20888 29         53 my $local_count = 0;
20889 29         73 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
20890 116 100 100     531 $local_count++
      66        
20891             if $ii >= 0
20892             && $types_to_go[$ii] eq ':'
20893             && $levels_to_go[$ii] == $lev;
20894             }
20895 29 100       155 return if ( $local_count <= 1 );
20896             }
20897 31         58 $forced_breakpoint_to_go[$iend_1] = 0;
20898             }
20899              
20900             # do not recombine lines with leading '.'
20901             elsif ( $type_ibeg_2 eq '.' ) {
20902 144         347 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
20903 144         251 my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] -
20904             $summed_lengths_to_go[$ibeg_1];
20905 144         223 my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] -
20906             $summed_lengths_to_go[$ibeg_2];
20907              
20908 144   66     1015 my $combine_ok = (
20909              
20910             # ... unless there is just one and we can reduce
20911             # this to two lines if we do. For example, this
20912             #
20913             #
20914             # $bodyA .=
20915             # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
20916             #
20917             # looks better than this:
20918             # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
20919             # . '$args .= $pat;'
20920              
20921             ( $n == 2 && $n == $nmax && $type_ibeg_1 ne $type_ibeg_2 )
20922              
20923             # ... or this would strand a short quote , like this
20924             # . "some long quote"
20925             # . "\n";
20926             || (
20927             $types_to_go[$i_next_nonblank] eq 'Q'
20928             && $i_next_nonblank >= $iend_2 - 1
20929             && $token_lengths_to_go[$i_next_nonblank] <
20930             $rOpts_short_concatenation_item_length
20931              
20932             # additional constraints to fix c167
20933             && (
20934             $types_to_go[$iend_1] ne 'Q'
20935              
20936             # allow a term shorter than the previous term
20937             || $summed_len_2 < $summed_len_1
20938              
20939             # or allow a short semicolon-terminated term if this
20940             # makes two lines (see c169)
20941             || ( $n == 2
20942             && $n == $nmax
20943             && $this_line_is_semicolon_terminated )
20944             )
20945             )
20946             );
20947              
20948 144 100       383 return if ( !$combine_ok );
20949             }
20950              
20951             # handle leading keyword..
20952             elsif ( $type_ibeg_2 eq 'k' ) {
20953              
20954             # handle leading "or"
20955 33 100 66     308 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
    100          
    100          
20956              
20957             my $combine_ok = (
20958             $this_line_is_semicolon_terminated
20959             && (
20960             $type_ibeg_1 eq '}'
20961             || (
20962              
20963             # following 'if' or 'unless' or 'or'
20964             $type_ibeg_1 eq 'k'
20965 8   100     114 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
20966              
20967             # important: only combine a very simple
20968             # or statement because the step below
20969             # may have combined a trailing 'and'
20970             # with this or, and we do not want to
20971             # then combine everything together
20972             && ( $iend_2 - $ibeg_2 <= 7 )
20973             )
20974             )
20975             );
20976              
20977 8 100       39 return if ( !$combine_ok );
20978              
20979             #X: RT #81854
20980 4 100       24 $forced_breakpoint_to_go[$iend_1] = 0
20981             if ( !$old_breakpoint_to_go[$iend_1] );
20982             }
20983              
20984             # handle leading 'and' and 'xor'
20985             elsif ($tokens_to_go[$ibeg_2] eq 'and'
20986             || $tokens_to_go[$ibeg_2] eq 'xor' )
20987             {
20988              
20989             # Decide if we will combine a single terminal 'and'
20990             # after an 'if' or 'unless'.
20991              
20992             # This looks best with the 'and' on the same
20993             # line as the 'if':
20994             #
20995             # $a = 1
20996             # if $seconds and $nu < 2;
20997             #
20998             # But this looks better as shown:
20999             #
21000             # $a = 1
21001             # if !$this->{Parents}{$_}
21002             # or $this->{Parents}{$_} eq $_;
21003             #
21004             return
21005             unless (
21006             $this_line_is_semicolon_terminated
21007             && (
21008              
21009             # following 'if' or 'unless' or 'or'
21010             $type_ibeg_1 eq 'k'
21011 8 100 66     75 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
      66        
      100        
21012             || $tokens_to_go[$ibeg_1] eq 'or' )
21013             )
21014             );
21015             }
21016              
21017             # handle leading "if" and "unless"
21018             elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
21019              
21020             # Combine something like:
21021             # next
21022             # if ( $lang !~ /${l}$/i );
21023             # into:
21024             # next if ( $lang !~ /${l}$/i );
21025             return
21026             unless (
21027             $this_line_is_semicolon_terminated
21028              
21029             # previous line begins with 'and' or 'or'
21030             && $type_ibeg_1 eq 'k'
21031 8 50 66     52 && $is_and_or{ $tokens_to_go[$ibeg_1] }
      33        
21032              
21033             );
21034             }
21035              
21036             # handle all other leading keywords
21037             else {
21038              
21039             # keywords look best at start of lines,
21040             # but combine things like "1 while"
21041 9 100       47 if ( !$is_assignment{$type_iend_1} ) {
21042             return
21043 8 50 33     81 if ( ( $type_iend_1 ne 'k' )
21044             && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
21045             }
21046             }
21047             }
21048              
21049             # similar treatment of && and || as above for 'and' and
21050             # 'or': NOTE: This block of code is currently bypassed
21051             # because of a previous block but is retained for possible
21052             # future use.
21053             elsif ( $is_amp_amp{$type_ibeg_2} ) {
21054              
21055             # maybe looking at something like:
21056             # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
21057              
21058             return
21059             unless (
21060             $this_line_is_semicolon_terminated
21061              
21062             # previous line begins with an 'if' or 'unless'
21063             # keyword
21064             && $type_ibeg_1 eq 'k'
21065 0 0 0     0 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
      0        
21066              
21067             );
21068             }
21069              
21070             # handle line with leading = or similar
21071             elsif ( $is_assignment{$type_ibeg_2} ) {
21072 11 50 33     56 return unless ( $n == 1 || $n == $nmax );
21073 11 50       44 return if ( $old_breakpoint_to_go[$iend_1] );
21074             return
21075             unless (
21076              
21077             # unless we can reduce this to two lines
21078 11 50 66     162 $nmax == 2
      66        
      33        
      33        
      33        
21079              
21080             # or three lines, the last with a leading semicolon
21081             || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
21082              
21083             # or the next line ends with a here doc
21084             || $type_iend_2 eq 'h'
21085              
21086             # or this is a short line ending in ;
21087             || ( $n == $nmax
21088             && $this_line_is_semicolon_terminated )
21089             );
21090 1         3 $forced_breakpoint_to_go[$iend_1] = 0;
21091             }
21092             else {
21093             ## ok - not a special type
21094             }
21095 376         1011 return ( 1, $bs_tweak );
21096             } ## end sub recombine_section_3
21097              
21098             } ## end closure recombine_breakpoints
21099              
21100             sub insert_final_ternary_breaks {
21101              
21102 81     81 0 323 my ( $self, $ri_left, $ri_right ) = @_;
21103              
21104             # Called once per batch to look for and do any final line breaks for
21105             # long ternary chains
21106              
21107 81         181 my $nmax = @{$ri_right} - 1;
  81         224  
21108              
21109             # scan the left and right end tokens of all lines
21110 81         205 my $i_first_colon = -1;
21111 81         284 for my $n ( 0 .. $nmax ) {
21112 264         468 my $il = $ri_left->[$n];
21113 264         421 my $ir = $ri_right->[$n];
21114 264         489 my $typel = $types_to_go[$il];
21115 264         463 my $typer = $types_to_go[$ir];
21116 264 100       701 return if ( $typel eq '?' );
21117 229 100       541 return if ( $typer eq '?' );
21118 228 100       512 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
  20         60  
  20         71  
21119 208 100       554 if ( $typer eq ':' ) { $i_first_colon = $ir; last; }
  1         3  
  1         2  
21120             }
21121              
21122             # For long ternary chains,
21123             # if the first : we see has its ? is in the interior
21124             # of a preceding line, then see if there are any good
21125             # breakpoints before the ?.
21126 45 100       267 if ( $i_first_colon > 0 ) {
21127 20         57 my $i_question = $mate_index_to_go[$i_first_colon];
21128 20 100 66     125 if ( defined($i_question) && $i_question > 0 ) {
21129 12         36 my @insert_list;
21130 12         90 foreach my $ii ( reverse( 0 .. $i_question - 1 ) ) {
21131 133         228 my $token = $tokens_to_go[$ii];
21132 133         217 my $type = $types_to_go[$ii];
21133              
21134             # For now, a good break is either a comma or,
21135             # in a long chain, a 'return'.
21136             # Patch for RT #126633: added the $nmax>1 check to avoid
21137             # breaking after a return for a simple ternary. For longer
21138             # chains the break after return allows vertical alignment, so
21139             # it is still done. So perltidy -wba='?' will not break
21140             # immediately after the return in the following statement:
21141             # sub x {
21142             # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
21143             # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
21144             # }
21145 133 100 100     476 if (
      66        
21146             (
21147             $type eq ','
21148             || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
21149             )
21150             && $self->in_same_container_i( $ii, $i_question )
21151             )
21152             {
21153 1         4 push @insert_list, $ii;
21154 1         3 last;
21155             }
21156             }
21157              
21158             # insert any new break points
21159 12 100       80 if (@insert_list) {
21160 1         7 $self->insert_additional_breaks( \@insert_list, $ri_left,
21161             $ri_right );
21162             }
21163             }
21164             }
21165 45         242 return;
21166             } ## end sub insert_final_ternary_breaks
21167              
21168             sub insert_breaks_before_list_opening_containers {
21169              
21170 50     50 0 106 my ( $self, $ri_left, $ri_right ) = @_;
21171              
21172             # This routine is called once per batch to implement the parameters
21173             # --break-before-hash-brace, etc.
21174              
21175             # Nothing to do if none of these parameters has been set
21176 50 50       109 return unless %break_before_container_types;
21177              
21178 50         71 my $nmax = @{$ri_right} - 1;
  50         134  
21179 50 50       122 return if ( $nmax < 0 );
21180              
21181 50         86 my $rLL = $self->[_rLL_];
21182              
21183 50         93 my $rbreak_before_container_by_seqno =
21184             $self->[_rbreak_before_container_by_seqno_];
21185 50         85 my $rK_weld_left = $self->[_rK_weld_left_];
21186              
21187             # scan the ends of all lines
21188 50         84 my @insert_list;
21189 50         148 for my $n ( 0 .. $nmax ) {
21190 143         202 my $il = $ri_left->[$n];
21191 143         181 my $ir = $ri_right->[$n];
21192 143 100       267 next if ( $ir <= $il );
21193 122         174 my $Kl = $K_to_go[$il];
21194 122         207 my $Kr = $K_to_go[$ir];
21195 122         168 my $Kend = $Kr;
21196 122         227 my $type_end = $rLL->[$Kr]->[_TYPE_];
21197              
21198             # Backup before any side comment
21199 122 100       282 if ( $type_end eq '#' ) {
21200 4         13 $Kend = $self->K_previous_nonblank($Kr);
21201 4 50       11 next unless defined($Kend);
21202 4         6 $type_end = $rLL->[$Kend]->[_TYPE_];
21203             }
21204              
21205             # Backup to the start of any weld; fix for b1173.
21206 122 50       205 if ($total_weld_count) {
21207 0         0 my $Kend_test = $rK_weld_left->{$Kend};
21208 0 0 0     0 if ( defined($Kend_test) && $Kend_test > $Kl ) {
21209 0         0 $Kend = $Kend_test;
21210 0         0 $Kend_test = $rK_weld_left->{$Kend};
21211             }
21212              
21213             # Do not break if we did not back up to the start of a weld
21214             # (shouldn't happen)
21215 0 0       0 next if ( defined($Kend_test) );
21216             }
21217              
21218 122         194 my $token = $rLL->[$Kend]->[_TOKEN_];
21219 122 100       313 next if ( !$is_opening_token{$token} );
21220 30 50       77 next if ( $Kl >= $Kend - 1 );
21221              
21222 30         53 my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
21223 30 50       75 next if ( !defined($seqno) );
21224              
21225             # Use the flag which was previously set
21226 30 100       91 next unless ( $rbreak_before_container_by_seqno->{$seqno} );
21227              
21228             # Install a break before this opening token.
21229 14         52 my $Kbreak = $self->K_previous_nonblank($Kend);
21230 14         32 my $ibreak = $Kbreak - $Kl + $il;
21231 14 50       39 next if ( $ibreak < $il );
21232 14 50       34 next if ( $nobreak_to_go[$ibreak] );
21233 14         31 push @insert_list, $ibreak;
21234             }
21235              
21236             # insert any new break points
21237 50 100       121 if (@insert_list) {
21238 10         56 $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
21239             }
21240 50         100 return;
21241             } ## end sub insert_breaks_before_list_opening_containers
21242              
21243             sub note_added_semicolon {
21244 19     19 0 61 my ( $self, $line_number ) = @_;
21245 19         49 $self->[_last_added_semicolon_at_] = $line_number;
21246 19 100       84 if ( $self->[_added_semicolon_count_] == 0 ) {
21247 16         46 $self->[_first_added_semicolon_at_] = $line_number;
21248             }
21249 19         44 $self->[_added_semicolon_count_]++;
21250 19         84 write_logfile_entry("Added ';' here\n");
21251 19         33 return;
21252             } ## end sub note_added_semicolon
21253              
21254             sub note_deleted_semicolon {
21255 13     13 0 25 my ( $self, $line_number ) = @_;
21256 13         25 $self->[_last_deleted_semicolon_at_] = $line_number;
21257 13 100       33 if ( $self->[_deleted_semicolon_count_] == 0 ) {
21258 2         8 $self->[_first_deleted_semicolon_at_] = $line_number;
21259             }
21260 13         22 $self->[_deleted_semicolon_count_]++;
21261 13         50 write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
21262 13         20 return;
21263             } ## end sub note_deleted_semicolon
21264              
21265             sub note_embedded_tab {
21266 0     0 0 0 my ( $self, $line_number ) = @_;
21267 0         0 $self->[_embedded_tab_count_]++;
21268 0         0 $self->[_last_embedded_tab_at_] = $line_number;
21269 0 0       0 if ( !$self->[_first_embedded_tab_at_] ) {
21270 0         0 $self->[_first_embedded_tab_at_] = $line_number;
21271             }
21272              
21273 0 0       0 if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
21274 0         0 write_logfile_entry("Embedded tabs in quote or pattern\n");
21275             }
21276 0         0 return;
21277             } ## end sub note_embedded_tab
21278              
21279 39     39   466 use constant DEBUG_CORRECT_LP => 0;
  39         169  
  39         68503  
21280              
21281             sub correct_lp_indentation {
21282              
21283             # When the -lp option is used, we need to make a last pass through
21284             # each line to correct the indentation positions in case they differ
21285             # from the predictions. This is necessary because perltidy uses a
21286             # predictor/corrector method for aligning with opening parens. The
21287             # predictor is usually good, but sometimes stumbles. The corrector
21288             # tries to patch things up once the actual opening paren locations
21289             # are known.
21290 134     134 0 306 my ( $self, $ri_first, $ri_last ) = @_;
21291              
21292             # first remove continuation indentation if appropriate
21293 134         217 my $max_line = @{$ri_first} - 1;
  134         299  
21294              
21295             #---------------------------------------------------------------------------
21296             # PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
21297             #---------------------------------------------------------------------------
21298              
21299             # The point is that sub 'starting_one_line_block' made one-line blocks based
21300             # on default indentation, not -lp indentation. So some of the one-line
21301             # blocks may be too long when given -lp indentation. We will fix that now
21302             # if possible, using the list of these closing block indexes.
21303 134         296 my $ri_starting_one_line_block =
21304             $self->[_this_batch_]->[_ri_starting_one_line_block_];
21305 134 100       227 if ( @{$ri_starting_one_line_block} ) {
  134         375  
21306 5         44 $self->correct_lp_indentation_pass_1( $ri_first, $ri_last,
21307             $ri_starting_one_line_block );
21308             }
21309              
21310             #-------------------------------------------------------------------
21311             # PASS 2: look for and fix other problems in each line of this batch
21312             #-------------------------------------------------------------------
21313              
21314             # look at each output line ...
21315 134         352 foreach my $line ( 0 .. $max_line ) {
21316 576         898 my $ibeg = $ri_first->[$line];
21317 576         858 my $iend = $ri_last->[$line];
21318              
21319             # looking at each token in this output line ...
21320 576         1048 foreach my $i ( $ibeg .. $iend ) {
21321              
21322             # How many space characters to place before this token
21323             # for special alignment. Actual padding is done in the
21324             # continue block.
21325              
21326             # looking for next unvisited indentation item ...
21327 3869         5282 my $indentation = $leading_spaces_to_go[$i];
21328              
21329             # This is just for indentation objects (c098)
21330 3869 100       6989 next unless ( ref($indentation) );
21331              
21332             # Visit each indentation object just once
21333 3065 100       5684 next if ( $indentation->get_marked() );
21334              
21335             # Mark first visit
21336 608         1592 $indentation->set_marked(1);
21337              
21338             # Skip indentation objects which do not align with container tokens
21339 608         1242 my $align_seqno = $indentation->get_align_seqno();
21340 608 100       1342 next unless ($align_seqno);
21341              
21342             # Skip a container which is entirely on this line
21343 229         682 my $Ko = $self->[_K_opening_container_]->{$align_seqno};
21344 229         587 my $Kc = $self->[_K_closing_container_]->{$align_seqno};
21345 229 50 33     991 if ( defined($Ko) && defined($Kc) ) {
21346 229 100 100     932 next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
21347             }
21348              
21349             # Note on flag '$do_not_pad':
21350             # We want to avoid a situation like this, where the aligner
21351             # inserts whitespace before the '=' to align it with a previous
21352             # '=', because otherwise the parens might become mis-aligned in a
21353             # situation like this, where the '=' has become aligned with the
21354             # previous line, pushing the opening '(' forward beyond where we
21355             # want it.
21356             #
21357             # $mkFloor::currentRoom = '';
21358             # $mkFloor::c_entry = $c->Entry(
21359             # -width => '10',
21360             # -relief => 'sunken',
21361             # ...
21362             # );
21363             #
21364             # We leave it to the aligner to decide how to do this.
21365 130 100 66     490 if ( $line == 1 && $i == $ibeg ) {
21366 50         162 $self->[_this_batch_]->[_do_not_pad_] = 1;
21367             }
21368              
21369             #--------------------------------------------
21370             # Now see what the error is and try to fix it
21371             #--------------------------------------------
21372 130         388 my $closing_index = $indentation->get_closed();
21373 130         362 my $predicted_pos = $indentation->get_spaces();
21374              
21375             # Find actual position:
21376 130         300 my $actual_pos;
21377              
21378 130 100       356 if ( $i == $ibeg ) {
21379              
21380             # Case 1: token is first character of of batch - table lookup
21381 118 100       303 if ( $line == 0 ) {
21382              
21383 7         16 $actual_pos = $predicted_pos;
21384              
21385 7         20 my ( $indent, $offset, $is_leading, $exists ) =
21386             get_saved_opening_indentation($align_seqno);
21387 7 50       22 if ( defined($indent) ) {
21388              
21389             # NOTE: we could use '1' here if no space after
21390             # opening and '2' if want space; it is hardwired at 1
21391             # like -gnu-style. But it is probably best to leave
21392             # this alone because changing it would change
21393             # formatting of much existing code without any
21394             # significant benefit.
21395 7         18 $actual_pos = get_spaces($indent) + $offset + 1;
21396             }
21397             }
21398              
21399             # Case 2: token starts a new line - use length of previous line
21400             else {
21401              
21402 111         250 my $ibegm = $ri_first->[ $line - 1 ];
21403 111         189 my $iendm = $ri_last->[ $line - 1 ];
21404 111         288 $actual_pos = total_line_length( $ibegm, $iendm );
21405              
21406             # follow -pt style
21407 111 100       504 ++$actual_pos
21408             if ( $types_to_go[ $iendm + 1 ] eq 'b' );
21409              
21410             }
21411             }
21412              
21413             # Case 3: $i>$ibeg: token is mid-line - use length to previous token
21414             else {
21415              
21416 12         51 $actual_pos = total_line_length( $ibeg, $i - 1 );
21417              
21418             # for mid-line token, we must check to see if all
21419             # additional lines have continuation indentation,
21420             # and remove it if so. Otherwise, we do not get
21421             # good alignment.
21422 12 100       41 if ( $closing_index > $iend ) {
21423 10         37 my $ibeg_next = $ri_first->[ $line + 1 ];
21424 10 100       34 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
21425 9         29 $self->undo_lp_ci( $line, $i, $closing_index,
21426             $ri_first, $ri_last );
21427             }
21428             }
21429             }
21430              
21431             # By how many spaces (plus or minus) would we need to increase the
21432             # indentation to get alignment with the opening token?
21433 130         280 my $move_right = $actual_pos - $predicted_pos;
21434              
21435 130         194 if (DEBUG_CORRECT_LP) {
21436             my $tok = substr( $tokens_to_go[$i], 0, 8 );
21437             my $avail = $self->get_available_spaces_to_go($ibeg);
21438             print
21439             "CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
21440             }
21441              
21442             # nothing more to do if no error to correct (gnu2.t)
21443 130 100       356 if ( $move_right == 0 ) {
21444 52         232 $indentation->set_recoverable_spaces($move_right);
21445 52         121 next;
21446             }
21447              
21448             # Get any collapsed length defined for -xlp
21449             my $collapsed_length =
21450 78         173 $self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
21451 78 100       236 $collapsed_length = 0 unless ( defined($collapsed_length) );
21452              
21453 78         102 if (DEBUG_CORRECT_LP) {
21454             print
21455             "CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
21456             }
21457              
21458             # if we have not seen closure for this indentation in this batch,
21459             # and do not have a collapsed length estimate, we can only pass on
21460             # a request to the vertical aligner
21461 78 100 100     299 if ( $closing_index < 0 && !$collapsed_length ) {
21462 10         70 $indentation->set_recoverable_spaces($move_right);
21463 10         23 next;
21464             }
21465              
21466             # If necessary, look ahead to see if there is really any leading
21467             # whitespace dependent on this whitespace, and also find the
21468             # longest line using this whitespace. Since it is always safe to
21469             # move left if there are no dependents, we only need to do this if
21470             # we may have dependent nodes or need to move right.
21471              
21472 68         219 my $have_child = $indentation->get_have_child();
21473 68         130 my %saw_indentation;
21474 68         125 my $line_count = 1;
21475 68         206 $saw_indentation{$indentation} = $indentation;
21476              
21477             # How far can we move right before we hit the limit?
21478             # let $right_margen = the number of spaces that we can increase
21479             # the current indentation before hitting the maximum line length.
21480 68         110 my $right_margin = 0;
21481              
21482 68 100 100     225 if ( $have_child || $move_right > 0 ) {
21483 67         114 $have_child = 0;
21484              
21485             # include estimated collapsed length for incomplete containers
21486 67         102 my $max_length = 0;
21487 67 100       206 if ( $Kc > $K_to_go[$max_index_to_go] ) {
21488 3         5 $max_length = $collapsed_length + $predicted_pos;
21489             }
21490              
21491 67 100       183 if ( $i == $ibeg ) {
21492 61         147 my $length = total_line_length( $ibeg, $iend );
21493 61 100       191 if ( $length > $max_length ) { $max_length = $length }
  60         109  
21494             }
21495              
21496             # look ahead at the rest of the lines of this batch..
21497 67         208 foreach my $line_t ( $line + 1 .. $max_line ) {
21498 523         760 my $ibeg_t = $ri_first->[$line_t];
21499 523         691 my $iend_t = $ri_last->[$line_t];
21500 523 100       928 last if ( $closing_index <= $ibeg_t );
21501              
21502             # remember all different indentation objects
21503 463         636 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
21504 463         1168 $saw_indentation{$indentation_t} = $indentation_t;
21505 463         604 $line_count++;
21506              
21507             # remember longest line in the group
21508 463         707 my $length_t = total_line_length( $ibeg_t, $iend_t );
21509 463 100       1038 if ( $length_t > $max_length ) {
21510 96         205 $max_length = $length_t;
21511             }
21512             }
21513              
21514             $right_margin =
21515 67         201 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
21516             $max_length;
21517 67 50       190 if ( $right_margin < 0 ) { $right_margin = 0 }
  0         0  
21518             }
21519              
21520             my $first_line_comma_count =
21521 68         236 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
  541         1032  
21522 68         301 my $comma_count = $indentation->get_comma_count();
21523 68         225 my $arrow_count = $indentation->get_arrow_count();
21524              
21525             # This is a simple approximate test for vertical alignment:
21526             # if we broke just after an opening paren, brace, bracket,
21527             # and there are 2 or more commas in the first line,
21528             # and there are no '=>'s,
21529             # then we are probably vertically aligned. We could set
21530             # an exact flag in sub break_lists, but this is good
21531             # enough.
21532 68         180 my $indentation_count = keys %saw_indentation;
21533 68   66     375 my $is_vertically_aligned =
21534             ( $i == $ibeg
21535             && $first_line_comma_count > 1
21536             && $indentation_count == 1
21537             && ( $arrow_count == 0 || $arrow_count == $line_count ) );
21538              
21539             # Make the move if possible ..
21540 68 100 100     694 if (
      100        
      66        
      100        
      66        
      100        
21541              
21542             # we can always move left
21543             $move_right < 0
21544              
21545             # -xlp
21546              
21547             # incomplete container
21548             || ( $rOpts_extended_line_up_parentheses
21549             && $Kc > $K_to_go[$max_index_to_go] )
21550             || $closing_index < 0
21551              
21552             # but we should only move right if we are sure it will
21553             # not spoil vertical alignment
21554             || ( $comma_count == 0 )
21555             || ( $comma_count > 0 && !$is_vertically_aligned )
21556             )
21557             {
21558 62 100       154 my $move =
21559             ( $move_right <= $right_margin )
21560             ? $move_right
21561             : $right_margin;
21562              
21563 62         107 if (DEBUG_CORRECT_LP) {
21564             print
21565             "CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
21566             }
21567              
21568 62         208 foreach ( keys %saw_indentation ) {
21569 237         585 $saw_indentation{$_}
21570             ->permanently_decrease_available_spaces( -$move );
21571             }
21572             }
21573              
21574             # Otherwise, record what we want and the vertical aligner
21575             # will try to recover it.
21576             else {
21577 6         32 $indentation->set_recoverable_spaces($move_right);
21578             }
21579             } ## end loop over tokens in a line
21580             } ## end loop over lines
21581 134         406 return;
21582             } ## end sub correct_lp_indentation
21583              
21584             sub correct_lp_indentation_pass_1 {
21585 5     5 0 26 my ( $self, $ri_first, $ri_last, $ri_starting_one_line_block ) = @_;
21586              
21587             # So some of the one-line blocks may be too long when given -lp
21588             # indentation. We will fix that now if possible, using the list of these
21589             # closing block indexes.
21590              
21591 5         16 my @ilist = @{$ri_starting_one_line_block};
  5         26  
21592 5 50       28 return unless (@ilist);
21593              
21594 5         27 my $max_line = @{$ri_first} - 1;
  5         17  
21595 5         17 my $inext = shift(@ilist);
21596              
21597             # loop over lines, checking length of each with a one-line block
21598 5         18 my ( $ibeg, $iend );
21599 5         18 foreach my $line ( 0 .. $max_line ) {
21600 15         28 $iend = $ri_last->[$line];
21601 15 100       69 next if ( $inext > $iend );
21602 9         19 $ibeg = $ri_first->[$line];
21603              
21604             # This is just for lines with indentation objects (c098)
21605 9 100       43 my $excess =
21606             ref( $leading_spaces_to_go[$ibeg] )
21607             ? $self->excess_line_length( $ibeg, $iend )
21608             : 0;
21609              
21610 9 50       37 if ( $excess > 0 ) {
21611 0         0 my $available_spaces = $self->get_available_spaces_to_go($ibeg);
21612              
21613 0 0       0 if ( $available_spaces > 0 ) {
21614 0         0 my $delete_want = min( $available_spaces, $excess );
21615 0         0 my $deleted_spaces =
21616             $self->reduce_lp_indentation( $ibeg, $delete_want );
21617 0         0 $available_spaces = $self->get_available_spaces_to_go($ibeg);
21618             }
21619             }
21620              
21621             # skip forward to next one-line block to check
21622 9         30 while (@ilist) {
21623 4         7 $inext = shift @ilist;
21624 4 50       11 next if ( $inext <= $iend );
21625 4 50       13 last if ( $inext > $iend );
21626             }
21627 9 100       32 last if ( $inext <= $iend );
21628             }
21629 5         15 return;
21630             } ## end sub correct_lp_indentation_pass_1
21631              
21632             sub undo_lp_ci {
21633              
21634             # If there is a single, long parameter within parens, like this:
21635             #
21636             # $self->command( "/msg "
21637             # . $infoline->chan
21638             # . " You said $1, but did you know that it's square was "
21639             # . $1 * $1 . " ?" );
21640             #
21641             # we can remove the continuation indentation of the 2nd and higher lines
21642             # to achieve this effect, which is more pleasing:
21643             #
21644             # $self->command("/msg "
21645             # . $infoline->chan
21646             # . " You said $1, but did you know that it's square was "
21647             # . $1 * $1 . " ?");
21648              
21649 9     9 0 22 my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
21650             @_;
21651 9         14 my $max_line = @{$ri_first} - 1;
  9         16  
21652              
21653             # must be multiple lines
21654 9 50       22 return if ( $max_line <= $line_open );
21655              
21656 9         28 my $lev_start = $levels_to_go[$i_start];
21657 9         29 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
21658              
21659             # see if all additional lines in this container have continuation
21660             # indentation
21661 9         47 my $line_1 = 1 + $line_open;
21662 9         45 my $n = $line_open;
21663              
21664 9         26 while ( ++$n <= $max_line ) {
21665 9         20 my $ibeg = $ri_first->[$n];
21666 9         15 my $iend = $ri_last->[$n];
21667 9 50       24 if ( $ibeg eq $closing_index ) { $n--; last }
  0         0  
  0         0  
21668 9 50       23 return if ( $lev_start != $levels_to_go[$ibeg] );
21669 9 50       33 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
21670 0 0       0 last if ( $closing_index <= $iend );
21671             }
21672              
21673             # we can reduce the indentation of all continuation lines
21674 0         0 my $continuation_line_count = $n - $line_open;
21675 0         0 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
  0         0  
21676             (0) x ($continuation_line_count);
21677 0         0 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
21678 0         0 @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
  0         0  
21679 0         0 return;
21680             } ## end sub undo_lp_ci
21681              
21682             ################################################
21683             # CODE SECTION 10: Code to break long statements
21684             ################################################
21685              
21686 39     39   409 use constant DEBUG_BREAK_LINES => 0;
  39         132  
  39         39056  
21687              
21688             sub break_long_lines {
21689              
21690             #-----------------------------------------------------------
21691             # Break a batch of tokens into lines which do not exceed the
21692             # maximum line length.
21693             #-----------------------------------------------------------
21694              
21695 1113     1113 0 2802 my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
21696              
21697             # Input parameters:
21698             # $saw_good_break - a flag set by break_lists
21699             # $rcolon_list - ref to a list of all the ? and : tokens in the batch,
21700             # in order.
21701             # $rbond_strength_bias - small bond strength bias values set by break_lists
21702              
21703             # Output: returns references to the arrays:
21704             # @i_first
21705             # @i_last
21706             # which contain the indexes $i of the first and last tokens on each
21707             # line.
21708              
21709             # In addition, the array:
21710             # $forced_breakpoint_to_go[$i]
21711             # may be updated to be =1 for any index $i after which there must be
21712             # a break. This signals later routines not to undo the breakpoint.
21713              
21714             # Method:
21715             # This routine is called if a statement is longer than the maximum line
21716             # length, or if a preliminary scanning located desirable break points.
21717             # Sub break_lists has already looked at these tokens and set breakpoints
21718             # (in array $forced_breakpoint_to_go[$i]) where it wants breaks (for
21719             # example after commas, after opening parens, and before closing parens).
21720             # This routine will honor these breakpoints and also add additional
21721             # breakpoints as necessary to keep the line length below the maximum
21722             # requested. It bases its decision on where the 'bond strength' is
21723             # lowest.
21724              
21725 1113         2237 my @i_first = (); # the first index to output
21726 1113         2143 my @i_last = (); # the last index to output
21727 1113         1958 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
21728 1113 100       3208 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
  1         4  
21729              
21730             # Get the 'bond strengths' between tokens
21731 1113         4263 my $rbond_strength_to_go = $self->set_bond_strengths();
21732              
21733             # Add any comma bias set by break_lists
21734 1113 100       2250 if ( @{$rbond_strength_bias} ) {
  1113         3368  
21735 13         51 foreach my $item ( @{$rbond_strength_bias} ) {
  13         47  
21736 31         63 my ( $ii, $bias ) = @{$item};
  31         70  
21737 31 50 33     158 if ( $ii >= 0 && $ii <= $max_index_to_go ) {
21738 31         79 $rbond_strength_to_go->[$ii] += $bias;
21739             }
21740             else {
21741 0         0 if (DEVEL_MODE) {
21742             my $KK = $K_to_go[0];
21743             my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
21744             Fault(
21745             "Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n"
21746             );
21747             }
21748             }
21749             }
21750             }
21751              
21752 1113         2342 my $imin = 0;
21753 1113         2037 my $imax = $max_index_to_go;
21754 1113 50       3164 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
  0         0  
21755 1113 50       2995 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
  0         0  
21756              
21757 1113         1963 my $i_begin = $imin;
21758 1113         1943 my $last_break_strength = NO_BREAK;
21759 1113         1772 my $i_last_break = -1;
21760 1113         1759 my $line_count = 0;
21761              
21762             # see if any ?/:'s are in order
21763 1113         1748 my $colons_in_order = 1;
21764 1113         1981 my $last_tok = EMPTY_STRING;
21765 1113         1723 foreach ( @{$rcolon_list} ) {
  1113         2962  
21766 205 100       630 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
  9         26  
  9         28  
21767 196         411 $last_tok = $_;
21768             }
21769              
21770             # This is a sufficient but not necessary condition for colon chain
21771 1113   100     3269 my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
21772              
21773             #------------------------------------------
21774             # BEGINNING of main loop to set breakpoints
21775             # Keep iterating until we reach the end
21776             #------------------------------------------
21777 1113         4151 while ( $i_begin <= $imax ) {
21778              
21779             #------------------------------------------------------------------
21780             # Find the best next breakpoint based on token-token bond strengths
21781             #------------------------------------------------------------------
21782 3957         12114 my ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg ) =
21783             $self->break_lines_inner_loop(
21784              
21785             $i_begin,
21786             $i_last_break,
21787             $imax,
21788             $last_break_strength,
21789             $line_count,
21790             $rbond_strength_to_go,
21791             $saw_good_break,
21792              
21793             );
21794              
21795             # Now make any adjustments required by ternary breakpoint rules
21796 3957 100       6871 if ( @{$rcolon_list} ) {
  3957         10246  
21797              
21798 439         820 my $i_next_nonblank = $inext_to_go[$i_lowest];
21799              
21800             #-------------------------------------------------------
21801             # ?/: rule 1 : if a break here will separate a '?' on this
21802             # line from its closing ':', then break at the '?' instead.
21803             # But do not break a sequential chain of ?/: statements
21804             #-------------------------------------------------------
21805 439 100       1044 if ( !$is_colon_chain ) {
21806 383         974 foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
21807 1835 100       3564 next unless ( $tokens_to_go[$i] eq '?' );
21808              
21809             # do not break if statement is broken by side comment
21810             next
21811 66 50 33     457 if ( $tokens_to_go[$max_index_to_go] eq '#'
21812             && terminal_type_i( 0, $max_index_to_go ) !~
21813             /^[\;\}]$/ );
21814              
21815             # no break needed if matching : is also on the line
21816             next
21817 66 100 66     406 if ( defined( $mate_index_to_go[$i] )
21818             && $mate_index_to_go[$i] <= $i_next_nonblank );
21819              
21820 5         13 $i_lowest = $i;
21821 5 100       33 if ( $want_break_before{'?'} ) { $i_lowest-- }
  4         9  
21822 5         20 $i_next_nonblank = $inext_to_go[$i_lowest];
21823 5         11 last;
21824             }
21825             }
21826              
21827 439         809 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
21828              
21829             #-------------------------------------------------------------
21830             # ?/: rule 2 : if we break at a '?', then break at its ':'
21831             #
21832             # Note: this rule is also in sub break_lists to handle a break
21833             # at the start and end of a line (in case breaks are dictated
21834             # by side comments).
21835             #-------------------------------------------------------------
21836 439 100       1419 if ( $next_nonblank_type eq '?' ) {
    100          
21837 32         146 $self->set_closing_breakpoint($i_next_nonblank);
21838             }
21839             elsif ( $types_to_go[$i_lowest] eq '?' ) {
21840 4         13 $self->set_closing_breakpoint($i_lowest);
21841             }
21842             else {
21843             ## ok
21844             }
21845              
21846             #--------------------------------------------------------
21847             # ?/: rule 3 : if we break at a ':' then we save
21848             # its location for further work below. We may need to go
21849             # back and break at its '?'.
21850             #--------------------------------------------------------
21851 439 100       1273 if ( $next_nonblank_type eq ':' ) {
    100          
21852 88         240 push @i_colon_breaks, $i_next_nonblank;
21853             }
21854             elsif ( $types_to_go[$i_lowest] eq ':' ) {
21855 4         12 push @i_colon_breaks, $i_lowest;
21856             }
21857             else {
21858             ## ok
21859             }
21860              
21861             # here we should set breaks for all '?'/':' pairs which are
21862             # separated by this line
21863             }
21864              
21865             # guard against infinite loop (should never happen)
21866 3957 50       8361 if ( $i_lowest <= $i_last_break ) {
21867 0         0 DEVEL_MODE
21868             && Fault("i_lowest=$i_lowest <= i_last_break=$i_last_break\n");
21869 0         0 $i_lowest = $imax;
21870             }
21871              
21872             DEBUG_BREAK_LINES
21873 3957         5547 && print {*STDOUT}
21874             "BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
21875              
21876 3957         5702 $line_count++;
21877              
21878             # save this line segment, after trimming blanks at the ends
21879 3957 50       10557 push( @i_first,
21880             ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
21881 3957 100       8593 push( @i_last,
21882             ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
21883              
21884             # set a forced breakpoint at a container opening, if necessary, to
21885             # signal a break at a closing container. Excepting '(' for now.
21886 3957 100 100     16356 if (
      100        
21887             (
21888             $tokens_to_go[$i_lowest] eq '{'
21889             || $tokens_to_go[$i_lowest] eq '['
21890             )
21891             && !$forced_breakpoint_to_go[$i_lowest]
21892             )
21893             {
21894 10         48 $self->set_closing_breakpoint($i_lowest);
21895             }
21896              
21897             # get ready to find the next breakpoint
21898 3957         5992 $last_break_strength = $lowest_strength;
21899 3957         5537 $i_last_break = $i_lowest;
21900 3957         5907 $i_begin = $i_lowest + 1;
21901              
21902             # skip past a blank
21903 3957 100 100     14244 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
21904 2297         5096 $i_begin++;
21905             }
21906             }
21907              
21908             #-------------------------------------------------
21909             # END of main loop to set continuation breakpoints
21910             #-------------------------------------------------
21911              
21912             #-----------------------------------------------------------
21913             # ?/: rule 4 -- if we broke at a ':', then break at
21914             # corresponding '?' unless this is a chain of ?: expressions
21915             #-----------------------------------------------------------
21916 1113 100       3671 if (@i_colon_breaks) {
21917 49   100     347 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
21918 49 100       206 if ( !$is_chain ) {
21919 38         230 $self->do_colon_breaks( \@i_colon_breaks, \@i_first, \@i_last );
21920             }
21921             }
21922              
21923 1113         5518 return ( \@i_first, \@i_last, $rbond_strength_to_go );
21924             } ## end sub break_long_lines
21925              
21926             # small bond strength numbers to help break ties
21927 39     39   433 use constant TINY_BIAS => 0.0001;
  39         125  
  39         3126  
21928 39     39   361 use constant MAX_BIAS => 0.001;
  39         132  
  39         75519  
21929              
21930             sub break_lines_inner_loop {
21931              
21932             #-----------------------------------------------------------------
21933             # Find the best next breakpoint in index range ($i_begin .. $imax)
21934             # which, if possible, does not exceed the maximum line length.
21935             #-----------------------------------------------------------------
21936              
21937             my (
21938 3957     3957 0 9336 $self, #
21939              
21940             $i_begin,
21941             $i_last_break,
21942             $imax,
21943             $last_break_strength,
21944             $line_count,
21945             $rbond_strength_to_go,
21946             $saw_good_break,
21947              
21948             ) = @_;
21949              
21950             # Given:
21951             # $i_begin = first index of range
21952             # $i_last_break = index of previous break
21953             # $imax = last index of range
21954             # $last_break_strength = bond strength of last break
21955             # $line_count = number of output lines so far
21956             # $rbond_strength_to_go = ref to array of bond strengths
21957             # $saw_good_break = true if old line had a good breakpoint
21958              
21959             # Returns:
21960             # $i_lowest = index of best breakpoint
21961             # $lowest_strength = 'bond strength' at best breakpoint
21962             # $leading_alignment_type = special token type after break
21963             # $Msg = string of debug info
21964              
21965 3957         6421 my $Msg = EMPTY_STRING;
21966 3957         5674 my $strength = NO_BREAK;
21967 3957         5869 my $i_test = $i_begin - 1;
21968 3957         5696 my $i_lowest = -1;
21969 3957         6331 my $starting_sum = $summed_lengths_to_go[$i_begin];
21970 3957         5733 my $lowest_strength = NO_BREAK;
21971 3957         5749 my $leading_alignment_type = EMPTY_STRING;
21972 3957         8480 my $leading_spaces = leading_spaces_to_go($i_begin);
21973 3957         8483 my $maximum_line_length =
21974             $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
21975             DEBUG_BREAK_LINES
21976 3957         5369 && do {
21977             $Msg .= "updating leading spaces to be $leading_spaces at i=$i_begin\n";
21978             };
21979              
21980             # Do not separate an isolated bare word from an opening paren.
21981             # Alternate Fix #2 for issue b1299. This waits as long as possible
21982             # to make the decision.
21983             # Note for fix #c250: to keep line breaks unchanged under -extrude when
21984             # switching from 'i' to 'S' for subs, we would have to also check 'S', i.e.
21985             # =~/^[Si]$/. But this was never necessary at a sub signature, so we leave
21986             # it alone and allow the new version to be different for --extrude. For a
21987             # test file run perl527/signatures.t with --extrude.
21988 3957 50 66     13257 if ( $types_to_go[$i_begin] eq 'i'
21989             && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
21990             {
21991 0         0 my $i_next_nonblank = $inext_to_go[$i_begin];
21992 0 0       0 if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
21993 0         0 $rbond_strength_to_go->[$i_begin] = NO_BREAK;
21994             }
21995             }
21996              
21997             # Avoid a break which would strand a single punctuation
21998             # token. For example, we do not want to strand a leading
21999             # '.' which is followed by a long quoted string.
22000             # But note that we do want to do this with -extrude (l=1)
22001             # so please test any changes to this code on -extrude.
22002 3957 100 100     20626 if (
      100        
      100        
      100        
      100        
22003             ( $i_begin < $imax )
22004             && ( $tokens_to_go[$i_begin] eq $types_to_go[$i_begin] )
22005             && !$forced_breakpoint_to_go[$i_begin]
22006             && !(
22007              
22008             # Allow break after a closing eval brace. This is an
22009             # approximate way to simulate a forced breakpoint made in
22010             # Section B below. No differences have been found, but if
22011             # necessary the full logic of Section B could be used here
22012             # (see c165).
22013             $tokens_to_go[$i_begin] eq '}'
22014             && $block_type_to_go[$i_begin]
22015             && $block_type_to_go[$i_begin] eq 'eval'
22016             )
22017             && (
22018             (
22019             $leading_spaces +
22020             $summed_lengths_to_go[ $i_begin + 1 ] -
22021             $starting_sum
22022             ) < $maximum_line_length
22023             )
22024             )
22025             {
22026 521         1814 $i_test = min( $imax, $inext_to_go[$i_begin] ) - 1;
22027 521         790 DEBUG_BREAK_LINES && do {
22028             $Msg .= " :skip ahead at i=$i_test";
22029             };
22030             }
22031              
22032             #-------------------------------------------------------
22033             # Begin INNER_LOOP over the indexes in the _to_go arrays
22034             #-------------------------------------------------------
22035 3957         8635 while ( ++$i_test <= $imax ) {
22036 33287         47540 my $type = $types_to_go[$i_test];
22037 33287         45059 my $token = $tokens_to_go[$i_test];
22038 33287         44339 my $i_next_nonblank = $inext_to_go[$i_test];
22039 33287         44733 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
22040 33287         45863 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
22041 33287         44061 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
22042              
22043             #---------------------------------------------------------------
22044             # Section A: Get token-token strength and handle any adjustments
22045             #---------------------------------------------------------------
22046              
22047             # adjustments to the previous bond strength may have been made, and
22048             # we must keep the bond strength of a token and its following blank
22049             # the same;
22050 33287         43699 my $last_strength = $strength;
22051 33287         46194 $strength = $rbond_strength_to_go->[$i_test];
22052 33287 100       58383 if ( $type eq 'b' ) { $strength = $last_strength }
  10924         14985  
22053              
22054             # reduce strength a bit to break ties at an old comma breakpoint ...
22055 33287 100 100     84764 if (
      66        
      100        
      100        
      100        
22056              
22057             $old_breakpoint_to_go[$i_test]
22058              
22059             # Patch: limited to just commas to avoid blinking states
22060             && $type eq ','
22061              
22062             # which is a 'good' breakpoint, meaning ...
22063             # we don't want to break before it
22064             && !$want_break_before{$type}
22065              
22066             # and either we want to break before the next token
22067             # or the next token is not short (i.e. not a '*', '/' etc.)
22068             && $i_next_nonblank <= $imax
22069             && ( $want_break_before{$next_nonblank_type}
22070             || $token_lengths_to_go[$i_next_nonblank] > 2
22071             || $next_nonblank_type eq ','
22072             || $is_opening_type{$next_nonblank_type} )
22073             )
22074             {
22075 503         965 $strength -= TINY_BIAS;
22076 503         768 DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
22077             }
22078              
22079             # otherwise increase strength a bit if this token would be at the
22080             # maximum line length. This is necessary to avoid blinking
22081             # in the above example when the -iob flag is added.
22082             else {
22083 32784         50068 my $len =
22084             $leading_spaces +
22085             $summed_lengths_to_go[ $i_test + 1 ] -
22086             $starting_sum;
22087 32784 100       56441 if ( $len >= $maximum_line_length ) {
22088 323         561 $strength += TINY_BIAS;
22089 323         532 DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
22090             }
22091             }
22092              
22093             #-------------------------------------
22094             # Section B: Handle forced breakpoints
22095             #-------------------------------------
22096 33287         41846 my $must_break;
22097              
22098             # Force an immediate break at certain operators
22099             # with lower level than the start of the line,
22100             # unless we've already seen a better break.
22101             #
22102             # Note on an issue with a preceding '?' :
22103              
22104             # There may be a break at a previous ? if the line is long. Because
22105             # of this we do not want to force a break if there is a previous ? on
22106             # this line. For now the best way to do this is to not break if we
22107             # have seen a lower strength point, which is probably a ?.
22108             #
22109             # Example of unwanted breaks we are avoiding at a '.' following a ?
22110             # from pod2html using perltidy -gnu:
22111             # )
22112             # ? "\n&lt;A NAME=\""
22113             # . $value
22114             # . "\"&gt;\n$text&lt;/A&gt;\n"
22115             # : "\n$type$pod2.html\#" . $value . "\"&gt;$text&lt;\/A&gt;\n";
22116 33287 100 100     88885 if (
      100        
      100        
22117             ( $strength <= $lowest_strength )
22118             && ( $nesting_depth_to_go[$i_begin] >
22119             $nesting_depth_to_go[$i_next_nonblank] )
22120             && (
22121             $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
22122             || (
22123             $next_nonblank_type eq 'k'
22124              
22125             ## /^(and|or)$/ # note: includes 'xor' now
22126             && $is_and_or{$next_nonblank_token}
22127             )
22128             )
22129             )
22130             {
22131 28         114 $self->set_forced_breakpoint($i_next_nonblank);
22132             DEBUG_BREAK_LINES
22133 28         65 && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
22134             }
22135              
22136 33287 100 100     167994 if (
      100        
      66        
      100        
      66        
      100        
      100        
      100        
      100        
      100        
22137              
22138             # Try to put a break where requested by break_lists
22139             $forced_breakpoint_to_go[$i_test]
22140              
22141             # break between ) { in a continued line so that the '{' can
22142             # be outdented
22143             # See similar logic in break_lists which catches instances
22144             # where a line is just something like ') {'. We have to
22145             # be careful because the corresponding block keyword might
22146             # not be on the first line, such as 'for' here:
22147             #
22148             # eval {
22149             # for ("a") {
22150             # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
22151             # }
22152             # };
22153             #
22154             || (
22155             $line_count
22156             && ( $token eq ')' )
22157             && ( $next_nonblank_type eq '{' )
22158             && ($next_nonblank_block_type)
22159             && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
22160              
22161             # RT #104427: Dont break before opening sub brace because
22162             # sub block breaks handled at higher level, unless
22163             # it looks like the preceding list is long and broken
22164             && !(
22165              
22166             (
22167             $next_nonblank_block_type =~ /$SUB_PATTERN/
22168             || $matches_ASUB{$next_nonblank_block_type}
22169             )
22170             && ( $nesting_depth_to_go[$i_begin] ==
22171             $nesting_depth_to_go[$i_next_nonblank] )
22172             )
22173              
22174             && !$rOpts_opening_brace_always_on_right
22175             )
22176              
22177             # There is an implied forced break at a terminal opening brace
22178             || ( ( $type eq '{' ) && ( $i_test == $imax ) )
22179             )
22180             {
22181              
22182             # Forced breakpoints must sometimes be overridden, for example
22183             # because of a side comment causing a NO_BREAK. It is easier
22184             # to catch this here than when they are set.
22185 2707 50       6830 if ( $strength < NO_BREAK - 1 ) {
22186 2707         4355 $strength = $lowest_strength - TINY_BIAS;
22187 2707         3989 $must_break = 1;
22188             DEBUG_BREAK_LINES
22189 2707         3781 && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
22190             }
22191             }
22192              
22193             # quit if a break here would put a good terminal token on
22194             # the next line and we already have a possible break
22195 33287 100 100     106609 if (
      100        
      100        
22196             ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
22197             && !$must_break
22198             && (
22199             (
22200             $leading_spaces +
22201             $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
22202             $starting_sum
22203             ) > $maximum_line_length
22204             )
22205             )
22206             {
22207 45 100       171 if ( $i_lowest >= 0 ) {
22208 11         30 DEBUG_BREAK_LINES && do {
22209             $Msg .= " :quit at good terminal='$next_nonblank_type'";
22210             };
22211 11         23 last;
22212             }
22213             }
22214              
22215             #------------------------------------------------------------
22216             # Section C: Look for the lowest bond strength between tokens
22217             #------------------------------------------------------------
22218 33276 100 100     77775 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) {
22219              
22220             # break at previous best break if it would have produced
22221             # a leading alignment of certain common tokens, and it
22222             # is different from the latest candidate break
22223 14275 100       24289 if ($leading_alignment_type) {
22224 108         176 DEBUG_BREAK_LINES && do {
22225             $Msg .=
22226             " :last at leading_alignment='$leading_alignment_type'";
22227             };
22228 108         193 last;
22229             }
22230              
22231             # Force at least one breakpoint if old code had good
22232             # break It is only called if a breakpoint is required or
22233             # desired. This will probably need some adjustments
22234             # over time. A goal is to try to be sure that, if a new
22235             # side comment is introduced into formatted text, then
22236             # the same breakpoints will occur. scbreak.t
22237 14167 50 100     30661 if (
      100        
      66        
      100        
      66        
      66        
      33        
22238             $i_test == $imax # we are at the end
22239             && !$forced_breakpoint_count
22240             && $saw_good_break # old line had good break
22241             && $type =~ /^[#;\{]$/ # and this line ends in
22242             # ';' or side comment
22243             && $i_last_break < 0 # and we haven't made a break
22244             && $i_lowest >= 0 # and we saw a possible break
22245             && $i_lowest < $imax - 1 # (but not just before this ;)
22246             && $strength - $lowest_strength < 0.5 * WEAK # and it's good
22247             )
22248             {
22249              
22250 6         12 DEBUG_BREAK_LINES && do {
22251             $Msg .= " :last at good old break\n";
22252             };
22253 6         13 last;
22254             }
22255              
22256             # Do not skip past an important break point in a short final
22257             # segment. For example, without this check we would miss the
22258             # break at the final / in the following code:
22259             #
22260             # $depth_stop =
22261             # ( $tau * $mass_pellet * $q_0 *
22262             # ( 1. - exp( -$t_stop / $tau ) ) -
22263             # 4. * $pi * $factor * $k_ice *
22264             # ( $t_melt - $t_ice ) *
22265             # $r_pellet *
22266             # $t_stop ) /
22267             # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
22268             #
22269 14161 100 100     42929 if (
      66        
      100        
      100        
      66        
22270             $line_count > 2
22271             && $i_lowest >= 0 # and we saw a possible break
22272             && $i_lowest < $i_test
22273             && $i_test > $imax - 2
22274             && $nesting_depth_to_go[$i_begin] >
22275             $nesting_depth_to_go[$i_lowest]
22276             && $lowest_strength < $last_break_strength - .5 * WEAK
22277             )
22278             {
22279             # Make this break for math operators for now
22280 6         19 my $ir = $inext_to_go[$i_lowest];
22281 6         29 my $il = iprev_to_go($ir);
22282 6 100 100     97 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
22283             || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
22284             {
22285 3         5 DEBUG_BREAK_LINES && do {
22286             $Msg .= " :last-noskip_short";
22287             };
22288 3         6 last;
22289             }
22290             }
22291              
22292             # Update the minimum bond strength location
22293 14158         19579 $lowest_strength = $strength;
22294 14158         18037 $i_lowest = $i_test;
22295 14158 100       24277 if ($must_break) {
22296 2707         3783 DEBUG_BREAK_LINES && do {
22297             $Msg .= " :last-must_break";
22298             };
22299 2707         5071 last;
22300             }
22301              
22302             # set flags to remember if a break here will produce a
22303             # leading alignment of certain common tokens
22304 11451 100 100     39670 if ( $line_count > 0
      100        
22305             && $i_test < $imax
22306             && ( $lowest_strength - $last_break_strength <= MAX_BIAS ) )
22307             {
22308 3517         7875 my $i_last_end = iprev_to_go($i_begin);
22309 3517         6543 my $tok_beg = $tokens_to_go[$i_begin];
22310 3517         5094 my $type_beg = $types_to_go[$i_begin];
22311 3517 50 100     16039 if (
      66        
      66        
      66        
      100        
      33        
      66        
      33        
      100        
22312              
22313             # check for leading alignment of certain tokens
22314             (
22315             $tok_beg eq $next_nonblank_token
22316             && $is_chain_operator{$tok_beg}
22317             && ( $type_beg eq 'k'
22318             || $type_beg eq $tok_beg )
22319             && $nesting_depth_to_go[$i_begin] >=
22320             $nesting_depth_to_go[$i_next_nonblank]
22321             )
22322              
22323             || ( $tokens_to_go[$i_last_end] eq $token
22324             && $is_chain_operator{$token}
22325             && ( $type eq 'k' || $type eq $token )
22326             && $nesting_depth_to_go[$i_last_end] >=
22327             $nesting_depth_to_go[$i_test] )
22328             )
22329             {
22330 109         236 $leading_alignment_type = $next_nonblank_type;
22331             }
22332             }
22333             }
22334              
22335             #-----------------------------------------------------------
22336             # Section D: See if the maximum line length will be exceeded
22337             #-----------------------------------------------------------
22338              
22339             # Quit if there are no more tokens to test
22340 30452 100       51625 last if ( $i_test >= $imax );
22341              
22342             # Keep going if we have not reached the limit
22343 29805         46639 my $excess =
22344             $leading_spaces +
22345             $summed_lengths_to_go[ $i_test + 2 ] -
22346             $starting_sum -
22347             $maximum_line_length;
22348              
22349 29805 100       47237 if ( $excess < 0 ) {
    100          
22350 29227         56683 next;
22351             }
22352             elsif ( $excess == 0 ) {
22353              
22354             # To prevent blinkers we will avoid leaving a token exactly at
22355             # the line length limit unless it is the last token or one of
22356             # several "good" types.
22357             #
22358             # The following code was a blinker with -pbp before this
22359             # modification:
22360             # $last_nonblank_token eq '('
22361             # && $is_indirect_object_taker{ $paren_type
22362             # [$paren_depth] }
22363             # The issue causing the problem is that if the
22364             # term [$paren_depth] gets broken across a line then
22365             # the whitespace routine doesn't see both opening and closing
22366             # brackets and will format like '[ $paren_depth ]'. This
22367             # leads to an oscillation in length depending if we break
22368             # before the closing bracket or not.
22369 157 100 100     1454 if ( $i_test + 1 < $imax
      100        
22370             && $next_nonblank_type ne ','
22371             && !$is_closing_type{$next_nonblank_type} )
22372             {
22373             # too long
22374 115         252 DEBUG_BREAK_LINES && do {
22375             $Msg .= " :too_long";
22376             }
22377             }
22378             else {
22379 42         184 next;
22380             }
22381             }
22382             else {
22383             # too long
22384             }
22385              
22386             # a break here makes the line too long ...
22387              
22388 536         915 DEBUG_BREAK_LINES && do {
22389             my $ltok = $token;
22390             my $rtok =
22391             $next_nonblank_token ? $next_nonblank_token : EMPTY_STRING;
22392             my $i_testp2 = $i_test + 2;
22393             if ( $i_testp2 > $max_index_to_go + 1 ) {
22394             $i_testp2 = $max_index_to_go + 1;
22395             }
22396             if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
22397             if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
22398             print {*STDOUT}
22399             "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] str=$strength $ltok $rtok\n";
22400             };
22401              
22402             # Exception: allow one extra terminal token after exceeding line length
22403             # if it would strand this token.
22404 536 100 100     2318 if ( $i_lowest == $i_test
      100        
      100        
      100        
22405             && $token_lengths_to_go[$i_test] > 1
22406             && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
22407             && $rOpts_fuzzy_line_length )
22408             {
22409 3         16 DEBUG_BREAK_LINES && do {
22410             $Msg .= " :do_not_strand next='$next_nonblank_type'";
22411             };
22412 3         10 next;
22413             }
22414              
22415             # Stop if here if we have a solution and the line will be too long
22416 533 100       1543 if ( $i_lowest >= 0 ) {
22417 475         687 DEBUG_BREAK_LINES && do {
22418             $Msg .=
22419             " :Done-too_long && i_lowest=$i_lowest at itest=$i_test, imax=$imax";
22420             };
22421 475         902 last;
22422             }
22423             }
22424              
22425             #-----------------------------------------------------
22426             # End INNER_LOOP over the indexes in the _to_go arrays
22427             #-----------------------------------------------------
22428              
22429             # Be sure we return an index in the range ($ibegin .. $imax).
22430             # We will break at imax if no other break was found.
22431 3957 50       9212 if ( $i_lowest < 0 ) { $i_lowest = $imax }
  0         0  
22432              
22433 3957         15433 return ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg );
22434             } ## end sub break_lines_inner_loop
22435              
22436             sub do_colon_breaks {
22437 38     38 0 154 my ( $self, $ri_colon_breaks, $ri_first, $ri_last ) = @_;
22438              
22439             # using a simple method for deciding if we are in a ?/: chain --
22440             # this is a chain if it has multiple ?/: pairs all in order;
22441             # otherwise not.
22442             # Note that if line starts in a ':' we count that above as a break
22443              
22444 38         95 my @insert_list = ();
22445 38         77 foreach ( @{$ri_colon_breaks} ) {
  38         117  
22446 65         143 my $i_question = $mate_index_to_go[$_];
22447 65 100       186 if ( defined($i_question) ) {
22448 57 100       1089 if ( $want_break_before{'?'} ) {
22449 56         136 $i_question = iprev_to_go($i_question);
22450             }
22451              
22452 57 50       219 if ( $i_question >= 0 ) {
22453 57         130 push @insert_list, $i_question;
22454             }
22455             }
22456 65         283 $self->insert_additional_breaks( \@insert_list, $ri_first, $ri_last );
22457             }
22458 38         107 return;
22459             } ## end sub do_colon_breaks
22460              
22461             ###########################################
22462             # CODE SECTION 11: Code to break long lists
22463             ###########################################
22464              
22465             { ## begin closure break_lists
22466              
22467             # These routines and variables are involved in finding good
22468             # places to break long lists.
22469              
22470 39     39   468 use constant DEBUG_BREAK_LISTS => 0;
  39         125  
  39         38272  
22471              
22472             my (
22473              
22474             $block_type,
22475             $current_depth,
22476             $depth,
22477             $i,
22478             $i_last_colon,
22479             $i_line_end,
22480             $i_line_start,
22481             $i_last_nonblank_token,
22482             $last_nonblank_block_type,
22483             $last_nonblank_token,
22484             $last_nonblank_type,
22485             $last_old_breakpoint_count,
22486             $minimum_depth,
22487             $next_nonblank_block_type,
22488             $next_nonblank_token,
22489             $next_nonblank_type,
22490             $old_breakpoint_count,
22491             $starting_breakpoint_count,
22492             $starting_depth,
22493             $token,
22494             $type,
22495             $type_sequence,
22496              
22497             );
22498              
22499             my (
22500              
22501             @breakpoint_stack,
22502             @breakpoint_undo_stack,
22503             @comma_index,
22504             @container_type,
22505             @identifier_count_stack,
22506             @index_before_arrow,
22507             @interrupted_list,
22508             @item_count_stack,
22509             @last_comma_index,
22510             @last_dot_index,
22511             @last_nonblank_type,
22512             @old_breakpoint_count_stack,
22513             @opening_structure_index_stack,
22514             @rfor_semicolon_list,
22515             @has_old_logical_breakpoints,
22516             @rand_or_list,
22517             @i_equals,
22518             @override_cab3,
22519             @type_sequence_stack,
22520              
22521             );
22522              
22523             # these arrays must retain values between calls
22524             my ( @has_broken_sublist, @dont_align, @want_comma_break );
22525              
22526             my $length_tol;
22527             my $lp_tol_boost;
22528              
22529             sub initialize_break_lists {
22530 561     561 0 2252 @dont_align = ();
22531 561         1656 @has_broken_sublist = ();
22532 561         1529 @want_comma_break = ();
22533              
22534             #---------------------------------------------------
22535             # Set tolerances to prevent formatting instabilities
22536             #---------------------------------------------------
22537              
22538             # Define tolerances to use when checking if closed
22539             # containers will fit on one line. This is necessary to avoid
22540             # formatting instability. The basic tolerance is based on the
22541             # following:
22542              
22543             # - Always allow for at least one extra space after a closing token so
22544             # that we do not strand a comma or semicolon. (oneline.t).
22545              
22546             # - Use an increased line length tolerance when -ci > -i to avoid
22547             # blinking states (case b923 and others).
22548 561         2531 $length_tol =
22549             1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
22550              
22551             # In addition, it may be necessary to use a few extra tolerance spaces
22552             # when -lp is used and/or when -xci is used. The history of this
22553             # so far is as follows:
22554              
22555             # FIX1: At least 3 characters were been found to be required for -lp
22556             # to fixes cases b1059 b1063 b1117.
22557              
22558             # FIX2: Further testing showed that we need a total of 3 extra spaces
22559             # when -lp is set for non-lists, and at least 2 spaces when -lp and
22560             # -xci are set.
22561             # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
22562             # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
22563             # b1165
22564              
22565             # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
22566             # 'find_token_starting_list' to go back before an initial blank space.
22567             # This fixed these three cases, and allowed the tolerances to be
22568             # reduced to continue to fix all other known cases of instability.
22569             # This gives the current tolerance formulation.
22570              
22571 561         1286 $lp_tol_boost = 0;
22572              
22573 561 100       2155 if ($rOpts_line_up_parentheses) {
22574              
22575             # boost tol for combination -lp -xci
22576 31 100       134 if ($rOpts_extended_continuation_indentation) {
22577 3         9 $lp_tol_boost = 2;
22578             }
22579              
22580             # boost tol for combination -lp and any -vtc > 0, but only for
22581             # non-list containers
22582             else {
22583 28         163 foreach ( keys %closing_vertical_tightness ) {
22584             next
22585 168 50       411 unless ( $closing_vertical_tightness{$_} );
22586 0         0 $lp_tol_boost = 1; # Fixes B1193;
22587 0         0 last;
22588             }
22589             }
22590             }
22591              
22592             # Define a level where list formatting becomes highly stressed and
22593             # needs to be simplified. Introduced for case b1262.
22594             # $list_stress_level = min($stress_level_alpha, $stress_level_beta + 2);
22595             # This is now '$high_stress_level'.
22596              
22597 561         1148 return;
22598             } ## end sub initialize_break_lists
22599              
22600             # routine to define essential variables when we go 'up' to
22601             # a new depth
22602             sub check_for_new_minimum_depth {
22603 2373     2373 0 5284 my ( $self, $depth_t, $seqno ) = @_;
22604 2373 50       5554 if ( $depth_t < $minimum_depth ) {
22605              
22606 2373         3784 $minimum_depth = $depth_t;
22607              
22608             # these arrays need not retain values between calls
22609 2373         4585 my $old_seqno = $type_sequence_stack[$depth_t];
22610 2373   100     8331 my $changed_seqno = !defined($old_seqno) || $old_seqno != $seqno;
22611 2373         4123 $type_sequence_stack[$depth_t] = $seqno;
22612 2373         4021 $override_cab3[$depth_t] = undef;
22613 2373 50 33     6602 if ( $rOpts_comma_arrow_breakpoints == 3 && $seqno ) {
22614 0         0 $override_cab3[$depth_t] = $self->[_roverride_cab3_]->{$seqno};
22615             }
22616 2373         4010 $breakpoint_stack[$depth_t] = $starting_breakpoint_count;
22617 2373         4383 $container_type[$depth_t] = EMPTY_STRING;
22618 2373         3857 $identifier_count_stack[$depth_t] = 0;
22619 2373         3795 $index_before_arrow[$depth_t] = -1;
22620 2373         3717 $interrupted_list[$depth_t] = 1;
22621 2373         3546 $item_count_stack[$depth_t] = 0;
22622 2373         3946 $last_nonblank_type[$depth_t] = EMPTY_STRING;
22623 2373         3716 $opening_structure_index_stack[$depth_t] = -1;
22624              
22625 2373         3732 $breakpoint_undo_stack[$depth_t] = undef;
22626 2373         3859 $comma_index[$depth_t] = undef;
22627 2373         3628 $last_comma_index[$depth_t] = undef;
22628 2373         3526 $last_dot_index[$depth_t] = undef;
22629 2373         3579 $old_breakpoint_count_stack[$depth_t] = undef;
22630 2373         3680 $has_old_logical_breakpoints[$depth_t] = 0;
22631 2373         5126 $rand_or_list[$depth_t] = [];
22632 2373         4323 $rfor_semicolon_list[$depth_t] = [];
22633 2373         3906 $i_equals[$depth_t] = -1;
22634              
22635             # these arrays must retain values between calls
22636 2373 100 100     9120 if ( $changed_seqno || !defined( $has_broken_sublist[$depth_t] ) ) {
22637 888         1767 $dont_align[$depth_t] = 0;
22638 888         1606 $has_broken_sublist[$depth_t] = 0;
22639 888         1872 $want_comma_break[$depth_t] = 0;
22640             }
22641             }
22642 2373         3868 return;
22643             } ## end sub check_for_new_minimum_depth
22644              
22645             # routine to decide which commas to break at within a container;
22646             # returns:
22647             # $bp_count = number of comma breakpoints set
22648             # $do_not_break_apart = a flag indicating if container need not
22649             # be broken open
22650             sub set_comma_breakpoints {
22651              
22652 543     543 0 1498 my ( $self, $dd, $rbond_strength_bias ) = @_;
22653 543         964 my $bp_count = 0;
22654 543         946 my $do_not_break_apart = 0;
22655              
22656             # anything to do?
22657 543 50       1555 if ( $item_count_stack[$dd] ) {
22658              
22659             # Do not break a list unless there are some non-line-ending commas.
22660             # This avoids getting different results with only non-essential
22661             # commas, and fixes b1192.
22662 543         1141 my $seqno = $type_sequence_stack[$dd];
22663              
22664             my $real_comma_count =
22665 543 50       2311 $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
22666              
22667             # handle commas not in containers...
22668 543 100       1757 if ( $dont_align[$dd] ) {
    100          
22669 40         230 $self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias );
22670             }
22671              
22672             # handle commas within containers...
22673             elsif ($real_comma_count) {
22674 497         875 my $fbc = $forced_breakpoint_count;
22675              
22676             # always open comma lists not preceded by keywords,
22677             # barewords, identifiers (that is, anything that doesn't
22678             # look like a function call)
22679             # c250: added new sub identifier type 'S'
22680 497         1850 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiUS]$/;
22681              
22682 497         7813 $self->table_maker(
22683             {
22684             depth => $dd,
22685             i_opening_paren => $opening_structure_index_stack[$dd],
22686             i_closing_paren => $i,
22687             item_count => $item_count_stack[$dd],
22688             identifier_count => $identifier_count_stack[$dd],
22689             rcomma_index => $comma_index[$dd],
22690             next_nonblank_type => $next_nonblank_type,
22691             list_type => $container_type[$dd],
22692             interrupted => $interrupted_list[$dd],
22693             rdo_not_break_apart => \$do_not_break_apart,
22694             must_break_open => $must_break_open,
22695             has_broken_sublist => $has_broken_sublist[$dd],
22696             }
22697             );
22698 497         2229 $bp_count = $forced_breakpoint_count - $fbc;
22699 497 100       1596 $do_not_break_apart = 0 if $must_break_open;
22700             }
22701             else {
22702             ## no real commas, nothing to do
22703             }
22704             }
22705 543         1412 return ( $bp_count, $do_not_break_apart );
22706             } ## end sub set_comma_breakpoints
22707              
22708             # These types are excluded at breakpoints to prevent blinking
22709             # Switched from excluded to included as part of fix for b1214
22710             my %is_uncontained_comma_break_included_type;
22711              
22712             BEGIN {
22713              
22714 39     39   407 my @q = qw< k R } ) ] Y Z U w i q Q .
22715             = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>;
22716 39         27345 @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
22717             } ## end BEGIN
22718              
22719             sub do_uncontained_comma_breaks {
22720              
22721             # Handle commas not in containers...
22722             # This is a catch-all routine for commas that we
22723             # don't know what to do with because the don't fall
22724             # within containers. We will bias the bond strength
22725             # to break at commas which ended lines in the input
22726             # file. This usually works better than just trying
22727             # to put as many items on a line as possible. A
22728             # downside is that if the input file is garbage it
22729             # won't work very well. However, the user can always
22730             # prevent following the old breakpoints with the
22731             # -iob flag.
22732 40     40 0 122 my ( $self, $dd, $rbond_strength_bias ) = @_;
22733              
22734             # Check added for issue c131; an error here would be due to an
22735             # error initializing @comma_index when entering depth $dd.
22736 40         80 if (DEVEL_MODE) {
22737             foreach my $ii ( @{ $comma_index[$dd] } ) {
22738             if ( $ii < 0 || $ii > $max_index_to_go ) {
22739             my $KK = $K_to_go[0];
22740             my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
22741             Fault(<<EOM);
22742             Bad comma index near line $lno: i=$ii must be between 0 and $max_index_to_go
22743             EOM
22744             }
22745             }
22746             }
22747              
22748 40         106 my $bias = -.01;
22749 40         90 my $old_comma_break_count = 0;
22750 40         85 foreach my $ii ( @{ $comma_index[$dd] } ) {
  40         121  
22751              
22752 89 100       258 if ( $old_breakpoint_to_go[$ii] ) {
22753 34         71 $old_comma_break_count++;
22754              
22755             # Store the bias info for use by sub set_bond_strength
22756 34         56 push @{$rbond_strength_bias}, [ $ii, $bias ];
  34         162  
22757              
22758             # reduce bias magnitude to force breaks in order
22759 34         83 $bias *= 0.99;
22760             }
22761             }
22762              
22763             # Also put a break before the first comma if
22764             # (1) there was a break there in the input, and
22765             # (2) there was exactly one old break before the first comma break
22766             # (3) OLD: there are multiple old comma breaks
22767             # (3) NEW: there are one or more old comma breaks (see return example)
22768             # (4) the first comma is at the starting level ...
22769             # ... fixes cases b064 b065 b068 b210 b747
22770             # (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
22771             # ... fixes b1220. If ci>0 we are in the middle of a snippet,
22772             # maybe because -boc has been forcing out previous lines.
22773              
22774             # For example, we will follow the user and break after
22775             # 'print' in this snippet:
22776             # print
22777             # "conformability (Not the same dimension)\n",
22778             # "\t", $have, " is ", text_unit($hu), "\n",
22779             # "\t", $want, " is ", text_unit($wu), "\n",
22780             # ;
22781             #
22782             # Another example, just one comma, where we will break after
22783             # the return:
22784             # return
22785             # $x * cos($a) - $y * sin($a),
22786             # $x * sin($a) + $y * cos($a);
22787              
22788             # Breaking a print statement:
22789             # print SAVEOUT
22790             # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
22791             # ( $? & 128 ) ? " -- core dumped" : "", "\n";
22792             #
22793             # But we will not force a break after the opening paren here
22794             # (causes a blinker):
22795             # $heap->{stream}->set_output_filter(
22796             # poe::filter::reference->new('myotherfreezer') ),
22797             # ;
22798             #
22799 40         122 my $i_first_comma = $comma_index[$dd]->[0];
22800 40         101 my $level_comma = $levels_to_go[$i_first_comma];
22801 40         109 my $ci_start = $ci_levels_to_go[0];
22802              
22803             # Here we want to use the value of ci before any -xci adjustment
22804 40 50 66     190 if ( $ci_start && $rOpts_extended_continuation_indentation ) {
22805 0         0 my $K0 = $K_to_go[0];
22806 0 0       0 if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
  0         0  
22807             }
22808 40 100 100     322 if ( !$ci_start
      100        
22809             && $old_breakpoint_to_go[$i_first_comma]
22810             && $level_comma == $levels_to_go[0] )
22811             {
22812 8         17 my $ibreak = -1;
22813 8         16 my $obp_count = 0;
22814 8         26 foreach my $ii ( reverse( 0 .. $i_first_comma - 1 ) ) {
22815 62 100       130 if ( $old_breakpoint_to_go[$ii] ) {
22816 3         13 $obp_count++;
22817 3 50       23 last if ( $obp_count > 1 );
22818 3 50       13 $ibreak = $ii
22819             if ( $levels_to_go[$ii] == $level_comma );
22820             }
22821             }
22822              
22823             # Changed rule from multiple old commas to just one here:
22824 8 50 66     56 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
      66        
22825             {
22826 3         9 my $ibreak_m = $ibreak;
22827 3 50       15 $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
22828 3 50       23 if ( $ibreak_m >= 0 ) {
22829              
22830             # In order to avoid blinkers we have to be fairly
22831             # restrictive:
22832              
22833             # OLD Rules:
22834             # Rule 1: Do not to break before an opening token
22835             # Rule 2: avoid breaking at ternary operators
22836             # (see b931, which is similar to the above print example)
22837             # Rule 3: Do not break at chain operators to fix case b1119
22838             # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
22839              
22840             # NEW Rule, replaced above rules after case b1214:
22841             # only break at one of the included types
22842              
22843             # Be sure to test any changes to these rules against runs
22844             # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
22845             # series.
22846 3         9 my $type_m = $types_to_go[$ibreak_m];
22847              
22848             # Switched from excluded to included for b1214. If necessary
22849             # the token could also be checked if type_m eq 'k'
22850 3 50       13 if ( $is_uncontained_comma_break_included_type{$type_m} ) {
22851              
22852             # Rule added to fix b1449:
22853             # Do not break before a '?' if -nbot is set
22854             # Otherwise, we may alternately arrive here and
22855             # set the break, or not, depending on the input.
22856 3         8 my $no_break;
22857 3         9 my $ibreak_p = $inext_to_go[$ibreak_m];
22858 3 50 33     22 if ( !$rOpts_break_at_old_ternary_breakpoints
22859             && $ibreak_p <= $max_index_to_go )
22860             {
22861 0         0 my $type_p = $types_to_go[$ibreak_p];
22862 0         0 $no_break = $type_p eq '?';
22863             }
22864              
22865 3 50       19 $self->set_forced_breakpoint($ibreak)
22866             if ( !$no_break );
22867             }
22868             }
22869             }
22870             }
22871 40         111 return;
22872             } ## end sub do_uncontained_comma_breaks
22873              
22874             my %is_logical_container;
22875             my %quick_filter;
22876              
22877             BEGIN {
22878 39     39   305 my @q = qw# if elsif unless while and or err not && | || ? : ! #;
22879 39         438 @is_logical_container{@q} = (1) x scalar(@q);
22880              
22881             # This filter will allow most tokens to skip past a section of code
22882 39         550 %quick_filter = %is_assignment;
22883 39         226 @q = qw# => . ; < > ~ #;
22884 39         193 push @q, ',';
22885 39         127 push @q, 'f'; # added for ';' for issue c154
22886 39         90827 @quick_filter{@q} = (1) x scalar(@q);
22887             } ## end BEGIN
22888              
22889             sub set_for_semicolon_breakpoints {
22890 2541     2541 0 5132 my ( $self, $dd ) = @_;
22891              
22892             # Set breakpoints for semicolons in C-style 'for' containers
22893 2541         3782 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
  2541         6093  
22894 9         23 $self->set_forced_breakpoint($_);
22895             }
22896 2541         4581 return;
22897             } ## end sub set_for_semicolon_breakpoints
22898              
22899             sub set_logical_breakpoints {
22900 69     69 0 231 my ( $self, $dd ) = @_;
22901              
22902             # Set breakpoints at logical operators
22903 69 50 100     559 if (
      66        
22904             $item_count_stack[$dd] == 0
22905             && $is_logical_container{ $container_type[$dd] }
22906              
22907             || $has_old_logical_breakpoints[$dd]
22908             )
22909             {
22910              
22911             # Look for breaks in this order:
22912             # 0 1 2 3
22913             # or and || &&
22914 69         208 foreach my $i ( 0 .. 3 ) {
22915 210 100       525 if ( $rand_or_list[$dd][$i] ) {
22916 42         91 foreach ( @{ $rand_or_list[$dd][$i] } ) {
  42         154  
22917 67         200 $self->set_forced_breakpoint($_);
22918             }
22919              
22920             # break at any 'if' and 'unless' too
22921 42         142 foreach ( @{ $rand_or_list[$dd][4] } ) {
  42         224  
22922 5         17 $self->set_forced_breakpoint($_);
22923             }
22924 42         149 $rand_or_list[$dd] = [];
22925 42         104 last;
22926             }
22927             }
22928             }
22929 69         143 return;
22930             } ## end sub set_logical_breakpoints
22931              
22932             sub is_unbreakable_container {
22933              
22934             # never break a container of one of these types
22935             # because bad things can happen (map1.t)
22936 1237     1237 0 2432 my $dd = shift;
22937 1237         6861 return $is_sort_map_grep{ $container_type[$dd] };
22938             } ## end sub is_unbreakable_container
22939              
22940             sub break_lists {
22941              
22942 1745     1745 0 3980 my ( $self, $is_long_line, $rbond_strength_bias ) = @_;
22943              
22944             #--------------------------------------------------------------------
22945             # This routine is called once per batch, if the batch is a list, to
22946             # set line breaks so that hierarchical structure can be displayed and
22947             # so that list items can be vertically aligned. The output of this
22948             # routine is stored in the array @forced_breakpoint_to_go, which is
22949             # used by sub 'break_long_lines' to set final breakpoints. This is
22950             # probably the most complex routine in perltidy, so I have
22951             # broken it into pieces and over-commented it.
22952             #--------------------------------------------------------------------
22953              
22954 1745         3214 $starting_depth = $nesting_depth_to_go[0];
22955              
22956 1745         3364 $block_type = SPACE;
22957 1745         2923 $current_depth = $starting_depth;
22958 1745         2782 $i = -1;
22959 1745         2692 $i_last_colon = -1;
22960 1745         2632 $i_line_end = -1;
22961 1745         2645 $i_line_start = -1;
22962 1745         3106 $last_nonblank_token = ';';
22963 1745         2928 $last_nonblank_type = ';';
22964 1745         3042 $last_nonblank_block_type = SPACE;
22965 1745         2716 $last_old_breakpoint_count = 0;
22966 1745         5939 $minimum_depth = $current_depth + 1; # forces update in check below
22967 1745         2673 $old_breakpoint_count = 0;
22968 1745         2846 $starting_breakpoint_count = $forced_breakpoint_count;
22969 1745         2832 $token = ';';
22970 1745         2913 $type = ';';
22971 1745         2763 $type_sequence = EMPTY_STRING;
22972              
22973 1745         2687 my $total_depth_variation = 0;
22974 1745         2661 my $i_old_assignment_break;
22975 1745         2753 my $depth_last = $starting_depth;
22976 1745         2750 my $comma_follows_last_closing_token;
22977              
22978 1745 50       8426 $self->check_for_new_minimum_depth( $current_depth,
22979             $parent_seqno_to_go[0] )
22980             if ( $current_depth < $minimum_depth );
22981              
22982 1745         2972 my $i_want_previous_break = -1;
22983              
22984 1745         2810 my $saw_good_breakpoint;
22985              
22986             #----------------------------------------
22987             # Main loop over all tokens in this batch
22988             #----------------------------------------
22989 1745         4848 while ( ++$i <= $max_index_to_go ) {
22990 34872 100       61644 if ( $type ne 'b' ) {
22991 22157         29462 $i_last_nonblank_token = $i - 1;
22992 22157         30320 $last_nonblank_type = $type;
22993 22157         29827 $last_nonblank_token = $token;
22994 22157         28525 $last_nonblank_block_type = $block_type;
22995             }
22996 34872         49206 $type = $types_to_go[$i];
22997 34872         47381 $block_type = $block_type_to_go[$i];
22998 34872         47381 $token = $tokens_to_go[$i];
22999 34872         46038 $type_sequence = $type_sequence_to_go[$i];
23000              
23001 34872         45290 my $i_next_nonblank = $inext_to_go[$i];
23002 34872         48179 $next_nonblank_type = $types_to_go[$i_next_nonblank];
23003 34872         47835 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
23004 34872         46279 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
23005              
23006             #-------------------------------------------
23007             # Loop Section A: Look for special breakpoints...
23008             #-------------------------------------------
23009              
23010             # set break if flag was set
23011 34872 100       58381 if ( $i_want_previous_break >= 0 ) {
23012 17         91 $self->set_forced_breakpoint($i_want_previous_break);
23013 17         37 $i_want_previous_break = -1;
23014             }
23015              
23016 34872         44425 $last_old_breakpoint_count = $old_breakpoint_count;
23017              
23018             # Check for a good old breakpoint ..
23019 34872 100       58669 if ( $old_breakpoint_to_go[$i] ) {
23020 2495         8567 ( $i_want_previous_break, $i_old_assignment_break ) =
23021             $self->examine_old_breakpoint( $i_next_nonblank,
23022             $i_want_previous_break, $i_old_assignment_break );
23023             }
23024              
23025 34872 100       66882 next if ( $type eq 'b' );
23026              
23027 22157         35082 $depth = $nesting_depth_to_go[ $i + 1 ];
23028              
23029 22157         31809 $total_depth_variation += abs( $depth - $depth_last );
23030 22157         28613 $depth_last = $depth;
23031              
23032             # safety check - be sure we always break after a comment
23033             # Shouldn't happen .. an error here probably means that the
23034             # nobreak flag did not get turned off correctly during
23035             # formatting.
23036 22157 100       38567 if ( $type eq '#' ) {
23037 134 50       474 if ( $i != $max_index_to_go ) {
23038 0         0 if (DEVEL_MODE) {
23039             Fault(<<EOM);
23040             Non-fatal program bug: backup logic required to break after a comment
23041             EOM
23042             }
23043 0         0 $nobreak_to_go[$i] = 0;
23044 0         0 $self->set_forced_breakpoint($i);
23045             } ## end if ( $i != $max_index_to_go)
23046             } ## end if ( $type eq '#' )
23047              
23048             # Force breakpoints at certain tokens in long lines.
23049             # Note that such breakpoints will be undone later if these tokens
23050             # are fully contained within parens on a line.
23051 22157 100 100     48224 if (
      100        
      66        
      66        
      100        
      66        
      66        
      66        
23052              
23053             # break before a keyword within a line
23054             $type eq 'k'
23055             && $i > 0
23056              
23057             # if one of these keywords:
23058             && $is_if_unless_while_until_for_foreach{$token}
23059              
23060             # but do not break at something like '1 while'
23061             && ( $last_nonblank_type ne 'n' || $i > 2 )
23062              
23063             # and let keywords follow a closing 'do' brace
23064             && ( !$last_nonblank_block_type
23065             || $last_nonblank_block_type ne 'do' )
23066              
23067             && (
23068             $is_long_line
23069              
23070             # or container is broken (by side-comment, etc)
23071             || (
23072             $next_nonblank_token eq '('
23073             && ( !defined( $mate_index_to_go[$i_next_nonblank] )
23074             || $mate_index_to_go[$i_next_nonblank] < $i )
23075             )
23076             )
23077             )
23078             {
23079 8         49 $self->set_forced_breakpoint( $i - 1 );
23080             }
23081              
23082             # remember locations of '||' and '&&' for possible breaks if we
23083             # decide this is a long logical expression.
23084 22157 100       70463 if ( $type eq '||' ) {
    100          
    100          
    100          
    100          
23085 61         169 push @{ $rand_or_list[$depth][2] }, $i;
  61         221  
23086 61 100 100     393 ++$has_old_logical_breakpoints[$depth]
      66        
23087             if ( ( $i == $i_line_start || $i == $i_line_end )
23088             && $rOpts_break_at_old_logical_breakpoints );
23089             }
23090             elsif ( $type eq '&&' ) {
23091 55         131 push @{ $rand_or_list[$depth][3] }, $i;
  55         164  
23092 55 100 100     346 ++$has_old_logical_breakpoints[$depth]
      100        
23093             if ( ( $i == $i_line_start || $i == $i_line_end )
23094             && $rOpts_break_at_old_logical_breakpoints );
23095             }
23096             elsif ( $type eq 'f' ) {
23097 28         77 push @{ $rfor_semicolon_list[$depth] }, $i;
  28         67  
23098             }
23099             elsif ( $type eq 'k' ) {
23100 1374 100 100     7495 if ( $token eq 'and' ) {
    100          
    100          
23101 44         82 push @{ $rand_or_list[$depth][1] }, $i;
  44         119  
23102 44 100 66     242 ++$has_old_logical_breakpoints[$depth]
      66        
23103             if ( ( $i == $i_line_start || $i == $i_line_end )
23104             && $rOpts_break_at_old_logical_breakpoints );
23105             }
23106              
23107             # break immediately at 'or's which are probably not in a logical
23108             # block -- but we will break in logical breaks below so that
23109             # they do not add to the forced_breakpoint_count
23110             elsif ( $token eq 'or' ) {
23111 40         137 push @{ $rand_or_list[$depth][0] }, $i;
  40         172  
23112 40 100 100     287 ++$has_old_logical_breakpoints[$depth]
      66        
23113             if ( ( $i == $i_line_start || $i == $i_line_end )
23114             && $rOpts_break_at_old_logical_breakpoints );
23115 40 100       185 if ( $is_logical_container{ $container_type[$depth] } ) {
23116             }
23117             else {
23118 31 100 100     172 if ($is_long_line) { $self->set_forced_breakpoint($i) }
  16 100 66     54  
23119             elsif ( ( $i == $i_line_start || $i == $i_line_end )
23120             && $rOpts_break_at_old_logical_breakpoints )
23121             {
23122 4         10 $saw_good_breakpoint = 1;
23123             }
23124             else {
23125             ## not a good break
23126             }
23127             }
23128             }
23129             elsif ( $token eq 'if' || $token eq 'unless' ) {
23130 120         273 push @{ $rand_or_list[$depth][4] }, $i;
  120         530  
23131 120 100 66     981 if ( ( $i == $i_line_start || $i == $i_line_end )
      66        
23132             && $rOpts_break_at_old_logical_breakpoints )
23133             {
23134 7         40 $self->set_forced_breakpoint($i);
23135             }
23136             }
23137             else {
23138             ## not one of: 'and' 'or' 'if' 'unless'
23139             }
23140             }
23141             elsif ( $is_assignment{$type} ) {
23142 506         1508 $i_equals[$depth] = $i;
23143             }
23144             else {
23145             ## not a good breakpoint type
23146             }
23147              
23148             #-----------------------------------------
23149             # Loop Section B: Handle a sequenced token
23150             #-----------------------------------------
23151 22157 100       38449 if ($type_sequence) {
23152 6140         15557 $self->break_lists_type_sequence;
23153             }
23154              
23155             #------------------------------------------
23156             # Loop Section C: Handle Increasing Depth..
23157             #------------------------------------------
23158              
23159             # hardened against bad input syntax: depth jump must be 1 and type
23160             # must be opening..fixes c102
23161 22157 100 66     71209 if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
    100 66        
23162 3022         7273 $self->break_lists_increasing_depth();
23163             }
23164              
23165             #------------------------------------------
23166             # Loop Section D: Handle Decreasing Depth..
23167             #------------------------------------------
23168              
23169             # hardened against bad input syntax: depth jump must be 1 and type
23170             # must be closing .. fixes c102
23171             elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) {
23172              
23173 2858         8709 $self->break_lists_decreasing_depth();
23174              
23175 2858   100     8866 $comma_follows_last_closing_token =
23176             $next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
23177              
23178             }
23179             else {
23180             ## not a depth change
23181             }
23182              
23183             #----------------------------------
23184             # Loop Section E: Handle this token
23185             #----------------------------------
23186              
23187 22157         30545 $current_depth = $depth;
23188              
23189             # most token types can skip the rest of this loop
23190 22157 100       55369 next if ( !$quick_filter{$type} );
23191              
23192             # Turn off comma alignment if we are sure that this is not a list
23193             # environment. To be safe, we will do this if we see certain
23194             # non-list tokens, such as ';', '=', and also the environment is
23195             # not a list.
23196             ## $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type}
23197 4952 100       14804 if ( $is_non_list_type{$type} ) {
    100          
    100          
    50          
23198 1456 100       5090 if ( !$self->is_in_list_by_i($i) ) {
23199 1446         2779 $dont_align[$depth] = 1;
23200 1446         2528 $want_comma_break[$depth] = 0;
23201 1446         2345 $index_before_arrow[$depth] = -1;
23202              
23203             # no special comma breaks in C-style 'for' terms (c154)
23204 1446 100       4646 if ( $type eq 'f' ) { $last_comma_index[$depth] = undef }
  28         85  
23205             }
23206             }
23207              
23208             # handle any commas
23209             elsif ( $type eq ',' ) {
23210 2396         5740 $self->study_comma($comma_follows_last_closing_token);
23211             }
23212              
23213             # handle comma-arrow
23214             elsif ( $type eq '=>' ) {
23215 984 50       2254 next if ( $last_nonblank_type eq '=>' );
23216 984 100       2119 next if $rOpts_break_at_old_comma_breakpoints;
23217             next
23218 978 50 33     2552 if ( $rOpts_comma_arrow_breakpoints == 3
23219             && !defined( $override_cab3[$depth] ) );
23220 978         1619 $want_comma_break[$depth] = 1;
23221 978         1540 $index_before_arrow[$depth] = $i_last_nonblank_token;
23222 978         2115 next;
23223             }
23224              
23225             elsif ( $type eq '.' ) {
23226 116         289 $last_dot_index[$depth] = $i;
23227             }
23228              
23229             else {
23230              
23231             # error : no code to handle a type in %quick_filter
23232 0         0 DEVEL_MODE && Fault(<<EOM);
23233             Missing code to handle token type '$type' which is in the quick_filter
23234             EOM
23235             }
23236              
23237             } ## end while ( ++$i <= $max_index_to_go)
23238              
23239             #-------------------------------------------
23240             # END of loop over all tokens in this batch
23241             # Now set breaks for any unfinished lists ..
23242             #-------------------------------------------
23243              
23244 1745         5862 foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) {
23245              
23246 2537         4619 $interrupted_list[$dd] = 1;
23247 2537 100       5861 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
23248 2537 100       5644 $self->set_comma_breakpoints( $dd, $rbond_strength_bias )
23249             if ( $item_count_stack[$dd] );
23250 2537 100       5597 $self->set_logical_breakpoints($dd)
23251             if ( $has_old_logical_breakpoints[$dd] );
23252 2537         7656 $self->set_for_semicolon_breakpoints($dd);
23253              
23254             # break open container...
23255 2537         4010 my $i_opening = $opening_structure_index_stack[$dd];
23256 2537 100 66     10500 if ( defined($i_opening) && $i_opening >= 0 ) {
23257              
23258 792   66     2346 my $no_break = (
23259             is_unbreakable_container($dd)
23260              
23261             # Avoid a break which would place an isolated ' or "
23262             # on a line
23263             || ( $type eq 'Q'
23264             && $i_opening >= $max_index_to_go - 2
23265             && ( $token eq "'" || $token eq '"' ) )
23266             );
23267              
23268 792 100       3078 $self->set_forced_breakpoint($i_opening)
23269             if ( !$no_break );
23270             }
23271             } ## end for ( my $dd = $current_depth...)
23272              
23273             #----------------------------------------
23274             # Return the flag '$saw_good_breakpoint'.
23275             #----------------------------------------
23276             # This indicates if the input file had some good breakpoints. This
23277             # flag will be used to force a break in a line shorter than the
23278             # allowed line length.
23279 1745 100 100     8424 if ( $has_old_logical_breakpoints[$current_depth] ) {
    100 100        
      66        
23280 31         74 $saw_good_breakpoint = 1;
23281             }
23282              
23283             # A complex line with one break at an = has a good breakpoint.
23284             # This is not complex ($total_depth_variation=0):
23285             # $res1
23286             # = 10;
23287             #
23288             # This is complex ($total_depth_variation=6):
23289             # $res2 =
23290             # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
23291              
23292             # The check ($i_old_.. < $max_index_to_go) was added to fix b1333
23293             elsif ($i_old_assignment_break
23294             && $total_depth_variation > 4
23295             && $old_breakpoint_count == 1
23296             && $i_old_assignment_break < $max_index_to_go )
23297             {
23298 12         27 $saw_good_breakpoint = 1;
23299             }
23300             else {
23301             ## not a good breakpoint
23302             }
23303              
23304 1745         3890 return $saw_good_breakpoint;
23305             } ## end sub break_lists
23306              
23307             sub study_comma {
23308              
23309             # study and store info for a list comma
23310              
23311 2396     2396 0 4797 my ( $self, $comma_follows_last_closing_token ) = @_;
23312              
23313 2396         3880 $last_dot_index[$depth] = undef;
23314 2396         3808 $last_comma_index[$depth] = $i;
23315              
23316             # break here if this comma follows a '=>'
23317             # but not if there is a side comment after the comma
23318 2396 100       5002 if ( $want_comma_break[$depth] ) {
23319              
23320 610 100       2614 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
23321 145 50       549 if ($rOpts_comma_arrow_breakpoints) {
23322 145         365 $want_comma_break[$depth] = 0;
23323 145         392 return;
23324             }
23325             }
23326              
23327 465 50       2044 $self->set_forced_breakpoint($i)
23328             unless ( $next_nonblank_type eq '#' );
23329              
23330             # break before the previous token if it looks safe
23331             # Example of something that we will not try to break before:
23332             # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
23333             # Also we don't want to break at a binary operator (like +):
23334             # $c->createOval(
23335             # $x + $R, $y +
23336             # $R => $x - $R,
23337             # $y - $R, -fill => 'black',
23338             # );
23339 465         1112 my $ibreak = $index_before_arrow[$depth] - 1;
23340 465 100 66     2525 if ( $ibreak > 0
23341             && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
23342             {
23343 460 100       1246 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
  142         269  
23344 460 100       1130 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
  451         761  
23345 460 100       1680 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
23346              
23347             # don't break before a comma, as in the following:
23348             # ( LONGER_THAN,=> 1,
23349             # EIGHTY_CHARACTERS,=> 2,
23350             # CAUSES_FORMATTING,=> 3,
23351             # LIKE_THIS,=> 4,
23352             # );
23353             # This example is for -tso but should be general rule
23354 453 50 33     1962 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
23355             && $tokens_to_go[ $ibreak + 1 ] ne ',' )
23356             {
23357 453         1085 $self->set_forced_breakpoint($ibreak);
23358             }
23359             }
23360             }
23361              
23362 465         940 $want_comma_break[$depth] = 0;
23363 465         875 $index_before_arrow[$depth] = -1;
23364              
23365             # handle list which mixes '=>'s and ','s:
23366             # treat any list items so far as an interrupted list
23367 465         770 $interrupted_list[$depth] = 1;
23368 465         1280 return;
23369             }
23370              
23371             # Break after all commas above starting depth...
23372             # But only if the last closing token was followed by a comma,
23373             # to avoid breaking a list operator (issue c119)
23374 1786 100 100     4486 if ( $depth < $starting_depth
      100        
23375             && $comma_follows_last_closing_token
23376             && !$dont_align[$depth] )
23377             {
23378 8 50       54 $self->set_forced_breakpoint($i)
23379             unless ( $next_nonblank_type eq '#' );
23380 8         24 return;
23381             }
23382              
23383             # add this comma to the list..
23384 1778         2791 my $item_count = $item_count_stack[$depth];
23385 1778 100       3730 if ( $item_count == 0 ) {
23386              
23387             # but do not form a list with no opening structure
23388             # for example:
23389              
23390             # open INFILE_COPY, ">$input_file_copy"
23391             # or die ("very long message");
23392 543 100 100     2137 if ( ( $opening_structure_index_stack[$depth] < 0 )
23393             && $self->is_in_block_by_i($i) )
23394             {
23395 29         76 $dont_align[$depth] = 1;
23396             }
23397             }
23398              
23399 1778         3453 $comma_index[$depth][$item_count] = $i;
23400 1778         2644 ++$item_count_stack[$depth];
23401 1778 100       5657 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
23402 411         759 $identifier_count_stack[$depth]++;
23403             }
23404 1778         3941 return;
23405             } ## end sub study_comma
23406              
23407             my %poor_types;
23408             my %poor_keywords;
23409             my %poor_next_types;
23410             my %poor_next_keywords;
23411              
23412             BEGIN {
23413              
23414             # Setup filters for detecting very poor breaks to ignore.
23415             # b1097: old breaks after type 'L' and before 'R' are poor
23416             # b1450: old breaks at 'eq' and related operators are poor
23417 39     39   310 my @q = qw(== <= >= !=);
23418              
23419 39         209 @{poor_types}{@q} = (1) x scalar(@q);
23420 39         144 @{poor_next_types}{@q} = (1) x scalar(@q);
23421 39         121 $poor_types{'L'} = 1;
23422 39         99 $poor_next_types{'R'} = 1;
23423              
23424 39         191 @q = qw(eq ne le ge lt gt);
23425 39         328 @{poor_keywords}{@q} = (1) x scalar(@q);
23426 39         103138 @{poor_next_keywords}{@q} = (1) x scalar(@q);
23427             } ## end BEGIN
23428              
23429             sub examine_old_breakpoint {
23430              
23431 2495     2495 0 5729 my ( $self, $i_next_nonblank, $i_want_previous_break,
23432             $i_old_assignment_break )
23433             = @_;
23434              
23435             # Look at an old breakpoint and set/update certain flags:
23436              
23437             # Given indexes of three tokens in this batch:
23438             # $i_next_nonblank - index of the next nonblank token
23439             # $i_want_previous_break - we want a break before this index
23440             # $i_old_assignment_break - the index of an '=' or equivalent
23441             # Update:
23442             # $old_breakpoint_count - a counter to increment unless poor break
23443             # Update and return:
23444             # $i_want_previous_break
23445             # $i_old_assignment_break
23446              
23447             #-----------------------
23448             # Filter out poor breaks
23449             #-----------------------
23450             # Just return if this is a poor break and pretend it does not exist.
23451             # Otherwise, poor breaks made under stress can cause instability.
23452 2495         3646 my $poor_break;
23453 2495 100 33     5265 if ( $type eq 'k' ) { $poor_break ||= $poor_keywords{$token} }
  29         266  
23454 2466   66     8778 else { $poor_break ||= $poor_types{$type} }
23455              
23456 2495 100       5336 if ( $next_nonblank_type eq 'k' ) {
23457 150   33     749 $poor_break ||= $poor_next_keywords{$next_nonblank_token};
23458             }
23459 2345   66     7464 else { $poor_break ||= $poor_next_types{$next_nonblank_type} }
23460              
23461             # Also ignore any high stress level breaks; fixes b1395
23462 2495   100     9978 $poor_break ||= $levels_to_go[$i] >= $high_stress_level;
23463 2495 100       5075 if ($poor_break) { goto RETURN }
  6         29  
23464              
23465             #--------------------------------------------
23466             # Not a poor break, so continue to examine it
23467             #--------------------------------------------
23468 2489         3614 $old_breakpoint_count++;
23469 2489         3739 $i_line_end = $i;
23470 2489         3769 $i_line_start = $i_next_nonblank;
23471              
23472             #---------------------------------------
23473             # Do we want to break before this token?
23474             #---------------------------------------
23475              
23476             # Break before certain keywords if user broke there and
23477             # this is a 'safe' break point. The idea is to retain
23478             # any preferred breaks for sequential list operations,
23479             # like a schwartzian transform.
23480 2489 100       6236 if ($rOpts_break_at_old_keyword_breakpoints) {
23481 2487 50 100     6420 if (
      66        
      66        
23482             $next_nonblank_type eq 'k'
23483             && $is_keyword_returning_list{$next_nonblank_token}
23484             && ( $type =~ /^[=\)\]\}Riw]$/
23485             || $type eq 'k' && $is_keyword_returning_list{$token} )
23486             )
23487             {
23488              
23489             # we actually have to set this break next time through
23490             # the loop because if we are at a closing token (such
23491             # as '}') which forms a one-line block, this break might
23492             # get undone.
23493              
23494             # But do not do this at an '=' if:
23495             # - the user wants breaks before an equals (b434 b903)
23496             # - or -naws is set (can be unstable, see b1354)
23497             my $skip = $type eq '='
23498 12   66     71 && ( $want_break_before{$type}
23499             || !$rOpts_add_whitespace );
23500              
23501 12 50       44 $i_want_previous_break = $i
23502             unless ($skip);
23503              
23504             }
23505             }
23506              
23507             # Break before attributes if user broke there
23508 2489 100       5160 if ($rOpts_break_at_old_attribute_breakpoints) {
23509 2485 100       5479 if ( $next_nonblank_type eq 'A' ) {
23510 5         9 $i_want_previous_break = $i;
23511             }
23512             }
23513              
23514             #---------------------------------
23515             # Is this an old assignment break?
23516             #---------------------------------
23517 2489 100       6811 if ( $is_assignment{$type} ) {
    50          
23518 73         203 $i_old_assignment_break = $i;
23519             }
23520             elsif ( $is_assignment{$next_nonblank_type} ) {
23521 0         0 $i_old_assignment_break = $i_next_nonblank;
23522             }
23523             else {
23524             ## not old assignment break
23525             }
23526              
23527 2495         5579 RETURN:
23528             return ( $i_want_previous_break, $i_old_assignment_break );
23529             } ## end sub examine_old_breakpoint
23530              
23531             sub break_lists_type_sequence {
23532              
23533 6140     6140 0 11317 my ($self) = @_;
23534              
23535             # We have encountered a sequenced token while setting list breakpoints
23536              
23537             # if closing type, one of } ) ] :
23538 6140 100       13895 if ( $is_closing_sequence_token{$token} ) {
23539              
23540 2988 100       8130 if ( $type eq ':' ) {
23541 130         345 $i_last_colon = $i;
23542              
23543             # retain break at a ':' line break
23544 130 100 100     1257 if ( ( $i == $i_line_start || $i == $i_line_end )
      100        
      66        
23545             && $rOpts_break_at_old_ternary_breakpoints
23546             && $levels_to_go[$i] < $high_stress_level )
23547             {
23548              
23549 73         312 $self->set_forced_breakpoint($i);
23550              
23551             # Break at a previous '=', but only if it is before
23552             # the mating '?'. Mate_index test fixes b1287.
23553 73         213 my $ieq = $i_equals[$depth];
23554 73         161 my $mix = $mate_index_to_go[$i];
23555 73 100       277 if ( !defined($mix) ) { $mix = -1 }
  6         27  
23556 73 100 66     327 if ( $ieq > 0 && $ieq < $mix ) {
23557 17         95 $self->set_forced_breakpoint( $i_equals[$depth] );
23558 17         48 $i_equals[$depth] = -1;
23559             }
23560             }
23561             }
23562              
23563             # handle any postponed closing breakpoints
23564 2988 100       7673 if ( has_postponed_breakpoint($type_sequence) ) {
23565 731 100       2404 my $inc = ( $type eq ':' ) ? 0 : 1;
23566 731 100       1935 if ( $i >= $inc ) {
23567 266         1106 $self->set_forced_breakpoint( $i - $inc );
23568             }
23569             }
23570             }
23571              
23572             # must be opening token, one of { ( [ ?
23573             else {
23574              
23575             # set breaks at ?/: if they will get separated (and are
23576             # not a ?/: chain), or if the '?' is at the end of the
23577             # line
23578 3152 100       6531 if ( $token eq '?' ) {
23579 130         481 my $i_colon = $mate_index_to_go[$i];
23580 130 50 66     1152 if (
      66        
23581             !defined($i_colon) # the ':' is not in this batch
23582             || $i == 0 # this '?' is the first token of the line
23583             || $i == $max_index_to_go # or this '?' is the last token
23584             )
23585             {
23586              
23587             # don't break if # this has a side comment, and
23588             # don't break at a '?' if preceded by ':' on
23589             # this line of previous ?/: pair on this line.
23590             # This is an attempt to preserve a chain of ?/:
23591             # expressions (elsif2.t).
23592 12 100 66     109 if (
      100        
23593             (
23594             $i_last_colon < 0
23595             || $parent_seqno_to_go[$i_last_colon] !=
23596             $parent_seqno_to_go[$i]
23597             )
23598             && $tokens_to_go[$max_index_to_go] ne '#'
23599             )
23600             {
23601 8         33 $self->set_forced_breakpoint($i);
23602             }
23603 12         70 $self->set_closing_breakpoint($i);
23604             }
23605             }
23606              
23607             # must be one of { ( [
23608             else {
23609              
23610             # do requested -lp breaks at the OPENING token for BROKEN
23611             # blocks. NOTE: this can be done for both -lp and -xlp,
23612             # but only -xlp can really take advantage of this. So this
23613             # is currently restricted to -xlp to avoid excess changes to
23614             # existing -lp formatting.
23615 3022 100 100     7558 if ( $rOpts_extended_line_up_parentheses
23616             && !defined( $mate_index_to_go[$i] ) )
23617             {
23618             my $lp_object =
23619 26         101 $self->[_rlp_object_by_seqno_]->{$type_sequence};
23620 26 100       67 if ($lp_object) {
23621 13         46 my $K_begin_line = $lp_object->get_K_begin_line();
23622 13         26 my $i_begin_line = $K_begin_line - $K_to_go[0];
23623 13         41 $self->set_forced_lp_break( $i_begin_line, $i );
23624             }
23625             }
23626             }
23627             }
23628 6140         10015 return;
23629             } ## end sub break_lists_type_sequence
23630              
23631             sub break_lists_increasing_depth {
23632              
23633 3022     3022 0 5426 my ($self) = @_;
23634              
23635             #--------------------------------------------
23636             # prepare for a new list when depth increases
23637             # token $i is a '(','{', or '['
23638             #--------------------------------------------
23639              
23640             #----------------------------------------------------------
23641             # BEGIN initialize depth arrays
23642             # ... use the same order as sub check_for_new_minimum_depth
23643             #----------------------------------------------------------
23644 3022         6384 $type_sequence_stack[$depth] = $type_sequence;
23645              
23646 3022         4951 $override_cab3[$depth] = undef;
23647 3022 50 33     8170 if ( $rOpts_comma_arrow_breakpoints == 3 && $type_sequence ) {
23648             $override_cab3[$depth] =
23649 0         0 $self->[_roverride_cab3_]->{$type_sequence};
23650             }
23651              
23652 3022         4948 $breakpoint_stack[$depth] = $forced_breakpoint_count;
23653             $container_type[$depth] =
23654              
23655             # k => && || ? : .
23656 3022 100       8142 $is_container_label_type{$last_nonblank_type}
23657             ? $last_nonblank_token
23658             : EMPTY_STRING;
23659 3022         5008 $identifier_count_stack[$depth] = 0;
23660 3022         4807 $index_before_arrow[$depth] = -1;
23661 3022         4599 $interrupted_list[$depth] = 0;
23662 3022         4587 $item_count_stack[$depth] = 0;
23663 3022         5581 $last_nonblank_type[$depth] = $last_nonblank_type;
23664 3022         4951 $opening_structure_index_stack[$depth] = $i;
23665              
23666 3022         4649 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
23667 3022         5326 $comma_index[$depth] = undef;
23668 3022         4493 $last_comma_index[$depth] = undef;
23669 3022         4458 $last_dot_index[$depth] = undef;
23670 3022         4554 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
23671 3022         4410 $has_old_logical_breakpoints[$depth] = 0;
23672 3022         6437 $rand_or_list[$depth] = [];
23673 3022         5584 $rfor_semicolon_list[$depth] = [];
23674 3022         4824 $i_equals[$depth] = -1;
23675              
23676             # if line ends here then signal closing token to break
23677 3022 100 100     11000 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) {
23678 612         2151 $self->set_closing_breakpoint($i);
23679             }
23680              
23681             # Not all lists of values should be vertically aligned..
23682 3022   66     11422 $dont_align[$depth] =
23683              
23684             # code BLOCKS are handled at a higher level
23685             ##( $block_type ne EMPTY_STRING )
23686             $block_type
23687              
23688             # certain paren lists
23689             || ( $type eq '(' ) && (
23690              
23691             # it does not usually look good to align a list of
23692             # identifiers in a parameter list, as in:
23693             # my($var1, $var2, ...)
23694             # (This test should probably be refined, for now I'm just
23695             # testing for any keyword)
23696             ( $last_nonblank_type eq 'k' )
23697              
23698             # a trailing '(' usually indicates a non-list
23699             || ( $next_nonblank_type eq '(' )
23700             );
23701 3022         5036 $has_broken_sublist[$depth] = 0;
23702 3022         4919 $want_comma_break[$depth] = 0;
23703              
23704             #----------------------------
23705             # END initialize depth arrays
23706             #----------------------------
23707              
23708             # patch to outdent opening brace of long if/for/..
23709             # statements (like this one). See similar coding in
23710             # set_continuation breaks. We have also catch it here for
23711             # short line fragments which otherwise will not go through
23712             # break_long_lines.
23713 3022 50 100     9860 if (
      100        
      66        
      66        
      33        
23714             $block_type
23715              
23716             # if we have the ')' but not its '(' in this batch..
23717             && ( $last_nonblank_token eq ')' )
23718             && !defined( $mate_index_to_go[$i_last_nonblank_token] )
23719              
23720             # and user wants brace to left
23721             && !$rOpts_opening_brace_always_on_right
23722              
23723             && ( $type eq '{' ) # should be true
23724             && ( $token eq '{' ) # should be true
23725             )
23726             {
23727 4         21 $self->set_forced_breakpoint( $i - 1 );
23728             }
23729              
23730 3022         5049 return;
23731             } ## end sub break_lists_increasing_depth
23732              
23733             sub break_lists_decreasing_depth {
23734              
23735 2858     2858 0 6028 my ( $self, $rbond_strength_bias ) = @_;
23736              
23737             # We have arrived at a closing container token in sub break_lists:
23738             # the token at index $i is one of these: ')','}', ']'
23739             # A number of important breakpoints for this container can now be set
23740             # based on the information that we have collected. This includes:
23741             # - breaks at commas to format tables
23742             # - breaks at certain logical operators and other good breakpoints
23743             # - breaks at opening and closing containers if needed by selected
23744             # formatting styles
23745             # These breaks are made by calling sub 'set_forced_breakpoint'
23746              
23747 2858 100       7496 $self->check_for_new_minimum_depth( $depth, $parent_seqno_to_go[$i] )
23748             if ( $depth < $minimum_depth );
23749              
23750             # force all outer logical containers to break after we see on
23751             # old breakpoint
23752 2858   100     12185 $has_old_logical_breakpoints[$depth] ||=
23753             $has_old_logical_breakpoints[$current_depth];
23754              
23755             # Patch to break between ') {' if the paren list is broken.
23756             # There is similar logic in break_long_lines for
23757             # non-broken lists.
23758 2858 50 100     10552 if ( $token eq ')'
      100        
      66        
      66        
23759             && $next_nonblank_block_type
23760             && $interrupted_list[$current_depth]
23761             && $next_nonblank_type eq '{'
23762             && !$rOpts_opening_brace_always_on_right )
23763             {
23764 4         17 $self->set_forced_breakpoint($i);
23765             }
23766              
23767             #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
23768              
23769             #-----------------------------------------------------------------
23770             # Set breaks at commas to display a table of values if appropriate
23771             #-----------------------------------------------------------------
23772 2858         5531 my ( $bp_count, $do_not_break_apart ) = ( 0, 0 );
23773 2858 100       7425 ( $bp_count, $do_not_break_apart ) =
23774             $self->set_comma_breakpoints( $current_depth, $rbond_strength_bias )
23775             if ( $item_count_stack[$current_depth] );
23776              
23777             #-----------------------------------------------------------
23778             # Now set flags needed to decide if we should break open the
23779             # container ... This is a long rambling section which has
23780             # grown over time to handle all situations.
23781             #-----------------------------------------------------------
23782 2858         4732 my $i_opening = $opening_structure_index_stack[$current_depth];
23783 2858         4860 my $saw_opening_structure = ( $i_opening >= 0 );
23784 2858         4112 my $lp_object;
23785 2858 100 100     7501 if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
23786             $lp_object = $self->[_rlp_object_by_seqno_]
23787 279         801 ->{ $type_sequence_to_go[$i_opening] };
23788             }
23789              
23790             # this term is long if we had to break at interior commas..
23791 2858         4784 my $is_long_term = $bp_count > 0;
23792              
23793             # If this is a short container with one or more comma arrows,
23794             # then we will mark it as a long term to open it if requested.
23795             # $rOpts_comma_arrow_breakpoints =
23796             # 0 - open only if comma precedes closing brace
23797             # 1 - stable: except for one line blocks
23798             # 2 - try to form 1 line blocks
23799             # 3 - ignore =>
23800             # 4 - always open up if vt=0
23801             # 5 - stable: even for one line blocks if vt=0
23802              
23803 2858         4497 my $cab_flag = $rOpts_comma_arrow_breakpoints;
23804              
23805             # replace -cab=3 if overriden
23806 2858 50 33     7166 if ( $cab_flag == 3 && $type_sequence ) {
23807 0         0 my $test_cab = $self->[_roverride_cab3_]->{$type_sequence};
23808 0 0       0 if ( defined($test_cab) ) { $cab_flag = $test_cab }
  0         0  
23809             }
23810              
23811             # PATCH: Modify the -cab flag if we are not processing a list:
23812             # We only want the -cab flag to apply to list containers, so
23813             # for non-lists we use the default and stable -cab=5 value.
23814             # Fixes case b939a.
23815 2858 100 66     12708 if ( $type_sequence && !$self->[_ris_list_by_seqno_]->{$type_sequence} )
23816             {
23817 1928         3519 $cab_flag = 5;
23818             }
23819              
23820             # Ignore old breakpoints when under stress.
23821             # Fixes b1203 b1204 as well as b1197-b1200.
23822             # But not if -lp: fixes b1264, b1265. NOTE: rechecked with
23823             # b1264 to see if this check is still required at all, and
23824             # these still require a check, but at higher level beta+3
23825             # instead of beta: b1193 b780
23826 2858 100 100     13542 if ( $saw_opening_structure
      100        
23827             && !$lp_object
23828             && $levels_to_go[$i_opening] >= $high_stress_level )
23829             {
23830 29         53 $cab_flag = 2;
23831              
23832             # Do not break hash braces under stress (fixes b1238)
23833 29   100     130 $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
23834              
23835             # This option fixes b1235, b1237, b1240 with old and new
23836             # -lp, but formatting is nicer with next option.
23837             ## $is_long_term ||=
23838             ## $levels_to_go[$i_opening] > $stress_level_beta + 1;
23839              
23840             # This option fixes b1240 but not b1235, b1237 with new -lp,
23841             # but this gives better formatting than the previous option.
23842             # TODO: see if stress_level_alpha should also be considered
23843 29   100     80 $do_not_break_apart ||=
23844             $levels_to_go[$i_opening] > $stress_level_beta;
23845             }
23846              
23847 2858 100 100     19353 if ( !$is_long_term
      66        
      100        
      100        
23848             && $saw_opening_structure
23849             && $is_opening_token{ $tokens_to_go[$i_opening] }
23850             && $index_before_arrow[ $depth + 1 ] > 0
23851             && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } )
23852             {
23853 430   66     3525 $is_long_term =
23854             $cab_flag == 4
23855             || $cab_flag == 0 && $last_nonblank_token eq ','
23856             || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
23857             }
23858              
23859             # mark term as long if the length between opening and closing
23860             # parens exceeds allowed line length
23861 2858 100 100     9907 if ( !$is_long_term && $saw_opening_structure ) {
23862              
23863 1950         5340 my $i_opening_minus = $self->find_token_starting_list($i_opening);
23864              
23865 1950         5120 my $excess = $self->excess_line_length( $i_opening_minus, $i );
23866              
23867             # Use standard spaces for indentation of lists in -lp mode
23868             # if it gives a longer line length. This helps to avoid an
23869             # instability due to forming and breaking one-line blocks.
23870             # This fixes case b1314.
23871 1950         3891 my $indentation = $leading_spaces_to_go[$i_opening_minus];
23872 1950 100 100     4783 if ( ref($indentation)
23873             && $self->[_ris_broken_container_]->{$type_sequence} )
23874             {
23875 25         57 my $lp_spaces = $indentation->get_spaces();
23876 25         53 my $std_spaces = $indentation->get_standard_spaces();
23877 25         40 my $diff = $std_spaces - $lp_spaces;
23878 25 50       54 if ( $diff > 0 ) { $excess += $diff }
  0         0  
23879             }
23880              
23881 1950         3120 my $tol = $length_tol;
23882              
23883             # boost tol for an -lp container
23884 1950 50 100     4879 if (
      33        
      66        
23885             $lp_tol_boost
23886             && $lp_object
23887             && ( $rOpts_extended_continuation_indentation
23888             || !$self->[_ris_list_by_seqno_]->{$type_sequence} )
23889             )
23890             {
23891 25         44 $tol += $lp_tol_boost;
23892             }
23893              
23894             # Patch to avoid blinking with -bbxi=2 and -cab=2
23895             # in which variations in -ci cause unstable formatting
23896             # in edge cases. We just always add one ci level so that
23897             # the formatting is independent of the -BBX results.
23898             # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
23899             # b1161 b1166 b1167 b1168
23900 1950 50 66     5742 if ( !$ci_levels_to_go[$i_opening]
23901             && $self->[_rbreak_before_container_by_seqno_]->{$type_sequence}
23902             )
23903             {
23904 0         0 $tol += $rOpts_continuation_indentation;
23905             }
23906              
23907 1950         4103 $is_long_term = $excess + $tol > 0;
23908              
23909             }
23910              
23911             # We've set breaks after all comma-arrows. Now we have to
23912             # undo them if this can be a one-line block
23913             # (the only breakpoints set will be due to comma-arrows)
23914              
23915 2858 100 33     22615 if (
      66        
      66        
      100        
      100        
      100        
23916              
23917             # user doesn't require breaking after all comma-arrows
23918             ( $cab_flag != 0 ) && ( $cab_flag != 4 )
23919              
23920             # and if the opening structure is in this batch
23921             && $saw_opening_structure
23922              
23923             # and either on the same old line
23924             && (
23925             $old_breakpoint_count_stack[$current_depth] ==
23926             $last_old_breakpoint_count
23927              
23928             # or user wants to form long blocks with arrows
23929             || $cab_flag == 2
23930             )
23931              
23932             # and we made breakpoints between the opening and closing
23933             && ( $breakpoint_undo_stack[$current_depth] <
23934             $forced_breakpoint_undo_count )
23935              
23936             # and this block is short enough to fit on one line
23937             # Note: use < because need 1 more space for possible comma
23938             && !$is_long_term
23939              
23940             )
23941             {
23942 96         385 $self->undo_forced_breakpoint_stack(
23943             $breakpoint_undo_stack[$current_depth] );
23944             }
23945              
23946             # now see if we have any comma breakpoints left
23947 2858         5500 my $has_comma_breakpoints =
23948             ( $breakpoint_stack[$current_depth] != $forced_breakpoint_count );
23949              
23950             # update broken-sublist flag of the outer container
23951 2858   100     13440 $has_broken_sublist[$depth] =
23952             $has_broken_sublist[$depth]
23953             || $has_broken_sublist[$current_depth]
23954             || $is_long_term
23955             || $has_comma_breakpoints;
23956              
23957             # Having come to the closing ')', '}', or ']', now we have to decide
23958             # if we should 'open up' the structure by placing breaks at the
23959             # opening and closing containers. This is a tricky decision. Here
23960             # are some of the basic considerations:
23961             #
23962             # -If this is a BLOCK container, then any breakpoints will have
23963             # already been set (and according to user preferences), so we need do
23964             # nothing here.
23965             #
23966             # -If we have a comma-separated list for which we can align the list
23967             # items, then we need to do so because otherwise the vertical aligner
23968             # cannot currently do the alignment.
23969             #
23970             # -If this container does itself contain a container which has been
23971             # broken open, then it should be broken open to properly show the
23972             # structure.
23973             #
23974             # -If there is nothing to align, and no other reason to break apart,
23975             # then do not do it.
23976             #
23977             # We will not break open the parens of a long but 'simple' logical
23978             # expression. For example:
23979             #
23980             # This is an example of a simple logical expression and its formatting:
23981             #
23982             # if ( $bigwasteofspace1 && $bigwasteofspace2
23983             # || $bigwasteofspace3 && $bigwasteofspace4 )
23984             #
23985             # Most people would prefer this than the 'spacey' version:
23986             #
23987             # if (
23988             # $bigwasteofspace1 && $bigwasteofspace2
23989             # || $bigwasteofspace3 && $bigwasteofspace4
23990             # )
23991             #
23992             # To illustrate the rules for breaking logical expressions, consider:
23993             #
23994             # FULLY DENSE:
23995             # if ( $opt_excl
23996             # and ( exists $ids_excl_uc{$id_uc}
23997             # or grep $id_uc =~ /$_/, @ids_excl_uc ))
23998             #
23999             # This is on the verge of being difficult to read. The current
24000             # default is to open it up like this:
24001             #
24002             # DEFAULT:
24003             # if (
24004             # $opt_excl
24005             # and ( exists $ids_excl_uc{$id_uc}
24006             # or grep $id_uc =~ /$_/, @ids_excl_uc )
24007             # )
24008             #
24009             # This is a compromise which tries to avoid being too dense and to
24010             # spacey. A more spaced version would be:
24011             #
24012             # SPACEY:
24013             # if (
24014             # $opt_excl
24015             # and (
24016             # exists $ids_excl_uc{$id_uc}
24017             # or grep $id_uc =~ /$_/, @ids_excl_uc
24018             # )
24019             # )
24020             #
24021             # Some people might prefer the spacey version -- an option could be
24022             # added. The innermost expression contains a long block '( exists
24023             # $ids_... ')'.
24024             #
24025             # Here is how the logic goes: We will force a break at the 'or' that
24026             # the innermost expression contains, but we will not break apart its
24027             # opening and closing containers because (1) it contains no
24028             # multi-line sub-containers itself, and (2) there is no alignment to
24029             # be gained by breaking it open like this
24030             #
24031             # and (
24032             # exists $ids_excl_uc{$id_uc}
24033             # or grep $id_uc =~ /$_/, @ids_excl_uc
24034             # )
24035             #
24036             # (although this looks perfectly ok and might be good for long
24037             # expressions). The outer 'if' container, though, contains a broken
24038             # sub-container, so it will be broken open to avoid too much density.
24039             # Also, since it contains no 'or's, there will be a forced break at
24040             # its 'and'.
24041              
24042             # Handle the experimental flag --break-open-compact-parens
24043             # NOTE: This flag is not currently used and may eventually be removed.
24044             # If this flag is set, we will implement it by
24045             # pretending we did not see the opening structure, since in that case
24046             # parens always get opened up.
24047 2858 50 66     9431 if ( $saw_opening_structure
24048             && $rOpts_break_open_compact_parens )
24049             {
24050              
24051             # This parameter is a one-character flag, as follows:
24052             # '0' matches no parens -> break open NOT OK
24053             # '1' matches all parens -> break open OK
24054             # Other values are same as used by the weld-exclusion-list
24055 0         0 my $flag = $rOpts_break_open_compact_parens;
24056 0 0 0     0 if ( $flag eq '*'
24057             || $flag eq '1' )
24058             {
24059 0         0 $saw_opening_structure = 0;
24060             }
24061             else {
24062              
24063             # NOTE: $seqno will be equal to closure var $type_sequence here
24064 0         0 my $seqno = $type_sequence_to_go[$i_opening];
24065 0         0 $saw_opening_structure =
24066             !$self->match_paren_control_flag( $seqno, $flag );
24067             }
24068             }
24069              
24070             # Set some more flags telling something about this container..
24071 2858         4332 my $is_simple_logical_expression;
24072 2858 100 100     15728 if ( $item_count_stack[$current_depth] == 0
      100        
      100        
24073             && $saw_opening_structure
24074             && $tokens_to_go[$i_opening] eq '('
24075             && $is_logical_container{ $container_type[$current_depth] } )
24076             {
24077              
24078             # This seems to be a simple logical expression with
24079             # no existing breakpoints. Set a flag to prevent
24080             # opening it up.
24081 205 100       623 if ( !$has_comma_breakpoints ) {
24082 192         424 $is_simple_logical_expression = 1;
24083             }
24084              
24085             #---------------------------------------------------
24086             # This seems to be a simple logical expression with
24087             # breakpoints (broken sublists, for example). Break
24088             # at all 'or's and '||'s.
24089             #---------------------------------------------------
24090             else {
24091 13         58 $self->set_logical_breakpoints($current_depth);
24092             }
24093             }
24094              
24095             # break long terms at any C-style for semicolons (c154)
24096 2858 100 100     7204 if ( $is_long_term
24097 550         2136 && @{ $rfor_semicolon_list[$current_depth] } )
24098             {
24099 4         29 $self->set_for_semicolon_breakpoints($current_depth);
24100              
24101             # and open up a long 'for' or 'foreach' container to allow
24102             # leading term alignment unless -lp is used.
24103 4 100       37 $has_comma_breakpoints = 1 unless ($lp_object);
24104             }
24105              
24106             #----------------------------------------------------------------
24107             # FINALLY: Break open container according to the flags which have
24108             # been set.
24109             #----------------------------------------------------------------
24110 2858 100 100     22681 if (
    100 100        
    100 100        
      66        
24111              
24112             # breaks for code BLOCKS are handled at a higher level
24113             !$block_type
24114              
24115             # we do not need to break at the top level of an 'if'
24116             # type expression
24117             && !$is_simple_logical_expression
24118              
24119             ## modification to keep ': (' containers vertically tight;
24120             ## but probably better to let user set -vt=1 to avoid
24121             ## inconsistency with other paren types
24122             ## && ($container_type[$current_depth] ne ':')
24123              
24124             # otherwise, we require one of these reasons for breaking:
24125             && (
24126              
24127             # - this term has forced line breaks
24128             $has_comma_breakpoints
24129              
24130             # - the opening container is separated from this batch
24131             # for some reason (comment, blank line, code block)
24132             # - this is a non-paren container spanning multiple lines
24133             || !$saw_opening_structure
24134              
24135             # - this is a long block contained in another breakable
24136             # container
24137             || $is_long_term && !$self->is_in_block_by_i($i_opening)
24138             )
24139             )
24140             {
24141              
24142             # do special -lp breaks at the CLOSING token for INTACT
24143             # blocks (because we might not do them if the block does
24144             # not break open)
24145 682 100       2019 if ($lp_object) {
24146 96         321 my $K_begin_line = $lp_object->get_K_begin_line();
24147 96         268 my $i_begin_line = $K_begin_line - $K_to_go[0];
24148 96         334 $self->set_forced_lp_break( $i_begin_line, $i_opening );
24149             }
24150              
24151             # break after opening structure.
24152             # note: break before closing structure will be automatic
24153 682 50       1896 if ( $minimum_depth <= $current_depth ) {
24154              
24155 682 100       1843 if ( $i_opening >= 0 ) {
24156 485 50 66     2002 if ( !$do_not_break_apart
24157             && !is_unbreakable_container($current_depth) )
24158             {
24159 445         1636 $self->set_forced_breakpoint($i_opening);
24160              
24161             # Do not let brace types L/R use vertical tightness
24162             # flags to recombine if we have to break on length
24163             # because instability is possible if both vt and vtc
24164             # flags are set ... see issue b1444.
24165 445 0 100     2494 if ( $is_long_term
      66        
      33        
24166             && $types_to_go[$i_opening] eq 'L'
24167             && $opening_vertical_tightness{'{'}
24168             && $closing_vertical_tightness{'}'} )
24169             {
24170 0         0 my $seqno = $type_sequence_to_go[$i_opening];
24171 0 0       0 if ($seqno) {
24172 0         0 $self->[_rbreak_container_]->{$seqno} = 1;
24173             }
24174             }
24175             }
24176             }
24177              
24178             # break at ',' of lower depth level before opening token
24179 682 100       1958 if ( $last_comma_index[$depth] ) {
24180 107         279 $self->set_forced_breakpoint( $last_comma_index[$depth] );
24181             }
24182              
24183             # break at '.' of lower depth level before opening token
24184 682 100       1715 if ( $last_dot_index[$depth] ) {
24185 5         21 $self->set_forced_breakpoint( $last_dot_index[$depth] );
24186             }
24187              
24188             # break before opening structure if preceded by another
24189             # closing structure and a comma. This is normally
24190             # done by the previous closing brace, but not
24191             # if it was a one-line block.
24192 682 100       1959 if ( $i_opening > 2 ) {
24193 427 100       1425 my $i_prev =
24194             ( $types_to_go[ $i_opening - 1 ] eq 'b' )
24195             ? $i_opening - 2
24196             : $i_opening - 1;
24197              
24198 427         906 my $type_prev = $types_to_go[$i_prev];
24199 427         837 my $token_prev = $tokens_to_go[$i_prev];
24200 427 100 66     3745 if (
    100 100        
      66        
24201             $type_prev eq ','
24202             && ( $types_to_go[ $i_prev - 1 ] eq ')'
24203             || $types_to_go[ $i_prev - 1 ] eq '}' )
24204             )
24205             {
24206 11         36 $self->set_forced_breakpoint($i_prev);
24207             }
24208              
24209             # also break before something like ':(' or '?('
24210             # if appropriate.
24211             elsif ($type_prev =~ /^([k\:\?]|&&|\|\|)$/
24212             && $want_break_before{$token_prev} )
24213             {
24214 6         23 $self->set_forced_breakpoint($i_prev);
24215             }
24216             else {
24217             ## not a breakpoint
24218             }
24219             }
24220             }
24221              
24222             # break after comma following closing structure
24223 682 100       2280 if ( $types_to_go[ $i + 1 ] eq ',' ) {
24224 79         210 $self->set_forced_breakpoint( $i + 1 );
24225             }
24226              
24227             # break before an '=' following closing structure
24228 682 50 33     2492 if (
24229             $is_assignment{$next_nonblank_type}
24230             && ( $breakpoint_stack[$current_depth] !=
24231             $forced_breakpoint_count )
24232             )
24233             {
24234 0         0 $self->set_forced_breakpoint($i);
24235             }
24236              
24237             # break at any comma before the opening structure Added
24238             # for -lp, but seems to be good in general. It isn't
24239             # obvious how far back to look; the '5' below seems to
24240             # work well and will catch the comma in something like
24241             # push @list, myfunc( $param, $param, ..
24242              
24243 682         1283 my $icomma = $last_comma_index[$depth];
24244 682 100 100     2380 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
24245 25 50       74 if ( !$forced_breakpoint_to_go[$icomma] ) {
24246 0         0 $self->set_forced_breakpoint($icomma);
24247             }
24248             }
24249             }
24250              
24251             #-----------------------------------------------------------
24252             # Break open a logical container open if it was already open
24253             #-----------------------------------------------------------
24254             elsif ($is_simple_logical_expression
24255             && $has_old_logical_breakpoints[$current_depth] )
24256             {
24257 10         58 $self->set_logical_breakpoints($current_depth);
24258             }
24259              
24260             # Handle long container which does not get opened up
24261             elsif ($is_long_term) {
24262              
24263             # must set fake breakpoint to alert outer containers that
24264             # they are complex
24265 78         390 set_fake_breakpoint();
24266             }
24267             else {
24268             ## do not break open
24269             }
24270              
24271 2858         5566 return;
24272             } ## end sub break_lists_decreasing_depth
24273             } ## end closure break_lists
24274              
24275             my %is_kwiZ;
24276             my %is_key_type;
24277              
24278             BEGIN {
24279              
24280             # Added 'w' to fix b1172
24281 39     39   340 my @q = qw(k w i Z ->);
24282 39         272 @is_kwiZ{@q} = (1) x scalar(@q);
24283              
24284             # added = for b1211
24285 39         200 @q = qw<( [ { L R } ] ) = b>;
24286 39         133 push @q, ',';
24287 39         1383 @is_key_type{@q} = (1) x scalar(@q);
24288             } ## end BEGIN
24289              
24290 39     39   376 use constant DEBUG_FIND_START => 0;
  39         136  
  39         20209  
24291              
24292             sub find_token_starting_list {
24293              
24294             # When testing to see if a block will fit on one line, some
24295             # previous token(s) may also need to be on the line; particularly
24296             # if this is a sub call. So we will look back at least one
24297             # token.
24298 2265     2265 0 4526 my ( $self, $i_opening_paren ) = @_;
24299              
24300             # This will be the return index
24301 2265         3542 my $i_opening_minus = $i_opening_paren;
24302              
24303 2265 100       4981 if ( $i_opening_minus <= 0 ) {
24304 22         59 return $i_opening_minus;
24305             }
24306              
24307 2243         3699 my $im1 = $i_opening_paren - 1;
24308 2243         4765 my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
24309 2243 100 66     7893 if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
24310 1189         2194 $iprev_nb -= 1;
24311 1189         2310 $type_prev_nb = $types_to_go[$iprev_nb];
24312             }
24313              
24314 2243 100 66     9358 if ( $type_prev_nb eq ',' ) {
    100          
24315              
24316             # a previous comma is a good break point
24317             # $i_opening_minus = $i_opening_paren;
24318             }
24319              
24320             elsif (
24321             $tokens_to_go[$i_opening_paren] eq '('
24322              
24323             # non-parens added here to fix case b1186
24324             || $is_kwiZ{$type_prev_nb}
24325             )
24326             {
24327 1701         2734 $i_opening_minus = $im1;
24328              
24329             # Walk back to improve length estimate...
24330             # FIX for cases b1169 b1170 b1171: start walking back
24331             # at the previous nonblank. This makes the result insensitive
24332             # to the flag --space-function-paren, and similar.
24333             # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
24334 1701         7413 foreach my $j ( reverse( 0 .. $iprev_nb ) ) {
24335 3741 100       9029 if ( $is_key_type{ $types_to_go[$j] } ) {
24336              
24337             # fix for b1211
24338 1401 100       3650 if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
  106         317  
24339 1401         2480 last;
24340             }
24341 2340         3839 $i_opening_minus = $j;
24342             }
24343 1701 100       5547 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
  61         152  
24344             }
24345             else {
24346             ## previous token not special
24347             }
24348              
24349 2243         3215 DEBUG_FIND_START && print <<EOM;
24350             FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
24351             EOM
24352              
24353 2243         4687 return $i_opening_minus;
24354             } ## end sub find_token_starting_list
24355              
24356             { ## begin closure table_maker
24357              
24358             my %is_keyword_with_special_leading_term;
24359              
24360             BEGIN {
24361              
24362             # These keywords have prototypes which allow a special leading item
24363             # followed by a list
24364 39     39   364 my @q = qw(
24365             chmod
24366             formline
24367             grep
24368             join
24369             kill
24370             map
24371             pack
24372             printf
24373             push
24374             sprintf
24375             unshift
24376             );
24377 39         1800 @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
24378             } ## end BEGIN
24379              
24380 39     39   398 use constant DEBUG_SPARSE => 0;
  39         79  
  39         248086  
24381              
24382             sub table_maker {
24383              
24384             # Given a list of comma-separated items, set breakpoints at some of
24385             # the commas, if necessary, to make it easy to read.
24386             # This is done by making calls to 'set_forced_breakpoint'.
24387             # This is a complex routine because there are many special cases.
24388              
24389             # Returns: nothing
24390              
24391             # The numerous variables involved are contained three hashes:
24392             # $rhash_IN : For contents see the calling routine
24393             # $rhash_A: For contents see return from sub 'table_layout_A'
24394             # $rhash_B: For contents see return from sub 'table_layout_B'
24395              
24396 497     497 0 1256 my ( $self, $rhash_IN ) = @_;
24397              
24398             # Find lengths of all list items needed for calculating page layout
24399 497         1608 my $rhash_A = table_layout_A($rhash_IN);
24400 497 100       1562 return if ( !defined($rhash_A) );
24401              
24402             # Some variables received from caller...
24403 489         1016 my $i_closing_paren = $rhash_IN->{i_closing_paren};
24404 489         856 my $i_opening_paren = $rhash_IN->{i_opening_paren};
24405 489         989 my $has_broken_sublist = $rhash_IN->{has_broken_sublist};
24406 489         895 my $interrupted = $rhash_IN->{interrupted};
24407              
24408             #-----------------------------------------
24409             # Section A: Handle some special cases ...
24410             #-----------------------------------------
24411              
24412             #-------------------------------------------------------------
24413             # Special Case A1: Compound List Rule 1:
24414             # Break at (almost) every comma for a list containing a broken
24415             # sublist. This has higher priority than the Interrupted List
24416             # Rule.
24417             #-------------------------------------------------------------
24418 489 100       1293 if ($has_broken_sublist) {
24419              
24420 80         346 $self->apply_broken_sublist_rule( $rhash_A, $interrupted );
24421              
24422 80         353 return;
24423             }
24424              
24425             #--------------------------------------------------------------
24426             # Special Case A2: Interrupted List Rule:
24427             # A list is forced to use old breakpoints if it was interrupted
24428             # by side comments or blank lines, or requested by user.
24429             #--------------------------------------------------------------
24430 409 100 100     2786 if ( $rOpts_break_at_old_comma_breakpoints
      66        
24431             || $interrupted
24432             || $i_opening_paren < 0 )
24433             {
24434 94         209 my $i_first_comma = $rhash_A->{_i_first_comma};
24435 94         220 my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
24436 94         416 $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
24437 94         448 return;
24438             }
24439              
24440             #-----------------------------------------------------------------
24441             # Special Case A3: If it fits on one line, return and let the line
24442             # break logic decide if and where to break.
24443             #-----------------------------------------------------------------
24444              
24445             # The -bbxi=2 parameters can add an extra hidden level of indentation
24446             # so they need a tolerance to avoid instability. Fixes b1259, 1260.
24447 315         763 my $opening_token = $tokens_to_go[$i_opening_paren];
24448 315         612 my $tol = 0;
24449 315 0 33     962 if ( $break_before_container_types{$opening_token}
      0        
24450             && $container_indentation_options{$opening_token}
24451             && $container_indentation_options{$opening_token} == 2 )
24452             {
24453 0         0 $tol = $rOpts_indent_columns;
24454              
24455             # use greater of -ci and -i (fix for case b1334)
24456 0 0       0 if ( $tol < $rOpts_continuation_indentation ) {
24457 0         0 $tol = $rOpts_continuation_indentation;
24458             }
24459             }
24460              
24461             # Increase tol when -atc and -dtc are both used to allow for
24462             # possible loss in length on next pass due to a comma. Fixes b1455.
24463 315 100 100     1066 if ( $rOpts_delete_trailing_commas && $rOpts_add_trailing_commas ) {
24464 20         35 $tol += 1;
24465             }
24466              
24467 315         1141 my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
24468 315         1166 my $excess =
24469             $self->excess_line_length( $i_opening_minus, $i_closing_paren );
24470 315 100       1780 return if ( $excess + $tol <= 0 );
24471              
24472             #---------------------------------------
24473             # Section B: Handle a multiline list ...
24474             #---------------------------------------
24475              
24476 135         762 $self->break_multiline_list( $rhash_IN, $rhash_A, $i_opening_minus );
24477 135         623 return;
24478              
24479             } ## end sub table_maker
24480              
24481             sub apply_broken_sublist_rule {
24482              
24483 80     80 0 203 my ( $self, $rhash_A, $interrupted ) = @_;
24484              
24485             # Break at (almost) every comma for a list containing a broken
24486             # sublist.
24487              
24488 80         176 my $ritem_lengths = $rhash_A->{_ritem_lengths};
24489 80         154 my $ri_term_begin = $rhash_A->{_ri_term_begin};
24490 80         136 my $ri_term_end = $rhash_A->{_ri_term_end};
24491 80         135 my $ri_term_comma = $rhash_A->{_ri_term_comma};
24492 80         150 my $item_count = $rhash_A->{_item_count_A};
24493 80         142 my $i_first_comma = $rhash_A->{_i_first_comma};
24494 80         149 my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
24495              
24496             # Break at every comma except for a comma between two
24497             # simple, small terms. This prevents long vertical
24498             # columns of, say, just 0's.
24499 80         117 my $small_length = 10; # 2 + actual maximum length wanted
24500              
24501             # We'll insert a break in long runs of small terms to
24502             # allow alignment in uniform tables.
24503 80         133 my $skipped_count = 0;
24504 80         235 my $columns = table_columns_available($i_first_comma);
24505 80         229 my $fields = int( $columns / $small_length );
24506 80 50 33     270 if ( $rOpts_maximum_fields_per_table
24507             && $fields > $rOpts_maximum_fields_per_table )
24508             {
24509 0         0 $fields = $rOpts_maximum_fields_per_table;
24510             }
24511 80         164 my $max_skipped_count = $fields - 1;
24512              
24513 80         136 my $is_simple_last_term = 0;
24514 80         138 my $is_simple_next_term = 0;
24515 80         192 foreach my $j ( 0 .. $item_count ) {
24516 278         392 $is_simple_last_term = $is_simple_next_term;
24517 278         376 $is_simple_next_term = 0;
24518 278 100 100     997 if ( $j < $item_count
      100        
24519             && $ri_term_end->[$j] == $ri_term_begin->[$j]
24520             && $ritem_lengths->[$j] <= $small_length )
24521             {
24522 25         40 $is_simple_next_term = 1;
24523             }
24524 278 100       537 next if $j == 0;
24525 198 100 100     589 if ( $is_simple_last_term
      66        
24526             && $is_simple_next_term
24527             && $skipped_count < $max_skipped_count )
24528             {
24529 6         13 $skipped_count++;
24530             }
24531             else {
24532 192         283 $skipped_count = 0;
24533 192         320 my $i_tc = $ri_term_comma->[ $j - 1 ];
24534 192 100       444 last unless defined $i_tc;
24535 127         312 $self->set_forced_breakpoint($i_tc);
24536             }
24537             }
24538              
24539             # always break at the last comma if this list is
24540             # interrupted; we wouldn't want to leave a terminal '{', for
24541             # example.
24542 80 100       313 if ($interrupted) {
24543 8         45 $self->set_forced_breakpoint($i_true_last_comma);
24544             }
24545 80         180 return;
24546             } ## end sub apply_broken_sublist_rule
24547              
24548             sub set_emergency_comma_breakpoints {
24549              
24550             my (
24551              
24552 7     7 0 38 $self, #
24553              
24554             $number_of_fields_best,
24555             $rhash_IN,
24556             $comma_count,
24557             $i_first_comma,
24558              
24559             ) = @_;
24560              
24561             # The computed number of table fields is negative, so we have to make
24562             # an emergency fix.
24563              
24564 7         24 my $rcomma_index = $rhash_IN->{rcomma_index};
24565 7         21 my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
24566 7         19 my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
24567 7         23 my $must_break_open = $rhash_IN->{must_break_open};
24568              
24569             # are we an item contained in an outer list?
24570 7         28 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
24571              
24572             # In many cases, it may be best to not force a break if there is just
24573             # one comma, because the standard continuation break logic will do a
24574             # better job without it.
24575              
24576             # In the common case that all but one of the terms can fit
24577             # on a single line, it may look better not to break open the
24578             # containing parens. Consider, for example
24579              
24580             # $color =
24581             # join ( '/',
24582             # sort { $color_value{$::a} <=> $color_value{$::b}; }
24583             # keys %colors );
24584              
24585             # which will look like this with the container broken:
24586              
24587             # $color = join (
24588             # '/',
24589             # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
24590             # );
24591              
24592             # Here is an example of this rule for a long last term:
24593              
24594             # log_message( 0, 256, 128,
24595             # "Number of routes in adj-RIB-in to be considered: $peercount" );
24596              
24597             # And here is an example with a long first term:
24598              
24599             # $s = sprintf(
24600             # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
24601             # $r, $pu, $ps, $cu, $cs, $tt
24602             # )
24603             # if $style eq 'all';
24604              
24605 7         27 my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
24606              
24607 7         29 my $long_last_term = $self->excess_line_length( 0, $i_last_comma ) <= 0;
24608 7         49 my $long_first_term =
24609             $self->excess_line_length( $i_first_comma + 1, $max_index_to_go ) <=
24610             0;
24611              
24612             # break at every comma ...
24613 7 100 66     141 if (
    100 0        
    50 33        
      66        
24614              
24615             # if requested by user or is best looking
24616             $number_of_fields_best == 1
24617              
24618             # or if this is a sublist of a larger list
24619             || $in_hierarchical_list
24620              
24621             # or if multiple commas and we don't have a long first or last
24622             # term
24623             || ( $comma_count > 1
24624             && !( $long_last_term || $long_first_term ) )
24625             )
24626             {
24627 2         12 foreach ( 0 .. $comma_count - 1 ) {
24628 3         17 $self->set_forced_breakpoint( $rcomma_index->[$_] );
24629             }
24630             }
24631             elsif ($long_last_term) {
24632              
24633 2         12 $self->set_forced_breakpoint($i_last_comma);
24634 2 100       9 ${$rdo_not_break_apart} = 1 unless $must_break_open;
  1         2  
24635             }
24636             elsif ($long_first_term) {
24637              
24638 3         25 $self->set_forced_breakpoint($i_first_comma);
24639             }
24640             else {
24641              
24642             # let breaks be defined by default bond strength logic
24643             }
24644 7         27 return;
24645             } ## end sub set_emergency_comma_breakpoints
24646              
24647             sub break_multiline_list {
24648 135     135 0 488 my ( $self, $rhash_IN, $rhash_A, $i_opening_minus ) = @_;
24649              
24650             # We have a list spanning multiple lines and are trying
24651             # to decide the best way to set comma breakpoints.
24652              
24653             # Overriden variables
24654 135         347 my $item_count = $rhash_A->{_item_count_A};
24655 135         320 my $identifier_count = $rhash_A->{_identifier_count_A};
24656              
24657             # Derived variables:
24658 135         320 my $ritem_lengths = $rhash_A->{_ritem_lengths};
24659 135         281 my $ri_term_begin = $rhash_A->{_ri_term_begin};
24660 135         292 my $ri_term_end = $rhash_A->{_ri_term_end};
24661 135         280 my $ri_term_comma = $rhash_A->{_ri_term_comma};
24662 135         297 my $rmax_length = $rhash_A->{_rmax_length};
24663 135         281 my $comma_count = $rhash_A->{_comma_count};
24664 135         281 my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma};
24665 135         286 my $first_term_length = $rhash_A->{_first_term_length};
24666 135         297 my $i_first_comma = $rhash_A->{_i_first_comma};
24667 135         253 my $i_last_comma = $rhash_A->{_i_last_comma};
24668 135         282 my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
24669              
24670             # Variables received from caller
24671 135         296 my $i_opening_paren = $rhash_IN->{i_opening_paren};
24672 135         277 my $i_closing_paren = $rhash_IN->{i_closing_paren};
24673 135         325 my $rcomma_index = $rhash_IN->{rcomma_index};
24674 135         316 my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
24675 135         324 my $list_type = $rhash_IN->{list_type};
24676 135         274 my $interrupted = $rhash_IN->{interrupted};
24677 135         271 my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
24678 135         307 my $must_break_open = $rhash_IN->{must_break_open};
24679             ## NOTE: these input vars from caller use the values from rhash_A (see above):
24680             ## my $item_count = $rhash_IN->{item_count};
24681             ## my $identifier_count = $rhash_IN->{identifier_count};
24682              
24683             # NOTE: i_opening_paren changes value below so we need to get these here
24684 135         619 my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
24685 135         342 my $opening_token = $tokens_to_go[$i_opening_paren];
24686              
24687             #---------------------------------------------------------------
24688             # Section B1: Determine '$number_of_fields' = the best number of
24689             # fields to use if this is to be formatted as a table.
24690             #---------------------------------------------------------------
24691              
24692             # Now we know that this block spans multiple lines; we have to set
24693             # at least one breakpoint -- real or fake -- as a signal to break
24694             # open any outer containers.
24695 135         680 set_fake_breakpoint();
24696              
24697             # Set a flag indicating if we need to break open to keep -lp
24698             # items aligned. This is necessary if any of the list terms
24699             # exceeds the available space after the '('.
24700 135         308 my $need_lp_break_open = $must_break_open;
24701 135         334 my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
24702 135 100 100     582 if ( $is_lp_formatting && !$must_break_open ) {
24703 18         92 my $columns_if_unbroken =
24704             $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
24705             - total_line_length( $i_opening_minus, $i_opening_paren );
24706 18   100     159 $need_lp_break_open =
24707             ( $rmax_length->[0] > $columns_if_unbroken )
24708             || ( $rmax_length->[1] > $columns_if_unbroken )
24709             || ( $first_term_length > $columns_if_unbroken );
24710             }
24711              
24712 135         617 my $hash_B =
24713             $self->table_layout_B( $rhash_IN, $rhash_A, $is_lp_formatting );
24714 135 100       516 return if ( !defined($hash_B) );
24715              
24716             # Updated variables
24717 125         295 $i_first_comma = $hash_B->{_i_first_comma_B};
24718 125         249 $i_opening_paren = $hash_B->{_i_opening_paren_B};
24719 125         237 $item_count = $hash_B->{_item_count_B};
24720              
24721             # New variables
24722 125         265 my $columns = $hash_B->{_columns};
24723 125         243 my $formatted_columns = $hash_B->{_formatted_columns};
24724 125         260 my $formatted_lines = $hash_B->{_formatted_lines};
24725 125         246 my $max_width = $hash_B->{_max_width};
24726 125         252 my $new_identifier_count = $hash_B->{_new_identifier_count};
24727 125         222 my $number_of_fields = $hash_B->{_number_of_fields};
24728 125         245 my $odd_or_even = $hash_B->{_odd_or_even};
24729 125         239 my $packed_columns = $hash_B->{_packed_columns};
24730 125         234 my $packed_lines = $hash_B->{_packed_lines};
24731 125         226 my $pair_width = $hash_B->{_pair_width};
24732 125         228 my $ri_ragged_break_list = $hash_B->{_ri_ragged_break_list};
24733 125         285 my $use_separate_first_term = $hash_B->{_use_separate_first_term};
24734              
24735             # are we an item contained in an outer list?
24736 125         439 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
24737              
24738 125         268 my $unused_columns = $formatted_columns - $packed_columns;
24739              
24740             # set some empirical parameters to help decide if we should try to
24741             # align; high sparsity does not look good, especially with few lines
24742 125         326 my $sparsity = ($unused_columns) / ($formatted_columns);
24743 125 100       616 my $max_allowed_sparsity =
    100          
    100          
24744             ( $item_count < 3 ) ? 0.1
24745             : ( $packed_lines == 1 ) ? 0.15
24746             : ( $packed_lines == 2 ) ? 0.4
24747             : 0.7;
24748              
24749 125         235 my $two_line_word_wrap_ok;
24750 125 100       478 if ( $opening_token eq '(' ) {
24751              
24752             # default is to allow wrapping of short paren lists
24753 107         217 $two_line_word_wrap_ok = 1;
24754              
24755             # but turn off word wrap where requested
24756 107 50       346 if ($rOpts_break_open_compact_parens) {
24757              
24758             # This parameter is a one-character flag, as follows:
24759             # '0' matches no parens -> break open NOT OK -> word wrap OK
24760             # '1' matches all parens -> break open OK -> word wrap NOT OK
24761             # Other values are the same as used by the weld-exclusion-list
24762 0         0 my $flag = $rOpts_break_open_compact_parens;
24763 0 0 0     0 if ( $flag eq '*'
    0          
24764             || $flag eq '1' )
24765             {
24766 0         0 $two_line_word_wrap_ok = 0;
24767             }
24768             elsif ( $flag eq '0' ) {
24769 0         0 $two_line_word_wrap_ok = 1;
24770             }
24771             else {
24772 0         0 my $seqno = $type_sequence_to_go[$i_opening_paren];
24773 0         0 $two_line_word_wrap_ok =
24774             !$self->match_paren_control_flag( $seqno, $flag );
24775             }
24776             }
24777             }
24778              
24779             #-------------------------------------------------------------------
24780             # Section B2: Check for shortcut methods, which avoid treating
24781             # a list as a table for relatively small parenthesized lists. These
24782             # are usually easier to read if not formatted as tables.
24783             #-------------------------------------------------------------------
24784 125 100 100     979 if (
      100        
      100        
24785             $packed_lines <= 2 # probably can fit in 2 lines
24786             && $item_count < 9 # doesn't have too many items
24787             && $opening_is_in_block # not a sub-container
24788             && $two_line_word_wrap_ok # ok to wrap this paren list
24789             )
24790             {
24791              
24792             # Section B2A: Shortcut method 1: for -lp and just one comma:
24793             # This is a no-brainer, just break at the comma.
24794 55 100 100     273 if (
      66        
24795             $is_lp_formatting # -lp
24796             && $item_count == 2 # two items, one comma
24797             && !$must_break_open
24798             )
24799             {
24800 5         15 my $i_break = $rcomma_index->[0];
24801 5         26 $self->set_forced_breakpoint($i_break);
24802 5         12 ${$rdo_not_break_apart} = 1;
  5         39  
24803 5         32 return;
24804              
24805             }
24806              
24807             # Section B2B: Shortcut method 2 is for most small ragged lists
24808             # which might look best if not displayed as a table.
24809 50 100 100     488 if (
      100        
      100        
24810             ( $number_of_fields == 2 && $item_count == 3 )
24811             || (
24812             $new_identifier_count > 0 # isn't all quotes
24813             && $sparsity > 0.15
24814             ) # would be fairly spaced gaps if aligned
24815             )
24816             {
24817              
24818 26         150 my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
24819             $ri_ragged_break_list );
24820 26 100       82 ++$break_count if ($use_separate_first_term);
24821              
24822             # NOTE: we should really use the true break count here,
24823             # which can be greater if there are large terms and
24824             # little space, but usually this will work well enough.
24825 26 100       119 if ( !$must_break_open ) {
24826 23 100 66     115 if ( $break_count <= 1
      100        
24827             || ( $is_lp_formatting && !$need_lp_break_open ) )
24828             {
24829 22         51 ${$rdo_not_break_apart} = 1;
  22         51  
24830             }
24831             }
24832 26         149 return;
24833             }
24834              
24835             } ## end shortcut methods
24836              
24837             # debug stuff
24838 94         176 DEBUG_SPARSE && do {
24839              
24840             # How many spaces across the page will we fill?
24841             my $columns_per_line =
24842             ( int $number_of_fields / 2 ) * $pair_width +
24843             ( $number_of_fields % 2 ) * $max_width;
24844              
24845             print {*STDOUT}
24846             "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
24847              
24848             };
24849              
24850             #------------------------------------------------------------------
24851             # Section B3: Compound List Rule 2:
24852             # If this list is too long for one line, and it is an item of a
24853             # larger list, then we must format it, regardless of sparsity
24854             # (ian.t). One reason that we have to do this is to trigger
24855             # Compound List Rule 1, above, which causes breaks at all commas of
24856             # all outer lists. In this way, the structure will be properly
24857             # displayed.
24858             #------------------------------------------------------------------
24859              
24860             # Decide if this list is too long for one line unless broken
24861 94         332 my $total_columns = table_columns_available($i_opening_paren);
24862 94         360 my $too_long = $packed_columns > $total_columns;
24863              
24864             # For a paren list, include the length of the token just before the
24865             # '(' because this is likely a sub call, and we would have to
24866             # include the sub name on the same line as the list. This is still
24867             # imprecise, but not too bad. (steve.t)
24868 94 50 66     523 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
      66        
24869              
24870 1         7 $too_long = $self->excess_line_length( $i_opening_minus,
24871             $i_effective_last_comma + 1 ) > 0;
24872             }
24873              
24874             # TODO: For an item after a '=>', try to include the length of the
24875             # thing before the '=>'. This is crude and should be improved by
24876             # actually looking back token by token.
24877 94 0 33     432 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
      33        
24878 0         0 my $i_opening_minus_test = $i_opening_paren - 4;
24879 0 0       0 if ( $i_opening_minus >= 0 ) {
24880 0         0 $too_long = $self->excess_line_length( $i_opening_minus_test,
24881             $i_effective_last_comma + 1 ) > 0;
24882             }
24883             }
24884              
24885             # Always break lists contained in '[' and '{' if too long for 1 line,
24886             # and always break lists which are too long and part of a more complex
24887             # structure.
24888 94   100     549 my $must_break_open_container = $must_break_open
24889             || ( $too_long
24890             && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
24891              
24892             #--------------------------------------------------------------------
24893             # Section B4: A table will work here. But do not attempt to align
24894             # columns if this is a tiny table or it would be too spaced. It
24895             # seems that the more packed lines we have, the sparser the list that
24896             # can be allowed and still look ok.
24897             #--------------------------------------------------------------------
24898              
24899 94 100 66     994 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
      66        
      100        
24900             || ( $formatted_lines < 2 )
24901             || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
24902             )
24903             {
24904             #----------------------------------------------------------------
24905             # Section B4A: too sparse: would not look good aligned in a table
24906             #----------------------------------------------------------------
24907              
24908             # use old breakpoints if this is a 'big' list
24909 12 50 33     65 if ( $packed_lines > 2 && $item_count > 10 ) {
24910 0         0 write_logfile_entry("List sparse: using old breakpoints\n");
24911 0         0 $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
24912             }
24913              
24914             # let the continuation logic handle it if 2 lines
24915             else {
24916              
24917 12         63 my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
24918             $ri_ragged_break_list );
24919 12 50       47 ++$break_count if ($use_separate_first_term);
24920              
24921 12 50       61 if ( !$must_break_open_container ) {
24922 0 0 0     0 if ( $break_count <= 1
      0        
24923             || ( $is_lp_formatting && !$need_lp_break_open ) )
24924             {
24925 0         0 ${$rdo_not_break_apart} = 1;
  0         0  
24926             }
24927             }
24928             }
24929 12         72 return;
24930             }
24931              
24932             #--------------------------------------------
24933             # Section B4B: Go ahead and format as a table
24934             #--------------------------------------------
24935 82         457 $self->write_formatted_table( $number_of_fields, $comma_count,
24936             $rcomma_index, $use_separate_first_term );
24937              
24938 82         484 return;
24939             } ## end sub break_multiline_list
24940              
24941             sub table_layout_A {
24942              
24943 497     497 0 1148 my ($rhash_IN) = @_;
24944              
24945             # Find lengths of all list items needed to calculate page layout
24946              
24947             # Returns:
24948             # - nothing if this list is empty, or
24949             # - a ref to a hash containing some derived parameters
24950              
24951 497         1191 my $i_opening_paren = $rhash_IN->{i_opening_paren};
24952 497         936 my $i_closing_paren = $rhash_IN->{i_closing_paren};
24953 497         903 my $identifier_count = $rhash_IN->{identifier_count};
24954 497         887 my $rcomma_index = $rhash_IN->{rcomma_index};
24955 497         872 my $item_count = $rhash_IN->{item_count};
24956              
24957             # nothing to do if no commas seen
24958 497 50       1355 return if ( $item_count < 1 );
24959              
24960 497         1002 my $i_first_comma = $rcomma_index->[0];
24961 497         1104 my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
24962 497         798 my $i_last_comma = $i_true_last_comma;
24963 497 100       1291 if ( $i_last_comma >= $max_index_to_go ) {
24964 21         46 $item_count -= 1;
24965 21 100       101 return if ( $item_count < 1 );
24966 13         33 $i_last_comma = $rcomma_index->[ $item_count - 1 ];
24967             }
24968              
24969 489         908 my $comma_count = $item_count;
24970              
24971 489         986 my $ritem_lengths = [];
24972 489         1033 my $ri_term_begin = [];
24973 489         972 my $ri_term_end = [];
24974 489         923 my $ri_term_comma = [];
24975              
24976 489         1161 my $rmax_length = [ 0, 0 ];
24977              
24978 489         912 my $i_prev_plus;
24979             my $first_term_length;
24980 489         810 my $i = $i_opening_paren;
24981 489         877 my $is_odd = 1;
24982              
24983 489         1648 foreach my $j ( 0 .. $comma_count - 1 ) {
24984 1662         2360 $is_odd = 1 - $is_odd;
24985 1662         2390 $i_prev_plus = $i + 1;
24986 1662         2416 $i = $rcomma_index->[$j];
24987              
24988 1662 100 66     6087 my $i_term_end =
24989             ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' )
24990             ? $i - 2
24991             : $i - 1;
24992 1662 100       3609 my $i_term_begin =
24993             ( $types_to_go[$i_prev_plus] eq 'b' )
24994             ? $i_prev_plus + 1
24995             : $i_prev_plus;
24996 1662         2319 push @{$ri_term_begin}, $i_term_begin;
  1662         3118  
24997 1662         2373 push @{$ri_term_end}, $i_term_end;
  1662         2728  
24998 1662         2290 push @{$ri_term_comma}, $i;
  1662         2751  
24999              
25000             # note: currently adding 2 to all lengths (for comma and space)
25001 1662         3475 my $length =
25002             2 + token_sequence_length( $i_term_begin, $i_term_end );
25003 1662         2403 push @{$ritem_lengths}, $length;
  1662         2903  
25004              
25005 1662 100       3617 if ( $j == 0 ) {
25006 489         1103 $first_term_length = $length;
25007             }
25008             else {
25009              
25010 1173 100       3035 if ( $length > $rmax_length->[$is_odd] ) {
25011 562         1243 $rmax_length->[$is_odd] = $length;
25012             }
25013             }
25014             }
25015              
25016             # now we have to make a distinction between the comma count and item
25017             # count, because the item count will be one greater than the comma
25018             # count if the last item is not terminated with a comma
25019 489 100       1771 my $i_b =
25020             ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
25021             ? $i_last_comma + 1
25022             : $i_last_comma;
25023 489 100       1467 my $i_e =
25024             ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
25025             ? $i_closing_paren - 2
25026             : $i_closing_paren - 1;
25027 489         850 my $i_effective_last_comma = $i_last_comma;
25028              
25029 489         1185 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
25030              
25031 489 100       1622 if ( $last_item_length > 0 ) {
25032              
25033             # add 2 to length because other lengths include a comma and a blank
25034 416         889 $last_item_length += 2;
25035 416         661 push @{$ritem_lengths}, $last_item_length;
  416         889  
25036 416         739 push @{$ri_term_begin}, $i_b + 1;
  416         861  
25037 416         733 push @{$ri_term_end}, $i_e;
  416         789  
25038 416         686 push @{$ri_term_comma}, undef;
  416         803  
25039              
25040 416         1047 my $i_odd = $item_count % 2;
25041              
25042 416 100       1209 if ( $last_item_length > $rmax_length->[$i_odd] ) {
25043 360         731 $rmax_length->[$i_odd] = $last_item_length;
25044             }
25045              
25046 416         715 $item_count++;
25047 416         729 $i_effective_last_comma = $i_e + 1;
25048              
25049 416 100       1838 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
25050 144         305 $identifier_count++;
25051             }
25052             }
25053              
25054             # be sure we do not extend beyond the current list length
25055 489 100       1438 if ( $i_effective_last_comma >= $max_index_to_go ) {
25056 50         119 $i_effective_last_comma = $max_index_to_go - 1;
25057             }
25058              
25059             # Return the hash of derived variables.
25060             return {
25061              
25062             # Updated variables
25063 489         5773 _item_count_A => $item_count,
25064             _identifier_count_A => $identifier_count,
25065              
25066             # New variables
25067             _ritem_lengths => $ritem_lengths,
25068             _ri_term_begin => $ri_term_begin,
25069             _ri_term_end => $ri_term_end,
25070             _ri_term_comma => $ri_term_comma,
25071             _rmax_length => $rmax_length,
25072             _comma_count => $comma_count,
25073             _i_effective_last_comma => $i_effective_last_comma,
25074             _first_term_length => $first_term_length,
25075             _i_first_comma => $i_first_comma,
25076             _i_last_comma => $i_last_comma,
25077             _i_true_last_comma => $i_true_last_comma,
25078             };
25079              
25080             } ## end sub table_layout_A
25081              
25082             sub table_layout_B {
25083              
25084 135     135 0 432 my ( $self, $rhash_IN, $rhash_A, $is_lp_formatting ) = @_;
25085              
25086             # Determine variables for the best table layout, including
25087             # the best number of fields.
25088              
25089             # Returns:
25090             # - nothing if nothing more to do
25091             # - a ref to a hash containg some derived parameters
25092              
25093             # Variables from caller
25094 135         344 my $i_opening_paren = $rhash_IN->{i_opening_paren};
25095 135         303 my $list_type = $rhash_IN->{list_type};
25096 135         351 my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
25097 135         288 my $rcomma_index = $rhash_IN->{rcomma_index};
25098 135         328 my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
25099              
25100             # Table size variables
25101 135         266 my $comma_count = $rhash_A->{_comma_count};
25102 135         285 my $first_term_length = $rhash_A->{_first_term_length};
25103 135         297 my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma};
25104 135         274 my $i_first_comma = $rhash_A->{_i_first_comma};
25105 135         280 my $identifier_count = $rhash_A->{_identifier_count_A};
25106 135         284 my $item_count = $rhash_A->{_item_count_A};
25107 135         287 my $ri_term_begin = $rhash_A->{_ri_term_begin};
25108 135         269 my $ri_term_comma = $rhash_A->{_ri_term_comma};
25109 135         295 my $ri_term_end = $rhash_A->{_ri_term_end};
25110 135         301 my $ritem_lengths = $rhash_A->{_ritem_lengths};
25111 135         261 my $rmax_length = $rhash_A->{_rmax_length};
25112              
25113             # Specify if the list must have an even number of fields or not.
25114             # It is generally safest to assume an even number, because the
25115             # list items might be a hash list. But if we can be sure that
25116             # it is not a hash, then we can allow an odd number for more
25117             # flexibility.
25118             # 1 = odd field count ok, 2 = want even count
25119 135         250 my $odd_or_even = 2;
25120 135 100 66     1138 if (
      100        
      66        
      66        
25121             $identifier_count >= $item_count - 1
25122             || $is_assignment{$next_nonblank_type}
25123             || ( $list_type
25124             && $list_type ne '=>'
25125             && $list_type !~ /^[\:\?]$/ )
25126             )
25127             {
25128 32         109 $odd_or_even = 1;
25129             }
25130              
25131             # do we have a long first term which should be
25132             # left on a line by itself?
25133 135   33     816 my $use_separate_first_term = (
25134             $odd_or_even == 1 # only if we can use 1 field/line
25135             && $item_count > 3 # need several items
25136             && $first_term_length >
25137             2 * $rmax_length->[0] - 2 # need long first term
25138             && $first_term_length >
25139             2 * $rmax_length->[1] - 2 # need long first term
25140             );
25141              
25142             # or do we know from the type of list that the first term should
25143             # be placed alone?
25144 135 50       428 if ( !$use_separate_first_term ) {
25145 135 100       611 if ( $is_keyword_with_special_leading_term{$list_type} ) {
25146 4         19 $use_separate_first_term = 1;
25147              
25148             # should the container be broken open?
25149 4 100 33     26 if ( $item_count < 3 ) {
    50          
25150 3 50       15 if ( $i_first_comma - $i_opening_paren < 4 ) {
25151 3         8 ${$rdo_not_break_apart} = 1;
  3         9  
25152             }
25153             }
25154             elsif ($first_term_length < 20
25155             && $i_first_comma - $i_opening_paren < 4 )
25156             {
25157 1         6 my $columns = table_columns_available($i_first_comma);
25158 1 50       6 if ( $first_term_length < $columns ) {
25159 1         5 ${$rdo_not_break_apart} = 1;
  1         4  
25160             }
25161             }
25162             else {
25163             ## ok
25164             }
25165             }
25166             }
25167              
25168             # if so,
25169 135 100       416 if ($use_separate_first_term) {
25170              
25171             # ..set a break and update starting values
25172 4         18 $self->set_forced_breakpoint($i_first_comma);
25173 4         13 $item_count--;
25174              
25175             #---------------------------------------------------------------
25176             # Section B1A: Stop if one item remains ($i_first_comma = undef)
25177             #---------------------------------------------------------------
25178             # Fix for b1442: use '$item_count' here instead of '$comma_count'
25179             # to make the result independent of any trailing comma.
25180 4 100       37 return if ( $item_count <= 1 );
25181              
25182 1         3 $i_opening_paren = $i_first_comma;
25183 1         3 $i_first_comma = $rcomma_index->[1];
25184 1         3 shift @{$ritem_lengths};
  1         2  
25185 1         2 shift @{$ri_term_begin};
  1         2  
25186 1         2 shift @{$ri_term_end};
  1         3  
25187 1         2 shift @{$ri_term_comma};
  1         2  
25188             }
25189              
25190             # if not, update the metrics to include the first term
25191             else {
25192 131 100       417 if ( $first_term_length > $rmax_length->[0] ) {
25193 44         112 $rmax_length->[0] = $first_term_length;
25194             }
25195             }
25196              
25197             # Field width parameters
25198 132         353 my $pair_width = ( $rmax_length->[0] + $rmax_length->[1] );
25199 132 100       414 my $max_width =
25200             ( $rmax_length->[0] > $rmax_length->[1] )
25201             ? $rmax_length->[0]
25202             : $rmax_length->[1];
25203              
25204             # Number of free columns across the page width for laying out tables
25205 132         555 my $columns = table_columns_available($i_first_comma);
25206              
25207             # Patch for b1210 and b1216-b1218 when -vmll is set. If we are unable
25208             # to break after an opening paren, then the maximum line length for the
25209             # first line could be less than the later lines. So we need to reduce
25210             # the line length. Normally, we will get a break after an opening
25211             # paren, but in some cases we might not.
25212 132 0 33     536 if ( $rOpts_variable_maximum_line_length
      33        
25213             && $tokens_to_go[$i_opening_paren] eq '('
25214 0         0 && @{$ri_term_begin} )
25215             {
25216 0         0 my $ib = $ri_term_begin->[0];
25217 0         0 my $type = $types_to_go[$ib];
25218              
25219             # So far, the only known instance of this problem is when
25220             # a bareword follows an opening paren with -vmll
25221 0 0       0 if ( $type eq 'w' ) {
25222              
25223             # If a line starts with paren+space+terms, then its max length
25224             # could be up to ci+2-i spaces less than if the term went out
25225             # on a line after the paren. So..
25226 0         0 my $tol_w = max( 0,
25227             2 + $rOpts_continuation_indentation -
25228             $rOpts_indent_columns );
25229 0         0 $columns = max( 0, $columns - $tol_w );
25230              
25231             ## Here is the original b1210 fix, but it failed on b1216-b1218
25232             ##my $columns2 = table_columns_available($i_opening_paren);
25233             ##$columns = min( $columns, $columns2 );
25234             }
25235             }
25236              
25237             # Estimated maximum number of fields which fit this space.
25238             # This will be our first guess:
25239 132         546 my $number_of_fields_max =
25240             maximum_number_of_fields( $columns, $odd_or_even, $max_width,
25241             $pair_width );
25242 132         266 my $number_of_fields = $number_of_fields_max;
25243              
25244             # Find the best-looking number of fields.
25245             # This will be our second guess, if possible.
25246 132         637 my ( $number_of_fields_best, $ri_ragged_break_list,
25247             $new_identifier_count )
25248             = $self->study_list_complexity( $ri_term_begin, $ri_term_end,
25249             $ritem_lengths, $max_width );
25250              
25251 132 100 100     995 if ( $number_of_fields_best != 0
    50 33        
25252             && $number_of_fields_best < $number_of_fields_max )
25253             {
25254 18         53 $number_of_fields = $number_of_fields_best;
25255             }
25256              
25257             # fix b1427
25258             elsif ($number_of_fields_best > 1
25259             && $number_of_fields_best > $number_of_fields_max )
25260             {
25261 0         0 $number_of_fields_best = $number_of_fields_max;
25262             }
25263             else {
25264             ## ok
25265             }
25266              
25267             # If we are crowded and the -lp option is being used, try
25268             # to undo some indentation
25269 132 100 100     652 if (
      100        
25270             $is_lp_formatting
25271             && (
25272             $number_of_fields == 0
25273             || ( $number_of_fields == 1
25274             && $number_of_fields != $number_of_fields_best )
25275             )
25276             )
25277             {
25278 16         86 ( $number_of_fields, $number_of_fields_best, $columns ) =
25279             $self->lp_table_fix(
25280              
25281             $columns,
25282             $i_first_comma,
25283             $max_width,
25284             $number_of_fields,
25285             $number_of_fields_best,
25286             $odd_or_even,
25287             $pair_width,
25288             $ritem_lengths,
25289              
25290             );
25291             }
25292              
25293             # try for one column if two won't work
25294 132 100       443 if ( $number_of_fields <= 0 ) {
25295 46         1095 $number_of_fields = int( $columns / $max_width );
25296             }
25297              
25298             # The user can place an upper bound on the number of fields,
25299             # which can be useful for doing maintenance on tables
25300 132 50 33     545 if ( $rOpts_maximum_fields_per_table
25301             && $number_of_fields > $rOpts_maximum_fields_per_table )
25302             {
25303 0         0 $number_of_fields = $rOpts_maximum_fields_per_table;
25304             }
25305              
25306             # How many columns (characters) and lines would this container take
25307             # if no additional whitespace were added?
25308 132         490 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
25309             $i_effective_last_comma + 1 );
25310 132 50       559 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
  0         0  
25311 132         451 my $packed_lines = 1 + int( $packed_columns / $columns );
25312              
25313             #-----------------------------------------------------------------
25314             # Section B1B: Stop here if we did not compute a positive number of
25315             # fields. In this case we just have to bail out.
25316             #-----------------------------------------------------------------
25317 132 100       575 if ( $number_of_fields <= 0 ) {
25318              
25319 7         70 $self->set_emergency_comma_breakpoints(
25320              
25321             $number_of_fields_best,
25322             $rhash_IN,
25323             $comma_count,
25324             $i_first_comma,
25325              
25326             );
25327 7         27 return;
25328             }
25329              
25330             #------------------------------------------------------------------
25331             # Section B1B: We have a tentative field count that seems to work.
25332             # Now we must look more closely to determine if a table layout will
25333             # actually look okay.
25334             #------------------------------------------------------------------
25335              
25336             # How many lines will this require?
25337 125         304 my $formatted_lines = $item_count / ($number_of_fields);
25338 125 100       463 if ( $formatted_lines != int $formatted_lines ) {
25339 38         135 $formatted_lines = 1 + int $formatted_lines;
25340             }
25341              
25342             # So far we've been trying to fill out to the right margin. But
25343             # compact tables are easier to read, so let's see if we can use fewer
25344             # fields without increasing the number of lines.
25345 125         472 $number_of_fields = compactify_table( $item_count, $number_of_fields,
25346             $formatted_lines, $odd_or_even );
25347              
25348 125         287 my $formatted_columns;
25349              
25350 125 100       407 if ( $number_of_fields > 1 ) {
25351 61         243 $formatted_columns =
25352             ( $pair_width * ( int( $item_count / 2 ) ) +
25353             ( $item_count % 2 ) * $max_width );
25354             }
25355             else {
25356 64         153 $formatted_columns = $max_width * $item_count;
25357             }
25358 125 100       381 if ( $formatted_columns < $packed_columns ) {
25359 7         19 $formatted_columns = $packed_columns;
25360             }
25361              
25362             # Construce hash_B:
25363             return {
25364              
25365             # Updated variables
25366 125         1905 _i_first_comma_B => $i_first_comma,
25367             _i_opening_paren_B => $i_opening_paren,
25368             _item_count_B => $item_count,
25369              
25370             # New variables
25371             _columns => $columns,
25372             _formatted_columns => $formatted_columns,
25373             _formatted_lines => $formatted_lines,
25374             _max_width => $max_width,
25375             _new_identifier_count => $new_identifier_count,
25376             _number_of_fields => $number_of_fields,
25377             _odd_or_even => $odd_or_even,
25378             _packed_columns => $packed_columns,
25379             _packed_lines => $packed_lines,
25380             _pair_width => $pair_width,
25381             _ri_ragged_break_list => $ri_ragged_break_list,
25382             _use_separate_first_term => $use_separate_first_term,
25383             };
25384             } ## end sub table_layout_B
25385              
25386             sub lp_table_fix {
25387              
25388             # try to undo some -lp indentation to improve table formatting
25389              
25390             my (
25391              
25392 16     16 0 70 $self, #
25393              
25394             $columns,
25395             $i_first_comma,
25396             $max_width,
25397             $number_of_fields,
25398             $number_of_fields_best,
25399             $odd_or_even,
25400             $pair_width,
25401             $ritem_lengths,
25402              
25403             ) = @_;
25404              
25405 16         79 my $available_spaces =
25406             $self->get_available_spaces_to_go($i_first_comma);
25407 16 100       69 if ( $available_spaces > 0 ) {
25408              
25409 9         35 my $spaces_wanted = $max_width - $columns; # for 1 field
25410              
25411 9 100       33 if ( $number_of_fields_best == 0 ) {
25412 5         23 $number_of_fields_best =
25413             get_maximum_fields_wanted($ritem_lengths);
25414             }
25415              
25416 9 100       35 if ( $number_of_fields_best != 1 ) {
25417 3         10 my $spaces_wanted_2 = 1 + $pair_width - $columns; # for 2 fields
25418 3 50       18 if ( $available_spaces > $spaces_wanted_2 ) {
25419 3         8 $spaces_wanted = $spaces_wanted_2;
25420             }
25421             }
25422              
25423 9 100       53 if ( $spaces_wanted > 0 ) {
25424 6         43 my $deleted_spaces =
25425             $self->reduce_lp_indentation( $i_first_comma,
25426             $spaces_wanted );
25427              
25428             # redo the math
25429 6 100       23 if ( $deleted_spaces > 0 ) {
25430 5         15 $columns = table_columns_available($i_first_comma);
25431 5         37 $number_of_fields =
25432             maximum_number_of_fields( $columns, $odd_or_even,
25433             $max_width, $pair_width );
25434              
25435 5 50 66     48 if ( $number_of_fields_best == 1
25436             && $number_of_fields >= 1 )
25437             {
25438 0         0 $number_of_fields = $number_of_fields_best;
25439             }
25440             }
25441             }
25442             }
25443 16         61 return ( $number_of_fields, $number_of_fields_best, $columns );
25444             } ## end sub lp_table_fix
25445              
25446             sub write_formatted_table {
25447              
25448             # Write a table of comma separated items with fixed number of fields
25449 82     82 0 312 my ( $self, $number_of_fields, $comma_count, $rcomma_index,
25450             $use_separate_first_term )
25451             = @_;
25452              
25453 82         551 write_logfile_entry(
25454             "List: auto formatting with $number_of_fields fields/row\n");
25455              
25456 82 50       387 my $j_first_break =
25457             $use_separate_first_term
25458             ? $number_of_fields
25459             : $number_of_fields - 1;
25460              
25461 82         184 my $j = $j_first_break;
25462 82         346 while ( $j < $comma_count ) {
25463 245         432 my $i_comma = $rcomma_index->[$j];
25464 245         809 $self->set_forced_breakpoint($i_comma);
25465 245         657 $j += $number_of_fields;
25466             }
25467 82         303 return;
25468             } ## end sub write_formatted_table
25469              
25470             } ## end closure set_comma_breakpoint_final
25471              
25472             sub study_list_complexity {
25473              
25474             # Look for complex tables which should be formatted with one term per line.
25475             # Returns the following:
25476             #
25477             # \@i_ragged_break_list = list of good breakpoints to avoid lines
25478             # which are hard to read
25479             # $number_of_fields_best = suggested number of fields based on
25480             # complexity; = 0 if any number may be used.
25481             #
25482 132     132 0 495 my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
25483 132         260 my $item_count = @{$ri_term_begin};
  132         290  
25484 132         272 my $complex_item_count = 0;
25485 132         264 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
25486 132         223 my $i_max = @{$ritem_lengths} - 1;
  132         330  
25487             ##my @item_complexity;
25488              
25489 132         242 my $i_last_last_break = -3;
25490 132         239 my $i_last_break = -2;
25491 132         246 my @i_ragged_break_list;
25492              
25493 132         308 my $definitely_complex = 30;
25494 132         261 my $definitely_simple = 12;
25495 132         284 my $quote_count = 0;
25496              
25497 132         373 for my $i ( 0 .. $i_max ) {
25498 938         1519 my $ib = $ri_term_begin->[$i];
25499 938         1393 my $ie = $ri_term_end->[$i];
25500              
25501             # define complexity: start with the actual term length
25502 938         1328 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
25503              
25504             ##TBD: join types here and check for variations
25505             ##my $str=join "", @tokens_to_go[$ib..$ie];
25506              
25507 938         1284 my $is_quote = 0;
25508 938 100       3085 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
    100          
25509 298         448 $is_quote = 1;
25510 298         411 $quote_count++;
25511             }
25512             elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
25513 36         82 $quote_count++;
25514             }
25515             else {
25516             ## ok
25517             }
25518              
25519 938 100       1937 if ( $ib eq $ie ) {
25520 727 100 100     2268 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
25521 50         99 $complex_item_count++;
25522 50         99 $weighted_length *= 2;
25523             }
25524             else {
25525             }
25526             }
25527             else {
25528 211 100   595   1783 if ( first { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
  595         1153  
25529 181         304 $complex_item_count++;
25530 181         305 $weighted_length *= 2;
25531             }
25532 211 100   2393   1192 if ( first { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
  2393         3280  
25533 24         55 $weighted_length += 4;
25534             }
25535             }
25536              
25537             # add weight for extra tokens.
25538 938         1744 $weighted_length += 2 * ( $ie - $ib );
25539              
25540             ## my $BUB = join '', @tokens_to_go[$ib..$ie];
25541             ## print "# COMPLEXITY:$weighted_length $BUB\n";
25542              
25543             ##push @item_complexity, $weighted_length;
25544              
25545             # now mark a ragged break after this item it if it is 'long and
25546             # complex':
25547 938 100 100     3029 if ( $weighted_length >= $definitely_complex ) {
    100 66        
25548              
25549             # if we broke after the previous term
25550             # then break before it too
25551 239 100 100     1483 if ( $i_last_break == $i - 1
      100        
25552             && $i > 1
25553             && $i_last_last_break != $i - 2 )
25554             {
25555              
25556             ## TODO: don't strand a small term
25557 21         54 pop @i_ragged_break_list;
25558 21         53 push @i_ragged_break_list, $i - 2;
25559 21         47 push @i_ragged_break_list, $i - 1;
25560             }
25561              
25562 239         500 push @i_ragged_break_list, $i;
25563 239         377 $i_last_last_break = $i_last_break;
25564 239         526 $i_last_break = $i;
25565             }
25566              
25567             # don't break before a small last term -- it will
25568             # not look good on a line by itself.
25569             elsif ($i == $i_max
25570             && $i_last_break == $i - 1
25571             && $weighted_length <= $definitely_simple )
25572             {
25573 11         38 pop @i_ragged_break_list;
25574             }
25575             else {
25576             ## ok
25577             }
25578             }
25579              
25580 132         441 my $identifier_count = $i_max + 1 - $quote_count;
25581              
25582             # Need more tuning here..
25583 132 100 100     1024 if ( $max_width > 12
      66        
25584             && $complex_item_count > $item_count / 2
25585             && $number_of_fields_best != 2 )
25586             {
25587 49         119 $number_of_fields_best = 1;
25588             }
25589              
25590 132         623 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
25591             } ## end sub study_list_complexity
25592              
25593             sub get_maximum_fields_wanted {
25594              
25595             # Not all tables look good with more than one field of items.
25596             # This routine looks at a table and decides if it should be
25597             # formatted with just one field or not.
25598             # This coding is still under development.
25599 5     5 0 16 my ($ritem_lengths) = @_;
25600              
25601 5         13 my $number_of_fields_best = 0;
25602              
25603             # For just a few items, we tentatively assume just 1 field.
25604 5         9 my $item_count = @{$ritem_lengths};
  5         11  
25605 5 100       25 if ( $item_count <= 5 ) {
25606 2         7 $number_of_fields_best = 1;
25607             }
25608              
25609             # For larger tables, look at it both ways and see what looks best
25610             else {
25611              
25612 3         7 my $is_odd = 1;
25613 3         8 my @max_length = ( 0, 0 );
25614 3         10 my @last_length_2 = ( undef, undef );
25615 3         7 my @first_length_2 = ( undef, undef );
25616 3         22 my $last_length = undef;
25617 3         7 my $total_variation_1 = 0;
25618 3         5 my $total_variation_2 = 0;
25619 3         8 my @total_variation_2 = ( 0, 0 );
25620              
25621 3         9 foreach my $j ( 0 .. $item_count - 1 ) {
25622              
25623 24         34 $is_odd = 1 - $is_odd;
25624 24         37 my $length = $ritem_lengths->[$j];
25625 24 100       47 if ( $length > $max_length[$is_odd] ) {
25626 9         18 $max_length[$is_odd] = $length;
25627             }
25628              
25629 24 100       43 if ( defined($last_length) ) {
25630 21         34 my $dl = abs( $length - $last_length );
25631 21         26 $total_variation_1 += $dl;
25632             }
25633 24         31 $last_length = $length;
25634              
25635 24         34 my $ll = $last_length_2[$is_odd];
25636 24 100       43 if ( defined($ll) ) {
25637 18         29 my $dl = abs( $length - $ll );
25638 18         28 $total_variation_2[$is_odd] += $dl;
25639             }
25640             else {
25641 6         10 $first_length_2[$is_odd] = $length;
25642             }
25643 24         49 $last_length_2[$is_odd] = $length;
25644             }
25645 3         15 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
25646              
25647 3 50       16 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
    50          
25648 3 50       24 if ( $total_variation_2 >= $factor * $total_variation_1 ) {
25649 0         0 $number_of_fields_best = 1;
25650             }
25651             }
25652 5         14 return ($number_of_fields_best);
25653             } ## end sub get_maximum_fields_wanted
25654              
25655             sub table_columns_available {
25656 312     312 0 656 my $i_first_comma = shift;
25657 312         1450 my $columns =
25658             $maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
25659             leading_spaces_to_go($i_first_comma);
25660              
25661             # Patch: the vertical formatter does not line up lines whose lengths
25662             # exactly equal the available line length because of allowances
25663             # that must be made for side comments. Therefore, the number of
25664             # available columns is reduced by 1 character.
25665 312         647 $columns -= 1;
25666 312         743 return $columns;
25667             } ## end sub table_columns_available
25668              
25669             sub maximum_number_of_fields {
25670              
25671             # how many fields will fit in the available space?
25672 137     137 0 434 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
25673 137         461 my $max_pairs = int( $columns / $pair_width );
25674 137         308 my $number_of_fields = $max_pairs * 2;
25675 137 100 100     651 if ( $odd_or_even == 1
25676             && $max_pairs * $pair_width + $max_width <= $columns )
25677             {
25678 7         21 $number_of_fields++;
25679             }
25680 137         338 return $number_of_fields;
25681             } ## end sub maximum_number_of_fields
25682              
25683             sub compactify_table {
25684              
25685             # given a table with a certain number of fields and a certain number
25686             # of lines, see if reducing the number of fields will make it look
25687             # better.
25688 125     125 0 399 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
25689 125 100 66     632 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
25690              
25691 43         110 my $min_fields = $number_of_fields;
25692              
25693 43   66     327 while ($min_fields >= $odd_or_even
25694             && $min_fields * $formatted_lines >= $item_count )
25695             {
25696 53         120 $number_of_fields = $min_fields;
25697 53         219 $min_fields -= $odd_or_even;
25698             }
25699             }
25700 125         360 return $number_of_fields;
25701             } ## end sub compactify_table
25702              
25703             sub set_ragged_breakpoints {
25704              
25705             # Set breakpoints in a list that cannot be formatted nicely as a
25706             # table.
25707 38     38 0 132 my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
25708              
25709 38         128 my $break_count = 0;
25710 38         81 foreach ( @{$ri_ragged_break_list} ) {
  38         150  
25711 70         127 my $j = $ri_term_comma->[$_];
25712 70 100       229 if ($j) {
25713 38         133 $self->set_forced_breakpoint($j);
25714 38         91 $break_count++;
25715             }
25716             }
25717 38         141 return $break_count;
25718             } ## end sub set_ragged_breakpoints
25719              
25720             sub copy_old_breakpoints {
25721 94     94 0 235 my ( $self, $i_first_comma, $i_last_comma ) = @_;
25722              
25723             # We are formatting a list and have decided to make comma breaks
25724             # the same as in the input file.
25725 94         267 for my $i ( $i_first_comma .. $i_last_comma ) {
25726 1177 100       2090 if ( $old_breakpoint_to_go[$i] ) {
25727              
25728             # If the comma style is under certain controls, and if this is a
25729             # comma breakpoint with the comma is at the beginning of the next
25730             # line, then we must pass that index instead. This will allow sub
25731             # set_forced_breakpoints to check and follow the user settings. This
25732             # produces a uniform style and can prevent instability (b1422).
25733             #
25734             # The flag '$controlled_comma_style' will be set if the user
25735             # entered any of -wbb=',' -wba=',' -kbb=',' -kba=','. It is not
25736             # needed or set for the -boc flag.
25737 121         277 my $ibreak = $i;
25738 121 50 33     413 if ( $types_to_go[$ibreak] ne ',' && $controlled_comma_style ) {
25739 0         0 my $index = $inext_to_go[$ibreak];
25740 0 0 0     0 if ( $index > $ibreak && $types_to_go[$index] eq ',' ) {
25741 0         0 $ibreak = $index;
25742             }
25743             }
25744 121         312 $self->set_forced_breakpoint($ibreak);
25745             }
25746             }
25747 94         220 return;
25748             } ## end sub copy_old_breakpoints
25749              
25750             sub set_nobreaks {
25751 355     355 0 917 my ( $self, $i, $j ) = @_;
25752 355 50 33     2222 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
      33        
25753              
25754 355         603 0 && do {
25755             my ( $a, $b, $c ) = caller();
25756             print {*STDOUT}
25757             "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
25758             };
25759              
25760 355         1996 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
25761             }
25762              
25763             # shouldn't happen; non-critical error
25764             else {
25765 0         0 if (DEVEL_MODE) {
25766             my ( $a, $b, $c ) = caller();
25767             Fault(<<EOM);
25768             NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go
25769             EOM
25770             }
25771             }
25772 355         881 return;
25773             } ## end sub set_nobreaks
25774              
25775             ###############################################
25776             # CODE SECTION 12: Code for setting indentation
25777             ###############################################
25778              
25779             sub token_sequence_length {
25780              
25781             # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
25782 3943     3943 0 7357 my ( $ibeg, $iend ) = @_;
25783              
25784             # fix possible negative starting index
25785 3943 50       7871 if ( $ibeg < 0 ) { $ibeg = 0 }
  0         0  
25786              
25787             # returns 0 if index range is empty (some subs assume this)
25788 3943 100       7677 if ( $ibeg > $iend ) {
25789 74         224 return 0;
25790             }
25791              
25792 3869         8945 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
25793             } ## end sub token_sequence_length
25794              
25795             sub total_line_length {
25796              
25797             # return length of a line of tokens ($ibeg .. $iend)
25798 1861     1861 0 4652 my ( $ibeg, $iend ) = @_;
25799              
25800             # get the leading spaces on this line ...
25801 1861         3189 my $spaces = $leading_spaces_to_go[$ibeg];
25802 1861 100       4261 if ( ref($spaces) ) { $spaces = $spaces->get_spaces() }
  603         1187  
25803              
25804             # ... then add the net token length
25805 1861         4625 return $spaces + $summed_lengths_to_go[ $iend + 1 ] -
25806             $summed_lengths_to_go[$ibeg];
25807              
25808             } ## end sub total_line_length
25809              
25810             sub excess_line_length {
25811              
25812             # return number of characters by which a line of tokens ($ibeg..$iend)
25813             # exceeds the allowable line length.
25814             # NOTE: profiling shows that efficiency of this routine is essential.
25815              
25816 11578     11578 0 22454 my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
25817              
25818             # Start with the leading spaces on this line ...
25819 11578         18026 my $excess = $leading_spaces_to_go[$ibeg];
25820 11578 100       22557 if ( ref($excess) ) { $excess = $excess->get_spaces() }
  871         2162  
25821              
25822             # ... and include right weld lengths unless requested not to
25823 11578 100 100     23283 if ( $total_weld_count
      100        
25824             && $type_sequence_to_go[$iend]
25825             && !$ignore_right_weld )
25826             {
25827 231         571 my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
25828 231 100       579 $excess += $wr if defined($wr);
25829             }
25830              
25831             # ... then add the net token length, minus the maximum length
25832 11578         30224 return $excess +
25833             $summed_lengths_to_go[ $iend + 1 ] -
25834             $summed_lengths_to_go[$ibeg] -
25835             $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
25836              
25837             } ## end sub excess_line_length
25838              
25839             sub get_spaces {
25840              
25841             # return the number of leading spaces associated with an indentation
25842             # variable $indentation is either a constant number of spaces or an object
25843             # with a get_spaces method.
25844 1955     1955 0 3240 my $indentation = shift;
25845 1955 100       5765 return ref($indentation) ? $indentation->get_spaces() : $indentation;
25846             } ## end sub get_spaces
25847              
25848             sub get_recoverable_spaces {
25849              
25850             # return the number of spaces (+ means shift right, - means shift left)
25851             # that we would like to shift a group of lines with the same indentation
25852             # to get them to line up with their opening parens
25853 38     38 0 82 my $indentation = shift;
25854 38 50       159 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
25855             } ## end sub get_recoverable_spaces
25856              
25857             sub get_available_spaces_to_go {
25858              
25859 16     16 0 45 my ( $self, $ii ) = @_;
25860 16         35 my $item = $leading_spaces_to_go[$ii];
25861              
25862             # return the number of available leading spaces associated with an
25863             # indentation variable. $indentation is either a constant number of
25864             # spaces or an object with a get_available_spaces method.
25865 16 50       216 return ref($item) ? $item->get_available_spaces() : 0;
25866             } ## end sub get_available_spaces_to_go
25867              
25868             { ## begin closure set_lp_indentation
25869              
25870 39     39   395 use constant DEBUG_LP => 0;
  39         99  
  39         5424  
25871              
25872             # Stack of -lp index objects which survives between batches.
25873             my $rLP;
25874             my $max_lp_stack;
25875              
25876             # The predicted position of the next opening container which may start
25877             # an -lp indentation level. This survives between batches.
25878             my $lp_position_predictor;
25879              
25880 0         0 BEGIN {
25881              
25882             # Index names for the -lp stack variables.
25883             # Do not combine with other BEGIN blocks (c101).
25884              
25885 39     39   11535 my $i = 0;
25886             use constant {
25887 39         3787 _lp_ci_level_ => $i++,
25888             _lp_level_ => $i++,
25889             _lp_object_ => $i++,
25890             _lp_container_seqno_ => $i++,
25891             _lp_space_count_ => $i++,
25892 39     39   365 };
  39         98  
25893             } ## end BEGIN
25894              
25895             sub initialize_lp_vars {
25896              
25897             # initialize gnu variables for a new file;
25898             # must be called once at the start of a new file.
25899              
25900 561     561 0 1536 $lp_position_predictor = 0;
25901 561         1291 $max_lp_stack = 0;
25902              
25903             # we can turn off -lp if all levels will be at or above the cutoff
25904 561 100       2069 if ( $high_stress_level <= 1 ) {
25905 6         13 $rOpts_line_up_parentheses = 0;
25906 6         19 $rOpts_extended_line_up_parentheses = 0;
25907             }
25908              
25909             # fix for b1459: -naws adds stress for -xlp
25910 561 100 100     2445 if ( $high_stress_level <= 2 && !$rOpts_add_whitespace ) {
25911 6         14 $rOpts_extended_line_up_parentheses = 0;
25912             }
25913              
25914 561         2406 $rLP = [];
25915              
25916             # initialize the leading whitespace stack to negative levels
25917             # so that we can never run off the end of the stack
25918 561         1863 $rLP->[$max_lp_stack]->[_lp_ci_level_] = -1;
25919 561         1624 $rLP->[$max_lp_stack]->[_lp_level_] = -1;
25920 561         1409 $rLP->[$max_lp_stack]->[_lp_object_] = undef;
25921 561         1689 $rLP->[$max_lp_stack]->[_lp_container_seqno_] = SEQ_ROOT;
25922 561         1447 $rLP->[$max_lp_stack]->[_lp_space_count_] = 0;
25923              
25924 561         1134 return;
25925             } ## end sub initialize_lp_vars
25926              
25927             # hashes for efficient testing
25928             my %hash_test1;
25929             my %hash_test2;
25930             my %hash_test3;
25931              
25932             BEGIN {
25933 39     39   269 my @q = qw< } ) ] >;
25934 39         233 @hash_test1{@q} = (1) x scalar(@q);
25935 39         187 @q = qw(: ? f);
25936 39         118 push @q, ',';
25937 39         221 @hash_test2{@q} = (1) x scalar(@q);
25938 39         140 @q = qw( . || && );
25939 39         278174 @hash_test3{@q} = (1) x scalar(@q);
25940             } ## end BEGIN
25941              
25942             # shared variables, re-initialized for each batch
25943             my $rlp_object_list;
25944             my $max_lp_object_list;
25945             my %lp_comma_count;
25946             my %lp_arrow_count;
25947             my $space_count;
25948             my $current_level;
25949             my $current_ci_level;
25950             my $ii_begin_line;
25951             my $in_lp_mode;
25952             my $stack_changed;
25953             my $K_last_nonblank;
25954             my $last_nonblank_token;
25955             my $last_nonblank_type;
25956             my $last_last_nonblank_type;
25957              
25958             sub set_lp_indentation {
25959              
25960 302     302 0 600 my ($self) = @_;
25961              
25962             #------------------------------------------------------------------
25963             # Define the leading whitespace for all tokens in the current batch
25964             # when the -lp formatting is selected.
25965             #------------------------------------------------------------------
25966              
25967             # Returns number of tokens in this batch which have leading spaces
25968             # defined by an lp object:
25969 302         496 my $lp_object_count_this_batch = 0;
25970              
25971             # Safety check, should not be needed:
25972 302 50 33     1868 if ( !$rOpts_line_up_parentheses
      33        
25973             || !defined($max_index_to_go)
25974             || $max_index_to_go < 0 )
25975             {
25976 0         0 return $lp_object_count_this_batch;
25977             }
25978              
25979             # List of -lp indentation objects created in this batch
25980 302         792 $rlp_object_list = [];
25981 302         543 $max_lp_object_list = -1;
25982              
25983 302         664 %lp_comma_count = ();
25984 302         502 %lp_arrow_count = ();
25985 302         506 $space_count = undef;
25986 302         432 $current_level = undef;
25987 302         465 $current_ci_level = undef;
25988 302         455 $ii_begin_line = 0;
25989 302         458 $in_lp_mode = 0;
25990 302         451 $stack_changed = 1;
25991 302         441 $K_last_nonblank = undef;
25992 302         493 $last_nonblank_token = EMPTY_STRING;
25993 302         467 $last_nonblank_type = EMPTY_STRING;
25994 302         475 $last_last_nonblank_type = EMPTY_STRING;
25995              
25996 302         489 my %last_lp_equals = ();
25997              
25998 302         528 my $rLL = $self->[_rLL_];
25999 302         571 my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_];
26000              
26001 302         1995 my $imin = 0;
26002              
26003             # The 'starting_in_quote' flag means that the first token is the first
26004             # token of a line and it is also the continuation of some kind of
26005             # multi-line quote or pattern. It must have no added leading
26006             # whitespace, so we can skip it.
26007 302 100       675 if ($starting_in_quote) {
26008 2         3 $imin += 1;
26009             }
26010              
26011 302         558 my $Kpnb = $K_to_go[0] - 1;
26012 302 100 100     1456 if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) {
26013 210         392 $Kpnb -= 1;
26014             }
26015 302 100 66     1284 if ( $Kpnb >= 0 && $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) {
26016 272         461 $K_last_nonblank = $Kpnb;
26017             }
26018              
26019 302 100       695 if ( defined($K_last_nonblank) ) {
26020 272         524 $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
26021 272         487 $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
26022             }
26023              
26024             #-----------------------------------
26025             # Loop over all tokens in this batch
26026             #-----------------------------------
26027 302         777 foreach my $ii ( $imin .. $max_index_to_go ) {
26028              
26029 5767         9354 my $type = $types_to_go[$ii];
26030 5767         8589 my $token = $tokens_to_go[$ii];
26031 5767         7934 my $level = $levels_to_go[$ii];
26032 5767         7489 my $ci_level = $ci_levels_to_go[$ii];
26033 5767         8024 my $total_depth = $nesting_depth_to_go[$ii];
26034              
26035             # get the top state from the stack if it has changed
26036 5767 100       9907 if ($stack_changed) {
26037 1757         2667 my $rLP_top = $rLP->[$max_lp_stack];
26038 1757         2565 my $lp_object = $rLP_top->[_lp_object_];
26039 1757 100       5042 if ($lp_object) {
26040             ( $space_count, $current_level, $current_ci_level ) =
26041 808         1124 @{ $lp_object->get_spaces_level_ci() };
  808         2270  
26042             }
26043             else {
26044 949         1391 $current_ci_level = $rLP_top->[_lp_ci_level_];
26045 949         1433 $current_level = $rLP_top->[_lp_level_];
26046 949         1363 $space_count = $rLP_top->[_lp_space_count_];
26047             }
26048 1757         3232 $stack_changed = 0;
26049             }
26050              
26051             #------------------------------------------------------------
26052             # Break at a previous '=' if necessary to control line length
26053             #------------------------------------------------------------
26054 5767 100 66     16837 if ( $type eq '{' || $type eq '(' ) {
26055 335         919 $lp_comma_count{ $total_depth + 1 } = 0;
26056 335         704 $lp_arrow_count{ $total_depth + 1 } = 0;
26057              
26058             # If we come to an opening token after an '=' token of some
26059             # type, see if it would be helpful to 'break' after the '=' to
26060             # save space
26061 335         641 my $ii_last_equals = $last_lp_equals{$total_depth};
26062 335 100       789 if ($ii_last_equals) {
26063 141         500 $self->lp_equals_break_check( $ii, $ii_last_equals );
26064             }
26065             }
26066              
26067             #------------------------
26068             # Handle decreasing depth
26069             #------------------------
26070             # Note that one token may have both decreasing and then increasing
26071             # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
26072             # in this example we would first go back to (1,0) then up to (2,0)
26073             # in a single call.
26074 5767 100 100     15737 if ( $level < $current_level || $ci_level < $current_ci_level ) {
26075 935         2485 $self->lp_decreasing_depth($ii);
26076             }
26077              
26078             #------------------------
26079             # handle increasing depth
26080             #------------------------
26081 5767 100 100     16248 if ( $level > $current_level || $ci_level > $current_ci_level ) {
26082 1485         3313 $self->lp_increasing_depth($ii);
26083             }
26084              
26085             #------------------
26086             # Handle all tokens
26087             #------------------
26088 5767 100       10612 if ( $type ne 'b' ) {
26089              
26090             # Count commas and look for non-list characters. Once we see a
26091             # non-list character, we give up and don't look for any more
26092             # commas.
26093 3772 100       10110 if ( $type eq '=>' ) {
    100          
    100          
26094 227         477 $lp_arrow_count{$total_depth}++;
26095              
26096             # remember '=>' like '=' for estimating breaks (but see
26097             # above note for b1035)
26098 227         457 $last_lp_equals{$total_depth} = $ii;
26099             }
26100              
26101             elsif ( $type eq ',' ) {
26102 615         1124 $lp_comma_count{$total_depth}++;
26103             }
26104              
26105             elsif ( $is_assignment{$type} ) {
26106 85         292 $last_lp_equals{$total_depth} = $ii;
26107             }
26108             else {
26109             ## not a special type
26110             }
26111              
26112             # this token might start a new line if ..
26113 3772 100 66     45150 if (
      66        
26114             $ii > $ii_begin_line
26115              
26116             && (
26117              
26118             # this is the first nonblank token of the line
26119             $ii == 1 && $types_to_go[0] eq 'b'
26120              
26121             # or previous character was one of these:
26122             # /^([\:\?\,f])$/
26123             || $hash_test2{$last_nonblank_type}
26124              
26125             # or previous character was opening and this is not
26126             # closing
26127             || ( $last_nonblank_type eq '{' && $type ne '}' )
26128             || ( $last_nonblank_type eq '(' and $type ne ')' )
26129              
26130             # or this token is one of these:
26131             # /^([\.]|\|\||\&\&)$/
26132             || $hash_test3{$type}
26133              
26134             # or this is a closing structure
26135             || ( $last_nonblank_type eq '}'
26136             && $last_nonblank_token eq $last_nonblank_type )
26137              
26138             # or previous token was keyword 'return'
26139             || (
26140             $last_nonblank_type eq 'k'
26141             && ( $last_nonblank_token eq 'return'
26142             && $type ne '{' )
26143             )
26144              
26145             # or starting a new line at certain keywords is fine
26146             || ( $type eq 'k'
26147             && $is_if_unless_and_or_last_next_redo_return{
26148             $token} )
26149              
26150             # or this is after an assignment after a closing
26151             # structure
26152             || (
26153             $is_assignment{$last_nonblank_type}
26154             && (
26155             # /^[\}\)\]]$/
26156             $hash_test1{$last_last_nonblank_type}
26157              
26158             # and it is significantly to the right
26159             || $lp_position_predictor > (
26160             $maximum_line_length_at_level[$level] -
26161             $rOpts_maximum_line_length / 2
26162             )
26163             )
26164             )
26165             )
26166             )
26167             {
26168 1057         2797 check_for_long_gnu_style_lines($ii);
26169 1057         1456 $ii_begin_line = $ii;
26170              
26171             # back up 1 token if we want to break before that type
26172             # otherwise, we may strand tokens like '?' or ':' on a line
26173 1057 50       2031 if ( $ii_begin_line > 0 ) {
26174             my $wbb =
26175             $last_nonblank_type eq 'k'
26176             ? $want_break_before{$last_nonblank_token}
26177 1057 100       2619 : $want_break_before{$last_nonblank_type};
26178 1057 100       2160 $ii_begin_line-- if ($wbb);
26179             }
26180             }
26181              
26182 3772         6144 $K_last_nonblank = $K_to_go[$ii];
26183 3772         5435 $last_last_nonblank_type = $last_nonblank_type;
26184 3772         4975 $last_nonblank_type = $type;
26185 3772         5200 $last_nonblank_token = $token;
26186              
26187             } ## end if ( $type ne 'b' )
26188              
26189             # remember the predicted position of this token on the output line
26190 5767 100       9562 if ( $ii > $ii_begin_line ) {
26191              
26192             ## NOTE: this is a critical loop - the following call has been
26193             ## expanded for about 2x speedup:
26194             ## $lp_position_predictor =
26195             ## total_line_length( $ii_begin_line, $ii );
26196              
26197 4414         6246 my $indentation = $leading_spaces_to_go[$ii_begin_line];
26198 4414 100       8420 if ( ref($indentation) ) {
26199 2746         6254 $indentation = $indentation->get_spaces();
26200             }
26201             $lp_position_predictor =
26202 4414         7976 $indentation +
26203             $summed_lengths_to_go[ $ii + 1 ] -
26204             $summed_lengths_to_go[$ii_begin_line];
26205             }
26206             else {
26207 1353         2078 $lp_position_predictor =
26208             $space_count + $token_lengths_to_go[$ii];
26209             }
26210              
26211             # Store the indentation object for this token.
26212             # This allows us to manipulate the leading whitespace
26213             # (in case we have to reduce indentation to fit a line) without
26214             # having to change any token values.
26215              
26216             #---------------------------------------------------------------
26217             # replace leading whitespace with indentation objects where used
26218             #---------------------------------------------------------------
26219 5767 100       11969 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
26220 3398         4494 $lp_object_count_this_batch++;
26221 3398         4672 my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
26222 3398         4709 $leading_spaces_to_go[$ii] = $lp_object;
26223 3398 100 66     12489 if ( $max_lp_stack > 0
      100        
26224             && $ci_level
26225             && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] )
26226             {
26227 1379         2974 $reduced_spaces_to_go[$ii] =
26228             $rLP->[ $max_lp_stack - 1 ]->[_lp_object_];
26229             }
26230             else {
26231 2019         3651 $reduced_spaces_to_go[$ii] = $lp_object;
26232             }
26233             }
26234             } ## end loop over all tokens in this batch
26235              
26236             undo_incomplete_lp_indentation()
26237 302 100       1181 if ( !$rOpts_extended_line_up_parentheses );
26238              
26239 302         1226 return $lp_object_count_this_batch;
26240             } ## end sub set_lp_indentation
26241              
26242             sub lp_equals_break_check {
26243              
26244 141     141 0 355 my ( $self, $ii, $ii_last_equals ) = @_;
26245              
26246             # If we come to an opening token after an '=' token of some
26247             # type, see if it would be helpful to 'break' after the '=' to
26248             # save space.
26249              
26250             # Given:
26251             # $ii = index of an opening token in the output batch
26252             # $ii_begin_line = index of token starting next output line
26253             # Update:
26254             # $lp_position_predictor - updated position predictor
26255             # $ii_begin_line = updated starting token index
26256              
26257             # Skip an empty set of parens, such as after channel():
26258             # my $exchange = $self->_channel()->exchange(
26259             # This fixes issues b1318 b1322 b1323 b1328
26260 141         234 my $is_empty_container;
26261 141 100 66     648 if ( $ii_last_equals && $ii < $max_index_to_go ) {
26262 132         323 my $seqno = $type_sequence_to_go[$ii];
26263 132         251 my $inext_nb = $ii + 1;
26264 132 100       427 $inext_nb++
26265             if ( $types_to_go[$inext_nb] eq 'b' );
26266 132         261 my $seqno_nb = $type_sequence_to_go[$inext_nb];
26267 132   100     637 $is_empty_container = $seqno && $seqno_nb && $seqno_nb == $seqno;
26268             }
26269              
26270 141 100 66     861 if ( $ii_last_equals
      66        
26271             && $ii_last_equals > $ii_begin_line
26272             && !$is_empty_container )
26273             {
26274              
26275 104         230 my $seqno = $type_sequence_to_go[$ii];
26276              
26277             # find the position if we break at the '='
26278 104         182 my $i_test = $ii_last_equals;
26279              
26280             # Fix for issue b1229, check if want break before this token
26281             # Fix for issue b1356, if i_test is a blank, the leading spaces may
26282             # be incorrect (if it was an interline blank).
26283             # Fix for issue b1357 .. b1370, i_test must be prev nonblank
26284             # ( the ci value for blanks can vary )
26285             # See also case b223
26286             # Fix for issue b1371-b1374 : all of these and the above are fixed
26287             # by simply backing up one index and setting the leading spaces of
26288             # a blank equal to that of the equals.
26289 104 50       481 if ( $want_break_before{ $types_to_go[$i_test] } ) {
    50          
26290 0         0 $i_test -= 1;
26291 0 0       0 $leading_spaces_to_go[$i_test] =
26292             $leading_spaces_to_go[$ii_last_equals]
26293             if ( $types_to_go[$i_test] eq 'b' );
26294             }
26295 104         186 elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
26296             else {
26297             ## ok
26298             }
26299              
26300 104         314 my $test_position = total_line_length( $i_test, $ii );
26301 104         255 my $mll = $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
26302              
26303             #------------------------------------------------------
26304             # Break if structure will reach the maximum line length
26305             #------------------------------------------------------
26306              
26307             # Historically, -lp just used one-half line length here
26308 104         227 my $len_increase = $rOpts_maximum_line_length / 2;
26309              
26310             # For -xlp, we can also use the pre-computed lengths
26311 104         232 my $min_len = $self->[_rcollapsed_length_by_seqno_]->{$seqno};
26312 104 100 100     353 if ( $min_len && $min_len > $len_increase ) {
26313 2         7 $len_increase = $min_len;
26314             }
26315              
26316 104 100 66     1440 if (
      100        
      100        
      33        
      66        
      66        
26317              
26318             # if we might exceed the maximum line length
26319             $lp_position_predictor + $len_increase > $mll
26320              
26321             # if a -bbx flag WANTS a break before this opening token
26322             || ( $seqno
26323             && $self->[_rbreak_before_container_by_seqno_]->{$seqno} )
26324              
26325             # or we are beyond the 1/4 point and there was an old
26326             # break at an assignment (not '=>') [fix for b1035]
26327             || (
26328             $lp_position_predictor >
26329             $mll - $rOpts_maximum_line_length * 3 / 4
26330             && $types_to_go[$ii_last_equals] ne '=>'
26331             && (
26332             $old_breakpoint_to_go[$ii_last_equals]
26333             || ( $ii_last_equals > 0
26334             && $old_breakpoint_to_go[ $ii_last_equals - 1 ] )
26335             || ( $ii_last_equals > 1
26336             && $types_to_go[ $ii_last_equals - 1 ] eq 'b'
26337             && $old_breakpoint_to_go[ $ii_last_equals - 2 ] )
26338             )
26339             )
26340             )
26341             {
26342              
26343             # then make the switch -- note that we do not set a
26344             # real breakpoint here because we may not really need
26345             # one; sub break_lists will do that if necessary.
26346              
26347 16         62 my $Kc = $self->[_K_closing_container_]->{$seqno};
26348 16 100 66     111 if (
      100        
26349              
26350             # For -lp, only if the closing token is in this
26351             # batch (c117). Otherwise it cannot be done by sub
26352             # break_lists.
26353             defined($Kc) && $Kc <= $K_to_go[$max_index_to_go]
26354              
26355             # For -xlp, we only need one nonblank token after
26356             # the opening token.
26357             || $rOpts_extended_line_up_parentheses
26358             )
26359             {
26360 15         32 $ii_begin_line = $i_test + 1;
26361 15         35 $lp_position_predictor = $test_position;
26362              
26363             #--------------------------------------------------
26364             # Fix for an opening container terminating a batch:
26365             #--------------------------------------------------
26366             # To get alignment of a -lp container with its
26367             # contents, we have to put a break after $i_test.
26368             # For $ii<$max_index_to_go, this will be done by
26369             # sub break_lists based on the indentation object.
26370             # But for $ii=$max_index_to_go, the indentation
26371             # object for this seqno will not be created until
26372             # the next batch, so we have to set a break at
26373             # $i_test right now in order to get one.
26374 15 0 66     80 if ( $ii == $max_index_to_go
      33        
      33        
      0        
26375             && !$block_type_to_go[$ii]
26376             && $types_to_go[$ii] eq '{'
26377             && $seqno
26378             && !$self->[_ris_excluded_lp_container_]->{$seqno} )
26379             {
26380 0         0 $self->set_forced_lp_break( $ii_begin_line, $ii );
26381             }
26382             }
26383             }
26384             }
26385 141         306 return;
26386             } ## end sub lp_equals_break_check
26387              
26388             sub lp_decreasing_depth {
26389 935     935 0 1746 my ( $self, $ii ) = @_;
26390              
26391 935         1644 my $rLL = $self->[_rLL_];
26392              
26393 935         1474 my $level = $levels_to_go[$ii];
26394 935         1439 my $ci_level = $ci_levels_to_go[$ii];
26395              
26396             # loop to find the first entry at or completely below this level
26397 935         1363 while (1) {
26398              
26399             # Be sure we have not hit the stack bottom - should never
26400             # happen because only negative levels can get here, and
26401             # $level was forced to be positive above.
26402 1064 50       3248 if ( !$max_lp_stack ) {
26403              
26404             # non-fatal, just keep going except in DEVEL_MODE
26405 0         0 if (DEVEL_MODE) {
26406             Fault(<<EOM);
26407             program bug with -lp: stack_error. level=$level; ci_level=$ci_level; rerun with -nlp
26408             EOM
26409             }
26410 0         0 last;
26411             }
26412              
26413             # save index of token which closes this level
26414 1064 100       2312 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
26415 608         1137 my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
26416              
26417 608         1832 $lp_object->set_closed($ii);
26418              
26419 608         899 my $comma_count = 0;
26420 608         889 my $arrow_count = 0;
26421 608         977 my $type = $types_to_go[$ii];
26422 608 100 66     1909 if ( $type eq '}' || $type eq ')' ) {
26423 340         567 my $total_depth = $nesting_depth_to_go[$ii];
26424 340         639 $comma_count = $lp_comma_count{$total_depth};
26425 340         589 $arrow_count = $lp_arrow_count{$total_depth};
26426 340 100       768 $comma_count = 0 unless $comma_count;
26427 340 100       737 $arrow_count = 0 unless $arrow_count;
26428             }
26429              
26430 608         1751 $lp_object->set_comma_count($comma_count);
26431 608         1637 $lp_object->set_arrow_count($arrow_count);
26432              
26433             # Undo any extra indentation if we saw no commas
26434 608         1422 my $available_spaces = $lp_object->get_available_spaces();
26435 608         1418 my $K_start = $lp_object->get_K_begin_line();
26436              
26437 608 100 100     2515 if ( $available_spaces > 0
      100        
      100        
26438             && $K_start >= $K_to_go[0]
26439             && ( $comma_count <= 0 || $arrow_count > 0 ) )
26440             {
26441              
26442 62         261 my $i = $lp_object->get_lp_item_index();
26443              
26444             # Safety check for a valid stack index. It
26445             # should be ok because we just checked that the
26446             # index K of the token associated with this
26447             # indentation is in this batch.
26448 62 50 33     307 if ( $i < 0 || $i > $max_lp_object_list ) {
26449 0         0 my $KK = $K_to_go[$ii];
26450 0         0 my $lno = $rLL->[$KK]->[_LINE_INDEX_];
26451 0         0 DEVEL_MODE && Fault(<<EOM);
26452             Program bug with -lp near line $lno. Stack index i=$i should be >=0 and <= max=$max_lp_object_list
26453             EOM
26454 0         0 last;
26455             }
26456              
26457 62 100       182 if ( $arrow_count == 0 ) {
26458 36         148 $rlp_object_list->[$i]
26459             ->permanently_decrease_available_spaces(
26460             $available_spaces);
26461             }
26462             else {
26463 26         101 $rlp_object_list->[$i]
26464             ->tentatively_decrease_available_spaces(
26465             $available_spaces);
26466             }
26467 62         210 foreach my $j ( $i + 1 .. $max_lp_object_list ) {
26468 310         652 $rlp_object_list->[$j]
26469             ->decrease_SPACES($available_spaces);
26470             }
26471             }
26472             }
26473              
26474             # go down one level
26475 1064         1596 --$max_lp_stack;
26476              
26477 1064         1618 my $rLP_top = $rLP->[$max_lp_stack];
26478 1064         1590 my $ci_lev = $rLP_top->[_lp_ci_level_];
26479 1064         1572 my $lev = $rLP_top->[_lp_level_];
26480 1064         1497 my $spaces = $rLP_top->[_lp_space_count_];
26481 1064 100       2151 if ( $rLP_top->[_lp_object_] ) {
26482 498         733 my $lp_obj = $rLP_top->[_lp_object_];
26483             ( $spaces, $lev, $ci_lev ) =
26484 498         659 @{ $lp_obj->get_spaces_level_ci() };
  498         1058  
26485             }
26486              
26487             # stop when we reach a level at or below the current
26488             # level
26489 1064 100 66     3933 if ( $lev <= $level && $ci_lev <= $ci_level ) {
26490 935         1445 $space_count = $spaces;
26491 935         1251 $current_level = $lev;
26492 935         1245 $current_ci_level = $ci_lev;
26493 935         1646 last;
26494             }
26495             }
26496 935         1574 return;
26497             } ## end sub lp_decreasing_depth
26498              
26499             sub lp_increasing_depth {
26500 1485     1485 0 2549 my ( $self, $ii ) = @_;
26501              
26502 1485         2331 my $rLL = $self->[_rLL_];
26503              
26504 1485         2418 my $type = $types_to_go[$ii];
26505 1485         2234 my $level = $levels_to_go[$ii];
26506 1485         2059 my $ci_level = $ci_levels_to_go[$ii];
26507              
26508 1485         2004 $stack_changed = 1;
26509              
26510             # Compute the standard incremental whitespace. This will be
26511             # the minimum incremental whitespace that will be used. This
26512             # choice results in a smooth transition between the gnu-style
26513             # and the standard style.
26514 1485         2577 my $standard_increment =
26515             ( $level - $current_level ) * $rOpts_indent_columns +
26516             ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
26517              
26518             # Now we have to define how much extra incremental space
26519             # ("$available_space") we want. This extra space will be
26520             # reduced as necessary when long lines are encountered or when
26521             # it becomes clear that we do not have a good list.
26522 1485         2066 my $available_spaces = 0;
26523 1485         2010 my $align_seqno = 0;
26524 1485         3089 my $K_extra_space;
26525              
26526             my $last_nonblank_seqno;
26527 1485         0 my $last_nonblank_block_type;
26528 1485 100       2889 if ( defined($K_last_nonblank) ) {
26529 1455         4725 $last_nonblank_seqno = $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
26530             $last_nonblank_block_type =
26531             $last_nonblank_seqno
26532 1455 100       3141 ? $self->[_rblock_type_of_seqno_]->{$last_nonblank_seqno}
26533             : undef;
26534             }
26535              
26536 1485         2318 $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_];
26537              
26538             #-----------------------------------------------
26539             # Initialize indentation spaces on empty stack..
26540             #-----------------------------------------------
26541 1485 100 100     8810 if ( $max_lp_stack == 0 ) {
    100 66        
      100        
      100        
      66        
      66        
26542 31         77 $space_count = $level * $rOpts_indent_columns;
26543             }
26544              
26545             #----------------------------------------
26546             # Add the standard space increment if ...
26547             #----------------------------------------
26548             elsif (
26549              
26550             # if this is a BLOCK, add the standard increment
26551             $last_nonblank_block_type
26552              
26553             # or if this is not a sequenced item
26554             || !$last_nonblank_seqno
26555              
26556             # or this container is excluded by user rules
26557             # or contains here-docs or multiline qw text
26558             || defined($last_nonblank_seqno)
26559             && $self->[_ris_excluded_lp_container_]->{$last_nonblank_seqno}
26560              
26561             # or if last nonblank token was not structural indentation
26562             || $last_nonblank_type ne '{'
26563              
26564             # and do not start -lp under stress .. fixes b1244, b1255
26565             || !$in_lp_mode && $level >= $high_stress_level
26566              
26567             )
26568             {
26569              
26570             # If we have entered lp mode, use the top lp object to get
26571             # the current indentation spaces because it may have
26572             # changed. Fixes b1285, b1286.
26573 1189 100       2749 if ($in_lp_mode) {
26574 509         1369 $space_count = $in_lp_mode->get_spaces();
26575             }
26576 1189         1841 $space_count += $standard_increment;
26577             }
26578              
26579             #---------------------------------------------------------------
26580             # -lp mode: try to use space to the first non-blank level change
26581             #---------------------------------------------------------------
26582             else {
26583              
26584             # see how much space we have available
26585 265         449 my $test_space_count = $lp_position_predictor;
26586 265         399 my $excess = 0;
26587             my $min_len =
26588 265         462 $self->[_rcollapsed_length_by_seqno_]->{$last_nonblank_seqno};
26589 265         454 my $next_opening_too_far;
26590              
26591 265 100       587 if ( defined($min_len) ) {
26592 54         117 $excess =
26593             $test_space_count +
26594             $min_len -
26595             $maximum_line_length_at_level[$level];
26596 54 100       110 if ( $excess > 0 ) {
26597 3         13 $test_space_count -= $excess;
26598              
26599             # will the next opening token be a long way out?
26600 3         9 $next_opening_too_far =
26601             $lp_position_predictor + $excess >
26602             $maximum_line_length_at_level[$level];
26603             }
26604             }
26605              
26606 265         463 my $rLP_top = $rLP->[$max_lp_stack];
26607 265         460 my $min_gnu_indentation = $rLP_top->[_lp_space_count_];
26608 265 100       597 if ( $rLP_top->[_lp_object_] ) {
26609 148         413 $min_gnu_indentation = $rLP_top->[_lp_object_]->get_spaces();
26610             }
26611 265         423 $available_spaces = $test_space_count - $min_gnu_indentation;
26612              
26613             # Do not startup -lp indentation mode if no space ...
26614             # ... or if it puts the opening far to the right
26615 265 50 33     989 if ( !$in_lp_mode
      66        
26616             && ( $available_spaces <= 0 || $next_opening_too_far ) )
26617             {
26618 0         0 $space_count += $standard_increment;
26619 0         0 $available_spaces = 0;
26620             }
26621              
26622             # Use -lp mode
26623             else {
26624 265         396 $space_count = $test_space_count;
26625              
26626 265         400 $in_lp_mode = 1;
26627 265 100       635 if ( $available_spaces >= $standard_increment ) {
    100          
    50          
26628 202         317 $min_gnu_indentation += $standard_increment;
26629             }
26630             elsif ( $available_spaces > 1 ) {
26631 41         89 $min_gnu_indentation += $available_spaces + 1;
26632              
26633             # The "+1" space can cause mis-alignment if there is no
26634             # blank space between the opening paren and the next
26635             # nonblank token (i.e., -pt=2) and the container does not
26636             # get broken open. So we will mark this token for later
26637             # space removal by sub 'xlp_tweak' if this container
26638             # remains intact (issue git #106).
26639 41 100 66     450 if (
      66        
      33        
      66        
26640             $type ne 'b'
26641              
26642             # Skip if the maximum line length is exceeded here
26643             && $excess <= 0
26644              
26645             # This is only for level changes, not ci level changes.
26646             # But note: this test is here out of caution but I have
26647             # not found a case where it is actually necessary.
26648             && $is_opening_token{$last_nonblank_token}
26649              
26650             # Be sure we are at consecutive nonblanks. This test
26651             # should be true, but it guards against future coding
26652             # changes to level values assigned to blank spaces.
26653             && $ii > 0
26654             && $types_to_go[ $ii - 1 ] ne 'b'
26655              
26656             )
26657             {
26658 8         26 $K_extra_space = $K_to_go[$ii];
26659             }
26660             }
26661             elsif ( $is_opening_token{$last_nonblank_token} ) {
26662 22 100       70 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
26663 13         37 $min_gnu_indentation += 2;
26664             }
26665             else {
26666 9         18 $min_gnu_indentation += 1;
26667             }
26668             }
26669             else {
26670 0         0 $min_gnu_indentation += $standard_increment;
26671             }
26672 265         421 $available_spaces = $space_count - $min_gnu_indentation;
26673              
26674 265 100       573 if ( $available_spaces < 0 ) {
26675 54         82 $space_count = $min_gnu_indentation;
26676 54         77 $available_spaces = 0;
26677             }
26678 265         599 $align_seqno = $last_nonblank_seqno;
26679             }
26680             }
26681              
26682             #-------------------------------------------
26683             # update the state, but not on a blank token
26684             #-------------------------------------------
26685 1485 100       3037 if ( $type ne 'b' ) {
26686              
26687 1122 100       2317 if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
26688 498         1516 $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1);
26689 498         706 $in_lp_mode = 1;
26690             }
26691              
26692             #----------------------------------------
26693             # Create indentation object if in lp-mode
26694             #----------------------------------------
26695 1122         1638 ++$max_lp_stack;
26696 1122         1601 my $lp_object;
26697 1122 100       2160 if ($in_lp_mode) {
26698              
26699             # A negative level implies not to store the item in the
26700             # item_list
26701 608         1003 my $lp_item_index = 0;
26702 608 50       1333 if ( $level >= 0 ) {
26703 608         959 $lp_item_index = ++$max_lp_object_list;
26704             }
26705              
26706 608         886 my $K_begin_line = 0;
26707 608 50 33     2141 if ( $ii_begin_line >= 0
26708             && $ii_begin_line <= $max_index_to_go )
26709             {
26710 608         998 $K_begin_line = $K_to_go[$ii_begin_line];
26711             }
26712              
26713             # Minor Fix: when creating indentation at a side
26714             # comment we don't know what the space to the actual
26715             # next code token will be. We will allow a space for
26716             # sub correct_lp to move it in if necessary.
26717 608 100 100     1520 if ( $type eq '#'
      66        
26718             && $max_index_to_go > 0
26719             && $align_seqno )
26720             {
26721 2         5 $available_spaces += 1;
26722             }
26723              
26724 608         1009 my $standard_spaces = $leading_spaces_to_go[$ii];
26725 608         2180 $lp_object = Perl::Tidy::IndentationItem->new(
26726             spaces => $space_count,
26727             level => $level,
26728             ci_level => $ci_level,
26729             available_spaces => $available_spaces,
26730             lp_item_index => $lp_item_index,
26731             align_seqno => $align_seqno,
26732             K_begin_line => $K_begin_line,
26733             standard_spaces => $standard_spaces,
26734             K_extra_space => $K_extra_space,
26735             );
26736              
26737 608         948 DEBUG_LP && do {
26738             my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_];
26739             my $token = $tokens_to_go[$ii];
26740             print {*STDOUT} <<EOM;
26741             DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor
26742             EOM
26743             };
26744              
26745 608 50       1469 if ( $level >= 0 ) {
26746 608         1095 $rlp_object_list->[$max_lp_object_list] = $lp_object;
26747             }
26748              
26749 608 100 66     2141 if ( $is_opening_token{$last_nonblank_token}
26750             && $last_nonblank_seqno )
26751             {
26752 259         768 $self->[_rlp_object_by_seqno_]->{$last_nonblank_seqno} =
26753             $lp_object;
26754             }
26755             }
26756              
26757             #------------------------------------
26758             # Store this indentation on the stack
26759             #------------------------------------
26760 1122         1972 $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level;
26761 1122         1760 $rLP->[$max_lp_stack]->[_lp_level_] = $level;
26762 1122         1762 $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object;
26763 1122         1892 $rLP->[$max_lp_stack]->[_lp_container_seqno_] =
26764             $last_nonblank_seqno;
26765 1122         1742 $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count;
26766              
26767             # If the opening paren is beyond the half-line length, then
26768             # we will use the minimum (standard) indentation. This will
26769             # help avoid problems associated with running out of space
26770             # near the end of a line. As a result, in deeply nested
26771             # lists, there will be some indentations which are limited
26772             # to this minimum standard indentation. But the most deeply
26773             # nested container will still probably be able to shift its
26774             # parameters to the right for proper alignment, so in most
26775             # cases this will not be noticeable.
26776 1122 100 66     3065 if ( $available_spaces > 0 && $lp_object ) {
26777 169         470 my $halfway =
26778             $maximum_line_length_at_level[$level] -
26779             $rOpts_maximum_line_length / 2;
26780 169 100       540 $lp_object->tentatively_decrease_available_spaces(
26781             $available_spaces)
26782             if ( $space_count > $halfway );
26783             }
26784             }
26785 1485         2650 return;
26786             } ## end sub lp_increasing_depth
26787              
26788             sub check_for_long_gnu_style_lines {
26789              
26790             # look at the current estimated maximum line length, and
26791             # remove some whitespace if it exceeds the desired maximum
26792 1057     1057 0 2008 my ($ii_to_go) = @_;
26793              
26794             # nothing can be done if no stack items defined for this line
26795 1057 100       2133 return if ( $max_lp_object_list < 0 );
26796              
26797             # See if we have exceeded the maximum desired line length ..
26798             # keep 2 extra free because they are needed in some cases
26799             # (result of trial-and-error testing)
26800 815         1231 my $tol = 2;
26801              
26802             # But reduce tol to 0 at a terminal comma; fixes b1432
26803 815 100 66     2094 if ( $tokens_to_go[$ii_to_go] eq ','
26804             && $ii_to_go < $max_index_to_go )
26805             {
26806 32         90 my $in = $ii_to_go + 1;
26807 32 50 33     186 if ( $types_to_go[$in] eq 'b' && $in < $max_index_to_go ) { $in++ }
  32         58  
26808 32 100       152 if ( $is_closing_token{ $tokens_to_go[$in] } ) {
26809 7         33 $tol = 0;
26810             }
26811             }
26812              
26813 815         1563 my $spaces_needed =
26814             $lp_position_predictor -
26815             $maximum_line_length_at_level[ $levels_to_go[$ii_to_go] ] +
26816             $tol;
26817              
26818 815 100       1799 return if ( $spaces_needed <= 0 );
26819              
26820             # We are over the limit, so try to remove a requested number of
26821             # spaces from leading whitespace. We are only allowed to remove
26822             # from whitespace items created on this batch, since others have
26823             # already been used and cannot be undone.
26824 2         16 my @candidates = ();
26825              
26826             # loop over all whitespace items created for the current batch
26827 2         6 foreach my $i ( 0 .. $max_lp_object_list ) {
26828 200         262 my $item = $rlp_object_list->[$i];
26829              
26830             # item must still be open to be a candidate (otherwise it
26831             # cannot influence the current token)
26832 200 100       342 next if ( $item->get_closed() >= 0 );
26833              
26834 13         27 my $available_spaces = $item->get_available_spaces();
26835              
26836 13 100       27 if ( $available_spaces > 0 ) {
26837 8         19 push( @candidates, [ $i, $available_spaces ] );
26838             }
26839             }
26840              
26841 2 50       7 return unless (@candidates);
26842              
26843             # sort by available whitespace so that we can remove whitespace
26844             # from the maximum available first.
26845             @candidates =
26846 2 50       19 sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @candidates;
  10         31  
26847              
26848             # keep removing whitespace until we are done or have no more
26849 2         6 foreach my $candidate (@candidates) {
26850 2         6 my ( $i, $available_spaces ) = @{$candidate};
  2         7  
26851 2 50       8 my $deleted_spaces =
26852             ( $available_spaces > $spaces_needed )
26853             ? $spaces_needed
26854             : $available_spaces;
26855              
26856             # remove the incremental space from this item
26857 2         9 $rlp_object_list->[$i]->decrease_available_spaces($deleted_spaces);
26858              
26859 2         5 my $i_debug = $i;
26860              
26861             # update the leading whitespace of this item and all items
26862             # that came after it
26863 2         18 $i -= 1;
26864 2         12 while ( ++$i <= $max_lp_object_list ) {
26865              
26866 200         352 my $old_spaces = $rlp_object_list->[$i]->get_spaces();
26867 200 50       311 if ( $old_spaces >= $deleted_spaces ) {
26868 200         332 $rlp_object_list->[$i]->decrease_SPACES($deleted_spaces);
26869             }
26870              
26871             # shouldn't happen except for code bug:
26872             else {
26873             # non-fatal, keep going except in DEVEL_MODE
26874 0         0 if (DEVEL_MODE) {
26875             my $level = $rlp_object_list->[$i_debug]->get_level();
26876             my $ci_level =
26877             $rlp_object_list->[$i_debug]->get_ci_level();
26878             my $old_level = $rlp_object_list->[$i]->get_level();
26879             my $old_ci_level =
26880             $rlp_object_list->[$i]->get_ci_level();
26881             Fault(<<EOM);
26882             program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level
26883             EOM
26884             }
26885             }
26886             }
26887 2         7 $lp_position_predictor -= $deleted_spaces;
26888 2         3 $spaces_needed -= $deleted_spaces;
26889 2 50       17 last if ( $spaces_needed <= 0 );
26890             }
26891 2         9 return;
26892             } ## end sub check_for_long_gnu_style_lines
26893              
26894             sub undo_incomplete_lp_indentation {
26895              
26896             #------------------------------------------------------------------
26897             # Undo indentation for all incomplete -lp indentation levels of the
26898             # current batch unless -xlp is set.
26899             #------------------------------------------------------------------
26900              
26901             # This routine is called once after each output stream batch is
26902             # finished to undo indentation for all incomplete -lp indentation
26903             # levels. If this routine is called then comments and blank lines will
26904             # disrupt this indentation style. In older versions of perltidy this
26905             # was always done because it could cause problems otherwise, but recent
26906             # improvements allow fairly good results to be obtained by skipping
26907             # this step with the -xlp flag.
26908              
26909             # nothing to do if no stack items defined for this line
26910 229 100   229 0 595 return if ( $max_lp_object_list < 0 );
26911              
26912             # loop over all whitespace items created for the current batch
26913 83         305 foreach my $i ( 0 .. $max_lp_object_list ) {
26914 527         853 my $item = $rlp_object_list->[$i];
26915              
26916             # only look for open items
26917 527 100       1210 next if ( $item->get_closed() >= 0 );
26918              
26919             # Tentatively remove all of the available space
26920             # (The vertical aligner will try to get it back later)
26921 19         67 my $available_spaces = $item->get_available_spaces();
26922 19 100       64 if ( $available_spaces > 0 ) {
26923              
26924             # delete incremental space for this item
26925 9         49 $rlp_object_list->[$i]
26926             ->tentatively_decrease_available_spaces($available_spaces);
26927              
26928             # Reduce the total indentation space of any nodes that follow
26929             # Note that any such nodes must necessarily be dependents
26930             # of this node.
26931 9         33 foreach ( $i + 1 .. $max_lp_object_list ) {
26932 17         44 $rlp_object_list->[$_]->decrease_SPACES($available_spaces);
26933             }
26934             }
26935             }
26936 83         181 return;
26937             } ## end sub undo_incomplete_lp_indentation
26938             } ## end closure set_lp_indentation
26939              
26940             #----------------------------------------------------------------------
26941             # sub to set a requested break before an opening container in -lp mode.
26942             #----------------------------------------------------------------------
26943             sub set_forced_lp_break {
26944              
26945 109     109 0 298 my ( $self, $i_begin_line, $i_opening ) = @_;
26946              
26947             # Given:
26948             # $i_begin_line = index of break in the _to_go arrays
26949             # $i_opening = index of the opening container
26950              
26951             # Set any requested break at a token before this opening container
26952             # token. This is often an '=' or '=>' but can also be things like
26953             # '.', ',', 'return'. It was defined by sub set_lp_indentation.
26954              
26955             # Important:
26956             # For intact containers, call this at the closing token.
26957             # For broken containers, call this at the opening token.
26958             # This will avoid needless breaks when it turns out that the
26959             # container does not actually get broken. This isn't known until
26960             # the closing container for intact blocks.
26961              
26962             return
26963 109 50 33     473 if ( $i_begin_line < 0
26964             || $i_begin_line > $max_index_to_go );
26965              
26966             # Handle request to put a break break immediately before this token.
26967             # We may not want to do that since we are also breaking after it.
26968 109 100       276 if ( $i_begin_line == $i_opening ) {
26969              
26970             # The following rules should be reviewed. We may want to always
26971             # allow the break. If we do not do the break, the indentation
26972             # may be off.
26973              
26974             # RULE: don't break before it unless it is welded to a qw.
26975             # This works well, but we may want to relax this to allow
26976             # breaks in additional cases.
26977             return
26978 18 50       93 if ( !$self->[_rK_weld_right_]->{ $K_to_go[$i_opening] } );
26979 0 0       0 return unless ( $types_to_go[$max_index_to_go] eq 'q' );
26980             }
26981              
26982             # Only break for breakpoints at the same
26983             # indentation level as the opening paren
26984 91         192 my $test1 = $nesting_depth_to_go[$i_opening];
26985 91         186 my $test2 = $nesting_depth_to_go[$i_begin_line];
26986 91 100       218 return if ( $test2 != $test1 );
26987              
26988             # Back up at a blank (fixes case b932)
26989 90         190 my $ibr = $i_begin_line - 1;
26990 90 100 66     392 if ( $ibr > 0
26991             && $types_to_go[$ibr] eq 'b' )
26992             {
26993 44         72 $ibr--;
26994             }
26995 90 100       234 if ( $ibr >= 0 ) {
26996 44         106 my $i_nonblank = $self->set_forced_breakpoint($ibr);
26997              
26998             # Crude patch to prevent sub recombine_breakpoints from undoing
26999             # this break, especially after an '='. It will leave old
27000             # breakpoints alone. See c098/x045 for some examples.
27001 44 100       120 if ( defined($i_nonblank) ) {
27002 33         55 $old_breakpoint_to_go[$i_nonblank] = 1;
27003             }
27004             }
27005 90         191 return;
27006             } ## end sub set_forced_lp_break
27007              
27008             sub reduce_lp_indentation {
27009              
27010             # reduce the leading whitespace at token $i if possible by $spaces_needed
27011             # (a large value of $spaces_needed will remove all excess space)
27012             # NOTE: to be called from break_lists only for a sequence of tokens
27013             # contained between opening and closing parens/braces/brackets
27014              
27015 6     6 0 21 my ( $self, $i, $spaces_wanted ) = @_;
27016 6         11 my $deleted_spaces = 0;
27017              
27018 6         17 my $item = $leading_spaces_to_go[$i];
27019 6         20 my $available_spaces = $item->get_available_spaces();
27020              
27021 6 100 66     55 if (
      33        
27022             $available_spaces > 0
27023             && ( ( $spaces_wanted <= $available_spaces )
27024             || !$item->get_have_child() )
27025             )
27026             {
27027              
27028             # we'll remove these spaces, but mark them as recoverable
27029 5         26 $deleted_spaces =
27030             $item->tentatively_decrease_available_spaces($spaces_wanted);
27031             }
27032              
27033 6         27 return $deleted_spaces;
27034             } ## end sub reduce_lp_indentation
27035              
27036             ###########################################################
27037             # CODE SECTION 13: Preparing batches for vertical alignment
27038             ###########################################################
27039              
27040             sub check_convey_batch_input {
27041              
27042             # Check for valid input to sub convey_batch_to_vertical_aligner. An
27043             # error here would most likely be due to an error in the calling
27044             # routine 'sub grind_batch_of_CODE'.
27045 0     0 0 0 my ( $self, $ri_first, $ri_last ) = @_;
27046              
27047 0 0 0     0 if ( !defined($ri_first) || !defined($ri_last) ) {
27048 0         0 Fault(<<EOM);
27049             Undefined line ranges ri_first and/r ri_last
27050             EOM
27051             }
27052              
27053 0         0 my $nmax = @{$ri_first} - 1;
  0         0  
27054 0         0 my $nmax_check = @{$ri_last} - 1;
  0         0  
27055 0 0 0     0 if ( $nmax < 0 || $nmax_check < 0 || $nmax != $nmax_check ) {
      0        
27056 0         0 Fault(<<EOM);
27057             Line range index error: nmax=$nmax but nmax_check=$nmax_check
27058             These should be equal and >=0
27059             EOM
27060             }
27061 0         0 my ( $ibeg, $iend );
27062 0         0 foreach my $n ( 0 .. $nmax ) {
27063 0         0 my $ibeg_m = $ibeg;
27064 0         0 my $iend_m = $iend;
27065 0         0 $ibeg = $ri_first->[$n];
27066 0         0 $iend = $ri_last->[$n];
27067 0 0 0     0 if ( $ibeg < 0 || $iend < $ibeg || $iend > $max_index_to_go ) {
      0        
27068 0         0 Fault(<<EOM);
27069             Bad line range at line index $n of $nmax: ibeg=$ibeg, iend=$iend
27070             These should have iend >= ibeg and be in the range (0..$max_index_to_go)
27071             EOM
27072             }
27073 0 0       0 next if ( $n == 0 );
27074 0 0       0 if ( $ibeg <= $iend_m ) {
27075 0         0 Fault(<<EOM);
27076             Line ranges overlap: iend=$iend_m at line $n-1 but ibeg=$ibeg for line $n
27077             EOM
27078             }
27079             }
27080 0         0 return;
27081             } ## end sub check_convey_batch_input
27082              
27083             sub convey_batch_to_vertical_aligner {
27084              
27085 4561     4561 0 8892 my ($self) = @_;
27086              
27087             # This routine receives a batch of code for which the final line breaks
27088             # have been defined. Here we prepare the lines for passing to the vertical
27089             # aligner. We do the following tasks:
27090             # - mark certain vertical alignment tokens, such as '=', in each line
27091             # - make final indentation adjustments
27092             # - do logical padding: insert extra blank spaces to help display certain
27093             # logical constructions
27094             # - send the line to the vertical aligner
27095              
27096 4561         8098 my $rLL = $self->[_rLL_];
27097 4561         7403 my $Klimit = $self->[_Klimit_];
27098 4561         7443 my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
27099 4561         7133 my $this_batch = $self->[_this_batch_];
27100              
27101 4561         7552 my $do_not_pad = $this_batch->[_do_not_pad_];
27102 4561         7073 my $starting_in_quote = $this_batch->[_starting_in_quote_];
27103 4561         7209 my $ending_in_quote = $this_batch->[_ending_in_quote_];
27104 4561         7118 my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
27105 4561         7180 my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
27106 4561         6991 my $ri_first = $this_batch->[_ri_first_];
27107 4561         6644 my $ri_last = $this_batch->[_ri_last_];
27108              
27109 4561         6275 $self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE);
27110              
27111 4561         6265 my $n_last_line = @{$ri_first} - 1;
  4561         8463  
27112              
27113 4561         7380 my $ibeg_next = $ri_first->[0];
27114 4561         7587 my $iend_next = $ri_last->[0];
27115              
27116 4561         8134 my $type_beg_next = $types_to_go[$ibeg_next];
27117 4561         7054 my $type_end_next = $types_to_go[$iend_next];
27118 4561         7704 my $token_beg_next = $tokens_to_go[$ibeg_next];
27119              
27120 4561         8896 my $rindentation_list = [0]; # ref to indentations for each line
27121 4561         7692 my ( $cscw_block_comment, $closing_side_comment, $is_block_comment );
27122              
27123 4561 100 100     13940 if ( !$max_index_to_go && $type_beg_next eq '#' ) {
27124 632         1246 $is_block_comment = 1;
27125             }
27126              
27127 4561 100       9299 if ($rOpts_closing_side_comments) {
27128 61         205 ( $closing_side_comment, $cscw_block_comment ) =
27129             $self->add_closing_side_comment( $ri_first, $ri_last );
27130             }
27131              
27132 4561 100 100     16292 if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation ) {
27133 829         4430 $self->undo_ci( $ri_first, $ri_last,
27134             $this_batch->[_rix_seqno_controlling_ci_] );
27135             }
27136              
27137             # for multi-line batches ...
27138 4561 100       10409 if ( $n_last_line > 0 ) {
27139              
27140             # flush before a long if statement to avoid unwanted alignment
27141             $self->flush_vertical_aligner()
27142             if ( $type_beg_next eq 'k'
27143 754 100 100     3397 && $is_if_unless{$token_beg_next} );
27144              
27145 754 100       4254 $self->set_logical_padding( $ri_first, $ri_last, $starting_in_quote )
27146             if ($rOpts_logical_padding);
27147              
27148 754 100       2561 $self->xlp_tweak( $ri_first, $ri_last )
27149             if ($rOpts_extended_line_up_parentheses);
27150             }
27151              
27152 4561         8042 if (DEVEL_MODE) { $self->check_batch_summed_lengths() }
27153              
27154             # ----------------------------------------------------------
27155             # define the vertical alignments for all lines of this batch
27156             # ----------------------------------------------------------
27157 4561         8312 my $rline_alignments;
27158              
27159 4561 100       9074 if ( !$max_index_to_go ) {
27160              
27161             # Optional shortcut for single token ...
27162             # = [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
27163 1288         5877 $rline_alignments = [
27164             [
27165             [],
27166             [ $tokens_to_go[0] ],
27167             [ $types_to_go[0] ],
27168             [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ],
27169             ]
27170             ];
27171             }
27172             else {
27173 3273         10998 $rline_alignments =
27174             $self->make_vertical_alignments( $ri_first, $ri_last );
27175             }
27176              
27177             # ----------------------------------------------
27178             # loop to send each line to the vertical aligner
27179             # ----------------------------------------------
27180 4561         9295 my ( $type_beg, $type_end, $token_beg, $ljump );
27181              
27182 4561         10258 for my $n ( 0 .. $n_last_line ) {
27183              
27184             # ----------------------------------------------------------------
27185             # This hash will hold the args for vertical alignment of this line
27186             # We will populate it as we go.
27187             # ----------------------------------------------------------------
27188 7384         13177 my $rvao_args = {};
27189              
27190 7384         12190 my $type_beg_last = $type_beg;
27191 7384         11097 my $type_end_last = $type_end;
27192              
27193 7384         12187 my $ibeg = $ibeg_next;
27194 7384         10805 my $iend = $iend_next;
27195 7384         12418 my $Kbeg = $K_to_go[$ibeg];
27196 7384         11154 my $Kend = $K_to_go[$iend];
27197              
27198 7384         11268 $type_beg = $type_beg_next;
27199 7384         10634 $type_end = $type_end_next;
27200 7384         11728 $token_beg = $token_beg_next;
27201              
27202             # ---------------------------------------------------
27203             # Define the check value 'Kend' to send for this line
27204             # ---------------------------------------------------
27205             # The 'Kend' value is an integer for checking that lines come out of
27206             # the far end of the pipeline in the right order. It increases
27207             # linearly along the token stream. But we only send ending K values of
27208             # non-comments down the pipeline. This is equivalent to checking that
27209             # the last CODE_type is blank or equal to 'VER'. See also sub
27210             # resync_lines_and_tokens for related coding. Note that
27211             # '$batch_CODE_type' is the code type of the line to which the ending
27212             # token belongs.
27213 7384 100 100     20656 my $Kend_code =
27214             $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
27215              
27216             # Get some vars on line [n+1], if any,
27217             # and define $ljump = level jump needed by 'sub get_final_indentation'
27218 7384 100 100     25942 if ( $n < $n_last_line ) {
    100          
27219 2823         6151 $ibeg_next = $ri_first->[ $n + 1 ];
27220 2823         5239 $iend_next = $ri_last->[ $n + 1 ];
27221              
27222 2823         5084 $type_beg_next = $types_to_go[$ibeg_next];
27223 2823         4614 $type_end_next = $types_to_go[$iend_next];
27224 2823         4574 $token_beg_next = $tokens_to_go[$ibeg_next];
27225              
27226 2823         4539 my $Kbeg_next = $K_to_go[$ibeg_next];
27227 2823         9719 $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
27228             }
27229             elsif ( !$is_block_comment && $Kend < $Klimit ) {
27230              
27231             # Patch for git #51, a bare closing qw paren was not outdented
27232             # if the flag '-nodelete-old-newlines is set
27233             # Note that we are just looking ahead for the next nonblank
27234             # character. We could scan past an arbitrary number of block
27235             # comments or hanging side comments by calling K_next_code, but it
27236             # could add significant run time with very little to be gained.
27237 3385         5948 my $Kbeg_next = $Kend + 1;
27238 3385 100 100     17164 if ( $Kbeg_next < $Klimit
27239             && $rLL->[$Kbeg_next]->[_TYPE_] eq 'b' )
27240             {
27241 2860         4592 $Kbeg_next += 1;
27242             }
27243             $ljump =
27244 3385         8149 $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
27245             }
27246             else {
27247 1176         2679 $ljump = 0;
27248             }
27249              
27250             # ---------------------------------------------
27251             # get the vertical alignment info for this line
27252             # ---------------------------------------------
27253              
27254             # The lines are broken into fields which can be spaced by the vertical
27255             # to achieve vertical alignment. These fields are the actual text
27256             # which will be output, so from here on no more changes can be made to
27257             # the text.
27258 7384         12341 my $rline_alignment = $rline_alignments->[$n];
27259             my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
27260 7384         10742 @{$rline_alignment};
  7384         16185  
27261              
27262             # Programming check: (shouldn't happen)
27263             # The number of tokens which separate the fields must always be
27264             # one less than the number of fields. If this is not true then
27265             # an error has been introduced in sub make_alignment_patterns.
27266 7384         11871 if (DEVEL_MODE) {
27267             if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
27268             my $nt = @{$rtokens};
27269             my $nf = @{$rfields};
27270             my $msg = <<EOM;
27271             Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
27272             The number of tokens = $nt should be one less than number of fields: $nf
27273             EOM
27274             Fault($msg);
27275             }
27276             }
27277              
27278             # --------------------------------------
27279             # get the final indentation of this line
27280             # --------------------------------------
27281             my (
27282              
27283 7384         22366 $indentation,
27284             $lev,
27285             $level_end,
27286             $i_terminal,
27287             $is_outdented_line,
27288              
27289             ) = $self->get_final_indentation(
27290              
27291             $ibeg,
27292             $iend,
27293             $rfields,
27294             $rpatterns,
27295             $ri_first,
27296             $ri_last,
27297             $rindentation_list,
27298             $ljump,
27299             $starting_in_quote,
27300             $is_static_block_comment,
27301              
27302             );
27303              
27304             # --------------------------------
27305             # define flag 'outdent_long_lines'
27306             # --------------------------------
27307 7384 100 100     36725 if (
      100        
      100        
      100        
27308             # we will allow outdenting of long lines..
27309             # which are long quotes, if allowed
27310             ( $type_beg eq 'Q' && $rOpts_outdent_long_quotes )
27311              
27312             # which are long block comments, if allowed
27313             || (
27314             $type_beg eq '#'
27315             && $rOpts_outdent_long_comments
27316              
27317             # but not if this is a static block comment
27318             && !$is_static_block_comment
27319             )
27320             )
27321             {
27322 884         2601 $rvao_args->{outdent_long_lines} = 1;
27323              
27324             # convert -lp indentation objects to spaces to allow outdenting
27325 884 100       2422 if ( ref($indentation) ) {
27326 14         55 $indentation = $indentation->get_spaces();
27327             }
27328             }
27329              
27330             # --------------------------------------------------
27331             # define flags 'break_alignment_before' and '_after'
27332             # --------------------------------------------------
27333              
27334             # These flags tell the vertical aligner to stop alignment before or
27335             # after this line.
27336 7384 100 100     28661 if ($is_outdented_line) {
    100          
    100          
27337 26         185 $rvao_args->{break_alignment_before} = 1;
27338 26         77 $rvao_args->{break_alignment_after} = 1;
27339             }
27340             elsif ($do_not_pad) {
27341 50         168 $rvao_args->{break_alignment_before} = 1;
27342             }
27343              
27344             # flush at an 'if' which follows a line with (1) terminal semicolon
27345             # or (2) terminal block_type which is not an 'if'. This prevents
27346             # unwanted alignment between the lines.
27347             elsif ( $type_beg eq 'k' && $token_beg eq 'if' ) {
27348 136         417 my $type_m = 'b';
27349 136         277 my $block_type_m;
27350              
27351 136 100       509 if ( $Kbeg > 0 ) {
27352 105         275 my $Km = $Kbeg - 1;
27353 105         301 $type_m = $rLL->[$Km]->[_TYPE_];
27354 105 100 66     636 if ( $type_m eq 'b' && $Km > 0 ) {
27355 93         212 $Km -= 1;
27356 93         248 $type_m = $rLL->[$Km]->[_TYPE_];
27357             }
27358 105 100 100     601 if ( $type_m eq '#' && $Km > 0 ) {
27359 23         79 $Km -= 1;
27360 23         58 $type_m = $rLL->[$Km]->[_TYPE_];
27361 23 100 66     121 if ( $type_m eq 'b' && $Km > 0 ) {
27362 9         20 $Km -= 1;
27363 9         19 $type_m = $rLL->[$Km]->[_TYPE_];
27364             }
27365             }
27366              
27367 105         287 my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_];
27368 105 100       355 if ($seqno_m) {
27369 44         206 $block_type_m = $self->[_rblock_type_of_seqno_]->{$seqno_m};
27370             }
27371             }
27372              
27373             # break after anything that is not if-like
27374 136 50 100     1440 if (
      100        
      66        
      66        
      33        
      100        
27375             $type_m eq ';'
27376             || ( $type_m eq '}'
27377             && $block_type_m
27378             && $block_type_m ne 'if'
27379             && $block_type_m ne 'unless'
27380             && $block_type_m ne 'elsif'
27381             && $block_type_m ne 'else' )
27382             )
27383             {
27384 35         125 $rvao_args->{break_alignment_before} = 1;
27385             }
27386             }
27387             else {
27388             ## ok - do not need to break vertical alignment here
27389             }
27390              
27391             # ----------------------------------
27392             # define 'rvertical_tightness_flags'
27393             # ----------------------------------
27394             # These flags tell the vertical aligner if/when to combine consecutive
27395             # lines, based on the user input parameters.
27396             $rvao_args->{rvertical_tightness_flags} =
27397 7384 100 100     27729 $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
27398             $ri_first, $ri_last, $ending_in_quote, $closing_side_comment )
27399             unless ( $is_block_comment
27400             || $self->[_no_vertical_tightness_flags_] );
27401              
27402             # ----------------------------------
27403             # define 'is_terminal_ternary' flag
27404             # ----------------------------------
27405              
27406             # This flag is set at the final ':' of a ternary chain to request
27407             # vertical alignment of the final term. Here is a slightly complex
27408             # example:
27409             #
27410             # $self->{_text} = (
27411             # !$section ? ''
27412             # : $type eq 'item' ? "the $section entry"
27413             # : "the section on $section"
27414             # )
27415             # . (
27416             # $page
27417             # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
27418             # : ' elsewhere in this document'
27419             # );
27420             #
27421 7384 100 100     30180 if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
      100        
27422              
27423 97         250 my $is_terminal_ternary = 0;
27424 97 100       379 my $last_leading_type = $n > 0 ? $type_beg_last : ':';
27425 97         233 my $terminal_type = $types_to_go[$i_terminal];
27426 97 100 100     633 if ( $terminal_type ne ';'
      66        
27427             && $n_last_line > $n
27428             && $level_end == $lev )
27429             {
27430 61         124 my $Kbeg_next = $K_to_go[$ibeg_next];
27431 61         130 $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
27432 61         130 $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
27433             }
27434 97 100 100     720 if (
      100        
27435             $last_leading_type eq ':'
27436             && ( ( $terminal_type eq ';' && $level_end <= $lev )
27437             || ( $terminal_type ne ':' && $level_end < $lev ) )
27438             )
27439             {
27440              
27441             # the terminal term must not contain any ternary terms, as in
27442             # my $ECHO = (
27443             # $Is_MSWin32 ? ".\\echo$$"
27444             # : $Is_MacOS ? ":echo$$"
27445             # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
27446             # );
27447 16         89 $is_terminal_ternary = 1;
27448              
27449 16         64 my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
27450 16   100     130 while ( defined($KP) && $KP <= $Kend ) {
27451 6         16 my $type_KP = $rLL->[$KP]->[_TYPE_];
27452 6 50 33     33 if ( $type_KP eq '?' || $type_KP eq ':' ) {
27453 0         0 $is_terminal_ternary = 0;
27454 0         0 last;
27455             }
27456 6         19 $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
27457             }
27458             }
27459 97         270 $rvao_args->{is_terminal_ternary} = $is_terminal_ternary;
27460             }
27461              
27462             # -------------------------------------------------
27463             # add any new closing side comment to the last line
27464             # -------------------------------------------------
27465 7384 50 66     16603 if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
  9   66     40  
27466              
27467 9         34 $rfields->[-1] .= " $closing_side_comment";
27468              
27469             # NOTE: Patch for csc. We can just use 1 for the length of the csc
27470             # because its length should not be a limiting factor from here on.
27471 9         18 $rfield_lengths->[-1] += 2;
27472              
27473             # repack
27474 9         22 $rline_alignment =
27475             [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
27476             }
27477              
27478             # ------------------------
27479             # define flag 'list_seqno'
27480             # ------------------------
27481              
27482             # This flag indicates if this line is contained in a multi-line list
27483 7384 100       14381 if ( !$is_block_comment ) {
27484 6752         12629 my $parent_seqno = $parent_seqno_to_go[$ibeg];
27485 6752         16975 $rvao_args->{list_seqno} = $ris_list_by_seqno->{$parent_seqno};
27486             }
27487              
27488             # The alignment tokens have been marked with nesting_depths, so we need
27489             # to pass nesting depths to the vertical aligner. They remain invariant
27490             # under all formatting operations. Previously, level values were sent
27491             # to the aligner. But they can be altered in welding and other
27492             # operations, and this can lead to alignment errors.
27493 7384         12303 my $nesting_depth_beg = $nesting_depth_to_go[$ibeg];
27494 7384         11745 my $nesting_depth_end = $nesting_depth_to_go[$iend];
27495              
27496             # A quirk in the definition of nesting depths is that the closing token
27497             # has the same depth as internal tokens. The vertical aligner is
27498             # programmed to expect them to have the lower depth, so we fix this.
27499 7384 100       19652 if ( $is_closing_type{ $types_to_go[$ibeg] } ) { $nesting_depth_beg-- }
  1238         2490  
27500 7384 100       15958 if ( $is_closing_type{ $types_to_go[$iend] } ) { $nesting_depth_end-- }
  1019         1908  
27501              
27502             # Adjust nesting depths to keep -lp indentation for qw lists. This is
27503             # required because qw lists contained in brackets do not get nesting
27504             # depths, but the vertical aligner is watching nesting depth changes to
27505             # decide if a -lp block is intact. Without this patch, qw lists
27506             # enclosed in angle brackets will not get the correct -lp indentation.
27507              
27508             # Looking for line with isolated qw ...
27509 7384 50 100     18001 if ( $rOpts_line_up_parentheses
      66        
27510             && $type_beg eq 'q'
27511             && $ibeg == $iend )
27512             {
27513              
27514             # ... which is part of a multiline qw
27515 0         0 my $Km = $self->K_previous_nonblank($Kbeg);
27516 0         0 my $Kp = $self->K_next_nonblank($Kbeg);
27517 0 0 0     0 if ( defined($Km) && $rLL->[$Km]->[_TYPE_] eq 'q'
      0        
      0        
27518             || defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' )
27519             {
27520 0         0 $nesting_depth_beg++;
27521 0         0 $nesting_depth_end++;
27522             }
27523             }
27524              
27525             # ---------------------------------
27526             # define flag 'forget_side_comment'
27527             # ---------------------------------
27528              
27529             # This flag tells the vertical aligner to reset the side comment
27530             # location if we are entering a new block from level 0. This is
27531             # intended to keep side comments from drifting too far to the right.
27532 7384 100 100     19661 if ( $block_type_to_go[$i_terminal]
27533             && $nesting_depth_end > $nesting_depth_beg )
27534             {
27535             $rvao_args->{forget_side_comment} =
27536 59         231 !$self->[_radjusted_levels_]->[$Kbeg];
27537             }
27538              
27539             # -----------------------------------
27540             # Store the remaining non-flag values
27541             # -----------------------------------
27542 7384         14180 $rvao_args->{Kend} = $Kend_code;
27543 7384         13879 $rvao_args->{ci_level} = $ci_levels_to_go[$ibeg];
27544 7384         12576 $rvao_args->{indentation} = $indentation;
27545 7384         14084 $rvao_args->{level_end} = $nesting_depth_end;
27546 7384         13964 $rvao_args->{level} = $nesting_depth_beg;
27547 7384         14197 $rvao_args->{rline_alignment} = $rline_alignment;
27548             $rvao_args->{maximum_line_length} =
27549 7384         21197 $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
27550              
27551             # --------------------------------------
27552             # send this line to the vertical aligner
27553             # --------------------------------------
27554 7384         12712 my $vao = $self->[_vertical_aligner_object_];
27555 7384         33287 $vao->valign_input($rvao_args);
27556              
27557 7384         31647 $do_not_pad = 0;
27558              
27559             } ## end of loop to output each line
27560              
27561             # Set flag indicating if the last line ends in an opening
27562             # token and is very short, so that a blank line is not
27563             # needed if the subsequent line is a comment.
27564             # Examples of what we are looking for:
27565             # {
27566             # && (
27567             # BEGIN {
27568             # default {
27569             # sub {
27570             $self->[_last_output_short_opening_token_]
27571              
27572             # line ends in opening token
27573             # /^[\{\(\[L]$/
27574 4561   66     19206 = $is_opening_type{$type_end}
27575              
27576             # and either
27577             && (
27578             # line has either single opening token
27579             $iend_next == $ibeg_next
27580              
27581             # or is a single token followed by opening token.
27582             # Note that sub identifiers have blanks like 'sub doit'
27583             # $token_beg !~ /\s+/
27584             || ( $iend_next - $ibeg_next <= 2 && index( $token_beg, SPACE ) < 0 )
27585             )
27586              
27587             # and limit total to 10 character widths
27588             && token_sequence_length( $ibeg_next, $iend_next ) <= 10;
27589              
27590             # remember indentation of lines containing opening containers for
27591             # later use by sub get_final_indentation
27592 4561 100 100     22271 $self->save_opening_indentation( $ri_first, $ri_last,
27593             $rindentation_list, $this_batch->[_runmatched_opening_indexes_] )
27594             if ( $this_batch->[_runmatched_opening_indexes_]
27595             || $types_to_go[$max_index_to_go] eq 'q' );
27596              
27597             # output any new -cscw block comment
27598 4561 50       9712 if ($cscw_block_comment) {
27599 0         0 $self->flush_vertical_aligner();
27600 0         0 my $file_writer_object = $self->[_file_writer_object_];
27601 0         0 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
27602             }
27603 4561         20644 return;
27604             } ## end sub convey_batch_to_vertical_aligner
27605              
27606             sub check_batch_summed_lengths {
27607              
27608 0     0 0 0 my ( $self, $msg ) = @_;
27609 0 0       0 $msg = EMPTY_STRING unless defined($msg);
27610 0         0 my $rLL = $self->[_rLL_];
27611              
27612             # Verify that the summed lengths are correct. We want to be sure that
27613             # errors have not been introduced by programming changes. Summed lengths
27614             # are defined in sub store_token. Operations like padding and unmasking
27615             # semicolons can change token lengths, but those operations are expected to
27616             # update the summed lengths when they make changes. So the summed lengths
27617             # should always be correct.
27618 0         0 foreach my $i ( 0 .. $max_index_to_go ) {
27619 0         0 my $len_by_sum =
27620             $summed_lengths_to_go[ $i + 1 ] - $summed_lengths_to_go[$i];
27621 0         0 my $len_tok_i = $token_lengths_to_go[$i];
27622 0         0 my $KK = $K_to_go[$i];
27623 0         0 my $len_tok_K;
27624              
27625             # For --indent-only, there is not always agreement between
27626             # token lengths in _rLL_ and token_lengths_to_go, so skip that check.
27627 0 0 0     0 if ( defined($KK) && !$rOpts_indent_only ) {
27628 0         0 $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_];
27629             }
27630 0 0 0     0 if ( $len_by_sum != $len_tok_i
      0        
27631             || defined($len_tok_K) && $len_by_sum != $len_tok_K )
27632             {
27633 0 0       0 my $lno = defined($KK) ? $rLL->[$KK]->[_LINE_INDEX_] + 1 : "undef";
27634 0 0       0 $KK = 'undef' unless defined($KK);
27635 0         0 my $tok = $tokens_to_go[$i];
27636 0         0 my $type = $types_to_go[$i];
27637 0         0 Fault(<<EOM);
27638             Summed lengths are appear to be incorrect. $msg
27639             lengths disagree: token length by sum=$len_by_sum but token_length_to_go[$i] = $len_tok_i and rLL->[$KK]->[_TOKEN_LENGTH_]=$len_tok_K
27640             near line $lno starting with '$tokens_to_go[0]..' at token i=$i K=$KK token_type='$type' token='$tok'
27641             EOM
27642             }
27643             }
27644 0         0 return;
27645             } ## end sub check_batch_summed_lengths
27646              
27647             { ## begin closure set_vertical_alignment_markers
27648             my %is_vertical_alignment_type;
27649             my %is_not_vertical_alignment_token;
27650             my %is_vertical_alignment_keyword;
27651             my %is_terminal_alignment_type;
27652             my %is_low_level_alignment_token;
27653              
27654             BEGIN {
27655              
27656 39     39   243 my @q;
27657              
27658             # Replaced =~ and // in the list. // had been removed in RT 119588
27659 39         278 @q = qw#
27660             = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
27661             { ? : => && || ~~ !~~ =~ !~ // <=> ->
27662             #;
27663 39         556 @is_vertical_alignment_type{@q} = (1) x scalar(@q);
27664              
27665             # These 'tokens' are not aligned. We need this to remove [
27666             # from the above list because it has type ='{'
27667 39         193 @q = qw([);
27668 39         91 @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
27669              
27670             # these are the only types aligned at a line end
27671 39         108 @q = qw(&& || =>);
27672 39         239 @is_terminal_alignment_type{@q} = (1) x scalar(@q);
27673              
27674             # these tokens only align at line level
27675 39         105 @q = ( '{', '(' );
27676 39         131 @is_low_level_alignment_token{@q} = (1) x scalar(@q);
27677              
27678             # eq and ne were removed from this list to improve alignment chances
27679 39         124 @q = qw(if unless and or err for foreach while until);
27680 39         119193 @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
27681             } ## end BEGIN
27682              
27683             my $ralignment_type_to_go;
27684             my $ralignment_counts;
27685             my $ralignment_hash_by_line;
27686              
27687             sub set_vertical_alignment_markers {
27688              
27689 3270     3270 0 6387 my ( $self, $ri_first, $ri_last ) = @_;
27690              
27691             #----------------------------------------------------------------------
27692             # This routine looks at output lines for certain tokens which can serve
27693             # as vertical alignment markers (such as an '=').
27694             #----------------------------------------------------------------------
27695              
27696             # Input parameters:
27697             # $ri_first = ref to list of starting line indexes in _to_go arrays
27698             # $ri_last = ref to list of ending line indexes in _to_go arrays
27699              
27700             # Method: We look at each token $i in this output batch and set
27701             # $ralignment_type_to_go->[$i] equal to those tokens at which we would
27702             # accept vertical alignment.
27703              
27704             # Initialize closure (and return) variables:
27705 3270         8617 $ralignment_type_to_go = [];
27706 3270         6528 $ralignment_counts = [];
27707 3270         9177 $ralignment_hash_by_line = [];
27708              
27709             # NOTE: closing side comments can insert up to 2 additional tokens
27710             # beyond the original $max_index_to_go, so we need to check ri_last for
27711             # the last index.
27712 3270         4935 my $max_line = @{$ri_first} - 1;
  3270         6224  
27713 3270         6179 my $max_i = $ri_last->[$max_line];
27714 3270 50       7441 if ( $max_i < $max_index_to_go ) { $max_i = $max_index_to_go }
  0         0  
27715              
27716             # -----------------------------------------------------------------
27717             # Shortcut:
27718             # - no alignments if there is only 1 token.
27719             # - and nothing to do if we aren't allowed to change whitespace.
27720             # -----------------------------------------------------------------
27721 3270 100 66     12494 if ( $max_i <= 0 || !$rOpts_add_whitespace ) {
27722 87         339 goto RETURN;
27723             }
27724              
27725             # -------------------------------
27726             # First handle any side comment.
27727             # -------------------------------
27728 3183         5180 my $i_terminal = $max_i;
27729 3183 100       7558 if ( $types_to_go[$max_i] eq '#' ) {
27730              
27731             # We know $max_i > 0 if we get here.
27732 343         734 $i_terminal -= 1;
27733 343 50 33     2005 if ( $i_terminal > 0 && $types_to_go[$i_terminal] eq 'b' ) {
27734 343         611 $i_terminal -= 1;
27735             }
27736              
27737 343         711 my $token = $tokens_to_go[$max_i];
27738 343         605 my $KK = $K_to_go[$max_i];
27739              
27740             # Do not align various special side comments
27741             my $do_not_align = (
27742              
27743             # it is any specially marked side comment
27744             ( defined($KK) && $self->[_rspecial_side_comment_type_]->{$KK} )
27745              
27746             # or it is a static side comment
27747 343   100     4118 || ( $rOpts->{'static-side-comments'}
27748             && $token =~ /$static_side_comment_pattern/ )
27749              
27750             # or a closing side comment
27751             || ( $types_to_go[$i_terminal] eq '}'
27752             && $tokens_to_go[$i_terminal] eq '}'
27753             && $token =~ /$closing_side_comment_prefix_pattern/ )
27754             );
27755              
27756             # - For the specific combination -vc -nvsc, we put all side comments
27757             # at fixed locations. Note that we will lose hanging side comment
27758             # alignments. Otherwise, hsc's can move to strange locations.
27759             # - For -nvc -nvsc we make all side comments vertical alignments
27760             # because the vertical aligner will check for -nvsc and be able
27761             # to reduce the final padding to the side comments for long lines.
27762             # and keep hanging side comments aligned.
27763 343 100 100     1811 if ( !$do_not_align
      100        
27764             && !$rOpts_valign_side_comments
27765             && $rOpts_valign_code )
27766             {
27767              
27768 8         10 $do_not_align = 1;
27769 8         17 my $ipad = $max_i - 1;
27770 8 50       19 if ( $types_to_go[$ipad] eq 'b' ) {
27771             my $pad_spaces =
27772 8         17 $rOpts->{'minimum-space-to-comment'} -
27773             $token_lengths_to_go[$ipad];
27774 8         22 $self->pad_token( $ipad, $pad_spaces );
27775             }
27776             }
27777              
27778 343 100       881 if ( !$do_not_align ) {
27779 325         838 $ralignment_type_to_go->[$max_i] = '#';
27780 325         1089 $ralignment_hash_by_line->[$max_line]->{$max_i} = '#';
27781 325         817 $ralignment_counts->[$max_line]++;
27782             }
27783             }
27784              
27785             # ----------------------------------------------
27786             # Nothing more to do on this line if -nvc is set
27787             # ----------------------------------------------
27788 3183 100       6897 if ( !$rOpts_valign_code ) {
27789 17         58 goto RETURN;
27790             }
27791              
27792             # -------------------------------------
27793             # Loop over each line of this batch ...
27794             # -------------------------------------
27795              
27796 3166         7448 foreach my $line ( 0 .. $max_line ) {
27797              
27798 5809         9121 my $ibeg = $ri_first->[$line];
27799 5809         8772 my $iend = $ri_last->[$line];
27800              
27801 5809 100       11321 next if ( $iend <= $ibeg );
27802              
27803             # back up before any side comment
27804 5397 100       10396 if ( $iend > $i_terminal ) { $iend = $i_terminal }
  326         581  
27805              
27806             #----------------------------------
27807             # Loop over all tokens on this line
27808             #----------------------------------
27809 5397         12124 $self->set_vertical_alignment_markers_token_loop( $line, $ibeg,
27810             $iend );
27811             }
27812              
27813             RETURN:
27814 3270         9771 return ( $ralignment_type_to_go, $ralignment_counts,
27815             $ralignment_hash_by_line );
27816             } ## end sub set_vertical_alignment_markers
27817              
27818             sub set_vertical_alignment_markers_token_loop {
27819 5397     5397 0 11825 my ( $self, $line, $ibeg, $iend ) = @_;
27820              
27821             # Set vertical alignment markers for the tokens on one line
27822             # of the current output batch. This is done by updating the
27823             # three closure variables:
27824             # $ralignment_type_to_go
27825             # $ralignment_counts
27826             # $ralignment_hash_by_line
27827              
27828             # Input parameters:
27829             # $line = index of this line in the current batch
27830             # $ibeg, $iend = index range of tokens to check in the _to_go arrays
27831              
27832 5397         8982 my $level_beg = $levels_to_go[$ibeg];
27833 5397         8680 my $token_beg = $tokens_to_go[$ibeg];
27834 5397         8523 my $type_beg = $types_to_go[$ibeg];
27835 5397   100     22777 my $type_beg_special_char =
27836             ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' );
27837              
27838 5397         8519 my $last_vertical_alignment_BEFORE_index = -1;
27839 5397         8054 my $vert_last_nonblank_type = $type_beg;
27840 5397         7927 my $vert_last_nonblank_token = $token_beg;
27841              
27842             # ----------------------------------------------------------------
27843             # Initialization code merged from 'sub delete_needless_alignments'
27844             # ----------------------------------------------------------------
27845 5397         7676 my $i_good_paren = -1;
27846 5397         7862 my $i_elsif_close = $ibeg - 1;
27847 5397         7968 my $i_elsif_open = $iend + 1;
27848 5397         7718 my @imatch_list;
27849 5397 100       11384 if ( $type_beg eq 'k' ) {
27850              
27851             # Initialization for paren patch: mark a location of a paren we
27852             # should keep, such as one following something like a leading
27853             # 'if', 'elsif',
27854 1651         3017 $i_good_paren = $ibeg + 1;
27855 1651 100       4420 if ( $types_to_go[$i_good_paren] eq 'b' ) {
27856 1516         2537 $i_good_paren++;
27857             }
27858              
27859             # Initialization for 'elsif' patch: remember the paren range of
27860             # an elsif, and do not make alignments within them because this
27861             # can cause loss of padding and overall brace alignment in the
27862             # vertical aligner.
27863 1651 50 66     4766 if ( $token_beg eq 'elsif'
      66        
27864             && $i_good_paren < $iend
27865             && $tokens_to_go[$i_good_paren] eq '(' )
27866             {
27867 23         58 $i_elsif_open = $i_good_paren;
27868 23         49 $i_elsif_close = $mate_index_to_go[$i_good_paren];
27869 23 50       85 if ( !defined($i_elsif_close) ) { $i_elsif_close = -1 }
  0         0  
27870             }
27871             } ## end if ( $type_beg eq 'k' )
27872              
27873             # --------------------------------------------
27874             # Loop over each token in this output line ...
27875             # --------------------------------------------
27876 5397         11318 foreach my $i ( $ibeg + 1 .. $iend ) {
27877              
27878 43300 100       83409 next if ( $types_to_go[$i] eq 'b' );
27879              
27880 27577         37764 my $type = $types_to_go[$i];
27881 27577         38795 my $token = $tokens_to_go[$i];
27882 27577         37463 my $alignment_type = EMPTY_STRING;
27883              
27884             # ----------------------------------------------
27885             # Check for 'paren patch' : Remove excess parens
27886             # ----------------------------------------------
27887              
27888             # Excess alignment of parens can prevent other good alignments.
27889             # For example, note the parens in the first two rows of the
27890             # following snippet. They would normally get marked for
27891             # alignment and aligned as follows:
27892              
27893             # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
27894             # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
27895             # my $img = new Gimp::Image( $w, $h, RGB );
27896              
27897             # This causes unnecessary paren alignment and prevents the
27898             # third equals from aligning. If we remove the unwanted
27899             # alignments we get:
27900              
27901             # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
27902             # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
27903             # my $img = new Gimp::Image( $w, $h, RGB );
27904              
27905             # A rule for doing this which works well is to remove alignment
27906             # of parens whose containers do not contain other aligning
27907             # tokens, with the exception that we always keep alignment of
27908             # the first opening paren on a line (for things like 'if' and
27909             # 'elsif' statements).
27910 27577 100 100     54340 if ( $token eq ')' && @imatch_list ) {
27911              
27912             # undo the corresponding opening paren if:
27913             # - it is at the top of the stack
27914             # - and not the first overall opening paren
27915             # - does not follow a leading keyword on this line
27916 977         2247 my $imate = $mate_index_to_go[$i];
27917 977 50       2993 if ( !defined($imate) ) { $imate = -1 }
  0         0  
27918 977 100 100     3956 if ( $imatch_list[-1] eq $imate
      100        
      100        
27919             && ( $ibeg > 1 || @imatch_list > 1 )
27920             && $imate > $i_good_paren )
27921             {
27922 54 50       157 if ( $ralignment_type_to_go->[$imate] ) {
27923 54         108 $ralignment_type_to_go->[$imate] = EMPTY_STRING;
27924 54         99 $ralignment_counts->[$line]--;
27925 54         160 delete $ralignment_hash_by_line->[$line]->{$imate};
27926             }
27927 54         101 pop @imatch_list;
27928             }
27929             }
27930              
27931             # do not align tokens at lower level than start of line
27932             # except for side comments
27933 27577 100       48091 if ( $levels_to_go[$i] < $level_beg ) {
27934 157         404 next;
27935             }
27936              
27937             #--------------------------------------------------------
27938             # First see if we want to align BEFORE this token
27939             #--------------------------------------------------------
27940              
27941             # The first possible token that we can align before
27942             # is index 2 because: 1) it doesn't normally make sense to
27943             # align before the first token and 2) the second
27944             # token must be a blank if we are to align before
27945             # the third
27946 27420 100 100     93280 if ( $i < $ibeg + 2 ) { }
    100          
    100          
    100          
    100          
    100          
27947              
27948             # must follow a blank token
27949             elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
27950              
27951             # otherwise, do not align two in a row to create a
27952             # blank field
27953             elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { }
27954              
27955             # align before one of these keywords
27956             # (within a line, since $i>1)
27957             elsif ( $type eq 'k' ) {
27958              
27959             # /^(if|unless|and|or|eq|ne)$/
27960 629 100       2401 if ( $is_vertical_alignment_keyword{$token} ) {
27961 136         297 $alignment_type = $token;
27962              
27963             # Align postfix 'unless' and 'if' if requested (git #116)
27964             # These are the only equivalent keywords. For equivalent
27965             # token types see '%operator_map'.
27966 136 100 100     593 if ( $token eq 'unless' && $rOpts_valign_if_unless ) {
27967 2         6 $alignment_type = 'if';
27968             }
27969             }
27970             }
27971              
27972             # align qw in a 'use' statement (issue git #93)
27973             elsif ( $type eq 'q' ) {
27974 68 100 100     449 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] eq 'use' ) {
27975 34         74 $alignment_type = $type;
27976             }
27977             }
27978              
27979             # align before one of these types..
27980             elsif ( $is_vertical_alignment_type{$type}
27981             && !$is_not_vertical_alignment_token{$token} )
27982             {
27983 4018         6692 $alignment_type = $token;
27984              
27985             # Do not align a terminal token. Although it might
27986             # occasionally look ok to do this, this has been found to be
27987             # a good general rule. The main problems are:
27988             # (1) that the terminal token (such as an = or :) might get
27989             # moved far to the right where it is hard to see because
27990             # nothing follows it, and
27991             # (2) doing so may prevent other good alignments.
27992             # Current exceptions are && and || and =>
27993 4018 100       8424 if ( $i == $iend ) {
27994             $alignment_type = EMPTY_STRING
27995 595 100       2819 unless ( $is_terminal_alignment_type{$type} );
27996             }
27997              
27998             # Do not align leading ': (' or '. ('. This would prevent
27999             # alignment in something like the following:
28000             # $extra_space .=
28001             # ( $input_line_number < 10 ) ? " "
28002             # : ( $input_line_number < 100 ) ? " "
28003             # : "";
28004             # or
28005             # $code =
28006             # ( $case_matters ? $accessor : " lc($accessor) " )
28007             # . ( $yesno ? " eq " : " ne " )
28008              
28009             # Also, do not align a ( following a leading ? so we can
28010             # align something like this:
28011             # $converter{$_}->{ushortok} =
28012             # $PDL::IO::Pic::biggrays
28013             # ? ( m/GIF/ ? 0 : 1 )
28014             # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
28015 4018 100 100     9237 if ( $type_beg_special_char
      66        
28016             && $i == $ibeg + 2
28017             && $types_to_go[ $i - 1 ] eq 'b' )
28018             {
28019 36         79 $alignment_type = EMPTY_STRING;
28020             }
28021              
28022             # Certain tokens only align at the same level as the
28023             # initial line level
28024 4018 100 100     11917 if ( $is_low_level_alignment_token{$token}
28025             && $levels_to_go[$i] != $level_beg )
28026             {
28027 124         324 $alignment_type = EMPTY_STRING;
28028             }
28029              
28030 4018 100       8513 if ( $token eq '(' ) {
28031              
28032             # For a paren after keyword, only align if-like parens,
28033             # such as:
28034             # if ( $a ) { &a }
28035             # elsif ( $b ) { &b }
28036             # ^-------------------aligned parens
28037 573 100 100     2880 if ( $vert_last_nonblank_type eq 'k'
28038             && !$is_if_unless_elsif{$vert_last_nonblank_token} )
28039             {
28040 171         382 $alignment_type = EMPTY_STRING;
28041             }
28042              
28043             # Do not align a spaced-function-paren if requested.
28044             # Issue git #53, #73.
28045 573 100       1538 if ( !$rOpts_function_paren_vertical_alignment ) {
28046 7         14 my $seqno = $type_sequence_to_go[$i];
28047             $alignment_type = EMPTY_STRING
28048 7 50       24 if ( $self->[_ris_function_call_paren_]->{$seqno} );
28049             }
28050              
28051             # make () align with qw in a 'use' statement (git #93)
28052 573 100 66     2051 if ( $tokens_to_go[0] eq 'use'
      66        
      66        
28053             && $types_to_go[0] eq 'k'
28054             && defined( $mate_index_to_go[$i] )
28055             && $mate_index_to_go[$i] == $i + 1 )
28056             {
28057 15         37 $alignment_type = 'q';
28058              
28059             ## Note on discussion git #101. We could make this
28060             ## a separate type '()' to separate it from qw's:
28061             ## $alignment_type =
28062             ## $rOpts_valign_empty_parens_with_qw ? 'q' : '()';
28063             }
28064             }
28065              
28066             # be sure the alignment tokens are unique
28067             # This experiment didn't work well: reason not determined
28068             # if ($token ne $type) {$alignment_type .= $type}
28069             }
28070             else {
28071             ## not a special type
28072             }
28073              
28074             # NOTE: This is deactivated because it causes the previous
28075             # if/elsif alignment to fail
28076             #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
28077             #{ $alignment_type = $type; }
28078              
28079 27420 100       41824 if ($alignment_type) {
28080 3303         5125 $last_vertical_alignment_BEFORE_index = $i;
28081             }
28082              
28083             #--------------------------------------------------------
28084             # Next see if we want to align AFTER the previous nonblank
28085             #--------------------------------------------------------
28086              
28087             # We want to line up ',' and interior ';' tokens, with the added
28088             # space AFTER these tokens. (Note: interior ';' is included
28089             # because it may occur in short blocks).
28090             else {
28091 24117 100 100     77932 if (
      100        
      100        
28092              
28093             # previous token IS one of these:
28094             (
28095             $vert_last_nonblank_type eq ','
28096             || $vert_last_nonblank_type eq ';'
28097             )
28098              
28099             # and it follows a blank
28100             && $types_to_go[ $i - 1 ] eq 'b'
28101              
28102             # and it's NOT one of these
28103             && !$is_closing_token{$type}
28104              
28105             # then go ahead and align
28106             )
28107              
28108             {
28109 1802         2919 $alignment_type = $vert_last_nonblank_type;
28110             }
28111             }
28112              
28113             #-----------------------
28114             # Set the alignment type
28115             #-----------------------
28116 27420 100       45744 if ($alignment_type) {
28117              
28118             # but do not align the opening brace of an anonymous sub
28119 5105 100 100     22749 if ( $token eq '{'
    100 100        
    100 100        
28120             && $block_type_to_go[$i]
28121             && $matches_ASUB{ $block_type_to_go[$i] } )
28122             {
28123              
28124             }
28125              
28126             # and do not make alignments within 'elsif' parens
28127             elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) {
28128              
28129             }
28130              
28131             # and ignore any tokens which have leading padded spaces
28132             # example: perl527/lop.t
28133             elsif ( substr( $alignment_type, 0, 1 ) eq SPACE ) {
28134              
28135             }
28136              
28137             else {
28138 5020         12714 $ralignment_type_to_go->[$i] = $alignment_type;
28139 5020         16305 $ralignment_hash_by_line->[$line]->{$i} = $alignment_type;
28140 5020         8534 $ralignment_counts->[$line]++;
28141 5020         9590 push @imatch_list, $i;
28142             }
28143             }
28144              
28145 27420         36781 $vert_last_nonblank_type = $type;
28146 27420         44253 $vert_last_nonblank_token = $token;
28147             }
28148 5397         12587 return;
28149             } ## end sub set_vertical_alignment_markers_token_loop
28150              
28151             } ## end closure set_vertical_alignment_markers
28152              
28153             sub make_vertical_alignments {
28154 3273     3273 0 7090 my ( $self, $ri_first, $ri_last ) = @_;
28155              
28156             #----------------------------
28157             # Shortcut for a single token
28158             #----------------------------
28159 3273 50       7733 if ( $max_index_to_go == 0 ) {
28160 0 0 0     0 if ( @{$ri_first} == 1 && $ri_last->[0] == 0 ) {
  0         0  
28161 0         0 my $rtokens = [];
28162 0         0 my $rfields = [ $tokens_to_go[0] ];
28163 0         0 my $rpatterns = [ $types_to_go[0] ];
28164 0         0 my $rfield_lengths =
28165             [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ];
28166 0         0 return [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
28167             }
28168              
28169             # Strange line packing, not fatal but should not happen
28170             else {
28171              
28172 0         0 if (DEVEL_MODE) {
28173             my $max_line = @{$ri_first} - 1;
28174             my $ibeg = $ri_first->[0];
28175             my $iend = $ri_last->[0];
28176             my $tok_b = $tokens_to_go[$ibeg];
28177             my $tok_e = $tokens_to_go[$iend];
28178             my $type_b = $types_to_go[$ibeg];
28179             my $type_e = $types_to_go[$iend];
28180             Fault(
28181             "Strange..max_index=0 but nlines=$max_line ibeg=$ibeg tok=$tok_b type=$type_b iend=$iend tok=$tok_e type=$type_e; please check\n"
28182             );
28183             }
28184             }
28185             }
28186              
28187             #---------------------------------------------------------
28188             # Step 1: Define the alignment tokens for the entire batch
28189             #---------------------------------------------------------
28190 3273         5722 my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line );
28191              
28192             # We only need to make this call if vertical alignment of code is
28193             # requested or if a line might have a side comment.
28194 3273 100 100     8397 if ( $rOpts_valign_code
28195             || $types_to_go[$max_index_to_go] eq '#' )
28196             {
28197 3270         9476 ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line )
28198             = $self->set_vertical_alignment_markers( $ri_first, $ri_last );
28199             }
28200              
28201             #----------------------------------------------
28202             # Step 2: Break each line into alignment fields
28203             #----------------------------------------------
28204 3273         7168 my $rline_alignments = [];
28205 3273         5187 my $max_line = @{$ri_first} - 1;
  3273         6645  
28206 3273         6966 foreach my $line ( 0 .. $max_line ) {
28207              
28208 6096         10013 my $ibeg = $ri_first->[$line];
28209 6096         9143 my $iend = $ri_last->[$line];
28210              
28211 6096         19599 my $rtok_fld_pat_len = $self->make_alignment_patterns(
28212             $ibeg, $iend, $ralignment_type_to_go,
28213             $ralignment_counts->[$line],
28214             $ralignment_hash_by_line->[$line]
28215             );
28216 6096         11253 push @{$rline_alignments}, $rtok_fld_pat_len;
  6096         15035  
28217             }
28218 3273         7581 return $rline_alignments;
28219             } ## end sub make_vertical_alignments
28220              
28221             sub get_seqno {
28222              
28223             # get opening and closing sequence numbers of a token for the vertical
28224             # aligner. Assign qw quotes a value to allow qw opening and closing tokens
28225             # to be treated somewhat like opening and closing tokens for stacking
28226             # tokens by the vertical aligner.
28227 18     18 0 44 my ( $self, $ii, $ending_in_quote ) = @_;
28228              
28229 18         31 my $rLL = $self->[_rLL_];
28230              
28231 18         32 my $KK = $K_to_go[$ii];
28232 18         30 my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
28233              
28234 18 50       45 if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
28235 18         27 my $SEQ_QW = -1;
28236 18         30 my $token = $rLL->[$KK]->[_TOKEN_];
28237 18 100       41 if ( $ii > 0 ) {
28238 2 50       12 $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
28239             }
28240             else {
28241 16 100       38 if ( !$ending_in_quote ) {
28242 6 100       31 $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
28243             }
28244             }
28245             }
28246 18         40 return ($seqno);
28247             } ## end sub get_seqno
28248              
28249             {
28250             my %undo_extended_ci;
28251              
28252             sub initialize_undo_ci {
28253 561     561 0 1497 %undo_extended_ci = ();
28254 561         1015 return;
28255             }
28256              
28257             sub undo_ci {
28258              
28259             # Undo continuation indentation in certain sequences
28260 829     829 0 3495 my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
28261 829         1809 my ( $line_1, $line_2, $lev_last );
28262 829         1349 my $max_line = @{$ri_first} - 1;
  829         1805  
28263              
28264 829         1758 my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
28265              
28266             # Prepare a list of controlling indexes for each line if required.
28267             # This is used for efficient processing below. Note: this is
28268             # critical for speed. In the initial implementation I just looped
28269             # through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
28270             # found that this routine was causing a huge run time in large lists.
28271             # On a very large list test case, this new coding dropped the run time
28272             # of this routine from 30 seconds to 169 milliseconds.
28273 829         1391 my @i_controlling_ci;
28274 829 100 66     2625 if ( $rix_seqno_controlling_ci && @{$rix_seqno_controlling_ci} ) {
  40         166  
28275 40         79 my @tmp = reverse @{$rix_seqno_controlling_ci};
  40         115  
28276 40         81 my $ix_next = pop @tmp;
28277 40         101 foreach my $line ( 0 .. $max_line ) {
28278 98         178 my $iend = $ri_last->[$line];
28279 98   100     347 while ( defined($ix_next) && $ix_next <= $iend ) {
28280 120         169 push @{ $i_controlling_ci[$line] }, $ix_next;
  120         238  
28281 120         390 $ix_next = pop @tmp;
28282             }
28283             }
28284             }
28285              
28286             # Loop over all lines of the batch ...
28287              
28288             # Workaround originally created for problem c007, in which the
28289             # combination -lp -xci could produce a "Program bug" message in unusual
28290             # circumstances.
28291 829         1488 my $skip_SECTION_1;
28292 829 100 100     2761 if ( $rOpts_line_up_parentheses
28293             && $rOpts_extended_continuation_indentation )
28294             {
28295              
28296             # Only set this flag if -lp is actually used here
28297 71         158 foreach my $line ( 0 .. $max_line ) {
28298 85         144 my $ibeg = $ri_first->[$line];
28299 85 100       212 if ( ref( $leading_spaces_to_go[$ibeg] ) ) {
28300 19         31 $skip_SECTION_1 = 1;
28301 19         44 last;
28302             }
28303             }
28304             }
28305              
28306 829         2400 foreach my $line ( 0 .. $max_line ) {
28307              
28308 3652         5353 my $ibeg = $ri_first->[$line];
28309 3652         5121 my $iend = $ri_last->[$line];
28310 3652         6993 my $lev = $levels_to_go[$ibeg];
28311              
28312             #-----------------------------------
28313             # SECTION 1: Undo needless common CI
28314             #-----------------------------------
28315              
28316             # We are looking at leading tokens and looking for a sequence all
28317             # at the same level and all at a higher level than enclosing lines.
28318              
28319             # For example, we can undo continuation indentation in sort/map/grep
28320             # chains
28321              
28322             # my $dat1 = pack( "n*",
28323             # map { $_, $lookup->{$_} }
28324             # sort { $a <=> $b }
28325             # grep { $lookup->{$_} ne $default } keys %$lookup );
28326              
28327             # to become
28328              
28329             # my $dat1 = pack( "n*",
28330             # map { $_, $lookup->{$_} }
28331             # sort { $a <=> $b }
28332             # grep { $lookup->{$_} ne $default } keys %$lookup );
28333              
28334 3652 100 100     10597 if ( $line > 0 && !$skip_SECTION_1 ) {
28335              
28336             # if we have started a chain..
28337 2807 100       4770 if ($line_1) {
28338              
28339             # see if it continues..
28340 11 100       52 if ( $lev == $lev_last ) {
    50          
28341 8 100 66     50 if ( $types_to_go[$ibeg] eq 'k'
28342             && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
28343             {
28344              
28345             # chain continues...
28346             # check for chain ending at end of a statement
28347 6   33     21 my $is_semicolon_terminated = (
28348             $line == $max_line
28349             && (
28350             $types_to_go[$iend] eq ';'
28351              
28352             # with possible side comment
28353             || ( $types_to_go[$iend] eq '#'
28354             && $iend - $ibeg >= 2
28355             && $types_to_go[ $iend - 2 ] eq ';'
28356             && $types_to_go[ $iend - 1 ] eq 'b' )
28357             )
28358             );
28359              
28360 6 50       20 $line_2 = $line
28361             if ($is_semicolon_terminated);
28362             }
28363             else {
28364              
28365             # kill chain
28366 2         8 $line_1 = undef;
28367             }
28368             }
28369             elsif ( $lev < $lev_last ) {
28370              
28371             # chain ends with previous line
28372 3         7 $line_2 = $line - 1;
28373             }
28374             else { ## ( $lev > $lev_last )
28375              
28376             # kill chain
28377 0         0 $line_1 = undef;
28378             }
28379              
28380             # undo the continuation indentation if a chain ends
28381 11 100 66     49 if ( defined($line_2) && defined($line_1) ) {
28382 3         11 my $continuation_line_count = $line_2 - $line_1 + 1;
28383 3 50       18 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
  3         11  
28384             = (0) x ($continuation_line_count)
28385             if ( $continuation_line_count >= 0 );
28386 3         8 @leading_spaces_to_go[ @{$ri_first}
28387             [ $line_1 .. $line_2 ] ] =
28388 3         11 @reduced_spaces_to_go[ @{$ri_first}
  3         9  
28389             [ $line_1 .. $line_2 ] ];
28390 3         9 $line_1 = undef;
28391             }
28392             }
28393              
28394             # not in a chain yet..
28395             else {
28396              
28397             # look for start of a new sort/map/grep chain
28398 2796 100       5543 if ( $lev > $lev_last ) {
28399 687 100 100     2798 if ( $types_to_go[$ibeg] eq 'k'
28400             && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
28401             {
28402 10         25 $line_1 = $line;
28403             }
28404             }
28405             }
28406             }
28407              
28408             #-------------------------------------
28409             # SECTION 2: Undo ci at cuddled blocks
28410             #-------------------------------------
28411              
28412             # Note that sub get_final_indentation will be called later to
28413             # actually do this, but for now we will tentatively mark cuddled
28414             # lines with ci=0 so that the the -xci loop which follows will be
28415             # correct at cuddles.
28416 3652 100 100     10362 if (
28417             $types_to_go[$ibeg] eq '}'
28418             && ( $nesting_depth_to_go[$iend] + 1 ==
28419             $nesting_depth_to_go[$ibeg] )
28420             )
28421             {
28422 450         1180 my $terminal_type = $types_to_go[$iend];
28423 450 100 66     1771 if ( $terminal_type eq '#' && $iend > $ibeg ) {
28424 6         21 $terminal_type = $types_to_go[ $iend - 1 ];
28425 6 50 33     29 if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
28426 0         0 $terminal_type = $types_to_go[ $iend - 2 ];
28427             }
28428             }
28429              
28430             # Patch for rt144979, part 2. Coordinated with part 1.
28431             # Skip cuddled braces.
28432 450         965 my $seqno_beg = $type_sequence_to_go[$ibeg];
28433             my $is_cuddled_closing_brace = $seqno_beg
28434 450   66     1866 && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg};
28435              
28436 450 100 100     1579 if ( $terminal_type eq '{' && !$is_cuddled_closing_brace ) {
28437 13         40 $ci_levels_to_go[$ibeg] = 0;
28438             }
28439             }
28440              
28441             #--------------------------------------------------------
28442             # SECTION 3: Undo ci set by sub extended_ci if not needed
28443             #--------------------------------------------------------
28444              
28445             # Undo the ci of the leading token if its controlling token
28446             # went out on a previous line without ci
28447 3652 100       7179 if ( $ci_levels_to_go[$ibeg] ) {
28448 1318         2556 my $Kbeg = $K_to_go[$ibeg];
28449 1318         2411 my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
28450 1318 100 100     3152 if ( $seqno && $undo_extended_ci{$seqno} ) {
28451              
28452             # but do not undo ci set by the -lp flag
28453 50 100       147 if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
28454 36         62 $ci_levels_to_go[$ibeg] = 0;
28455 36         77 $leading_spaces_to_go[$ibeg] =
28456             $reduced_spaces_to_go[$ibeg];
28457             }
28458             }
28459             }
28460              
28461             # Flag any controlling opening tokens in lines without ci. This
28462             # will be used later in the above if statement to undo the ci which
28463             # they added. The array i_controlling_ci[$line] was prepared at
28464             # the top of this routine.
28465 3652 100 100     10173 if ( !$ci_levels_to_go[$ibeg]
28466             && defined( $i_controlling_ci[$line] ) )
28467             {
28468 27         44 foreach my $i ( @{ $i_controlling_ci[$line] } ) {
  27         99  
28469 60         107 my $seqno = $type_sequence_to_go[$i];
28470 60         137 $undo_extended_ci{$seqno} = 1;
28471             }
28472             }
28473              
28474 3652         6428 $lev_last = $lev;
28475             }
28476              
28477 829         2201 return;
28478             } ## end sub undo_ci
28479             }
28480              
28481             { ## begin closure set_logical_padding
28482             my %is_math_op;
28483              
28484             BEGIN {
28485              
28486 39     39   283 my @q = qw( + - * / );
28487 39         97236 @is_math_op{@q} = (1) x scalar(@q);
28488             }
28489              
28490             sub set_logical_padding {
28491              
28492             # Look at a batch of lines and see if extra padding can improve the
28493             # alignment when there are certain leading operators. Here is an
28494             # example, in which some extra space is introduced before
28495             # '( $year' to make it line up with the subsequent lines:
28496             #
28497             # if ( ( $Year < 1601 )
28498             # || ( $Year > 2899 )
28499             # || ( $EndYear < 1601 )
28500             # || ( $EndYear > 2899 ) )
28501             # {
28502             # &Error_OutOfRange;
28503             # }
28504             #
28505 750     750 0 2054 my ( $self, $ri_first, $ri_last, $starting_in_quote ) = @_;
28506 750         1223 my $max_line = @{$ri_first} - 1;
  750         1612  
28507              
28508 750         1858 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
28509             $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
28510              
28511             # Patch to produce padding in the first line of short code blocks.
28512             # This is part of an update to fix cases b562 .. b983.
28513             # This is needed to compensate for a change which was made in 'sub
28514             # starting_one_line_block' to prevent blinkers. Previously, that sub
28515             # would not look at the total block size and rely on sub
28516             # break_long_lines to break up long blocks. Consequently, the
28517             # first line of those batches would end in the opening block brace of a
28518             # sort/map/grep/eval block. When this was changed to immediately check
28519             # for blocks which were too long, the opening block brace would go out
28520             # in a single batch, and the block contents would go out as the next
28521             # batch. This caused the logic in this routine which decides if the
28522             # first line should be padded to be incorrect. To fix this, we set a
28523             # flag if the previous batch ended in an opening sort/map/grep/eval
28524             # block brace, and use it to adjust the logic to compensate.
28525              
28526             # For example, the following would have previously been a single batch
28527             # but now is two batches. We want to pad the line starting in '$dir':
28528             # my (@indices) = # batch n-1 (prev batch n)
28529             # sort { # batch n-1 (prev batch n)
28530             # $dir eq 'left' # batch n
28531             # ? $cells[$a] <=> $cells[$b] # batch n
28532             # : $cells[$b] <=> $cells[$a]; # batch n
28533             # } ( 0 .. $#cells ); # batch n
28534              
28535 750         1474 my $rLL = $self->[_rLL_];
28536 750         1407 my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
28537              
28538 750         1303 my $is_short_block;
28539 750 100       2240 if ( $K_to_go[0] > 0 ) {
28540 633         1309 my $Kp = $K_to_go[0] - 1;
28541 633 100 100     3447 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
28542 590         1134 $Kp -= 1;
28543             }
28544 633 100 100     3312 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
28545 194         472 $Kp -= 1;
28546 194 100 100     1165 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
28547 25         67 $Kp -= 1;
28548             }
28549             }
28550 633         2249 my $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
28551 633 100       1760 if ($seqno) {
28552 125         458 my $block_type = $rblock_type_of_seqno->{$seqno};
28553 125 100       477 if ($block_type) {
28554 94         264 $is_short_block = $is_sort_map_grep_eval{$block_type};
28555 94   66     482 $is_short_block ||= $want_one_line_block{$block_type};
28556             }
28557             }
28558             }
28559              
28560             # looking at each line of this batch..
28561 750         2368 foreach my $line ( 0 .. $max_line - 1 ) {
28562              
28563             # see if the next line begins with a logical operator
28564 2811         4233 $ibeg = $ri_first->[$line];
28565 2811         4058 $iend = $ri_last->[$line];
28566 2811         4399 $ibeg_next = $ri_first->[ $line + 1 ];
28567 2811         4523 $tok_next = $tokens_to_go[$ibeg_next];
28568 2811         4143 $type_next = $types_to_go[$ibeg_next];
28569              
28570             $has_leading_op_next = ( $tok_next =~ /^\w/ )
28571             ? $is_chain_operator{$tok_next} # + - * / : ? && ||
28572 2811 100       8366 : $is_chain_operator{$type_next}; # and, or
28573              
28574 2811 100       5842 next unless ($has_leading_op_next);
28575              
28576             # next line must not be at lesser depth
28577             next
28578 322 100       992 if ( $nesting_depth_to_go[$ibeg] >
28579             $nesting_depth_to_go[$ibeg_next] );
28580              
28581             # identify the token in this line to be padded on the left
28582 287         531 $ipad = undef;
28583              
28584             # handle lines at same depth...
28585 287 100       818 if ( $nesting_depth_to_go[$ibeg] ==
28586             $nesting_depth_to_go[$ibeg_next] )
28587             {
28588              
28589             # if this is not first line of the batch ...
28590 265 100       679 if ( $line > 0 ) {
28591              
28592             # and we have leading operator..
28593 237 100       575 next if $has_leading_op;
28594              
28595             # Introduce padding if..
28596             # 1. the previous line is at lesser depth, or
28597             # 2. the previous line ends in an assignment
28598             # 3. the previous line ends in a 'return'
28599             # 4. the previous line ends in a comma
28600             # Example 1: previous line at lesser depth
28601             # if ( ( $Year < 1601 ) # <- we are here but
28602             # || ( $Year > 2899 ) # list has not yet
28603             # || ( $EndYear < 1601 ) # collapsed vertically
28604             # || ( $EndYear > 2899 ) )
28605             # {
28606             #
28607             # Example 2: previous line ending in assignment:
28608             # $leapyear =
28609             # $year % 4 ? 0 # <- We are here
28610             # : $year % 100 ? 1
28611             # : $year % 400 ? 0
28612             # : 1;
28613             #
28614             # Example 3: previous line ending in comma:
28615             # push @expr,
28616             # /test/ ? undef
28617             # : eval($_) ? 1
28618             # : eval($_) ? 1
28619             # : 0;
28620              
28621             # be sure levels agree (never indent after an indented 'if')
28622             next
28623 78 50       391 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
28624              
28625             # allow padding on first line after a comma but only if:
28626             # (1) this is line 2 and
28627             # (2) there are at more than three lines and
28628             # (3) lines 3 and 4 have the same leading operator
28629             # These rules try to prevent padding within a long
28630             # comma-separated list.
28631 78         165 my $ok_comma;
28632 78 50 66     407 if ( $types_to_go[$iendm] eq ','
      33        
28633             && $line == 1
28634             && $max_line > 2 )
28635             {
28636 0         0 my $ibeg_next_next = $ri_first->[ $line + 2 ];
28637 0         0 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
28638 0         0 $ok_comma = $tok_next_next eq $tok_next;
28639             }
28640              
28641             my $ok_pad = (
28642 78   100     764 $is_assignment{ $types_to_go[$iendm] }
28643             || $ok_comma
28644             || ( $nesting_depth_to_go[$ibegm] <
28645             $nesting_depth_to_go[$ibeg] )
28646             || ( $types_to_go[$iendm] eq 'k'
28647             && $tokens_to_go[$iendm] eq 'return' )
28648             );
28649 78 100       307 next if ( !$ok_pad );
28650              
28651             # we will add padding before the first token
28652 56         154 $ipad = $ibeg;
28653             }
28654              
28655             # for first line of the batch..
28656             else {
28657              
28658             # WARNING: Never indent if first line is starting in a
28659             # continued quote, which would change the quote.
28660 28 50       102 next if $starting_in_quote;
28661              
28662             # if this is text after closing '}'
28663             # then look for an interior token to pad
28664 28 50       161 if ( $types_to_go[$ibeg] eq '}' ) {
    100          
28665              
28666             }
28667              
28668             # otherwise, we might pad if it looks really good
28669             elsif ($is_short_block) {
28670 2         9 $ipad = $ibeg;
28671             }
28672             else {
28673              
28674             # we might pad token $ibeg, so be sure that it
28675             # is at the same depth as the next line.
28676             next
28677 26 50       103 if ( $nesting_depth_to_go[$ibeg] !=
28678             $nesting_depth_to_go[$ibeg_next] );
28679              
28680             # We can pad on line 1 of a statement if at least 3
28681             # lines will be aligned. Otherwise, it
28682             # can look very confusing.
28683              
28684             # We have to be careful not to pad if there are too few
28685             # lines. The current rule is:
28686             # (1) in general we require at least 3 consecutive lines
28687             # with the same leading chain operator token,
28688             # (2) but an exception is that we only require two lines
28689             # with leading colons if there are no more lines. For example,
28690             # the first $i in the following snippet would get padding
28691             # by the second rule:
28692             #
28693             # $i == 1 ? ( "First", "Color" )
28694             # : $i == 2 ? ( "Then", "Rarity" )
28695             # : ( "Then", "Name" );
28696              
28697 26 100       99 next if ( $max_line <= 1 );
28698              
28699 10         50 my $leading_token = $tokens_to_go[$ibeg_next];
28700 10         24 my $tokens_differ;
28701              
28702             # never indent line 1 of a '.' series because
28703             # previous line is most likely at same level.
28704             # TODO: we should also look at the leading_spaces
28705             # of the last output line and skip if it is same
28706             # as this line.
28707 10 100       41 next if ( $leading_token eq '.' );
28708              
28709 7         20 my $count = 1;
28710 7         26 foreach my $l ( 2 .. 3 ) {
28711 11 50       33 last if ( $line + $l > $max_line );
28712 11         22 $count++;
28713 11         26 my $ibeg_next_next = $ri_first->[ $line + $l ];
28714             next
28715 11 100       33 if ( $tokens_to_go[$ibeg_next_next] eq
28716             $leading_token );
28717 4         11 $tokens_differ = 1;
28718 4         11 last;
28719             }
28720 7 100       27 next if ($tokens_differ);
28721 3 50 33     16 next if ( $count < 3 && $leading_token ne ':' );
28722 3         9 $ipad = $ibeg;
28723             }
28724             }
28725             }
28726              
28727             # find interior token to pad if necessary
28728 83 100       317 if ( !defined($ipad) ) {
28729              
28730 22         73 foreach my $i ( $ibeg .. $iend - 1 ) {
28731              
28732             # find any unclosed container
28733             next
28734 61 100 66     318 if ( !$type_sequence_to_go[$i]
      66        
28735             || !defined( $mate_index_to_go[$i] )
28736             || $mate_index_to_go[$i] <= $iend );
28737              
28738             # find next nonblank token to pad
28739 22         76 $ipad = $inext_to_go[$i];
28740 22 50       87 last if $ipad;
28741             }
28742 22 50 33     131 last if ( !$ipad || $ipad > $iend );
28743             }
28744              
28745             # We cannot pad the first leading token of a file because
28746             # it could cause a bug in which the starting indentation
28747             # level is guessed incorrectly each time the code is run
28748             # though perltidy, thus causing the code to march off to
28749             # the right. For example, the following snippet would have
28750             # this problem:
28751              
28752             ## ov_method mycan( $package, '(""' ), $package
28753             ## or ov_method mycan( $package, '(0+' ), $package
28754             ## or ov_method mycan( $package, '(bool' ), $package
28755             ## or ov_method mycan( $package, '(nomethod' ), $package;
28756              
28757             # If this snippet is within a block this won't happen
28758             # unless the user just processes the snippet alone within
28759             # an editor. In that case either the user will see and
28760             # fix the problem or it will be corrected next time the
28761             # entire file is processed with perltidy.
28762 83         214 my $this_batch = $self->[_this_batch_];
28763 83         197 my $peak_batch_size = $this_batch->[_peak_batch_size_];
28764 83 50 66     391 next if ( $ipad == 0 && $peak_batch_size <= 1 );
28765              
28766             # next line must not be at greater depth
28767 83         247 my $iend_next = $ri_last->[ $line + 1 ];
28768             next
28769 83 100       337 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
28770             $nesting_depth_to_go[$ipad] );
28771              
28772             # lines must be somewhat similar to be padded..
28773 77         182 my $inext_next = $inext_to_go[$ibeg_next];
28774 77         207 my $type = $types_to_go[$ipad];
28775              
28776             # see if there are multiple continuation lines
28777 77         159 my $logical_continuation_lines = 1;
28778 77 100       271 if ( $line + 2 <= $max_line ) {
28779 71         163 my $leading_token = $tokens_to_go[$ibeg_next];
28780 71         1158 my $ibeg_next_next = $ri_first->[ $line + 2 ];
28781 71 100 66     431 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
28782             && $nesting_depth_to_go[$ibeg_next] eq
28783             $nesting_depth_to_go[$ibeg_next_next] )
28784             {
28785 42         97 $logical_continuation_lines++;
28786             }
28787             }
28788              
28789             # see if leading types match
28790 77         223 my $types_match = $types_to_go[$inext_next] eq $type;
28791 77         157 my $matches_without_bang;
28792              
28793             # if first line has leading ! then compare the following token
28794 77 100 100     392 if ( !$types_match && $type eq '!' ) {
28795 4         17 $types_match = $matches_without_bang =
28796             $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
28797             }
28798 77 100 100     781 if (
      100        
      100        
      100        
      100        
28799              
28800             # either we have multiple continuation lines to follow
28801             # and we are not padding the first token
28802             (
28803             $logical_continuation_lines > 1
28804             && ( $ipad > 0 || $is_short_block )
28805             )
28806              
28807             # or..
28808             || (
28809              
28810             # types must match
28811             $types_match
28812              
28813             # and keywords must match if keyword
28814             && !(
28815             $type eq 'k'
28816             && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
28817             )
28818             )
28819             )
28820             {
28821              
28822             #----------------------begin special checks--------------
28823             #
28824             # SPECIAL CHECK 1:
28825             # A check is needed before we can make the pad.
28826             # If we are in a list with some long items, we want each
28827             # item to stand out. So in the following example, the
28828             # first line beginning with '$casefold->' would look good
28829             # padded to align with the next line, but then it
28830             # would be indented more than the last line, so we
28831             # won't do it.
28832             #
28833             # ok(
28834             # $casefold->{code} eq '0041'
28835             # && $casefold->{status} eq 'C'
28836             # && $casefold->{mapping} eq '0061',
28837             # 'casefold 0x41'
28838             # );
28839             #
28840             # Note:
28841             # It would be faster, and almost as good, to use a comma
28842             # count, and not pad if comma_count > 1 and the previous
28843             # line did not end with a comma.
28844             #
28845 56         136 my $ok_to_pad = 1;
28846              
28847 56         160 my $ibg = $ri_first->[ $line + 1 ];
28848 56         138 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
28849              
28850             # just use simplified formula for leading spaces to avoid
28851             # needless sub calls
28852 56         165 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
28853              
28854             # look at each line beyond the next ..
28855 56         138 my $l = $line + 1;
28856 56         176 foreach my $ltest ( $line + 2 .. $max_line ) {
28857 171         254 $l = $ltest;
28858 171         261 my $ibeg_t = $ri_first->[$l];
28859              
28860             # quit looking at the end of this container
28861             last
28862 171 100 100     675 if ( $nesting_depth_to_go[ $ibeg_t + 1 ] < $depth )
28863             || ( $nesting_depth_to_go[$ibeg_t] < $depth );
28864              
28865             # cannot do the pad if a later line would be
28866             # outdented more
28867 152 100       426 if ( $levels_to_go[$ibeg_t] + $ci_levels_to_go[$ibeg_t] <
28868             $lsp )
28869             {
28870 2         8 $ok_to_pad = 0;
28871 2         5 last;
28872             }
28873             }
28874              
28875             # don't pad if we end in a broken list
28876 56 100       346 if ( $l == $max_line ) {
28877 41         130 my $i2 = $ri_last->[$l];
28878 41 100       187 if ( $types_to_go[$i2] eq '#' ) {
28879 1         4 my $i1 = $ri_first->[$l];
28880 1 50       5 next if terminal_type_i( $i1, $i2 ) eq ',';
28881             }
28882             }
28883              
28884             # SPECIAL CHECK 2:
28885             # a minus may introduce a quoted variable, and we will
28886             # add the pad only if this line begins with a bare word,
28887             # such as for the word 'Button' here:
28888             # [
28889             # Button => "Print letter \"~$_\"",
28890             # -command => [ sub { print "$_[0]\n" }, $_ ],
28891             # -accelerator => "Meta+$_"
28892             # ];
28893             #
28894             # On the other hand, if 'Button' is quoted, it looks best
28895             # not to pad:
28896             # [
28897             # 'Button' => "Print letter \"~$_\"",
28898             # -command => [ sub { print "$_[0]\n" }, $_ ],
28899             # -accelerator => "Meta+$_"
28900             # ];
28901 56 50       228 if ( $types_to_go[$ibeg_next] eq 'm' ) {
28902 0 0       0 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
28903             }
28904              
28905 56 100       178 next unless $ok_to_pad;
28906              
28907             #----------------------end special check---------------
28908              
28909 54         280 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
28910 54         218 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
28911 54         146 $pad_spaces = $length_2 - $length_1;
28912              
28913             # If the first line has a leading ! and the second does
28914             # not, then remove one space to try to align the next
28915             # leading characters, which are often the same. For example:
28916             # if ( !$ts
28917             # || $ts == $self->Holder
28918             # || $self->Holder->Type eq "Arena" )
28919             #
28920             # This usually helps readability, but if there are subsequent
28921             # ! operators things will still get messed up. For example:
28922             #
28923             # if ( !exists $Net::DNS::typesbyname{$qtype}
28924             # && exists $Net::DNS::classesbyname{$qtype}
28925             # && !exists $Net::DNS::classesbyname{$qclass}
28926             # && exists $Net::DNS::typesbyname{$qclass} )
28927             # We can't fix that.
28928 54 100       183 if ($matches_without_bang) { $pad_spaces-- }
  4         8  
28929              
28930             # make sure this won't change if -lp is used
28931 54         152 my $indentation_1 = $leading_spaces_to_go[$ibeg];
28932 54 50 33     206 if ( ref($indentation_1)
28933             && $indentation_1->get_recoverable_spaces() == 0 )
28934             {
28935 0         0 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
28936 0 0 0     0 if ( ref($indentation_2)
28937             && $indentation_2->get_recoverable_spaces() != 0 )
28938             {
28939 0         0 $pad_spaces = 0;
28940             }
28941             }
28942              
28943             # we might be able to handle a pad of -1 by removing a blank
28944             # token
28945 54 100       180 if ( $pad_spaces < 0 ) {
28946              
28947             # Deactivated for -kpit due to conflict. This block deletes
28948             # a space in an attempt to improve alignment in some cases,
28949             # but it may conflict with user spacing requests. For now
28950             # it is just deactivated if the -kpit option is used.
28951 5 100       32 if ( $pad_spaces == -1 ) {
28952 3 100 33     36 if ( $ipad > $ibeg
      66        
28953             && $types_to_go[ $ipad - 1 ] eq 'b'
28954             && !%keyword_paren_inner_tightness )
28955             {
28956 2         11 $self->pad_token( $ipad - 1, $pad_spaces );
28957             }
28958             }
28959 5         20 $pad_spaces = 0;
28960             }
28961              
28962             # now apply any padding for alignment
28963 54 100 66     329 if ( $ipad >= 0 && $pad_spaces ) {
28964              
28965 47         140 my $length_t = total_line_length( $ibeg, $iend );
28966 47 50       288 if ( $pad_spaces + $length_t <=
28967             $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] )
28968             {
28969 47         222 $self->pad_token( $ipad, $pad_spaces );
28970             }
28971             }
28972             }
28973             }
28974             continue {
28975 2811         4032 $iendm = $iend;
28976 2811         3806 $ibegm = $ibeg;
28977 2811         4596 $has_leading_op = $has_leading_op_next;
28978             } ## end of loop over lines
28979 750         2020 return;
28980             } ## end sub set_logical_padding
28981             } ## end closure set_logical_padding
28982              
28983             sub pad_token {
28984              
28985             # insert $pad_spaces before token number $ipad
28986 57     57 0 179 my ( $self, $ipad, $pad_spaces ) = @_;
28987 57         143 my $rLL = $self->[_rLL_];
28988 57         121 my $KK = $K_to_go[$ipad];
28989 57         152 my $tok = $rLL->[$KK]->[_TOKEN_];
28990 57         142 my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
28991              
28992 57 100 33     225 if ( $pad_spaces > 0 ) {
    50          
    50          
28993 55         238 $tok = SPACE x $pad_spaces . $tok;
28994 55         144 $tok_len += $pad_spaces;
28995             }
28996             elsif ( $pad_spaces == 0 ) {
28997 0         0 return;
28998             }
28999             elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq SPACE ) {
29000 2         4 $tok = EMPTY_STRING;
29001 2         3 $tok_len = 0;
29002             }
29003             else {
29004              
29005             # shouldn't happen
29006 0         0 DEVEL_MODE
29007             && Fault("unexpected request for pad spaces = $pad_spaces\n");
29008 0         0 return;
29009             }
29010              
29011 57         178 $tok = $rLL->[$KK]->[_TOKEN_] = $tok;
29012 57         144 $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
29013              
29014 57         125 $token_lengths_to_go[$ipad] += $pad_spaces;
29015 57         121 $tokens_to_go[$ipad] = $tok;
29016              
29017 57         175 foreach my $i ( $ipad .. $max_index_to_go ) {
29018 3019         4078 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
29019             }
29020 57         213 return;
29021             } ## end sub pad_token
29022              
29023             sub xlp_tweak {
29024              
29025             # Remove one indentation space from unbroken containers marked with
29026             # 'K_extra_space'. These are mostly two-line lists with short names
29027             # formatted with -xlp -pt=2.
29028             #
29029             # Before this fix (extra space in line 2):
29030             # is($module->VERSION, $expected,
29031             # "$main_module->VERSION matches $module->VERSION ($expected)");
29032             #
29033             # After this fix:
29034             # is($module->VERSION, $expected,
29035             # "$main_module->VERSION matches $module->VERSION ($expected)");
29036             #
29037             # Notes:
29038             # - This fixes issue git #106
29039             # - This must be called after 'set_logical_padding'.
29040             # - This is currently only applied to -xlp. It would also work for -lp
29041             # but that style is essentially frozen.
29042              
29043 33     33 0 68 my ( $self, $ri_first, $ri_last ) = @_;
29044              
29045             # Must be 2 or more lines
29046 33 50       48 return if ( @{$ri_first} <= 1 );
  33         94  
29047              
29048             # Pull indentation object from start of second line
29049 33         63 my $ibeg_1 = $ri_first->[1];
29050 33         53 my $lp_object = $leading_spaces_to_go[$ibeg_1];
29051 33 100       84 return if ( !ref($lp_object) );
29052              
29053             # This only applies to an indentation object with a marked token
29054 28         91 my $K_extra_space = $lp_object->get_K_extra_space();
29055 28 100       74 return unless ($K_extra_space);
29056              
29057             # Look for the marked token within the first line of this batch
29058 3         7 my $ibeg_0 = $ri_first->[0];
29059 3         10 my $iend_0 = $ri_last->[0];
29060 3         7 my $ii = $ibeg_0 + $K_extra_space - $K_to_go[$ibeg_0];
29061 3 50 33     15 return if ( $ii <= $ibeg_0 || $ii > $iend_0 );
29062              
29063             # Skip padded tokens, they have already been aligned
29064 3         7 my $tok = $tokens_to_go[$ii];
29065 3 100       13 return if ( substr( $tok, 0, 1 ) eq SPACE );
29066              
29067             # Skip 'if'-like statements, this does not improve them
29068             return
29069             if ( $types_to_go[$ibeg_0] eq 'k'
29070 2 50 66     16 && $is_if_unless_elsif{ $tokens_to_go[$ibeg_0] } );
29071              
29072             # Looks okay, reduce indentation by 1 space if possible
29073 2         9 my $spaces = $lp_object->get_spaces();
29074 2 50       8 if ( $spaces > 0 ) {
29075 2         7 $lp_object->decrease_SPACES(1);
29076             }
29077              
29078 2         5 return;
29079             } ## end sub xlp_tweak
29080              
29081             { ## begin closure make_alignment_patterns
29082              
29083             my %keyword_map;
29084             my %operator_map;
29085             my %is_w_n_C;
29086             my %is_my_local_our;
29087             my %is_kwU;
29088             my %is_use_like;
29089             my %is_binary_type;
29090             my %is_binary_keyword;
29091             my %name_map;
29092              
29093             BEGIN {
29094              
29095             # Note: %block_type_map is now global to enable the -gal=s option
29096              
29097             # Map certain keywords to the same 'if' class to align
29098             # long if/elsif sequences. [elsif.pl]. But note that this is
29099             # only for purposes of making the patterns, not alignment tokens.
29100             # The only possible equivalent alignment tokens are 'if' and 'unless',
29101             # and this is handled earlier under control of $rOpts_valign_if_unless
29102             # to avoid making this a global hash.
29103 39     39   516 %keyword_map = (
29104             'unless' => 'if',
29105             'else' => 'if',
29106             'elsif' => 'if',
29107             'when' => 'given',
29108             'default' => 'given',
29109             'case' => 'switch',
29110              
29111             # treat an 'undef' similar to numbers and quotes
29112             'undef' => 'Q',
29113             );
29114              
29115             # Map certain operators to the same class for alignment.
29116             # Note that this map is for the alignment tokens, not the patterns.
29117             # We could have placed 'unless' => 'if' here, but since that is
29118             # under control of $rOpts_valign_if_unless, it is handled elsewhere.
29119 39         172 %operator_map = (
29120             '!~' => '=~',
29121             '+=' => '+=',
29122             '-=' => '+=',
29123             '*=' => '+=',
29124             '/=' => '+=',
29125             );
29126              
29127 39         155 %is_w_n_C = (
29128             'w' => 1,
29129             'n' => 1,
29130             'C' => 1,
29131             );
29132              
29133             # leading keywords which to skip for efficiency when making parenless
29134             # container names
29135 39         114 my @q = qw( my local our return );
29136 39         205 @{is_my_local_our}{@q} = (1) x scalar(@q);
29137              
29138             # leading keywords where we should just join one token to form
29139             # parenless name
29140 39         123 @q = qw( use );
29141 39         135 @{is_use_like}{@q} = (1) x scalar(@q);
29142              
29143             # leading token types which may be used to make a container name
29144 39         108 @q = qw( k w U );
29145 39         193 @{is_kwU}{@q} = (1) x scalar(@q);
29146              
29147             # token types which prevent using leading word as a container name
29148 39         384 @q = qw(
29149             x / : % . | ^ < = > || >= != *= => !~ == && |= .= -= =~ += <=
29150             %= ^= x= ~~ ** << /= &= // >> ~. &. |. ^.
29151             **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
29152             );
29153 39         169 push @q, ',';
29154 39         1303 @{is_binary_type}{@q} = (1) x scalar(@q);
29155              
29156             # token keywords which prevent using leading word as a container name
29157 39         242 @q = qw(and or err eq ne cmp);
29158 39         175 @is_binary_keyword{@q} = (1) x scalar(@q);
29159              
29160             # Some common function calls whose args can be aligned. These do not
29161             # give good alignments if the lengths differ significantly.
29162 39         327489 %name_map = (
29163             'unlike' => 'like',
29164             'isnt' => 'is',
29165             ##'is_deeply' => 'is', # poor; names lengths too different
29166             );
29167              
29168             } ## end BEGIN
29169              
29170             sub make_alignment_patterns {
29171              
29172 6096     6096 0 17465 my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
29173             $ralignment_hash )
29174             = @_;
29175              
29176             #------------------------------------------------------------------
29177             # This sub creates arrays of vertical alignment info for one output
29178             # line.
29179             #------------------------------------------------------------------
29180              
29181             # Input parameters:
29182             # $ibeg, $iend - index range of this line in the _to_go arrays
29183             # $ralignment_type_to_go - alignment type of tokens, like '=', if any
29184             # $alignment_count - number of alignment tokens in the line
29185             # $ralignment_hash - this contains all of the alignments for this
29186             # line. It is not yet used but is available for future coding in
29187             # case there is a need to do a preliminary scan of alignment tokens.
29188              
29189             # The arrays which are created contain strings that can be tested by
29190             # the vertical aligner to see if consecutive lines can be aligned
29191             # vertically.
29192             #
29193             # The four arrays are indexed on the vertical
29194             # alignment fields and are:
29195             # @tokens - a list of any vertical alignment tokens for this line.
29196             # These are tokens, such as '=' '&&' '#' etc which
29197             # we want to might align vertically. These are
29198             # decorated with various information such as
29199             # nesting depth to prevent unwanted vertical
29200             # alignment matches.
29201             # @fields - the actual text of the line between the vertical alignment
29202             # tokens.
29203             # @patterns - a modified list of token types, one for each alignment
29204             # field. These should normally each match before alignment is
29205             # allowed, even when the alignment tokens match.
29206             # @field_lengths - the display width of each field
29207              
29208 6096         8923 if (DEVEL_MODE) {
29209             my $new_count = 0;
29210             if ( defined($ralignment_hash) ) {
29211             $new_count = keys %{$ralignment_hash};
29212             }
29213             my $old_count = $alignment_count;
29214             $old_count = 0 unless ($old_count);
29215             if ( $new_count != $old_count ) {
29216             my $K = $K_to_go[$ibeg];
29217             my $rLL = $self->[_rLL_];
29218             my $lnl = $rLL->[$K]->[_LINE_INDEX_];
29219             Fault(
29220             "alignment hash token count gives count=$new_count but old count is $old_count near line=$lnl\n"
29221             );
29222             }
29223             }
29224              
29225             # -------------------------------------
29226             # Shortcut for lines without alignments
29227             # -------------------------------------
29228 6096 100       13000 if ( !$alignment_count ) {
29229 3086         5760 my $rtokens = [];
29230 3086         8248 my $rfield_lengths =
29231             [ $summed_lengths_to_go[ $iend + 1 ] -
29232             $summed_lengths_to_go[$ibeg] ];
29233 3086         5210 my $rpatterns;
29234             my $rfields;
29235 3086 100       6481 if ( $ibeg == $iend ) {
29236 597         1828 $rfields = [ $tokens_to_go[$ibeg] ];
29237 597         1548 $rpatterns = [ $types_to_go[$ibeg] ];
29238             }
29239             else {
29240 2489         11161 $rfields =
29241             [ join( EMPTY_STRING, @tokens_to_go[ $ibeg .. $iend ] ) ];
29242 2489         8296 $rpatterns =
29243             [ join( EMPTY_STRING, @types_to_go[ $ibeg .. $iend ] ) ];
29244             }
29245 3086         9161 return [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
29246             }
29247              
29248 3010         5262 my $i_start = $ibeg;
29249 3010         4902 my $depth = 0;
29250 3010         4663 my $i_depth_prev = $i_start;
29251 3010         4628 my $depth_prev = $depth;
29252 3010         7307 my %container_name = ( 0 => EMPTY_STRING );
29253 3010         4723 my $saw_exclamation_mark = 0;
29254              
29255 3010         4885 my @tokens = ();
29256 3010         4824 my @fields = ();
29257 3010         4476 my @patterns = ();
29258 3010         4685 my @field_lengths = ();
29259              
29260             #-------------------------------------------------------------
29261             # Make a container name for any uncontained commas, issue c089
29262             #-------------------------------------------------------------
29263             # This is a generalization of the fix for rt136416 which was a
29264             # specialized patch just for 'use Module' statements.
29265             # We restrict this to semicolon-terminated statements; that way
29266             # we know that the top level commas are not in a list container.
29267 3010 100 100     10471 if ( $ibeg == 0 && $iend == $max_index_to_go ) {
29268 1567         2617 my $iterm = $max_index_to_go;
29269 1567 100       4070 if ( $types_to_go[$iterm] eq '#' ) {
29270 289         965 $iterm = iprev_to_go($iterm);
29271             }
29272              
29273             # Alignment lines ending like '=> sub {'; fixes issue c093
29274 1567         3111 my $term_type_ok = $types_to_go[$iterm] eq ';';
29275 1567   66     6383 $term_type_ok ||=
      100        
29276             $tokens_to_go[$iterm] eq '{' && $block_type_to_go[$iterm];
29277              
29278 1567 100 100     11601 if ( $iterm > $ibeg
      100        
      66        
29279             && $term_type_ok
29280             && !$is_my_local_our{ $tokens_to_go[$ibeg] }
29281             && $levels_to_go[$ibeg] eq $levels_to_go[$iterm] )
29282             {
29283 846         2597 $container_name{'0'} =
29284             make_uncontained_comma_name( $iterm, $ibeg, $iend );
29285             }
29286             }
29287              
29288             #--------------------------------
29289             # Begin main loop over all tokens
29290             #--------------------------------
29291 3010         4973 my $j = 0; # field index
29292              
29293 3010         6307 $patterns[0] = EMPTY_STRING;
29294 3010         4614 my %token_count;
29295 3010         6698 for my $i ( $ibeg .. $iend ) {
29296              
29297             #-------------------------------------------------------------
29298             # Part 1: keep track of containers balanced on this line only.
29299             #-------------------------------------------------------------
29300             # These are used below to prevent unwanted cross-line alignments.
29301             # Unbalanced containers already avoid aligning across
29302             # container boundaries.
29303 36984         52727 my $type = $types_to_go[$i];
29304 36984 100       62226 if ( $type_sequence_to_go[$i] ) {
29305 5302         8583 my $token = $tokens_to_go[$i];
29306 5302 100       13321 if ( $is_opening_token{$token} ) {
    100          
29307              
29308             # if container is balanced on this line...
29309 2716         4758 my $i_mate = $mate_index_to_go[$i];
29310 2716 100       6332 if ( !defined($i_mate) ) { $i_mate = -1 }
  302         698  
29311 2716 100 100     9910 if ( $i_mate > $i && $i_mate <= $iend ) {
29312 2193         3575 $i_depth_prev = $i;
29313 2193         3441 $depth_prev = $depth;
29314 2193         3281 $depth++;
29315              
29316             # Append the previous token name to make the container name
29317             # more unique. This name will also be given to any commas
29318             # within this container, and it helps avoid undesirable
29319             # alignments of different types of containers.
29320              
29321             # Containers beginning with { and [ are given those names
29322             # for uniqueness. That way commas in different containers
29323             # will not match. Here is an example of what this prevents:
29324             # a => [ 1, 2, 3 ],
29325             # b => { b1 => 4, b2 => 5 },
29326             # Here is another example of what we avoid by labeling the
29327             # commas properly:
29328              
29329             # is_d( [ $a, $a ], [ $b, $c ] );
29330             # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
29331             # is_d( [ \$a, \$a ], [ \$b, \$c ] );
29332              
29333 2193 100       6580 my $name =
29334             $token eq '(' ? $self->make_paren_name($i) : $token;
29335              
29336             # name cannot be '.', so change to something else if so
29337 2193 100       5096 if ( $name eq '.' ) { $name = 'dot' }
  1         3  
29338              
29339 2193         5802 $container_name{$depth} = "+" . $name;
29340              
29341             # Make the container name even more unique if necessary.
29342             # If we are not vertically aligning this opening paren,
29343             # append a character count to avoid bad alignment since
29344             # it usually looks bad to align commas within containers
29345             # for which the opening parens do not align. Here
29346             # is an example very BAD alignment of commas (because
29347             # the atan2 functions are not all aligned):
29348             # $XY =
29349             # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
29350             # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
29351             # $X * atan2( $X, 1 ) -
29352             # $Y * atan2( $Y, 1 );
29353             #
29354             # On the other hand, it is usually okay to align commas
29355             # if opening parens align, such as:
29356             # glVertex3d( $cx + $s * $xs, $cy, $z );
29357             # glVertex3d( $cx, $cy + $s * $ys, $z );
29358             # glVertex3d( $cx - $s * $xs, $cy, $z );
29359             # glVertex3d( $cx, $cy - $s * $ys, $z );
29360             #
29361             # To distinguish between these situations, we append
29362             # the length of the line from the previous matching
29363             # token, or beginning of line, to the function name.
29364             # This will allow the vertical aligner to reject
29365             # undesirable matches.
29366              
29367             # if we are not aligning on this paren...
29368 2193 100       5251 if ( !$ralignment_type_to_go->[$i] ) {
29369              
29370             # Add the length to the name ...
29371 1663         3294 my $len = $summed_lengths_to_go[$i] -
29372             $summed_lengths_to_go[$i_start];
29373              
29374             # Do not include the length of any '!'. Otherwise,
29375             # commas in the following line will not match:
29376             # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
29377             # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
29378 1663 100       3756 if ($saw_exclamation_mark) { $len -= 1 }
  36         81  
29379              
29380             # For first token, use distance from start of line
29381             # but subtract off the indentation due to level.
29382             # Otherwise, results could vary with indentation.
29383 1663 100       3869 if ( $i_start == $ibeg ) {
29384 728         2130 $len +=
29385             leading_spaces_to_go($ibeg) -
29386             $levels_to_go[$i_start] *
29387             $rOpts_indent_columns;
29388             }
29389 1663 50       3750 if ( $len < 0 ) { $len = 0 }
  0         0  
29390              
29391             # tack this length onto the container name to try
29392             # to make a unique token name
29393 1663         3871 $container_name{$depth} .= "-" . $len;
29394             } ## end if ( !$ralignment_type_to_go...)
29395             } ## end if ( $i_mate > $i && $i_mate...)
29396             } ## end if ( $is_opening_token...)
29397              
29398             elsif ( $is_closing_token{$token} ) {
29399 2318         3867 $i_depth_prev = $i;
29400 2318         3543 $depth_prev = $depth;
29401 2318 100       5757 $depth-- if $depth > 0;
29402             }
29403             else {
29404             ## must be ternary
29405             }
29406             } ## end if ( $type_sequence_to_go...)
29407              
29408             #------------------------------------------------------------
29409             # Part 2: if we find a new synchronization token, we are done
29410             # with a field
29411             #------------------------------------------------------------
29412 36984 100 100     100579 if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
29413              
29414 5291         10231 my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
29415              
29416             # map similar items
29417 5291         9226 my $tok_map = $operator_map{$tok};
29418 5291 100       9732 $tok = $tok_map if ($tok_map);
29419              
29420             # make separators in different nesting depths unique
29421             # by appending the nesting depth digit.
29422 5291 100       10606 if ( $raw_tok ne '#' ) {
29423 4966         10081 $tok .= "$nesting_depth_to_go[$i]";
29424             }
29425              
29426             # also decorate commas with any container name to avoid
29427             # unwanted cross-line alignments.
29428 5291 100 100     16291 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
29429              
29430             # If we are at an opening token which increased depth, we have
29431             # to use the name from the previous depth.
29432 2739 100       5746 my $depth_last = $i == $i_depth_prev ? $depth_prev : $depth;
29433 2739 100       5272 my $depth_p =
29434             ( $depth_last < $depth ? $depth_last : $depth );
29435 2739 100       5894 if ( $container_name{$depth_p} ) {
29436 1404         2592 $tok .= $container_name{$depth_p};
29437             }
29438             }
29439              
29440             # Patch to avoid aligning leading and trailing if, unless.
29441             # Mark trailing if, unless statements with container names.
29442             # This makes them different from leading if, unless which
29443             # are not so marked at present. If we ever need to name
29444             # them too, we could use ci to distinguish them.
29445             # Example problem to avoid:
29446             # return ( 2, "DBERROR" )
29447             # if ( $retval == 2 );
29448             # if ( scalar @_ ) {
29449             # my ( $a, $b, $c, $d, $e, $f ) = @_;
29450             # }
29451 5291 100       10490 if ( $raw_tok eq '(' ) {
29452 209 100 100     1092 if ( $ci_levels_to_go[$ibeg]
29453             && $container_name{$depth} =~ /^\+(if|unless)/ )
29454             {
29455 1         3 $tok .= $container_name{$depth};
29456             }
29457             }
29458              
29459             # Decorate block braces with block types to avoid
29460             # unwanted alignments such as the following:
29461             # foreach ( @{$routput_array} ) { $fh->print($_) }
29462             # eval { $fh->close() };
29463 5291 100 100     11165 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
29464 238         603 my $block_type = $block_type_to_go[$i];
29465              
29466             # map certain related block types to allow
29467             # else blocks to align
29468             $block_type = $block_type_map{$block_type}
29469 238 100       879 if ( defined( $block_type_map{$block_type} ) );
29470              
29471             # remove sub names to allow one-line sub braces to align
29472             # regardless of name
29473 238 100       4316 if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
  45         136  
29474              
29475             # allow all control-type blocks to align
29476 238 100       1173 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
  12         26  
29477              
29478 238         556 $tok .= $block_type;
29479              
29480             # Avoid aligning opening braces across leading ci level
29481             # changes by marking block type with _ci (issue c224)
29482 238 100       713 if ( $ci_levels_to_go[$ibeg] ) { $tok .= '_1' }
  24         69  
29483             }
29484              
29485             # Mark multiple copies of certain tokens with the copy number
29486             # This will allow the aligner to decide if they are matched.
29487             # For now, only do this for equals. For example, the two
29488             # equals on the next line will be labeled '=0' and '=0.2'.
29489             # Later, the '=0.2' will be ignored in alignment because it
29490             # has no match.
29491              
29492             # $| = $debug = 1 if $opt_d;
29493             # $full_index = 1 if $opt_i;
29494              
29495 5291 100 100     16949 if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
29496 2012         5098 $token_count{$tok}++;
29497 2012 100       4933 if ( $token_count{$tok} > 1 ) {
29498 193         677 $tok .= '.' . $token_count{$tok};
29499             }
29500             }
29501              
29502             # concatenate the text of the consecutive tokens to form
29503             # the field
29504 5291         21344 push( @fields,
29505             join( EMPTY_STRING, @tokens_to_go[ $i_start .. $i - 1 ] ) );
29506              
29507 5291         11803 push @field_lengths,
29508             $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
29509              
29510             # store the alignment token for this field
29511 5291         10271 push( @tokens, $tok );
29512              
29513             # get ready for the next batch
29514 5291         7997 $i_start = $i;
29515 5291         7383 $saw_exclamation_mark = 0;
29516 5291         7105 $j++;
29517 5291         9775 $patterns[$j] = EMPTY_STRING;
29518             } ## end if ( new synchronization token
29519              
29520             #-----------------------------------------------
29521             # Part 3: continue accumulating the next pattern
29522             #-----------------------------------------------
29523              
29524             # for keywords we have to use the actual text
29525 36984 100       88705 if ( $type eq 'k' ) {
    100          
    100          
    100          
29526              
29527 1839         3659 my $tok_fix = $tokens_to_go[$i];
29528              
29529             # but map certain keywords to a common string to allow
29530             # alignment.
29531             $tok_fix = $keyword_map{$tok_fix}
29532 1839 100       5976 if ( defined( $keyword_map{$tok_fix} ) );
29533 1839         4318 $patterns[$j] .= $tok_fix;
29534             }
29535              
29536             elsif ( $type eq 'b' ) {
29537 13173         21507 $patterns[$j] .= $type;
29538             }
29539              
29540             # Mark most things before arrows as a quote to
29541             # get them to line up. Testfile: mixed.pl.
29542              
29543             # handle $type =~ /^[wnC]$/
29544             elsif ( $is_w_n_C{$type} ) {
29545              
29546 2626         4746 my $type_fix = $type;
29547              
29548 2626 100       6445 if ( $i < $iend - 1 ) {
29549 2335         4264 my $next_type = $types_to_go[ $i + 1 ];
29550 2335 100       5191 my $i_next_nonblank =
29551             ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
29552              
29553 2335 100       5378 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
29554 789         1454 $type_fix = 'Q';
29555              
29556             # Patch to ignore leading minus before words,
29557             # by changing pattern 'mQ' into just 'Q',
29558             # so that we can align things like this:
29559             # Button => "Print letter \"~$_\"",
29560             # -command => [ sub { print "$_[0]\n" }, $_ ],
29561 789 100       1956 if ( $patterns[$j] eq 'm' ) {
29562 212         490 $patterns[$j] = EMPTY_STRING;
29563             }
29564             }
29565             }
29566              
29567             # Convert a bareword within braces into a quote for
29568             # matching. This will allow alignment of expressions like
29569             # this:
29570             # local ( $SIG{'INT'} ) = IGNORE;
29571             # local ( $SIG{ALRM} ) = 'POSTMAN';
29572 2626 100 100     11528 if ( $type eq 'w'
      100        
      100        
      66        
29573             && $i > $ibeg
29574             && $i < $iend
29575             && $types_to_go[ $i - 1 ] eq 'L'
29576             && $types_to_go[ $i + 1 ] eq 'R' )
29577             {
29578 68         158 $type_fix = 'Q';
29579             }
29580              
29581             # patch to make numbers and quotes align
29582 2626 100       5652 if ( $type eq 'n' ) { $type_fix = 'Q' }
  1399         2299  
29583              
29584 2626         5271 $patterns[$j] .= $type_fix;
29585             } ## end elsif ( $is_w_n_C{$type} )
29586              
29587             # ignore any ! in patterns
29588             elsif ( $type eq '!' ) {
29589 43         141 $saw_exclamation_mark = 1;
29590             }
29591              
29592             # everything else
29593             else {
29594 19303         30553 $patterns[$j] .= $type;
29595              
29596             # remove any zero-level name at first fat comma
29597 19303 100 100     54037 if ( $depth == 0 && $type eq '=>' ) {
29598 613         1591 $container_name{$depth} = EMPTY_STRING;
29599             }
29600             }
29601              
29602             } ## end for my $i ( $ibeg .. $iend)
29603              
29604             #---------------------------------------------------------------
29605             # End of main loop .. join text of tokens to make the last field
29606             #---------------------------------------------------------------
29607 3010         12515 push( @fields,
29608             join( EMPTY_STRING, @tokens_to_go[ $i_start .. $iend ] ) );
29609 3010         7579 push @field_lengths,
29610             $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
29611              
29612 3010         16666 return [ \@tokens, \@fields, \@patterns, \@field_lengths ];
29613             } ## end sub make_alignment_patterns
29614              
29615             sub make_uncontained_comma_name {
29616 846     846 0 2037 my ( $iterm, $ibeg, $iend ) = @_;
29617              
29618             # Make a container name by combining all leading barewords,
29619             # keywords and functions.
29620 846         1492 my $name = EMPTY_STRING;
29621 846         1390 my $count = 0;
29622 846         2312 my $count_max;
29623             my $iname_end;
29624 846         0 my $ilast_blank;
29625 846         2038 for ( $ibeg .. $iterm ) {
29626 1673         2811 my $type = $types_to_go[$_];
29627              
29628 1673 100       3645 if ( $type eq 'b' ) {
29629 383         718 $ilast_blank = $_;
29630 383         822 next;
29631             }
29632              
29633 1290         2212 my $token = $tokens_to_go[$_];
29634              
29635             # Give up if we find an opening paren, binary operator or
29636             # comma within or after the proposed container name.
29637 1290 100 100     8329 if ( $token eq '('
      100        
      100        
29638             || $is_binary_type{$type}
29639             || $type eq 'k' && $is_binary_keyword{$token} )
29640             {
29641 192         454 $name = EMPTY_STRING;
29642 192         487 last;
29643             }
29644              
29645             # The container name is only built of certain types:
29646 1098 100       3051 last if ( !$is_kwU{$type} );
29647              
29648             # Normally it is made of one word, but two words for 'use'
29649 486 100 66     1904 if ( $count == 0 ) {
    100          
29650 380 100 100     1927 if ( $type eq 'k'
29651             && $is_use_like{ $tokens_to_go[$_] } )
29652             {
29653 65         262 $count_max = 2;
29654             }
29655             else {
29656 315         656 $count_max = 1;
29657             }
29658             }
29659             elsif ( defined($count_max) && $count >= $count_max ) {
29660 42         137 last;
29661             }
29662             else {
29663             ## continue
29664             }
29665              
29666 444 50       1360 if ( defined( $name_map{$token} ) ) {
29667 0         0 $token = $name_map{$token};
29668             }
29669              
29670 444         1114 $name .= SPACE . $token;
29671 444         788 $iname_end = $_;
29672 444         826 $count++;
29673             }
29674              
29675             # Require a space after the container name token(s)
29676 846 100 66     3632 if ( $name
      100        
29677             && defined($ilast_blank)
29678             && $ilast_blank > $iname_end )
29679             {
29680 206         601 $name = substr( $name, 1 );
29681             }
29682 846         2482 return $name;
29683             } ## end sub make_uncontained_comma_name
29684              
29685             } ## end closure make_alignment_patterns
29686              
29687             sub make_paren_name {
29688 962     962 0 2225 my ( $self, $i ) = @_;
29689              
29690             # The token at index $i is a '('.
29691             # Create an alignment name for it to avoid incorrect alignments.
29692              
29693             # Start with the name of the previous nonblank token...
29694 962         1755 my $name = EMPTY_STRING;
29695 962         1711 my $im = $i - 1;
29696 962 100       2339 return EMPTY_STRING if ( $im < 0 );
29697 943 100       2726 if ( $types_to_go[$im] eq 'b' ) { $im--; }
  499         967  
29698 943 50       2233 return EMPTY_STRING if ( $im < 0 );
29699 943         1935 $name = $tokens_to_go[$im];
29700              
29701             # Prepend any sub name to an isolated -> to avoid unwanted alignments
29702             # [test case is test8/penco.pl]
29703 943 100       2396 if ( $name eq '->' ) {
29704 5         12 $im--;
29705 5 50 33     40 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
29706 5         17 $name = $tokens_to_go[$im] . $name;
29707             }
29708             }
29709              
29710             # Finally, remove any leading arrows
29711 943 50       2938 if ( substr( $name, 0, 2 ) eq '->' ) {
29712 0         0 $name = substr( $name, 2 );
29713             }
29714 943         2522 return $name;
29715             } ## end sub make_paren_name
29716              
29717             { ## begin closure get_final_indentation
29718              
29719             my ( $last_indentation_written, $last_unadjusted_indentation,
29720             $last_leading_token );
29721              
29722             sub initialize_get_final_indentation {
29723 561     561 0 1353 $last_indentation_written = 0;
29724 561         1307 $last_unadjusted_indentation = 0;
29725 561         1282 $last_leading_token = EMPTY_STRING;
29726 561         1063 return;
29727             } ## end sub initialize_get_final_indentation
29728              
29729             sub get_final_indentation {
29730              
29731             my (
29732 7384     7384 0 18351 $self, #
29733              
29734             $ibeg,
29735             $iend,
29736             $rfields,
29737             $rpatterns,
29738             $ri_first,
29739             $ri_last,
29740             $rindentation_list,
29741             $level_jump,
29742             $starting_in_quote,
29743             $is_static_block_comment,
29744              
29745             ) = @_;
29746              
29747             #--------------------------------------------------------------
29748             # This routine makes any necessary adjustments to get the final
29749             # indentation of a line in the Formatter.
29750             #--------------------------------------------------------------
29751              
29752             # It starts with the basic indentation which has been defined for the
29753             # leading token, and then takes into account any options that the user
29754             # has set regarding special indenting and outdenting.
29755              
29756             # This routine has to resolve a number of complex interacting issues,
29757             # including:
29758             # 1. The various -cti=n type flags, which contain the desired change in
29759             # indentation for lines ending in commas and semicolons, should be
29760             # followed,
29761             # 2. qw quotes require special processing and do not fit perfectly
29762             # with normal containers,
29763             # 3. formatting with -wn can complicate things, especially with qw
29764             # quotes,
29765             # 4. formatting with the -lp option is complicated, and does not
29766             # work well with qw quotes and with -wn formatting.
29767             # 5. a number of special situations, such as 'cuddled' formatting.
29768             # 6. This routine is mainly concerned with outdenting closing tokens
29769             # but note that there is some overlap with the functions of sub
29770             # undo_ci, which was processed earlier, so care has to be taken to
29771             # keep them coordinated.
29772              
29773             # Find the last code token of this line
29774 7384         10723 my $i_terminal = $iend;
29775 7384         12809 my $terminal_type = $types_to_go[$iend];
29776 7384 100 100     21367 if ( $terminal_type eq '#' && $i_terminal > $ibeg ) {
29777 364         807 $i_terminal -= 1;
29778 364         1166 $terminal_type = $types_to_go[$i_terminal];
29779 364 100 66     1933 if ( $terminal_type eq 'b' && $i_terminal > $ibeg ) {
29780 350         638 $i_terminal -= 1;
29781 350         634 $terminal_type = $types_to_go[$i_terminal];
29782             }
29783             }
29784              
29785 7384         10609 my $is_outdented_line;
29786              
29787 7384         11986 my $type_beg = $types_to_go[$ibeg];
29788 7384         13417 my $token_beg = $tokens_to_go[$ibeg];
29789 7384         11587 my $level_beg = $levels_to_go[$ibeg];
29790 7384         11356 my $block_type_beg = $block_type_to_go[$ibeg];
29791 7384         11661 my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
29792 7384         11418 my $seqno_beg = $type_sequence_to_go[$ibeg];
29793 7384         12934 my $is_closing_type_beg = $is_closing_type{$type_beg};
29794              
29795             # QW INDENTATION PATCH 3:
29796 7384         11626 my $seqno_qw_closing;
29797 7384 100 100     18235 if ( $type_beg eq 'q' && $ibeg == 0 ) {
29798 204         477 my $KK = $K_to_go[$ibeg];
29799             $seqno_qw_closing =
29800 204         487 $self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
29801             }
29802              
29803 7384   100     23778 my $is_semicolon_terminated = $terminal_type eq ';'
29804             && ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
29805             || $seqno_qw_closing );
29806              
29807             # NOTE: A future improvement would be to make it semicolon terminated
29808             # even if it does not have a semicolon but is followed by a closing
29809             # block brace. This would undo ci even for something like the
29810             # following, in which the final paren does not have a semicolon because
29811             # it is a possible weld location:
29812              
29813             # if ($BOLD_MATH) {
29814             # (
29815             # $labels, $comment,
29816             # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
29817             # )
29818             # }
29819             #
29820              
29821             # MOJO patch: Set a flag if this lines begins with ')->'
29822 7384   100     23706 my $leading_paren_arrow = (
29823             $is_closing_type_beg
29824             && $token_beg eq ')'
29825             && (
29826             ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
29827             || ( $ibeg < $i_terminal - 1
29828             && $types_to_go[ $ibeg + 1 ] eq 'b'
29829             && $types_to_go[ $ibeg + 2 ] eq '->' )
29830             )
29831             );
29832              
29833             #---------------------------------------------------------
29834             # Section 1: set a flag and a default indentation
29835             #
29836             # Most lines are indented according to the initial token.
29837             # But it is common to outdent to the level just after the
29838             # terminal token in certain cases...
29839             # adjust_indentation flag:
29840             # 0 - do not adjust
29841             # 1 - outdent
29842             # 2 - vertically align with opening token
29843             # 3 - indent
29844             #---------------------------------------------------------
29845              
29846 7384         10854 my $adjust_indentation = 0;
29847 7384         10749 my $default_adjust_indentation = 0;
29848              
29849             # Parameters needed for option 2, aligning with opening token:
29850             my (
29851 7384         12213 $opening_indentation, $opening_offset,
29852             $is_leading, $opening_exists
29853             );
29854              
29855             #-------------------------------------
29856             # Section 1A:
29857             # if line starts with a sequenced item
29858             #-------------------------------------
29859 7384 100 100     35679 if ( $seqno_beg || $seqno_qw_closing ) {
    50 66        
      33        
29860              
29861             # This can be tedious so we let a sub do it
29862             (
29863 1981         7693 $adjust_indentation,
29864             $default_adjust_indentation,
29865             $opening_indentation,
29866             $opening_offset,
29867             $is_leading,
29868             $opening_exists,
29869              
29870             ) = $self->get_closing_token_indentation(
29871              
29872             $ibeg,
29873             $iend,
29874             $ri_first,
29875             $ri_last,
29876             $rindentation_list,
29877             $level_jump,
29878             $i_terminal,
29879             $is_semicolon_terminated,
29880             $seqno_qw_closing,
29881              
29882             );
29883             }
29884              
29885             #--------------------------------------------------------
29886             # Section 1B:
29887             # if at ');', '};', '>;', and '];' of a terminal qw quote
29888             #--------------------------------------------------------
29889             elsif (
29890             substr( $rpatterns->[0], 0, 2 ) eq 'qb'
29891             && substr( $rfields->[0], -1, 1 ) eq ';'
29892             ## $rpatterns->[0] =~ /^qb*;$/
29893             && $rfields->[0] =~ /^([\)\}\]\>]);$/
29894             )
29895             {
29896 0 0       0 if ( $closing_token_indentation{$1} == 0 ) {
29897 0         0 $adjust_indentation = 1;
29898             }
29899             else {
29900 0         0 $adjust_indentation = 3;
29901             }
29902             }
29903             else {
29904             ## ok
29905             }
29906              
29907             #---------------------------------------------------------
29908             # Section 2: set indentation according to flag set above
29909             #
29910             # Select the indentation object to define leading
29911             # whitespace. If we are outdenting something like '} } );'
29912             # then we want to use one level below the last token
29913             # ($i_terminal) in order to get it to fully outdent through
29914             # all levels.
29915             #---------------------------------------------------------
29916 7384         13569 my $indentation;
29917             my $lev;
29918 7384         12251 my $level_end = $levels_to_go[$iend];
29919              
29920             #------------------------------------
29921             # Section 2A: adjust_indentation == 0
29922             # No change in indentation
29923             #------------------------------------
29924 7384 100       15524 if ( $adjust_indentation == 0 ) {
    100          
    100          
29925 6415         9339 $indentation = $leading_spaces_beg;
29926 6415         9477 $lev = $level_beg;
29927             }
29928              
29929             #-------------------------------------------------------------------
29930             # Section 2B: adjust_indentation == 1
29931             # Change the indentation to be that of a different token on the line
29932             #-------------------------------------------------------------------
29933             elsif ( $adjust_indentation == 1 ) {
29934              
29935             # Previously, the indentation of the terminal token was used:
29936             # OLD CODING:
29937             # $indentation = $reduced_spaces_to_go[$i_terminal];
29938             # $lev = $levels_to_go[$i_terminal];
29939              
29940             # Generalization for MOJO patch:
29941             # Use the lowest level indentation of the tokens on the line.
29942             # For example, here we can use the indentation of the ending ';':
29943             # } until ($selection > 0 and $selection < 10); # ok to use ';'
29944             # But this will not outdent if we use the terminal indentation:
29945             # )->then( sub { # use indentation of the ->, not the {
29946             # Warning: reduced_spaces_to_go[] may be a reference, do not
29947             # do numerical checks with it
29948              
29949 863         1476 my $i_ind = $ibeg;
29950 863         1701 $indentation = $reduced_spaces_to_go[$i_ind];
29951 863         1518 $lev = $levels_to_go[$i_ind];
29952 863         2383 while ( $i_ind < $i_terminal ) {
29953 1195         1705 $i_ind++;
29954 1195 100       3173 if ( $levels_to_go[$i_ind] < $lev ) {
29955 2         6 $indentation = $reduced_spaces_to_go[$i_ind];
29956 2         6 $lev = $levels_to_go[$i_ind];
29957             }
29958             }
29959             }
29960              
29961             #--------------------------------------------------------------
29962             # Section 2C: adjust_indentation == 2
29963             # Handle indented closing token which aligns with opening token
29964             #--------------------------------------------------------------
29965             elsif ( $adjust_indentation == 2 ) {
29966              
29967             # handle option to align closing token with opening token
29968 88         185 $lev = $level_beg;
29969              
29970             # calculate spaces needed to align with opening token
29971 88         441 my $space_count =
29972             get_spaces($opening_indentation) + $opening_offset;
29973              
29974             # Indent less than the previous line.
29975             #
29976             # Problem: For -lp we don't exactly know what it was if there
29977             # were recoverable spaces sent to the aligner. A good solution
29978             # would be to force a flush of the vertical alignment buffer, so
29979             # that we would know. For now, this rule is used for -lp:
29980             #
29981             # When the last line did not start with a closing token we will
29982             # be optimistic that the aligner will recover everything wanted.
29983             #
29984             # This rule will prevent us from breaking a hierarchy of closing
29985             # tokens, and in a worst case will leave a closing paren too far
29986             # indented, but this is better than frequently leaving it not
29987             # indented enough.
29988 88         242 my $last_spaces = get_spaces($last_indentation_written);
29989              
29990 88 100 100     877 if ( ref($last_indentation_written)
29991             && !$is_closing_token{$last_leading_token} )
29992             {
29993 38         138 $last_spaces +=
29994             get_recoverable_spaces($last_indentation_written);
29995             }
29996              
29997             # reset the indentation to the new space count if it works
29998             # only options are all or none: nothing in-between looks good
29999 88         182 $lev = $level_beg;
30000              
30001 88         206 my $diff = $last_spaces - $space_count;
30002 88 100       316 if ( $diff > 0 ) {
30003 49         107 $indentation = $space_count;
30004             }
30005             else {
30006              
30007             # We need to fix things ... but there is no good way to do it.
30008             # The best solution is for the user to use a longer maximum
30009             # line length. We could get a smooth variation if we just move
30010             # the paren in using
30011             # $space_count -= ( 1 - $diff );
30012             # But unfortunately this can give a rather unbalanced look.
30013              
30014             # For -xlp we currently allow a tolerance of one indentation
30015             # level and then revert to a simpler default. This will jump
30016             # suddenly but keeps a balanced look.
30017 39 50 66     313 if ( $rOpts_extended_line_up_parentheses
    100 33        
    50          
30018             && $diff >= -$rOpts_indent_columns
30019             && $space_count > $leading_spaces_beg )
30020             {
30021 0         0 $indentation = $space_count;
30022             }
30023              
30024             # Otherwise revert to defaults
30025             elsif ( $default_adjust_indentation == 0 ) {
30026 37         99 $indentation = $leading_spaces_beg;
30027             }
30028             elsif ( $default_adjust_indentation == 1 ) {
30029 2         6 $indentation = $reduced_spaces_to_go[$i_terminal];
30030 2         8 $lev = $levels_to_go[$i_terminal];
30031             }
30032             else {
30033             ## ok - maybe default_adjust_indentation > 1 ?
30034             }
30035             }
30036             }
30037              
30038             #-------------------------------------------------------------
30039             # Section 2D: adjust_indentation == 3
30040             # Full indentation of closing tokens (-icb and -icp or -cti=2)
30041             #-------------------------------------------------------------
30042             else {
30043              
30044             # handle -icb (indented closing code block braces)
30045             # Updated method for indented block braces: indent one full level if
30046             # there is no continuation indentation. This will occur for major
30047             # structures such as sub, if, else, but not for things like map
30048             # blocks.
30049             #
30050             # Note: only code blocks without continuation indentation are
30051             # handled here (if, else, unless, ..). In the following snippet,
30052             # the terminal brace of the sort block will have continuation
30053             # indentation as shown so it will not be handled by the coding
30054             # here. We would have to undo the continuation indentation to do
30055             # this, but it probably looks ok as is. This is a possible future
30056             # update for semicolon terminated lines.
30057             #
30058             # if ($sortby eq 'date' or $sortby eq 'size') {
30059             # @files = sort {
30060             # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
30061             # or $a cmp $b
30062             # } @files;
30063             # }
30064             #
30065 18 100 100     75 if ( $block_type_beg
30066             && $ci_levels_to_go[$i_terminal] == 0 )
30067             {
30068 6         20 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
30069 6         11 $indentation = $spaces + $rOpts_indent_columns;
30070              
30071             # NOTE: for -lp we could create a new indentation object, but
30072             # there is probably no need to do it
30073             }
30074              
30075             # handle -icp and any -icb block braces which fall through above
30076             # test such as the 'sort' block mentioned above.
30077             else {
30078              
30079             # There are currently two ways to handle -icp...
30080             # One way is to use the indentation of the previous line:
30081             # $indentation = $last_indentation_written;
30082              
30083             # The other way is to use the indentation that the previous line
30084             # would have had if it hadn't been adjusted:
30085 12         35 $indentation = $last_unadjusted_indentation;
30086              
30087             # Current method: use the minimum of the two. This avoids
30088             # inconsistent indentation.
30089 12 100       39 if ( get_spaces($last_indentation_written) <
30090             get_spaces($indentation) )
30091             {
30092 1         13 $indentation = $last_indentation_written;
30093             }
30094             }
30095              
30096             # use previous indentation but use own level
30097             # to cause list to be flushed properly
30098 18         34 $lev = $level_beg;
30099             }
30100              
30101             #-------------------------------------------------------------
30102             # Remember indentation except for multi-line quotes, which get
30103             # no indentation
30104             #-------------------------------------------------------------
30105 7384 100 100     22644 if ( !( $ibeg == 0 && $starting_in_quote ) ) {
30106 7365         10743 $last_indentation_written = $indentation;
30107 7365         10621 $last_unadjusted_indentation = $leading_spaces_beg;
30108 7365         11026 $last_leading_token = $token_beg;
30109              
30110             # Patch to make a line which is the end of a qw quote work with the
30111             # -lp option. Make $token_beg look like a closing token as some
30112             # type even if it is not. This variable will become
30113             # $last_leading_token at the end of this loop. Then, if the -lp
30114             # style is selected, and the next line is also a
30115             # closing token, it will not get more indentation than this line.
30116             # We need to do this because qw quotes (at present) only get
30117             # continuation indentation, not one level of indentation, so we
30118             # need to turn off the -lp indentation.
30119              
30120             # ... a picture is worth a thousand words:
30121              
30122             # perltidy -wn -gnu (Without this patch):
30123             # ok(defined(
30124             # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
30125             # 2981014)])
30126             # ));
30127              
30128             # perltidy -wn -gnu (With this patch):
30129             # ok(defined(
30130             # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
30131             # 2981014)])
30132             # ));
30133 7365 100 100     14403 if ( $seqno_qw_closing
      100        
30134             && ( length($token_beg) > 1 || $token_beg eq '>' ) )
30135             {
30136 4         13 $last_leading_token = ')';
30137             }
30138             }
30139              
30140             #---------------------------------------------------------------------
30141             # Rule: lines with leading closing tokens should not be outdented more
30142             # than the line which contained the corresponding opening token.
30143             #---------------------------------------------------------------------
30144              
30145             # Updated per bug report in alex_bug.pl: we must not
30146             # mess with the indentation of closing logical braces, so
30147             # we must treat something like '} else {' as if it were
30148             # an isolated brace
30149             my $is_isolated_block_brace = $block_type_beg
30150             && ( $i_terminal == $ibeg
30151 7384   100     17380 || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg}
30152             );
30153              
30154             # only do this for a ':; which is aligned with its leading '?'
30155 7384   100     16485 my $is_unaligned_colon = $type_beg eq ':' && !$is_leading;
30156              
30157 7384 100 100     24038 if (
      100        
      100        
30158             defined($opening_indentation)
30159             && !$leading_paren_arrow # MOJO patch
30160             && !$is_isolated_block_brace
30161             && !$is_unaligned_colon
30162             )
30163             {
30164 823 100       2565 if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
30165 48         166 $indentation = $opening_indentation;
30166             }
30167             }
30168              
30169             #----------------------------------------------------
30170             # remember the indentation of each line of this batch
30171             #----------------------------------------------------
30172 7384         10840 push @{$rindentation_list}, $indentation;
  7384         16885  
30173              
30174             #---------------------------------------------
30175             # outdent lines with certain leading tokens...
30176             #---------------------------------------------
30177 7384 100 100     46272 if (
      100        
30178              
30179             # must be first word of this batch
30180             $ibeg == 0
30181              
30182             # and ...
30183             && (
30184              
30185             # certain leading keywords if requested
30186             $rOpts_outdent_keywords
30187             && $type_beg eq 'k'
30188             && $outdent_keyword{$token_beg}
30189              
30190             # or labels if requested
30191             || $rOpts_outdent_labels && $type_beg eq 'J'
30192              
30193             # or static block comments if requested
30194             || $is_static_block_comment
30195             && $rOpts_outdent_static_block_comments
30196             )
30197             )
30198             {
30199 32         154 my $space_count = leading_spaces_to_go($ibeg);
30200 32 100       151 if ( $space_count > 0 ) {
30201 26         60 $space_count -= $rOpts_continuation_indentation;
30202 26         52 $is_outdented_line = 1;
30203 26 50       87 if ( $space_count < 0 ) { $space_count = 0 }
  0         0  
30204              
30205             # do not promote a spaced static block comment to non-spaced;
30206             # this is not normally necessary but could be for some
30207             # unusual user inputs (such as -ci = -i)
30208 26 50 66     125 if ( $type_beg eq '#' && $space_count == 0 ) {
30209 0         0 $space_count = 1;
30210             }
30211              
30212 26         56 $indentation = $space_count;
30213             }
30214             }
30215              
30216             return (
30217              
30218 7384         29784 $indentation,
30219             $lev,
30220             $level_end,
30221             $i_terminal,
30222             $is_outdented_line,
30223              
30224             );
30225             } ## end sub get_final_indentation
30226              
30227             sub get_closing_token_indentation {
30228              
30229             # Determine indentation adjustment for a line with a leading closing
30230             # token - i.e. one of these: ) ] } :
30231              
30232             my (
30233 1981     1981 0 5816 $self, #
30234              
30235             $ibeg,
30236             $iend,
30237             $ri_first,
30238             $ri_last,
30239             $rindentation_list,
30240             $level_jump,
30241             $i_terminal,
30242             $is_semicolon_terminated,
30243             $seqno_qw_closing,
30244              
30245             ) = @_;
30246              
30247 1981         3107 my $adjust_indentation = 0;
30248 1981         3007 my $default_adjust_indentation = $adjust_indentation;
30249 1981         3476 my $terminal_type = $types_to_go[$i_terminal];
30250              
30251 1981         3200 my $type_beg = $types_to_go[$ibeg];
30252 1981         3311 my $token_beg = $tokens_to_go[$ibeg];
30253 1981         3179 my $level_beg = $levels_to_go[$ibeg];
30254 1981         3254 my $block_type_beg = $block_type_to_go[$ibeg];
30255 1981         3063 my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
30256 1981         3170 my $seqno_beg = $type_sequence_to_go[$ibeg];
30257 1981         3384 my $is_closing_type_beg = $is_closing_type{$type_beg};
30258              
30259             my (
30260 1981         3530 $opening_indentation, $opening_offset,
30261             $is_leading, $opening_exists
30262             );
30263              
30264             # Honor any flag to reduce -ci set by the -bbxi=n option
30265 1981 100 100     7993 if ( $seqno_beg && $self->[_rwant_reduced_ci_]->{$seqno_beg} ) {
30266              
30267             # if this is an opening, it must be alone on the line ...
30268 4 50 66     15 if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
    0          
30269 4         8 $adjust_indentation = 1;
30270             }
30271              
30272             # ... or a single welded unit (fix for b1173)
30273             elsif ($total_weld_count) {
30274 0         0 my $K_beg = $K_to_go[$ibeg];
30275 0         0 my $Kterm = $K_to_go[$i_terminal];
30276 0         0 my $Kterm_test = $self->[_rK_weld_left_]->{$Kterm};
30277 0 0 0     0 if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
30278 0         0 $Kterm = $Kterm_test;
30279             }
30280 0 0       0 if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
  0         0  
30281             }
30282             else {
30283             ## ok
30284             }
30285             }
30286              
30287 1981         3398 my $ris_bli_container = $self->[_ris_bli_container_];
30288 1981 100       4598 my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
30289              
30290             # Update the $is_bli flag as we go. It is initially 1.
30291             # We note seeing a leading opening brace by setting it to 2.
30292             # If we get to the closing brace without seeing the opening then we
30293             # turn it off. This occurs if the opening brace did not get output
30294             # at the start of a line, so we will then indent the closing brace
30295             # in the default way.
30296 1981 100 100     4870 if ( $is_bli_beg && $is_bli_beg == 1 ) {
30297 21         48 my $K_opening_container = $self->[_K_opening_container_];
30298 21         49 my $K_opening = $K_opening_container->{$seqno_beg};
30299 21         45 my $K_beg = $K_to_go[$ibeg];
30300 21 50       62 if ( $K_beg eq $K_opening ) {
30301 21         52 $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
30302             }
30303 0         0 else { $is_bli_beg = 0 }
30304             }
30305              
30306             # QW PATCH for the combination -lp -wn
30307             # For -lp formatting use $ibeg_weld_fix to get around the problem
30308             # that with -lp type formatting the opening and closing tokens to not
30309             # have sequence numbers.
30310 1981         3207 my $ibeg_weld_fix = $ibeg;
30311 1981 100 100     5311 if ( $seqno_qw_closing && $total_weld_count ) {
30312 8         37 my $i_plus = $inext_to_go[$ibeg];
30313 8 50       33 if ( $i_plus <= $max_index_to_go ) {
30314 8         21 my $K_plus = $K_to_go[$i_plus];
30315 8 100       36 if ( defined( $self->[_rK_weld_left_]->{$K_plus} ) ) {
30316 6         15 $ibeg_weld_fix = $i_plus;
30317             }
30318             }
30319             }
30320              
30321             # if we are at a closing token of some type..
30322 1981 100 100     7250 if ( $is_closing_type_beg || $seqno_qw_closing ) {
    100          
30323              
30324 1270         2513 my $K_beg = $K_to_go[$ibeg];
30325              
30326             # get the indentation of the line containing the corresponding
30327             # opening token
30328             (
30329 1270         4135 $opening_indentation, $opening_offset,
30330             $is_leading, $opening_exists
30331             )
30332             = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
30333             $ri_last, $rindentation_list, $seqno_qw_closing );
30334              
30335             # Patch for rt144979, part 1. Coordinated with part 2.
30336             # Do not undo ci for a cuddled closing brace control; it
30337             # needs to be treated exactly the same ci as an isolated
30338             # closing brace.
30339             my $is_cuddled_closing_brace = $seqno_beg
30340 1270   100     5674 && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg};
30341              
30342             # First set the default behavior:
30343 1270 100 66     15120 if (
      100        
      100        
      100        
      66        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
30344              
30345             # default behavior is to outdent closing lines
30346             # of the form: "); }; ]; )->xxx;"
30347             $is_semicolon_terminated
30348              
30349             # and 'cuddled parens' of the form: ")->pack(". Bug fix for RT
30350             # #123749]: the TYPES here were incorrectly ')' and '('. The
30351             # corrected TYPES are '}' and '{'. But skip a cuddled block.
30352             || (
30353             $terminal_type eq '{'
30354             && $type_beg eq '}'
30355             && ( $nesting_depth_to_go[$iend] + 1 ==
30356             $nesting_depth_to_go[$ibeg] )
30357             && !$is_cuddled_closing_brace
30358             )
30359              
30360             # remove continuation indentation for any line like
30361             # } ... {
30362             # or without ending '{' and unbalanced, such as
30363             # such as '}->{$operator}'
30364             || (
30365             $type_beg eq '}'
30366              
30367             && ( $types_to_go[$iend] eq '{'
30368             || $levels_to_go[$iend] < $level_beg )
30369              
30370             # but not if a cuddled block
30371             && !$is_cuddled_closing_brace
30372             )
30373              
30374             # and when the next line is at a lower indentation level...
30375              
30376             # PATCH #1: and only if the style allows undoing continuation
30377             # for all closing token types. We should really wait until
30378             # the indentation of the next line is known and then make
30379             # a decision, but that would require another pass.
30380              
30381             # PATCH #2: and not if this token is under -xci control
30382             || ( $level_jump < 0
30383             && !$some_closing_token_indentation
30384             && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} )
30385              
30386             # Patch for -wn=2, multiple welded closing tokens
30387             || ( $i_terminal > $ibeg
30388             && $is_closing_type{ $types_to_go[$iend] } )
30389              
30390             # Alternate Patch for git #51, isolated closing qw token not
30391             # outdented if no-delete-old-newlines is set. This works, but
30392             # a more general patch elsewhere fixes the real problem: ljump.
30393             # || ( $seqno_qw_closing && $ibeg == $i_terminal )
30394              
30395             )
30396             {
30397 863         1587 $adjust_indentation = 1;
30398             }
30399              
30400             # outdent something like '),'
30401 1270 100 100     3892 if (
30402             $terminal_type eq ','
30403              
30404             # Removed this constraint for -wn
30405             # OLD: allow just one character before the comma
30406             # && $i_terminal == $ibeg + 1
30407              
30408             # require LIST environment; otherwise, we may outdent too much -
30409             # this can happen in calls without parentheses (overload.t);
30410             && $self->is_in_list_by_i($i_terminal)
30411             )
30412             {
30413 87         204 $adjust_indentation = 1;
30414             }
30415              
30416             # undo continuation indentation of a terminal closing token if
30417             # it is the last token before a level decrease. This will allow
30418             # a closing token to line up with its opening counterpart, and
30419             # avoids an indentation jump larger than 1 level.
30420 1270         2269 my $rLL = $self->[_rLL_];
30421 1270         2449 my $Klimit = $self->[_Klimit_];
30422 1270 100 100     7523 if ( $i_terminal == $ibeg
      66        
      100        
30423             && $is_closing_type_beg
30424             && defined($K_beg)
30425             && $K_beg < $Klimit )
30426             {
30427 527         1509 my $K_plus = $K_beg + 1;
30428 527         1354 my $type_plus = $rLL->[$K_plus]->[_TYPE_];
30429              
30430 527 100 100     2197 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
30431 474         1310 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
30432             }
30433              
30434 527 100 100     1968 if ( $type_plus eq '#' && $K_plus < $Klimit ) {
30435 49         146 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
30436 49 100 66     257 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
30437 42         138 $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
30438             }
30439              
30440             # Note: we have skipped past just one comment (perhaps a
30441             # side comment). There could be more, and we could easily
30442             # skip past all the rest with the following code, or with a
30443             # while loop. It would be rare to have to do this, and
30444             # those block comments would still be indented, so it would
30445             # to leave them indented. So it seems best to just stop at
30446             # a maximum of one comment.
30447             ##if ($type_plus eq '#') {
30448             ## $K_plus = $self->K_next_code($K_plus);
30449             ##}
30450             }
30451              
30452 527 100 66     2254 if ( !$is_bli_beg && defined($K_plus) ) {
30453 513         1013 my $lev = $level_beg;
30454 513         984 my $level_next = $rLL->[$K_plus]->[_LEVEL_];
30455              
30456             # and do not undo ci if it was set by the -xci option
30457             $adjust_indentation = 1
30458             if ( $level_next < $lev
30459 513 100 100     2198 && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} );
30460             }
30461              
30462             # Patch for RT #96101, in which closing brace of anonymous subs
30463             # was not outdented. We should look ahead and see if there is
30464             # a level decrease at the next token (i.e., a closing token),
30465             # but right now we do not have that information. For now
30466             # we see if we are in a list, and this works well.
30467             # See test files 'sub*.t' for good test cases.
30468 527 100 100     3138 if ( !$rOpts_indent_closing_brace
      100        
      100        
30469             && $block_type_beg
30470             && $self->[_ris_asub_block_]->{$seqno_beg}
30471             && $self->is_in_list_by_i($i_terminal) )
30472             {
30473             (
30474 18         83 $opening_indentation, $opening_offset,
30475             $is_leading, $opening_exists
30476             )
30477             = $self->get_opening_indentation( $ibeg, $ri_first,
30478             $ri_last, $rindentation_list );
30479 18         91 my $indentation = $leading_spaces_beg;
30480 18 100 66     175 if ( defined($opening_indentation)
30481             && get_spaces($indentation) >
30482             get_spaces($opening_indentation) )
30483             {
30484 14         57 $adjust_indentation = 1;
30485             }
30486             }
30487             }
30488              
30489             # YVES patch 1 of 2:
30490             # Undo ci of line with leading closing eval brace,
30491             # but not beyond the indentation of the line with
30492             # the opening brace.
30493 1270 100 100     5022 if ( $block_type_beg
      66        
      100        
30494             && $block_type_beg eq 'eval'
30495             && !ref($leading_spaces_beg)
30496             && !$rOpts_indent_closing_brace )
30497             {
30498             (
30499 30         120 $opening_indentation, $opening_offset,
30500             $is_leading, $opening_exists
30501             )
30502             = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
30503             $rindentation_list );
30504 30         100 my $indentation = $leading_spaces_beg;
30505 30 100 66     203 if ( defined($opening_indentation)
30506             && get_spaces($indentation) >
30507             get_spaces($opening_indentation) )
30508             {
30509 24         69 $adjust_indentation = 1;
30510             }
30511             }
30512              
30513             # patch for issue git #40: -bli setting has priority
30514 1270 100       2761 $adjust_indentation = 0 if ($is_bli_beg);
30515              
30516 1270         2051 $default_adjust_indentation = $adjust_indentation;
30517              
30518             # Now modify default behavior according to user request:
30519             # handle option to indent non-blocks of the form ); }; ];
30520             # But don't do special indentation to something like ')->pack('
30521 1270 100       2830 if ( !$block_type_beg ) {
30522              
30523             # Note that logical padding has already been applied, so we may
30524             # need to remove some spaces to get a valid hash key.
30525 672         2227 my $tok = $token_beg;
30526 672         1738 my $cti = $closing_token_indentation{$tok};
30527              
30528             # Fix the value of 'cti' for an isolated non-welded closing qw
30529             # delimiter.
30530 672 100 100     2134 if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
30531              
30532             # A quote delimiter which is not a container will not have
30533             # a cti value defined. In this case use the style of a
30534             # paren. For example
30535             # my @fars = (
30536             # qw<
30537             # far
30538             # farfar
30539             # farfars-far
30540             # >,
30541             # );
30542 26 100 100     175 if ( !defined($cti) && length($tok) == 1 ) {
30543              
30544             # something other than ')', '}', ']' ; use flag for ')'
30545 3         10 $cti = $closing_token_indentation{')'};
30546              
30547             # But for now, do not outdent non-container qw
30548             # delimiters because it would would change existing
30549             # formatting.
30550 3 50       15 if ( $tok ne '>' ) { $cti = 3 }
  3         7  
30551             }
30552              
30553             # A non-welded closing qw cannot currently use -cti=1
30554             # because that option requires a sequence number to find
30555             # the opening indentation, and qw quote delimiters are not
30556             # sequenced items.
30557 26 50 66     181 if ( defined($cti) && $cti == 1 ) { $cti = 0 }
  0         0  
30558             }
30559              
30560 672 100       3414 if ( !defined($cti) ) {
    100          
    100          
    100          
30561              
30562             # $cti may not be defined for several reasons.
30563             # -padding may have been applied so the character
30564             # has a length > 1
30565             # - we may have welded to a closing quote token.
30566             # Here is an example (perltidy -wn):
30567             # __PACKAGE__->load_components( qw(
30568             # > Core
30569             # >
30570             # > ) );
30571 3         10 $adjust_indentation = 0;
30572              
30573             }
30574             elsif ( $cti == 1 ) {
30575 43 100 100     182 if ( $i_terminal <= $ibeg + 1
30576             || $is_semicolon_terminated )
30577             {
30578 42         129 $adjust_indentation = 2;
30579             }
30580             else {
30581 1         4 $adjust_indentation = 0;
30582             }
30583             }
30584             elsif ( $cti == 2 ) {
30585 3 50       12 if ($is_semicolon_terminated) {
30586 3         6 $adjust_indentation = 3;
30587             }
30588             else {
30589 0         0 $adjust_indentation = 0;
30590             }
30591             }
30592             elsif ( $cti == 3 ) {
30593 3         8 $adjust_indentation = 3;
30594             }
30595             else {
30596             ## cti == 0
30597             }
30598             }
30599              
30600             # handle option to indent blocks
30601             else {
30602 598 50 66     1627 if (
      66        
30603             $rOpts_indent_closing_brace
30604             && (
30605             $i_terminal == $ibeg # isolated terminal '}'
30606             || $is_semicolon_terminated
30607             )
30608             ) # } xxxx ;
30609             {
30610 12         23 $adjust_indentation = 3;
30611             }
30612             }
30613             } ## end if ( $is_closing_type_beg || $seqno_qw_closing )
30614              
30615             # if line begins with a ':', align it with any
30616             # previous line leading with corresponding ?
30617             elsif ( $type_beg eq ':' ) {
30618             (
30619 93         479 $opening_indentation, $opening_offset,
30620             $is_leading, $opening_exists
30621             )
30622             = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
30623             $rindentation_list );
30624 93 100       353 if ($is_leading) { $adjust_indentation = 2; }
  46         91  
30625             }
30626             else {
30627             # not a closing type
30628             }
30629              
30630             return (
30631              
30632 1981         7722 $adjust_indentation,
30633             $default_adjust_indentation,
30634             $opening_indentation,
30635             $opening_offset,
30636             $is_leading,
30637             $opening_exists,
30638              
30639             );
30640             } ## end sub get_closing_token_indentation
30641             } ## end closure get_final_indentation
30642              
30643             sub get_opening_indentation {
30644              
30645             # get the indentation of the line which output the opening token
30646             # corresponding to a given closing token in the current output batch.
30647             #
30648             # given:
30649             # $i_closing - index in this line of a closing token ')' '}' or ']'
30650             #
30651             # $ri_first - reference to list of the first index $i for each output
30652             # line in this batch
30653             # $ri_last - reference to list of the last index $i for each output line
30654             # in this batch
30655             # $rindentation_list - reference to a list containing the indentation
30656             # used for each line.
30657             # $qw_seqno - optional sequence number to use if normal seqno not defined
30658             # (NOTE: would be more general to just look this up from index i)
30659             #
30660             # return:
30661             # -the indentation of the line which contained the opening token
30662             # which matches the token at index $i_opening
30663             # -and its offset (number of columns) from the start of the line
30664             #
30665 1411     1411 0 3490 my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno )
30666             = @_;
30667              
30668             # first, see if the opening token is in the current batch
30669 1411         2663 my $i_opening = $mate_index_to_go[$i_closing];
30670 1411         2448 my ( $indent, $offset, $is_leading, $exists );
30671 1411         2268 $exists = 1;
30672 1411 100 66     4925 if ( defined($i_opening) && $i_opening >= 0 ) {
30673              
30674             # it is..look up the indentation
30675 550         2053 ( $indent, $offset, $is_leading ) =
30676             lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
30677             $rindentation_list );
30678             }
30679              
30680             # if not, it should have been stored in the hash by a previous batch
30681             else {
30682 861         1659 my $seqno = $type_sequence_to_go[$i_closing];
30683 861 100       1895 $seqno = $qw_seqno unless ($seqno);
30684 861         2213 ( $indent, $offset, $is_leading, $exists ) =
30685             get_saved_opening_indentation($seqno);
30686             }
30687 1411         5009 return ( $indent, $offset, $is_leading, $exists );
30688             } ## end sub get_opening_indentation
30689              
30690             sub examine_vertical_tightness_flags {
30691 561     561 0 1937 my ($self) = @_;
30692              
30693             # For efficiency, we will set a flag to skip all calls to sub
30694             # 'set_vertical_tightness_flags' if vertical tightness is not possible with
30695             # the user input parameters. If vertical tightness is possible, we will
30696             # simply leave the flag undefined and return.
30697              
30698             # Vertical tightness is never possible with --freeze-whitespace
30699 561 100       2000 if ($rOpts_freeze_whitespace) {
30700 3         12 $self->[_no_vertical_tightness_flags_] = 1;
30701 3         6 return;
30702             }
30703              
30704             # This sub is coordinated with sub set_vertical_tightness_flags.
30705             # The Section numbers in the following comments are the sections
30706             # in sub set_vertical_tightness_flags:
30707              
30708             # Examine controls for Section 1a:
30709 558 100       1697 return if ($rOpts_line_up_parentheses);
30710              
30711 527         2563 foreach my $key ( keys %opening_vertical_tightness ) {
30712 3104 100       7313 return if ( $opening_vertical_tightness{$key} );
30713             }
30714              
30715             # Examine controls for Section 1b:
30716 515         3050 foreach my $key ( keys %closing_vertical_tightness ) {
30717 3045 100       7442 return if ( $closing_vertical_tightness{$key} );
30718             }
30719              
30720             # Examine controls for Section 1c:
30721 506         2827 foreach my $key ( keys %opening_token_right ) {
30722 1514 100       4100 return if ( $opening_token_right{$key} );
30723             }
30724              
30725             # Examine controls for Section 1d:
30726 504         2187 foreach my $key ( keys %stack_opening_token ) {
30727 1510 100       3956 return if ( $stack_opening_token{$key} );
30728             }
30729 503         2057 foreach my $key ( keys %stack_closing_token ) {
30730 1509 50       3861 return if ( $stack_closing_token{$key} );
30731             }
30732              
30733             # Examine controls for Section 2:
30734 503 100       1999 return if ($rOpts_block_brace_vertical_tightness);
30735              
30736             # Examine controls for Section 3:
30737 501 100       1796 return if ($rOpts_stack_closing_block_brace);
30738              
30739             # None of the controls used for vertical tightness are set, so
30740             # we can skip all calls to sub set_vertical_tightness_flags
30741 499         1509 $self->[_no_vertical_tightness_flags_] = 1;
30742 499         1006 return;
30743             } ## end sub examine_vertical_tightness_flags
30744              
30745             sub set_vertical_tightness_flags {
30746              
30747 1308     1308 0 3307 my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
30748             $ending_in_quote, $closing_side_comment )
30749             = @_;
30750              
30751             # Define vertical tightness controls for the nth line of a batch.
30752             # Note: do not call this sub for a block comment or if
30753             # $rOpts_freeze_whitespace is set.
30754              
30755             # These parameters are passed to the vertical aligner to indicated
30756             # if we should combine this line with the next line to achieve the
30757             # desired vertical tightness. This was previously an array but
30758             # has been converted to a hash:
30759              
30760             # old hash Meaning
30761             # index key
30762             #
30763             # 0 _vt_type: 1=opening non-block 2=closing non-block
30764             # 3=opening block brace 4=closing block brace
30765             #
30766             # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
30767             # 1b _vt_closing_flag: spaces of padding to use if closing
30768             # 2 _vt_seqno: sequence number of container
30769             # 3 _vt_valid flag: do not append if this flag is false. Will be
30770             # true if appropriate -vt flag is set. Otherwise, Will be
30771             # made true only for 2 line container in parens with -lp
30772             # 4 _vt_seqno_beg: sequence number of first token of line
30773             # 5 _vt_seqno_end: sequence number of last token of line
30774             # 6 _vt_min_lines: min number of lines for joining opening cache,
30775             # 0=no constraint
30776             # 7 _vt_max_lines: max number of lines for joining opening cache,
30777             # 0=no constraint
30778              
30779             # The vertical tightness mechanism can add whitespace, so whitespace can
30780             # continually increase if we allowed it when the -fws flag is set.
30781             # See case b499 for an example.
30782              
30783             # Define these values...
30784 1308         2088 my $vt_type = 0;
30785 1308         1983 my $vt_opening_flag = 0;
30786 1308         1993 my $vt_closing_flag = 0;
30787 1308         1976 my $vt_seqno = 0;
30788 1308         1812 my $vt_valid_flag = 0;
30789 1308         1897 my $vt_seqno_beg = 0;
30790 1308         1997 my $vt_seqno_end = 0;
30791 1308         1910 my $vt_min_lines = 0;
30792 1308         2018 my $vt_max_lines = 0;
30793              
30794             # Uses these global parameters:
30795             # $rOpts_block_brace_tightness
30796             # $rOpts_block_brace_vertical_tightness
30797             # $rOpts_stack_closing_block_brace
30798             # $rOpts_line_up_parentheses
30799             # %opening_vertical_tightness
30800             # %closing_vertical_tightness
30801             # %opening_token_right
30802             # %stack_closing_token
30803             # %stack_opening_token
30804              
30805             #--------------------------------------------------------------
30806             # Vertical Tightness Flags Section 1:
30807             # Handle Lines 1 .. n-1 but not the last line
30808             # For non-BLOCK tokens, we will need to examine the next line
30809             # too, so we won't consider the last line.
30810             #--------------------------------------------------------------
30811 1308 100 100     5952 if ( $n < $n_last_line ) {
    100 100        
    100 66        
      33        
      100        
      66        
      33        
      66        
      33        
30812              
30813             #--------------------------------------------------------------
30814             # Vertical Tightness Flags Section 1a:
30815             # Look for Type 1, last token of this line is a non-block opening token
30816             #--------------------------------------------------------------
30817 801         1475 my $ibeg_next = $ri_first->[ $n + 1 ];
30818 801         1527 my $token_end = $tokens_to_go[$iend];
30819 801         1309 my $iend_next = $ri_last->[ $n + 1 ];
30820              
30821 801 100 100     5369 if (
      100        
      100        
      100        
30822             $type_sequence_to_go[$iend]
30823             && !$block_type_to_go[$iend]
30824             && $is_opening_token{$token_end}
30825             && (
30826             $opening_vertical_tightness{$token_end} > 0
30827              
30828             # allow 2-line method call to be closed up
30829             || ( $rOpts_line_up_parentheses
30830             && $token_end eq '('
30831             && $self->[_rlp_object_by_seqno_]
30832             ->{ $type_sequence_to_go[$iend] }
30833             && $iend > $ibeg
30834             && $types_to_go[ $iend - 1 ] ne 'b' )
30835             )
30836             )
30837             {
30838             # avoid multiple jumps in nesting depth in one line if
30839             # requested
30840 74         172 my $ovt = $opening_vertical_tightness{$token_end};
30841              
30842             # Turn off the -vt flag if the next line ends in a weld.
30843             # This avoids an instability with one-line welds (fixes b1183).
30844 74         149 my $type_end_next = $types_to_go[$iend_next];
30845             $ovt = 0
30846             if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] }
30847 74 0 33     262 && $is_closing_type{$type_end_next} );
30848              
30849             # The flag '_rbreak_container_' avoids conflict of -bom and -pt=1
30850             # or -pt=2; fixes b1270. See similar patch above for $cvt.
30851 74         158 my $seqno = $type_sequence_to_go[$iend];
30852 74 50 66     323 if ( $ovt
      66        
30853             && $seqno
30854             && $self->[_rbreak_container_]->{$seqno} )
30855             {
30856 0         0 $ovt = 0;
30857             }
30858              
30859             # The flag '_rmax_vertical_tightness_' avoids welding conflicts.
30860 74 50       211 if ( defined( $self->[_rmax_vertical_tightness_]->{$seqno} ) ) {
30861             $ovt =
30862 0         0 min( $ovt, $self->[_rmax_vertical_tightness_]->{$seqno} );
30863             }
30864              
30865 74 100 100     420 if (
30866             $ovt >= 2
30867             || ( $nesting_depth_to_go[ $iend_next + 1 ] ==
30868             $nesting_depth_to_go[$ibeg_next] )
30869             )
30870             {
30871              
30872             # If -vt flag has not been set, mark this as invalid
30873             # and aligner will validate it if it sees the closing paren
30874             # within 2 lines.
30875 60         115 my $valid_flag = $ovt;
30876              
30877 60         121 $vt_type = 1;
30878 60         106 $vt_opening_flag = $ovt;
30879 60         114 $vt_seqno = $type_sequence_to_go[$iend];
30880 60         126 $vt_valid_flag = $valid_flag;
30881             }
30882             }
30883              
30884             #--------------------------------------------------------------
30885             # Vertical Tightness Flags Section 1b:
30886             # Look for Type 2, first token of next line is a non-block closing
30887             # token .. and be sure this line does not have a side comment
30888             #--------------------------------------------------------------
30889 801         1460 my $token_next = $tokens_to_go[$ibeg_next];
30890 801 100 100     4033 if ( $type_sequence_to_go[$ibeg_next]
      100        
      66        
30891             && !$block_type_to_go[$ibeg_next]
30892             && $is_closing_token{$token_next}
30893             && $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen!
30894             {
30895 197         553 my $cvt = $closing_vertical_tightness{$token_next};
30896              
30897             # Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303
30898             # See similar patch above for $ovt.
30899 197         402 my $seqno = $type_sequence_to_go[$ibeg_next];
30900 197 50 66     599 if ( $cvt && $self->[_rbreak_container_]->{$seqno} ) {
30901 0         0 $cvt = 0;
30902             }
30903              
30904             # Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
30905             # otherwise. Added for rt136417.
30906 197 100       559 if ( $cvt == 3 ) {
30907 2 100       8 $cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
30908             }
30909              
30910             # The unusual combination -pvtc=2 -dws -naws can be unstable.
30911             # This fixes b1282, b1283. This can be moved to set_options.
30912 197 50 66     679 if ( $cvt == 2
      33        
30913             && $rOpts_delete_old_whitespace
30914             && !$rOpts_add_whitespace )
30915             {
30916 0         0 $cvt = 1;
30917             }
30918              
30919             # Fix for b1379, b1380, b1381, b1382, b1384 part 2,
30920             # instability with adding and deleting trailing commas:
30921             # Reducing -cvt=2 to =1 fixes stability for -wtc=b in b1379,1380.
30922             # Reducing -cvt>0 to =0 fixes stability for -wtc=b in b1381,1382.
30923             # Reducing -cvt>0 to =0 fixes stability for -wtc=m in b1384
30924 197 100 100     566 if ( $cvt
30925             && $self->[_ris_bare_trailing_comma_by_seqno_]->{$seqno} )
30926             {
30927 10         24 $cvt = 0;
30928             }
30929              
30930 197 100 100     1286 if (
      100        
30931              
30932             # Never append a trailing line like ')->pack(' because it
30933             # will throw off later alignment. So this line must start at a
30934             # deeper level than the next line (fix1 for welding, git #45).
30935             (
30936             $nesting_depth_to_go[$ibeg_next] >=
30937             $nesting_depth_to_go[ $iend_next + 1 ] + 1
30938             )
30939             && (
30940             $cvt == 2
30941             || (
30942             !$self->is_in_list_by_i($ibeg_next)
30943             && (
30944             $cvt == 1
30945              
30946             # allow closing up 2-line method calls
30947             || ( $rOpts_line_up_parentheses
30948             && $token_next eq ')'
30949             && $type_sequence_to_go[$ibeg_next]
30950             && $self->[_rlp_object_by_seqno_]
30951             ->{ $type_sequence_to_go[$ibeg_next] } )
30952             )
30953             )
30954             )
30955             )
30956             {
30957              
30958             # decide which trailing closing tokens to append..
30959 76         172 my $ok = 0;
30960 76 100 100     481 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
  25         66  
30961             else {
30962 51         288 my $str = join( EMPTY_STRING,
30963             @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
30964              
30965             # append closing token if followed by comment or ';'
30966             # or another closing token (fix2 for welding, git #45)
30967 51 100       359 if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
  50         128  
30968             }
30969              
30970 76 100       199 if ($ok) {
30971 75         134 my $valid_flag = $cvt;
30972 75         139 my $min_lines = 0;
30973 75         151 my $max_lines = 0;
30974              
30975             # Fix for b1187 and b1188: Blinking can occur if we allow
30976             # welded tokens to re-form into one-line blocks during
30977             # vertical alignment when -lp used. So for this case we
30978             # set the minimum number of lines to be 1 instead of 0.
30979             # The maximum should be 1 if -vtc is not used. If -vtc is
30980             # used, we turn the valid
30981             # flag off and set the maximum to 0. This is equivalent to
30982             # using a large number.
30983 75         182 my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next];
30984 75 50 100     369 if ( $rOpts_line_up_parentheses
      66        
      66        
      33        
30985             && $total_weld_count
30986             && $seqno_ibeg_next
30987             && $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next}
30988             && $self->is_welded_at_seqno($seqno_ibeg_next) )
30989             {
30990 0         0 $min_lines = 1;
30991 0 0       0 $max_lines = $cvt ? 0 : 1;
30992 0         0 $valid_flag = 0;
30993             }
30994              
30995 75         152 $vt_type = 2;
30996 75 100       260 $vt_closing_flag = $tightness{$token_next} == 2 ? 0 : 1;
30997 75         143 $vt_seqno = $type_sequence_to_go[$ibeg_next];
30998 75         127 $vt_valid_flag = $valid_flag;
30999 75         118 $vt_min_lines = $min_lines;
31000 75         145 $vt_max_lines = $max_lines;
31001             }
31002             }
31003             }
31004              
31005             #--------------------------------------------------------------
31006             # Vertical Tightness Flags Section 1c:
31007             # Implement the Opening Token Right flag (Type 2)..
31008             # If requested, move an isolated trailing opening token to the end of
31009             # the previous line which ended in a comma. We could do this
31010             # in sub recombine_breakpoints but that would cause problems
31011             # with -lp formatting. The problem is that indentation will
31012             # quickly move far to the right in nested expressions. By
31013             # doing it after indentation has been set, we avoid changes
31014             # to the indentation. Actual movement of the token takes place
31015             # in sub valign_output_step_B.
31016              
31017             # Note added 4 May 2021: the man page suggests that the -otr flags
31018             # are mainly for opening tokens following commas. But this seems
31019             # to have been generalized long ago to include other situations.
31020             # I checked the coding back to 2012 and it is essentially the same
31021             # as here, so it is best to leave this unchanged for now.
31022             #--------------------------------------------------------------
31023 801 50 66     2746 if (
      66        
      33        
      33        
      33        
      33        
      0        
      33        
      33        
      33        
31024             $opening_token_right{ $tokens_to_go[$ibeg_next] }
31025              
31026             # previous line is not opening
31027             # (use -sot to combine with it)
31028             && !$is_opening_token{$token_end}
31029              
31030             # previous line ended in one of these
31031             # (add other cases if necessary; '=>' and '.' are not necessary
31032             && !$block_type_to_go[$ibeg_next]
31033              
31034             # this is a line with just an opening token
31035             && ( $iend_next == $ibeg_next
31036             || $iend_next == $ibeg_next + 2
31037             && $types_to_go[$iend_next] eq '#' )
31038              
31039             # Fix for case b1060 when both -baoo and -otr are set:
31040             # to avoid blinking, honor the -baoo flag over the -otr flag.
31041             && $token_end ne '||' && $token_end ne '&&'
31042              
31043             # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
31044             # Generalized from '=' to $is_assignment to fix b1375.
31045             && !(
31046             $is_assignment{ $types_to_go[$iend] }
31047             && $rOpts_line_up_parentheses
31048             && $type_sequence_to_go[$ibeg_next]
31049             && $self->[_rlp_object_by_seqno_]
31050             ->{ $type_sequence_to_go[$ibeg_next] }
31051             )
31052              
31053             # looks bad if we align vertically with the wrong container
31054             && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
31055              
31056             # give -kba priority over -otr (b1445)
31057             && !$self->[_rbreak_after_Klast_]->{ $K_to_go[$iend] }
31058             )
31059             {
31060 2 50       19 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
31061              
31062 2         7 $vt_type = 2;
31063 2         4 $vt_closing_flag = $spaces;
31064 2         6 $vt_seqno = $type_sequence_to_go[$ibeg_next];
31065 2         5 $vt_valid_flag = 1;
31066             }
31067              
31068             #--------------------------------------------------------------
31069             # Vertical Tightness Flags Section 1d:
31070             # Stacking of opening and closing tokens (Type 2)
31071             #--------------------------------------------------------------
31072 801         1200 my $stackable;
31073 801         1354 my $token_beg_next = $tokens_to_go[$ibeg_next];
31074              
31075             # patch to make something like 'qw(' behave like an opening paren
31076             # (aran.t)
31077 801 100       1806 if ( $types_to_go[$ibeg_next] eq 'q' ) {
31078 1 50       8 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
31079 1         3 $token_beg_next = $1;
31080             }
31081             }
31082              
31083 801 100 100     4782 if ( $is_closing_token{$token_end}
    100 66        
31084             && $is_closing_token{$token_beg_next} )
31085             {
31086              
31087             # avoid instability of combo -bom and -sct; b1179
31088 70         174 my $seq_next = $type_sequence_to_go[$ibeg_next];
31089             $stackable = $stack_closing_token{$token_beg_next}
31090             unless ( $block_type_to_go[$ibeg_next]
31091 70 50 33     495 || $seq_next && $self->[_rbreak_container_]->{$seq_next} );
      33        
31092             }
31093             elsif ($is_opening_token{$token_end}
31094             && $is_opening_token{$token_beg_next} )
31095             {
31096 41 50       144 $stackable = $stack_opening_token{$token_beg_next}
31097             unless ( $block_type_to_go[$ibeg_next] )
31098             ; # shouldn't happen; just checking
31099             }
31100             else {
31101             ## not stackable
31102             }
31103              
31104 801 100       1833 if ($stackable) {
31105              
31106 6         11 my $is_semicolon_terminated;
31107 6 100       23 if ( $n + 1 == $n_last_line ) {
31108 5         26 my ( $terminal_type, $i_terminal ) =
31109             terminal_type_i( $ibeg_next, $iend_next );
31110 5   66     29 $is_semicolon_terminated = $terminal_type eq ';'
31111             && $nesting_depth_to_go[$iend_next] <
31112             $nesting_depth_to_go[$ibeg_next];
31113             }
31114              
31115             # this must be a line with just an opening token
31116             # or end in a semicolon
31117 6 50 0     29 if (
      33        
      66        
31118             $is_semicolon_terminated
31119             || ( $iend_next == $ibeg_next
31120             || $iend_next == $ibeg_next + 2
31121             && $types_to_go[$iend_next] eq '#' )
31122             )
31123             {
31124 6 100       21 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
31125              
31126 6         11 $vt_type = 2;
31127 6         10 $vt_closing_flag = $spaces;
31128 6         14 $vt_seqno = $type_sequence_to_go[$ibeg_next];
31129 6         15 $vt_valid_flag = 1;
31130              
31131             }
31132             }
31133             }
31134              
31135             #--------------------------------------------------------------
31136             # Vertical Tightness Flags Section 2:
31137             # Handle type 3, opening block braces on last line of the batch
31138             # Check for a last line with isolated opening BLOCK curly
31139             #--------------------------------------------------------------
31140             elsif ($rOpts_block_brace_vertical_tightness
31141             && $ibeg eq $iend
31142             && $types_to_go[$iend] eq '{'
31143             && $block_type_to_go[$iend]
31144             && $block_type_to_go[$iend] =~
31145             /$block_brace_vertical_tightness_pattern/ )
31146             {
31147 11         28 $vt_type = 3;
31148 11         23 $vt_opening_flag = $rOpts_block_brace_vertical_tightness;
31149 11         20 $vt_seqno = 0;
31150 11         18 $vt_valid_flag = 1;
31151             }
31152              
31153             #--------------------------------------------------------------
31154             # Vertical Tightness Flags Section 3:
31155             # Handle type 4, a closing block brace on the last line of the batch Check
31156             # for a last line with isolated closing BLOCK curly
31157             # Patch: added a check for any new closing side comment which the
31158             # -csc option may generate. If it exists, there will be a side comment
31159             # so we cannot combine with a brace on the next line. This issue
31160             # occurs for the combination -scbb and -csc is used.
31161             #--------------------------------------------------------------
31162             elsif ($rOpts_stack_closing_block_brace
31163             && $ibeg eq $iend
31164             && $block_type_to_go[$iend]
31165             && $types_to_go[$iend] eq '}'
31166             && ( !$closing_side_comment || $n < $n_last_line ) )
31167             {
31168 5 50       18 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
31169              
31170 5         10 $vt_type = 4;
31171 5         9 $vt_closing_flag = $spaces;
31172 5         11 $vt_seqno = $type_sequence_to_go[$iend];
31173 5         10 $vt_valid_flag = 1;
31174              
31175             }
31176             else {
31177             ## none of the above
31178             }
31179              
31180             # get the sequence numbers of the ends of this line
31181 1308         2338 $vt_seqno_beg = $type_sequence_to_go[$ibeg];
31182 1308 100       2744 if ( !$vt_seqno_beg ) {
31183 886 100       1938 if ( $types_to_go[$ibeg] eq 'q' ) {
31184 11         49 $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
31185             }
31186 875         1600 else { $vt_seqno_beg = EMPTY_STRING }
31187             }
31188              
31189 1308         2136 $vt_seqno_end = $type_sequence_to_go[$iend];
31190 1308 100       2699 if ( !$vt_seqno_end ) {
31191 853 100       1919 if ( $types_to_go[$iend] eq 'q' ) {
31192 7         24 $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote );
31193             }
31194 846         1444 else { $vt_seqno_end = EMPTY_STRING }
31195             }
31196              
31197 1308 100       2959 if ( !defined($vt_seqno) ) { $vt_seqno = EMPTY_STRING }
  1         3  
31198              
31199 1308         9790 my $rvertical_tightness_flags = {
31200             _vt_type => $vt_type,
31201             _vt_opening_flag => $vt_opening_flag,
31202             _vt_closing_flag => $vt_closing_flag,
31203             _vt_seqno => $vt_seqno,
31204             _vt_valid_flag => $vt_valid_flag,
31205             _vt_seqno_beg => $vt_seqno_beg,
31206             _vt_seqno_end => $vt_seqno_end,
31207             _vt_min_lines => $vt_min_lines,
31208             _vt_max_lines => $vt_max_lines,
31209             };
31210              
31211 1308         4121 return ($rvertical_tightness_flags);
31212             } ## end sub set_vertical_tightness_flags
31213              
31214             ##########################################################
31215             # CODE SECTION 14: Code for creating closing side comments
31216             ##########################################################
31217              
31218             { ## begin closure accumulate_csc_text
31219              
31220             # These routines are called once per batch when the --closing-side-comments flag
31221             # has been set.
31222              
31223             my %block_leading_text;
31224             my %block_opening_line_number;
31225             my $csc_new_statement_ok;
31226             my $csc_last_label;
31227             my %csc_block_label;
31228             my $accumulating_text_for_block;
31229             my $leading_block_text;
31230             my $rleading_block_if_elsif_text;
31231             my $leading_block_text_level;
31232             my $leading_block_text_length_exceeded;
31233             my $leading_block_text_line_length;
31234             my $leading_block_text_line_number;
31235              
31236             sub initialize_csc_vars {
31237 561     561 0 1639 %block_leading_text = ();
31238 561         1256 %block_opening_line_number = ();
31239 561         1269 $csc_new_statement_ok = 1;
31240 561         1294 $csc_last_label = EMPTY_STRING;
31241 561         1292 %csc_block_label = ();
31242 561         1519 $rleading_block_if_elsif_text = [];
31243 561         1207 $accumulating_text_for_block = EMPTY_STRING;
31244 561         2575 reset_block_text_accumulator();
31245 561         973 return;
31246             } ## end sub initialize_csc_vars
31247              
31248             sub reset_block_text_accumulator {
31249              
31250             # save text after 'if' and 'elsif' to append after 'else'
31251 570 100   570 0 2475 if ($accumulating_text_for_block) {
31252              
31253             ## ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
31254 9 100       41 if ( $is_if_elsif{$accumulating_text_for_block} ) {
31255 5         14 push @{$rleading_block_if_elsif_text}, $leading_block_text;
  5         19  
31256             }
31257             }
31258 570         1353 $accumulating_text_for_block = EMPTY_STRING;
31259 570         1295 $leading_block_text = EMPTY_STRING;
31260 570         1185 $leading_block_text_level = 0;
31261 570         1063 $leading_block_text_length_exceeded = 0;
31262 570         1240 $leading_block_text_line_number = 0;
31263 570         1147 $leading_block_text_line_length = 0;
31264 570         1166 return;
31265             } ## end sub reset_block_text_accumulator
31266              
31267             sub set_block_text_accumulator {
31268 9     9 0 26 my ( $self, $i ) = @_;
31269 9         22 $accumulating_text_for_block = $tokens_to_go[$i];
31270 9 100       36 if ( $accumulating_text_for_block !~ /^els/ ) {
31271 7         19 $rleading_block_if_elsif_text = [];
31272             }
31273 9         19 $leading_block_text = EMPTY_STRING;
31274 9         24 $leading_block_text_level = $levels_to_go[$i];
31275 9         25 $leading_block_text_line_number = $self->get_output_line_number();
31276 9         21 $leading_block_text_length_exceeded = 0;
31277              
31278             # this will contain the column number of the last character
31279             # of the closing side comment
31280             $leading_block_text_line_length =
31281             length($csc_last_label) +
31282             length($accumulating_text_for_block) +
31283 9         35 length( $rOpts->{'closing-side-comment-prefix'} ) +
31284             $leading_block_text_level * $rOpts_indent_columns + 3;
31285 9         23 return;
31286             } ## end sub set_block_text_accumulator
31287              
31288             sub accumulate_block_text {
31289 708     708 0 1088 my ( $self, $i ) = @_;
31290              
31291             # accumulate leading text for -csc, ignoring any side comments
31292 708 50 66     1486 if ( $accumulating_text_for_block
      66        
31293             && !$leading_block_text_length_exceeded
31294             && $types_to_go[$i] ne '#' )
31295             {
31296              
31297 92         127 my $added_length = $token_lengths_to_go[$i];
31298 92 50       164 $added_length += 1 if $i == 0;
31299 92         127 my $new_line_length =
31300             $leading_block_text_line_length + $added_length;
31301              
31302             # we can add this text if we don't exceed some limits..
31303 92 100 33     408 if (
    50 66        
      33        
      66        
      100        
31304              
31305             # we must not have already exceeded the text length limit
31306             length($leading_block_text) <
31307             $rOpts_closing_side_comment_maximum_text
31308              
31309             # and either:
31310             # the new total line length must be below the line length limit
31311             # or the new length must be below the text length limit
31312             # (ie, we may allow one token to exceed the text length limit)
31313             && (
31314             $new_line_length <
31315             $maximum_line_length_at_level[$leading_block_text_level]
31316              
31317             || length($leading_block_text) + $added_length <
31318             $rOpts_closing_side_comment_maximum_text
31319             )
31320              
31321             # UNLESS: we are adding a closing paren before the brace we seek.
31322             # This is an attempt to avoid situations where the ... to be
31323             # added are longer than the omitted right paren, as in:
31324              
31325             # foreach my $item (@a_rather_long_variable_name_here) {
31326             # &whatever;
31327             # } ## end foreach my $item (@a_rather_long_variable_name_here...
31328              
31329             || (
31330             $tokens_to_go[$i] eq ')'
31331             && (
31332             (
31333             $i + 1 <= $max_index_to_go
31334             && $block_type_to_go[ $i + 1 ]
31335             && $block_type_to_go[ $i + 1 ] eq
31336             $accumulating_text_for_block
31337             )
31338             || ( $i + 2 <= $max_index_to_go
31339             && $block_type_to_go[ $i + 2 ]
31340             && $block_type_to_go[ $i + 2 ] eq
31341             $accumulating_text_for_block )
31342             )
31343             )
31344             )
31345             {
31346              
31347             # add an extra space at each newline
31348 89 50 33     183 if ( $i == 0 && $types_to_go[$i] ne 'b' ) {
31349 0         0 $leading_block_text .= SPACE;
31350             }
31351              
31352             # add the token text
31353 89         140 $leading_block_text .= $tokens_to_go[$i];
31354 89         125 $leading_block_text_line_length = $new_line_length;
31355             }
31356              
31357             # show that text was truncated if necessary
31358             elsif ( $types_to_go[$i] ne 'b' ) {
31359 0         0 $leading_block_text_length_exceeded = 1;
31360 0         0 $leading_block_text .= '...';
31361             }
31362             else {
31363             ## ok
31364             }
31365             }
31366 708         1270 return;
31367             } ## end sub accumulate_block_text
31368              
31369             sub accumulate_csc_text {
31370              
31371 61     61 0 108 my ($self) = @_;
31372              
31373             # called once per output buffer when -csc is used. Accumulates
31374             # the text placed after certain closing block braces.
31375             # Defines and returns the following for this buffer:
31376              
31377 61         114 my $block_leading_text =
31378             EMPTY_STRING; # the leading text of the last '}'
31379 61         97 my $rblock_leading_if_elsif_text;
31380 61         97 my $i_block_leading_text =
31381             -1; # index of token owning block_leading_text
31382 61         87 my $block_line_count = 100; # how many lines the block spans
31383 61         137 my $terminal_type = 'b'; # type of last nonblank token
31384 61         96 my $i_terminal = 0; # index of last nonblank token
31385 61         105 my $terminal_block_type = EMPTY_STRING;
31386              
31387             # update most recent statement label
31388 61 50       155 $csc_last_label = EMPTY_STRING unless ($csc_last_label);
31389 61 50       155 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
  0         0  
31390 61         103 my $block_label = $csc_last_label;
31391              
31392             # Loop over all tokens of this batch
31393 61         134 for my $i ( 0 .. $max_index_to_go ) {
31394 717         1059 my $type = $types_to_go[$i];
31395 717         994 my $block_type = $block_type_to_go[$i];
31396 717         1034 my $token = $tokens_to_go[$i];
31397 717 100       1298 $block_type = EMPTY_STRING unless ($block_type);
31398              
31399             # remember last nonblank token type
31400 717 100 100     2104 if ( $type ne '#' && $type ne 'b' ) {
31401 463         621 $terminal_type = $type;
31402 463         606 $terminal_block_type = $block_type;
31403 463         610 $i_terminal = $i;
31404             }
31405              
31406 717         981 my $type_sequence = $type_sequence_to_go[$i];
31407 717 100 66     1383 if ( $block_type && $type_sequence ) {
31408              
31409 34 100       114 if ( $token eq '}' ) {
    50          
31410              
31411             # restore any leading text saved when we entered this block
31412 17 100       57 if ( defined( $block_leading_text{$type_sequence} ) ) {
31413             ( $block_leading_text, $rblock_leading_if_elsif_text )
31414 9         18 = @{ $block_leading_text{$type_sequence} };
  9         33  
31415 9         17 $i_block_leading_text = $i;
31416 9         30 delete $block_leading_text{$type_sequence};
31417 9         27 $rleading_block_if_elsif_text =
31418             $rblock_leading_if_elsif_text;
31419             }
31420              
31421 17 50       54 if ( defined( $csc_block_label{$type_sequence} ) ) {
31422 17         35 $block_label = $csc_block_label{$type_sequence};
31423 17         39 delete $csc_block_label{$type_sequence};
31424             }
31425              
31426             # if we run into a '}' then we probably started accumulating
31427             # at something like a trailing 'if' clause..no harm done.
31428 17 50 33     56 if ( $accumulating_text_for_block
31429             && $levels_to_go[$i] <= $leading_block_text_level )
31430             {
31431 0         0 my $lev = $levels_to_go[$i];
31432 0         0 reset_block_text_accumulator();
31433             }
31434              
31435 17 50       42 if ( defined( $block_opening_line_number{$type_sequence} ) )
31436             {
31437 17         61 my $output_line_number =
31438             $self->get_output_line_number();
31439             $block_line_count =
31440             $output_line_number -
31441 17         47 $block_opening_line_number{$type_sequence} + 1;
31442 17         39 delete $block_opening_line_number{$type_sequence};
31443             }
31444             else {
31445              
31446             # Error: block opening line undefined for this line..
31447             # This shouldn't be possible, but it is not a
31448             # significant problem.
31449             }
31450             }
31451              
31452             elsif ( $token eq '{' ) {
31453              
31454 17         62 my $line_number = $self->get_output_line_number();
31455 17         46 $block_opening_line_number{$type_sequence} = $line_number;
31456              
31457             # set a label for this block, except for
31458             # a bare block which already has the label
31459             # A label can only be used on the next {
31460 17 50       72 if ( $block_type =~ /:$/ ) {
31461 0         0 $csc_last_label = EMPTY_STRING;
31462             }
31463 17         38 $csc_block_label{$type_sequence} = $csc_last_label;
31464 17         33 $csc_last_label = EMPTY_STRING;
31465              
31466 17 100 66     83 if ( $accumulating_text_for_block
31467             && $levels_to_go[$i] == $leading_block_text_level )
31468             {
31469              
31470 9 50       39 if ( $accumulating_text_for_block eq $block_type ) {
31471              
31472             # save any leading text before we enter this block
31473 9         34 $block_leading_text{$type_sequence} = [
31474             $leading_block_text,
31475             $rleading_block_if_elsif_text
31476             ];
31477 9         23 $block_opening_line_number{$type_sequence} =
31478             $leading_block_text_line_number;
31479 9         35 reset_block_text_accumulator();
31480             }
31481             else {
31482              
31483             # shouldn't happen, but not a serious error.
31484             # We were accumulating -csc text for block type
31485             # $accumulating_text_for_block and unexpectedly
31486             # encountered a '{' for block type $block_type.
31487             }
31488             }
31489             }
31490             else {
31491             ## should not get here
31492 0         0 DEVEL_MODE
31493             && Fault("token=$token should be '{' or '}' for block\n");
31494             }
31495             }
31496              
31497 717 100 100     2286 if ( $type eq 'k'
      100        
      100        
31498             && $csc_new_statement_ok
31499             && $is_if_elsif_else_unless_while_until_for_foreach{$token}
31500             && $token =~ /$closing_side_comment_list_pattern/ )
31501             {
31502 9         45 $self->set_block_text_accumulator($i);
31503             }
31504             else {
31505              
31506             # note: ignoring type 'q' because of tricks being played
31507             # with 'q' for hanging side comments
31508 708 100 100     2244 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
      66        
31509 454   100     1525 $csc_new_statement_ok =
31510             ( $block_type || $type eq 'J' || $type eq ';' );
31511             }
31512 708 50 66     1604 if ( $type eq ';'
      33        
31513             && $accumulating_text_for_block
31514             && $levels_to_go[$i] == $leading_block_text_level )
31515             {
31516 0         0 reset_block_text_accumulator();
31517             }
31518             else {
31519 708         1141 $self->accumulate_block_text($i);
31520             }
31521             }
31522             }
31523              
31524             # Treat an 'else' block specially by adding preceding 'if' and
31525             # 'elsif' text. Otherwise, the 'end else' is not helpful,
31526             # especially for cuddled-else formatting.
31527 61 100 100     225 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
31528 2         26 $block_leading_text =
31529             $self->make_else_csc_text( $i_terminal, $terminal_block_type,
31530             $block_leading_text, $rblock_leading_if_elsif_text );
31531             }
31532              
31533             # if this line ends in a label then remember it for the next pass
31534 61         108 $csc_last_label = EMPTY_STRING;
31535 61 50       163 if ( $terminal_type eq 'J' ) {
31536 0         0 $csc_last_label = $tokens_to_go[$i_terminal];
31537             }
31538              
31539 61         280 return ( $terminal_type, $i_terminal, $i_block_leading_text,
31540             $block_leading_text, $block_line_count, $block_label );
31541             } ## end sub accumulate_csc_text
31542              
31543             sub make_else_csc_text {
31544              
31545             # create additional -csc text for an 'else' and optionally 'elsif',
31546             # depending on the value of switch
31547             #
31548             # = 0 add 'if' text to trailing else
31549             # = 1 same as 0 plus:
31550             # add 'if' to 'elsif's if can fit in line length
31551             # add last 'elsif' to trailing else if can fit in one line
31552             # = 2 same as 1 but do not check if exceed line length
31553             #
31554             # $rif_elsif_text = a reference to a list of all previous closing
31555             # side comments created for this if block
31556             #
31557 2     2 0 13 my ( $self, $i_terminal, $block_type, $block_leading_text,
31558             $rif_elsif_text )
31559             = @_;
31560 2         5 my $csc_text = $block_leading_text;
31561              
31562 2 50 33     15 if ( $block_type eq 'elsif'
31563             && $rOpts_closing_side_comment_else_flag == 0 )
31564             {
31565 0         0 return $csc_text;
31566             }
31567              
31568 2         4 my $count = @{$rif_elsif_text};
  2         7  
31569 2 50       9 return $csc_text unless ($count);
31570              
31571 2         9 my $if_text = '[ if' . $rif_elsif_text->[0];
31572              
31573             # always show the leading 'if' text on 'else'
31574 2 50       9 if ( $block_type eq 'else' ) {
31575 2         9 $csc_text .= $if_text;
31576             }
31577              
31578             # see if that's all
31579 2 50       8 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
31580 2         7 return $csc_text;
31581             }
31582              
31583 0         0 my $last_elsif_text = EMPTY_STRING;
31584 0 0       0 if ( $count > 1 ) {
31585 0         0 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
31586 0 0       0 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
  0         0  
31587             }
31588              
31589             # tentatively append one more item
31590 0         0 my $saved_text = $csc_text;
31591 0 0       0 if ( $block_type eq 'else' ) {
31592 0         0 $csc_text .= $last_elsif_text;
31593             }
31594             else {
31595 0         0 $csc_text .= SPACE . $if_text;
31596             }
31597              
31598             # all done if no length checks requested
31599 0 0       0 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
31600 0         0 return $csc_text;
31601             }
31602              
31603             # undo it if line length exceeded
31604             my $length =
31605             length($csc_text) +
31606             length($block_type) +
31607 0         0 length( $rOpts->{'closing-side-comment-prefix'} ) +
31608             $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
31609 0 0       0 if (
31610             $length > $maximum_line_length_at_level[$leading_block_text_level] )
31611             {
31612 0         0 $csc_text = $saved_text;
31613             }
31614 0         0 return $csc_text;
31615             } ## end sub make_else_csc_text
31616             } ## end closure accumulate_csc_text
31617              
31618             { ## begin closure balance_csc_text
31619              
31620             # Some additional routines for handling the --closing-side-comments option
31621              
31622             my %matching_char;
31623              
31624             BEGIN {
31625 39     39   94464 %matching_char = (
31626             '{' => '}',
31627             '(' => ')',
31628             '[' => ']',
31629             '}' => '{',
31630             ')' => '(',
31631             ']' => '[',
31632             );
31633             } ## end BEGIN
31634              
31635             sub balance_csc_text {
31636              
31637             # Append characters to balance a closing side comment so that editors
31638             # such as vim can correctly jump through code.
31639             # Simple Example:
31640             # input = ## end foreach my $foo ( sort { $b ...
31641             # output = ## end foreach my $foo ( sort { $b ...})
31642              
31643             # NOTE: This routine does not currently filter out structures within
31644             # quoted text because the bounce algorithms in text editors do not
31645             # necessarily do this either (a version of vim was checked and
31646             # did not do this).
31647              
31648             # Some complex examples which will cause trouble for some editors:
31649             # while ( $mask_string =~ /\{[^{]*?\}/g ) {
31650             # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
31651             # if ( $1 eq '{' ) {
31652             # test file test1/braces.pl has many such examples.
31653              
31654 6     6 0 15 my ($csc) = @_;
31655              
31656             # loop to examine characters one-by-one, RIGHT to LEFT and
31657             # build a balancing ending, LEFT to RIGHT.
31658 6         25 foreach my $pos ( reverse( 0 .. length($csc) - 1 ) ) {
31659              
31660 171         241 my $char = substr( $csc, $pos, 1 );
31661              
31662             # ignore everything except structural characters
31663 171 100       322 next unless ( $matching_char{$char} );
31664              
31665             # pop most recently appended character
31666 7         14 my $top = chop($csc);
31667              
31668             # push it back plus the mate to the newest character
31669             # unless they balance each other.
31670 7 100       22 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
31671             }
31672              
31673             # return the balanced string
31674 6         24 return $csc;
31675             } ## end sub balance_csc_text
31676             } ## end closure balance_csc_text
31677              
31678             sub add_closing_side_comment {
31679              
31680 61     61 0 173 my ( $self, $ri_first, $ri_last ) = @_;
31681 61         117 my $rLL = $self->[_rLL_];
31682              
31683             # add closing side comments after closing block braces if -csc used
31684 61         108 my ( $closing_side_comment, $cscw_block_comment );
31685              
31686             #---------------------------------------------------------------
31687             # Step 1: loop through all tokens of this line to accumulate
31688             # the text needed to create the closing side comments. Also see
31689             # how the line ends.
31690             #---------------------------------------------------------------
31691              
31692 61         171 my ( $terminal_type, $i_terminal, $i_block_leading_text,
31693             $block_leading_text, $block_line_count, $block_label )
31694             = $self->accumulate_csc_text();
31695              
31696             #---------------------------------------------------------------
31697             # Step 2: make the closing side comment if this ends a block
31698             #---------------------------------------------------------------
31699 61         155 my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
31700              
31701             # if this line might end in a block closure..
31702 61 50 66     639 if (
      66        
      33        
      66        
      66        
      66        
      33        
      33        
31703             $terminal_type eq '}'
31704              
31705             # Fix 1 for c091, this is only for blocks
31706             && $block_type_to_go[$i_terminal]
31707              
31708             # ..and either
31709             && (
31710              
31711             # the block is long enough
31712             ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
31713              
31714             # or there is an existing comment to check
31715             || ( $have_side_comment
31716             && $rOpts->{'closing-side-comment-warnings'} )
31717             )
31718              
31719             # .. and if this is one of the types of interest
31720             && $block_type_to_go[$i_terminal] =~
31721             /$closing_side_comment_list_pattern/
31722              
31723             # .. but not an anonymous sub
31724             # These are not normally of interest, and their closing braces are
31725             # often followed by commas or semicolons anyway. This also avoids
31726             # possible erratic output due to line numbering inconsistencies
31727             # in the cases where their closing braces terminate a line.
31728             && $block_type_to_go[$i_terminal] ne 'sub'
31729              
31730             # ..and the corresponding opening brace must is not in this batch
31731             # (because we do not need to tag one-line blocks, although this
31732             # should also be caught with a positive -csci value)
31733             && !defined( $mate_index_to_go[$i_terminal] )
31734              
31735             # ..and either
31736             && (
31737              
31738             # this is the last token (line doesn't have a side comment)
31739             !$have_side_comment
31740              
31741             # or the old side comment is a closing side comment
31742             || $tokens_to_go[$max_index_to_go] =~
31743             /$closing_side_comment_prefix_pattern/
31744             )
31745             )
31746             {
31747              
31748             # then make the closing side comment text
31749 9 50       34 if ($block_label) { $block_label .= SPACE }
  0         0  
31750 9         38 my $token =
31751             "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
31752              
31753             # append any extra descriptive text collected above
31754 9 100       29 if ( $i_block_leading_text == $i_terminal ) {
31755 5         11 $token .= $block_leading_text;
31756             }
31757              
31758             $token = balance_csc_text($token)
31759 9 100       44 if $rOpts->{'closing-side-comments-balanced'};
31760              
31761 9         78 $token =~ s/\s*$//; # trim any trailing whitespace
31762              
31763             # handle case of existing closing side comment
31764 9 50       30 if ($have_side_comment) {
31765              
31766             # warn if requested and tokens differ significantly
31767 0 0       0 if ( $rOpts->{'closing-side-comment-warnings'} ) {
31768 0         0 my $old_csc = $tokens_to_go[$max_index_to_go];
31769 0         0 my $new_csc = $token;
31770 0         0 $new_csc =~ s/\s+//g; # trim all whitespace
31771 0         0 $old_csc =~ s/\s+//g; # trim all whitespace
31772 0         0 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
31773 0         0 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
31774              
31775             # trim trailing '...'
31776 0         0 my $new_trailing_dots = $new_csc =~ s/\.\.\.$//;
31777 0         0 $old_csc =~ s/\.\.\.\s*$//;
31778              
31779             # Patch to handle multiple closing side comments at
31780             # else and elsif's. These have become too complicated
31781             # to check, so if we see an indication of
31782             # '[ if' or '[ # elsif', then assume they were made
31783             # by perltidy.
31784 0 0       0 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
    0          
31785 0 0       0 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
  0         0  
31786             }
31787             elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
31788 0 0       0 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
  0         0  
31789             }
31790             else {
31791             ## ok: neither else or elsif
31792             }
31793              
31794             # if old comment is contained in new comment,
31795             # only compare the common part.
31796 0 0       0 if ( length($new_csc) > length($old_csc) ) {
31797 0         0 $new_csc = substr( $new_csc, 0, length($old_csc) );
31798             }
31799              
31800             # if the new comment is shorter and has been limited,
31801             # only compare the common part.
31802 0 0 0     0 if ( length($new_csc) < length($old_csc)
31803             && $new_trailing_dots )
31804             {
31805 0         0 $old_csc = substr( $old_csc, 0, length($new_csc) );
31806             }
31807              
31808             # any remaining difference?
31809 0 0       0 if ( $new_csc ne $old_csc ) {
    0          
31810              
31811             # just leave the old comment if we are below the threshold
31812             # for creating side comments
31813 0 0       0 if ( $block_line_count <
31814             $rOpts->{'closing-side-comment-interval'} )
31815             {
31816 0         0 $token = undef;
31817             }
31818              
31819             # otherwise we'll make a note of it
31820             else {
31821              
31822 0         0 my $msg_line_number;
31823 0         0 my $K = $K_to_go[$i_terminal];
31824 0 0       0 if ( defined($K) ) {
31825 0         0 $msg_line_number = $rLL->[$K]->[_LINE_INDEX_] + 1;
31826             }
31827             warning(
31828 0         0 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n",
31829             $msg_line_number
31830             );
31831              
31832             # save the old side comment in a new trailing block
31833             # comment
31834 0         0 my $timestamp = EMPTY_STRING;
31835 0 0       0 if ( $rOpts->{'timestamp'} ) {
31836 0         0 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
31837 0         0 $year += 1900;
31838 0         0 $month += 1;
31839 0         0 $timestamp = "$year-$month-$day";
31840             }
31841             $cscw_block_comment =
31842 0         0 "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
31843             }
31844             }
31845              
31846             # No differences.. we can safely delete old comment if we
31847             # are below the threshold
31848             elsif ( $block_line_count <
31849             $rOpts->{'closing-side-comment-interval'} )
31850             {
31851             # Since the line breaks have already been set, we have
31852             # to remove the token from the _to_go array and also
31853             # from the line range (this fixes issue c081).
31854             # Note that we can only get here if -cscw has been set
31855             # because otherwise the old comment is already deleted.
31856 0         0 $token = undef;
31857 0         0 my $ibeg = $ri_first->[-1];
31858 0         0 my $iend = $ri_last->[-1];
31859 0 0 0     0 if ( $iend > $ibeg
      0        
31860             && $iend == $max_index_to_go
31861             && $types_to_go[$max_index_to_go] eq '#' )
31862             {
31863 0         0 $iend--;
31864 0         0 $max_index_to_go--;
31865 0 0 0     0 if ( $iend > $ibeg
31866             && $types_to_go[$max_index_to_go] eq 'b' )
31867             {
31868 0         0 $iend--;
31869 0         0 $max_index_to_go--;
31870             }
31871 0         0 $ri_last->[-1] = $iend;
31872             }
31873             }
31874             else {
31875             ## above threshold, cannot delete
31876             }
31877             }
31878              
31879             # switch to the new csc (unless we deleted it!)
31880 0 0       0 if ($token) {
31881              
31882 0         0 my $len_tok = length($token); # NOTE: length no longer important
31883 0         0 my $added_len =
31884             $len_tok - $token_lengths_to_go[$max_index_to_go];
31885              
31886 0         0 $tokens_to_go[$max_index_to_go] = $token;
31887 0         0 $token_lengths_to_go[$max_index_to_go] = $len_tok;
31888 0         0 my $K = $K_to_go[$max_index_to_go];
31889 0         0 $rLL->[$K]->[_TOKEN_] = $token;
31890 0         0 $rLL->[$K]->[_TOKEN_LENGTH_] = $len_tok;
31891 0         0 $summed_lengths_to_go[ $max_index_to_go + 1 ] += $added_len;
31892             }
31893             }
31894              
31895             # handle case of NO existing closing side comment
31896             else {
31897              
31898             # To avoid inserting a new token in the token arrays, we
31899             # will just return the new side comment so that it can be
31900             # inserted just before it is needed in the call to the
31901             # vertical aligner.
31902 9         21 $closing_side_comment = $token;
31903             }
31904             }
31905 61         179 return ( $closing_side_comment, $cscw_block_comment );
31906             } ## end sub add_closing_side_comment
31907              
31908             ############################
31909             # CODE SECTION 15: Summarize
31910             ############################
31911              
31912             sub wrapup {
31913              
31914             # This is the last routine called when a file is formatted.
31915             # Flush buffer and write any informative messages
31916 561     561 0 2188 my ( $self, $severe_error ) = @_;
31917              
31918 561         2546 $self->flush();
31919 561         2163 my $file_writer_object = $self->[_file_writer_object_];
31920 561         3605 $file_writer_object->decrement_output_line_number()
31921             ; # fix up line number since it was incremented
31922 561         2643 we_are_at_the_last_line();
31923              
31924 561         1451 my $max_depth = $self->[_maximum_BLOCK_level_];
31925 561         1402 my $at_line = $self->[_maximum_BLOCK_level_at_line_];
31926 561         3682 write_logfile_entry(
31927             "Maximum leading structural depth is $max_depth in input at line $at_line\n"
31928             );
31929              
31930 561         1772 my $added_semicolon_count = $self->[_added_semicolon_count_];
31931 561         1666 my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
31932 561         1564 my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];
31933              
31934 561 100       2293 if ( $added_semicolon_count > 0 ) {
31935 16 100       135 my $first = ( $added_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
31936 16 100       70 my $what =
31937             ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
31938 16         92 write_logfile_entry("$added_semicolon_count $what added:\n");
31939 16         144 write_logfile_entry(
31940             " $first at input line $first_added_semicolon_at\n");
31941              
31942 16 100       94 if ( $added_semicolon_count > 1 ) {
31943 3         23 write_logfile_entry(
31944             " Last at input line $last_added_semicolon_at\n");
31945             }
31946 16         113 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
31947 16         77 write_logfile_entry("\n");
31948             }
31949              
31950 561         1565 my $deleted_semicolon_count = $self->[_deleted_semicolon_count_];
31951 561         1229 my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
31952 561         1279 my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_];
31953 561 100       2047 if ( $deleted_semicolon_count > 0 ) {
31954 2 50       12 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
31955 2 50       13 my $what =
31956             ( $deleted_semicolon_count > 1 )
31957             ? "semicolons were"
31958             : "semicolon was";
31959 2         16 write_logfile_entry(
31960             "$deleted_semicolon_count unnecessary $what deleted:\n");
31961 2         37 write_logfile_entry(
31962             " $first at input line $first_deleted_semicolon_at\n");
31963              
31964 2 50       13 if ( $deleted_semicolon_count > 1 ) {
31965 2         11 write_logfile_entry(
31966             " Last at input line $last_deleted_semicolon_at\n");
31967             }
31968 2         13 write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
31969 2         7 write_logfile_entry("\n");
31970             }
31971              
31972 561         1394 my $embedded_tab_count = $self->[_embedded_tab_count_];
31973 561         1179 my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
31974 561         1365 my $last_embedded_tab_at = $self->[_last_embedded_tab_at_];
31975 561 50       2079 if ( $embedded_tab_count > 0 ) {
31976 0 0       0 my $first = ( $embedded_tab_count > 1 ) ? "First" : EMPTY_STRING;
31977 0 0       0 my $what =
31978             ( $embedded_tab_count > 1 )
31979             ? "quotes or patterns"
31980             : "quote or pattern";
31981 0         0 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
31982 0         0 write_logfile_entry(
31983             "This means the display of this script could vary with device or software\n"
31984             );
31985 0         0 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
31986              
31987 0 0       0 if ( $embedded_tab_count > 1 ) {
31988 0         0 write_logfile_entry(
31989             " Last at input line $last_embedded_tab_at\n");
31990             }
31991 0         0 write_logfile_entry("\n");
31992             }
31993              
31994 561         1340 my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
31995 561         1288 my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_];
31996 561         1219 my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
31997 561         1239 my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
31998              
31999 561 50       1734 if ($first_tabbing_disagreement) {
32000 0         0 write_logfile_entry(
32001             "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
32002             );
32003             }
32004              
32005 561         1392 my $first_btd = $self->[_first_brace_tabbing_disagreement_];
32006 561 50       1767 if ($first_btd) {
32007 0         0 my $msg =
32008             "First closing brace indentation disagreement started at input line $first_btd\n";
32009 0         0 write_logfile_entry($msg);
32010              
32011             # leave a hint in the .ERR file if there was a brace error
32012 0 0       0 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
  0         0  
32013             }
32014              
32015 561         1418 my $in_btd = $self->[_in_brace_tabbing_disagreement_];
32016 561 50       1866 if ($in_btd) {
32017 0         0 my $msg =
32018             "Ending with brace indentation disagreement which started at input line $in_btd\n";
32019 0         0 write_logfile_entry($msg);
32020              
32021             # leave a hint in the .ERR file if there was a brace error
32022 0 0       0 if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
  0         0  
32023             }
32024              
32025 561 50       1823 if ($in_tabbing_disagreement) {
32026 0         0 my $msg =
32027             "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
32028 0         0 write_logfile_entry($msg);
32029             }
32030             else {
32031              
32032 561 50       1483 if ($last_tabbing_disagreement) {
32033              
32034 0         0 write_logfile_entry(
32035             "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
32036             );
32037             }
32038             else {
32039 561         1337 write_logfile_entry("No indentation disagreement seen\n");
32040             }
32041             }
32042              
32043 561 50       3732 if ($first_tabbing_disagreement) {
32044 0         0 write_logfile_entry(
32045             "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
32046             );
32047             }
32048 561         2265 write_logfile_entry("\n");
32049              
32050 561         2444 my $vao = $self->[_vertical_aligner_object_];
32051 561         5300 $vao->report_anything_unusual();
32052              
32053 561         3269 $file_writer_object->report_line_length_errors();
32054              
32055             # Define the formatter self-check for convergence.
32056             $self->[_converged_] =
32057             $severe_error
32058             || $file_writer_object->get_convergence_check()
32059 561   100     5492 || $rOpts->{'indent-only'};
32060              
32061 561         1758 return;
32062             } ## end sub wrapup
32063              
32064             } ## end package Perl::Tidy::Formatter
32065             1;