File Coverage

blib/lib/Perl/Tidy/Tokenizer.pm
Criterion Covered Total %
statement 2672 3695 72.3
branch 1246 1914 65.1
condition 749 1193 62.7
subroutine 168 202 83.1
pod 0 147 0.0
total 4835 7151 67.6


line stmt bran cond sub pod time code
1             #####################################################################
2             #
3             # Perl::Tidy::Tokenizer reads a source and breaks it into a stream of tokens
4             #
5             # Usage Outline:
6             #
7             # STEP 1: initialize or re-initialize Tokenizer with user options
8             # Perl::Tidy::Tokenizer::check_options($rOpts);
9             #
10             # STEP 2: create a tokenizer for a specific input source object
11             # my $tokenizer = Perl::Tidy::Tokenizer->new(
12             # source_object => $source,
13             # ...
14             # );
15             #
16             # STEP 3: get and process each tokenized 'line' (a hash ref of token info)
17             # while ( my $line = $tokenizer->get_line() ) {
18             # $formatter->write_line($line);
19             # }
20             #
21             # STEP 4: report errors
22             # my $severe_error = $tokenizer->report_tokenization_errors();
23             #
24             # The source object can be a STRING ref, an ARRAY ref, or an object with a
25             # get_line() method which supplies one line (a character string) perl call.
26             #
27             # NOTE: This is not a real class. Only one tokenizer my be used.
28             #
29             ########################################################################
30              
31             package Perl::Tidy::Tokenizer;
32 44     44   262 use strict;
  44         90  
  44         1514  
33 44     44   177 use warnings;
  44         72  
  44         1886  
34 44     44   166 use English qw( -no_match_vars );
  44         69  
  44         233  
35              
36             our $VERSION = '20260204';
37              
38 44     44   13408 use Carp;
  44         83  
  44         2210  
39              
40 44     44   183 use constant DEVEL_MODE => 0;
  44         73  
  44         2208  
41 44     44   171 use constant DEBUG_GUESS_MODE => 0;
  44         72  
  44         1818  
42 44     44   218 use constant EMPTY_STRING => q{};
  44         74  
  44         1600  
43 44     44   172 use constant SPACE => q{ };
  44         113  
  44         1662  
44 44     44   168 use constant COMMA => q{,};
  44         63  
  44         1662  
45 44     44   177 use constant BACKSLASH => q{\\};
  44         82  
  44         2557  
46              
47             { #<<< A non-indenting brace to contain all lexical variables
48              
49             # List of hash keys to prevent -duk from listing them.
50             # (note the backtick in this list)
51             my @unique_hash_keys_uu = qw( ` RPerl _rtype_sequence _ending_in_quote );
52              
53             # Parent sequence number of tree of containers; must be 1
54 44     44   203 use constant SEQ_ROOT => 1;
  44         94  
  44         1667  
55              
56             # Defaults for guessing old indentation
57 44     44   197 use constant INDENT_COLUMNS_DEFAULT => 4;
  44         86  
  44         1443  
58 44     44   147 use constant TAB_SIZE_DEFAULT => 8;
  44         69  
  44         1409  
59              
60             # Decimal values of some ascii characters for quick checks
61 44     44   147 use constant ORD_TAB => 9;
  44         81  
  44         1304  
62 44     44   171 use constant ORD_SPACE => 32;
  44         63  
  44         1345  
63 44     44   190 use constant ORD_PRINTABLE_MIN => 33;
  44         63  
  44         1830  
64 44     44   198 use constant ORD_PRINTABLE_MAX => 126;
  44         64  
  44         7960  
65              
66             # GLOBAL VARIABLES which change during tokenization:
67             # These could also be stored in $self but it is more convenient and
68             # efficient to make them global lexical variables.
69             # INITIALIZER: sub prepare_for_a_new_file
70             my (
71              
72             $brace_depth,
73             $context,
74             $current_package,
75             $last_nonblank_block_type,
76             $last_nonblank_token,
77             $last_nonblank_type,
78             $next_sequence_number,
79             $paren_depth,
80             $rbrace_context,
81             $rbrace_package,
82             $rbrace_structural_type,
83             $rbrace_type,
84             $rcurrent_depth,
85             $rcurrent_sequence_number,
86             $ris_lexical_sub,
87             $rdepth_array,
88             $ris_block_function,
89             $ris_block_list_function,
90             $ris_constant,
91             $ris_user_function,
92             $rnested_statement_type,
93             $rnested_ternary_flag,
94             $rparen_semicolon_count,
95             $rparen_vars,
96             $rparen_type,
97             $rsaw_function_definition,
98             $rsaw_use_module,
99             $rsquare_bracket_structural_type,
100             $rsquare_bracket_type,
101             $rstarting_line_of_current_depth,
102             $rtotal_depth,
103             $ruser_function_prototype,
104             $square_bracket_depth,
105             $statement_type,
106             $total_depth,
107             );
108              
109             my (
110              
111             # GLOBAL CONSTANTS for routines in this package,
112             # INITIALIZER: BEGIN block.
113             %can_start_digraph,
114             %expecting_operator_token,
115             %expecting_operator_types,
116             %expecting_term_token,
117             %expecting_term_types,
118             %is_block_operator,
119             %is_digraph,
120             %is_file_test_operator,
121             %is_if_elsif_unless,
122             %is_if_elsif_unless_case_when,
123             %is_indirect_object_taker,
124             %is_keyword_rejecting_question_as_pattern_delimiter,
125             %is_keyword_rejecting_slash_as_pattern_delimiter,
126             %is_keyword_taking_list,
127             %is_keyword_taking_optional_arg,
128             %is_q_qq_qw_qx_qr_s_y_tr_m,
129             %is_q_qq_qx_qr_s_y_tr_m,
130             %quote_modifiers,
131             %is_semicolon_or_t,
132             %is_sort_map_grep,
133             %is_sort_map_grep_eval_do_sub,
134             %is_tetragraph,
135             %is_trigraph,
136             %is_valid_token_type,
137             %other_line_endings,
138             %is_binary_operator_type,
139             %is_binary_keyword,
140             %is_binary_or_unary_operator_type,
141             %is_binary_or_unary_keyword,
142             %is_not_a_TERM_producer_type,
143             @closing_brace_names,
144             @opening_brace_names,
145              
146             # GLOBAL CONSTANT hash lookup table of operator expected values
147             # INITIALIZER: BEGIN block
148             %op_expected_table,
149              
150             # GLOBAL VARIABLES which are constant after being configured.
151             # INITIALIZER: BEGIN block and modified by sub check_options
152             %is_code_block_token,
153             %is_zero_continuation_block_type,
154             %is_keyword,
155             %is_TERM_keyword,
156             %is_my_our_state,
157             %is_package,
158             %matching_end_token,
159              
160             # INITIALIZER: sub check_options
161             $code_skipping_pattern_begin,
162             $code_skipping_pattern_end,
163             $format_skipping_pattern_begin,
164             $format_skipping_pattern_end,
165              
166             $rOpts_code_skipping,
167             $rOpts_code_skipping_begin,
168             $rOpts_format_skipping,
169             $rOpts_format_skipping_begin,
170             $rOpts_format_skipping_end,
171             $rOpts_starting_indentation_level,
172             $rOpts_indent_columns,
173             $rOpts_look_for_hash_bang,
174             $rOpts_look_for_autoloader,
175             $rOpts_look_for_selfloader,
176             $rOpts_trim_qw,
177             $rOpts_extended_syntax,
178             $rOpts_continuation_indentation,
179             $rOpts_outdent_labels,
180             $rOpts_maximum_level_errors,
181             $rOpts_maximum_unexpected_errors,
182             $rOpts_indent_closing_brace,
183             $rOpts_non_indenting_braces,
184             $rOpts_non_indenting_brace_prefix,
185             $rOpts_whitespace_cycle,
186              
187             $tabsize,
188             %is_END_DATA_format_sub,
189             %is_grep_alias,
190             %is_sub,
191             $guess_if_method,
192             );
193              
194             # possible values of operator_expected()
195 44     44   183 use constant TERM => -1;
  44         84  
  44         1656  
196 44     44   155 use constant UNKNOWN => 0;
  44         93  
  44         1378  
197 44     44   190 use constant OPERATOR => 1;
  44         78  
  44         1556  
198              
199             # possible values of context
200 44     44   165 use constant SCALAR_CONTEXT => -1;
  44         55  
  44         1420  
201 44     44   182 use constant UNKNOWN_CONTEXT => 0;
  44         72  
  44         1420  
202 44     44   169 use constant LIST_CONTEXT => 1;
  44         83  
  44         1505  
203              
204             # Maximum number of little messages; probably need not be changed.
205 44     44   159 use constant MAX_NAG_MESSAGES => 6;
  44         63  
  44         7876  
206              
207 0         0 BEGIN {
208              
209             # Array index names for $self.
210             # Do not combine with other BEGIN blocks (c101).
211 44     44   299169 my $i = 0;
212             use constant {
213 44         13664 _rhere_target_list_ => $i++,
214             _in_here_doc_ => $i++,
215             _here_doc_target_ => $i++,
216             _here_quote_character_ => $i++,
217             _in_data_ => $i++,
218             _in_end_ => $i++,
219             _in_format_ => $i++,
220             _in_error_ => $i++,
221             _do_not_format_ => $i++,
222             _warning_count_ => $i++,
223             _html_tag_count_ => $i++,
224             _in_pod_ => $i++,
225             _in_code_skipping_ => $i++,
226             _in_format_skipping_ => $i++,
227             _rformat_skipping_list_ => $i++,
228             _in_attribute_list_ => $i++,
229             _in_quote_ => $i++,
230             _quote_target_ => $i++,
231             _line_start_quote_ => $i++,
232             _starting_level_ => $i++,
233             _know_starting_level_ => $i++,
234             _last_line_number_ => $i++,
235             _saw_perl_dash_P_ => $i++,
236             _saw_perl_dash_w_ => $i++,
237             _saw_use_strict_ => $i++,
238             _saw_brace_error_ => $i++,
239             _hit_bug_ => $i++,
240             _look_for_autoloader_ => $i++,
241             _look_for_selfloader_ => $i++,
242             _saw_autoloader_ => $i++,
243             _saw_selfloader_ => $i++,
244             _saw_hash_bang_ => $i++,
245             _saw_end_ => $i++,
246             _saw_data_ => $i++,
247             _saw_negative_indentation_ => $i++,
248             _started_tokenizing_ => $i++,
249             _debugger_object_ => $i++,
250             _diagnostics_object_ => $i++,
251             _logger_object_ => $i++,
252             _save_logfile_ => $i++,
253             _unexpected_error_count_ => $i++,
254             _started_looking_for_here_target_at_ => $i++,
255             _nearly_matched_here_target_at_ => $i++,
256             _line_of_text_ => $i++,
257             _rlower_case_labels_at_ => $i++,
258             _maximum_level_ => $i++,
259             _true_brace_error_count_ => $i++,
260             _rOpts_ => $i++,
261             _rinput_lines_ => $i++,
262             _input_line_index_next_ => $i++,
263             _rtrimmed_input_lines_ => $i++,
264             _rclosing_brace_indentation_hash_ => $i++,
265             _show_indentation_table_ => $i++,
266             _rnon_indenting_brace_stack_ => $i++,
267             _rbareword_info_ => $i++,
268 44     44   273 };
  44         99  
269             } ## end BEGIN
270              
271             { ## closure for subs to count instances
272              
273             # methods to count instances
274             my $_count = 0;
275 0     0 0 0 sub get_count { return $_count; }
276 649     649   1924 sub _increment_count { return ++$_count }
277 649     649   1059 sub _decrement_count { return --$_count }
278             }
279              
280             sub DESTROY {
281 649     649   1250 my $self = shift;
282 649         1996 _decrement_count();
283 649         15288 return;
284             }
285              
286             sub AUTOLOAD {
287              
288             # Catch any undefined sub calls so that we are sure to get
289             # some diagnostic information. This sub should never be called
290             # except for a programming error.
291 0     0   0 our $AUTOLOAD;
292 0 0       0 return if ( $AUTOLOAD =~ /\bDESTROY$/ );
293 0         0 my ( $pkg, $fname, $lno ) = caller();
294 0         0 my $my_package = __PACKAGE__;
295 0         0 print {*STDERR} <<EOM;
  0         0  
296             ======================================================================
297             Error detected in package '$my_package', version $VERSION
298             Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
299             Called from package: '$pkg'
300             Called from File '$fname' at line '$lno'
301             This error is probably due to a recent programming change
302             ======================================================================
303             EOM
304 0         0 exit 1;
305             } ## end sub AUTOLOAD
306              
307             sub Die {
308 0     0 0 0 my ($msg) = @_;
309 0         0 Perl::Tidy::Die($msg);
310 0         0 croak "unexpected return from Perl::Tidy::Die";
311             }
312              
313             sub Warn {
314 0     0 0 0 my ($msg) = @_;
315 0         0 Perl::Tidy::Warn($msg);
316 0         0 return;
317             }
318              
319             sub Fault {
320 0     0 0 0 my ($msg) = @_;
321              
322             # This routine is called for errors that really should not occur
323             # except if there has been a bug introduced by a recent program change.
324             # Please add comments at calls to Fault to explain why the call
325             # should not occur, and where to look to fix it.
326 0         0 my ( $package0_uu, $filename0_uu, $line0, $subroutine0_uu ) = caller(0);
327 0         0 my ( $package1_uu, $filename1, $line1, $subroutine1 ) = caller(1);
328 0         0 my ( $package2_uu, $filename2_uu, $line2_uu, $subroutine2 ) = caller(2);
329 0         0 my $pkg = __PACKAGE__;
330 0         0 my $input_stream_name = Perl::Tidy::get_input_stream_name();
331              
332 0         0 Die(<<EOM);
333             ==============================================================================
334             While operating on input stream with name: '$input_stream_name'
335             A fault was detected at line $line0 of sub '$subroutine1'
336             in file '$filename1'
337             which was called from line $line1 of sub '$subroutine2'
338             Message: '$msg'
339             This is probably an error introduced by a recent programming change.
340             $pkg reports VERSION='$VERSION'.
341             ==============================================================================
342             EOM
343              
344 0         0 croak "unexpected return from sub Die";
345             } ## end sub Fault
346              
347             sub bad_pattern {
348 2588     2588 0 3758 my ($pattern) = @_;
349              
350             # Return true if a regex pattern has an error
351             # Note: Formatter.pm also has a copy of this
352 2588         3217 my $regex_uu = eval { qr/$pattern/ };
  2588         85460  
353 2588         8153 return $EVAL_ERROR;
354             } ## end sub bad_pattern
355              
356             sub make_skipping_pattern {
357 2588     2588 0 4501 my ( $rOpts, $opt_name, $default ) = @_;
358              
359             # Make regex patterns for the format-skipping and code-skipping options
360 2588         3970 my $param = $rOpts->{$opt_name};
361 2588 100       4217 if ( !$param ) { $param = $default }
  2584         3375  
362 2588         6117 $param =~ s/^\s+//;
363 2588 50       5913 if ( $param !~ /^#/ ) {
364 0         0 Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
365             }
366              
367             # Note that the ending \s will match a newline
368 2588         3926 my $pattern = '^\s*' . $param . '\s';
369 2588 50       4586 if ( bad_pattern($pattern) ) {
370 0         0 Die(
371             "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
372             );
373             }
374 2588         4718 return $pattern;
375             } ## end sub make_skipping_pattern
376              
377             sub check_options {
378              
379             # Check and pre-process tokenizer parameters
380 647     647 0 1332 my $rOpts = shift;
381              
382 647         1768 %is_sub = ();
383 647         1647 $is_sub{'sub'} = 1;
384              
385 647         3816 %is_END_DATA_format_sub = (
386             '__END__' => 1,
387             '__DATA__' => 1,
388             'format' => 1,
389             'sub' => 1,
390             );
391              
392             # Install any aliases to 'sub'
393 647 100       1928 if ( $rOpts->{'sub-alias-list'} ) {
394              
395             # Note that any 'sub-alias-list' has been preprocessed to
396             # be a trimmed, space-separated list which includes 'sub'
397             # for example, it might be 'sub method fun'
398 3         17 my @sub_alias_list = split /\s+/, $rOpts->{'sub-alias-list'};
399 3         8 foreach my $word (@sub_alias_list) {
400 11         20 $is_sub{$word} = 1;
401 11         23 $is_END_DATA_format_sub{$word} = 1;
402             }
403             }
404              
405             # Set global flag to say if we have to guess if bareword 'method' is
406             # a sub when 'method' is in %is_sub. This will be true unless:
407             # (1) the user entered 'method' as sub alias, or
408             # (2) the user set --use-feature=class
409             # In these two cases we can assume that 'method' is a sub alias.
410 647         1128 $guess_if_method = 1;
411 647 100       1840 if ( $is_sub{'method'} ) { $guess_if_method = 0 }
  2         4  
412              
413             #------------------------------------------------
414             # Update hash values for any -use-feature options
415             #------------------------------------------------
416              
417 647         1051 my $use_feature_class = 1;
418              
419 647         1292 my $str = $rOpts->{'use-feature'};
420 647 50 33     1943 if ( defined($str) && length($str) ) {
421 0         0 $str =~ s/^\s+//;
422 0         0 $str =~ s/\s+$//;
423 0 0       0 if ( !length($str) ) {
    0          
    0          
424             ## all spaces
425             }
426             elsif ( $str =~ /\bnoclass\b/ ) {
427 0         0 $use_feature_class = 0;
428             }
429             elsif ( $str =~ /\bclass\b/ ) {
430 0         0 $guess_if_method = 0;
431             }
432             else {
433             # At present, only 'class' and 'noclass' are valid strings
434             # This is just a Warn, for testing, but will eventually be Die
435 0         0 Warn(
436             "Unexpected text in --use-feature: expecting 'class' or 'noclass'\n"
437             );
438             }
439             }
440              
441             # These are the main updates for this option. There are additional
442             # changes elsewhere, usually indicated with a comment 'rt145706'
443              
444             # Update hash values for use_feature=class, added for rt145706
445             # see 'perlclass.pod'
446              
447             # IMPORTANT: We are changing global hash values initially set in a BEGIN
448             # block. Values must be defined (true or false) for each of these new
449             # words whether true or false. Otherwise, programs using the module which
450             # change options between runs (such as test code) will have
451             # incorrect settings and fail.
452              
453             # There are 4 new keywords:
454              
455             # 'class' - treated specially as generalization of 'package'
456             # Note: we must not set 'class' to be a keyword to avoid problems
457             # with older uses.
458 647         1707 $is_package{'class'} = $use_feature_class;
459              
460             # 'method' - treated like sub using the sub-alias-list option
461             # Note: we must not set 'method' to be a keyword to avoid problems
462             # with older uses.
463 647 50       1558 if ($use_feature_class) {
464 647         1322 $is_sub{'method'} = 1;
465 647         1215 $is_END_DATA_format_sub{'method'} = 1;
466             }
467              
468             # 'field' - added as a keyword, and works like 'my'
469             # Setting zero_continuation_block_type allows inclusion in table of level
470             # differences in case of a missing or extra brace (see sub wrapup).
471 647         1586 $is_keyword{'field'} = $use_feature_class;
472 647         1268 $is_my_our_state{'field'} = $use_feature_class;
473 647         1358 $is_zero_continuation_block_type{'field'} = $use_feature_class;
474              
475             # 'ADJUST' - added as a keyword and works like 'BEGIN'
476             # See update git #182 for 'ADJUST :params'
477             # Setting zero_continuation_block_type allows inclusion in table of level
478             # differences in case of a missing or extra brace (see sub wrapup).
479 647         1336 $is_keyword{'ADJUST'} = $use_feature_class;
480 647         1280 $is_code_block_token{'ADJUST'} = $use_feature_class;
481 647         1255 $is_zero_continuation_block_type{'ADJUST'} = $use_feature_class;
482              
483 647         1857 %is_grep_alias = ();
484 647 50       1874 if ( $rOpts->{'grep-alias-list'} ) {
485              
486             # Note that 'grep-alias-list' has been preprocessed to be a trimmed,
487             # space-separated list
488 647         2766 my @q = split /\s+/, $rOpts->{'grep-alias-list'};
489 647         4116 $is_grep_alias{$_} = 1 for @q;
490             }
491              
492 647         1334 $rOpts_starting_indentation_level = $rOpts->{'starting-indentation-level'};
493 647         1306 $rOpts_indent_columns = $rOpts->{'indent-columns'};
494 647         1141 $rOpts_look_for_hash_bang = $rOpts->{'look-for-hash-bang'};
495 647         1266 $rOpts_look_for_autoloader = $rOpts->{'look-for-autoloader'};
496 647         1134 $rOpts_look_for_selfloader = $rOpts->{'look-for-selfloader'};
497 647         1171 $rOpts_trim_qw = $rOpts->{'trim-qw'};
498 647         1072 $rOpts_extended_syntax = $rOpts->{'extended-syntax'};
499 647         1120 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
500 647         1066 $rOpts_outdent_labels = $rOpts->{'outdent-labels'};
501 647         1108 $rOpts_maximum_level_errors = $rOpts->{'maximum-level-errors'};
502 647         1157 $rOpts_maximum_unexpected_errors = $rOpts->{'maximum-unexpected-errors'};
503 647         1181 $rOpts_code_skipping = $rOpts->{'code-skipping'};
504 647         1018 $rOpts_code_skipping_begin = $rOpts->{'code-skipping-begin'};
505 647         1034 $rOpts_format_skipping = $rOpts->{'format-skipping'};
506 647         1123 $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
507 647         1167 $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
508 647         1004 $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
509 647         1135 $rOpts_non_indenting_braces = $rOpts->{'non-indenting-braces'};
510 647         1152 $rOpts_non_indenting_brace_prefix = $rOpts->{'non-indenting-brace-prefix'};
511 647         1077 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
512              
513             # In the Tokenizer, --indent-columns is just used for guessing old
514             # indentation, and must be positive. If -i=0 is used for this run (which
515             # is possible) we'll just guess that the old run used 4 spaces per level.
516 647 100       1565 if ( !$rOpts_indent_columns ) {
517 12         23 $rOpts_indent_columns = INDENT_COLUMNS_DEFAULT;
518             }
519              
520             # Define $tabsize, the number of spaces per tab for use in
521             # guessing the indentation of source lines with leading tabs.
522             # Assume same as for this run if tabs are used, otherwise assume
523             # a default value, typically 8
524             $tabsize =
525             $rOpts->{'entab-leading-whitespace'}
526             ? $rOpts->{'entab-leading-whitespace'}
527             : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'}
528 647 50       2564 : $rOpts->{'default-tabsize'};
    100          
529 647 50       1507 if ( !$tabsize ) { $tabsize = TAB_SIZE_DEFAULT }
  0         0  
530              
531             $code_skipping_pattern_begin =
532 647         2184 make_skipping_pattern( $rOpts, 'code-skipping-begin', '#<<V' );
533 647         1747 $code_skipping_pattern_end =
534             make_skipping_pattern( $rOpts, 'code-skipping-end', '#>>V' );
535              
536 647         1899 $format_skipping_pattern_begin =
537             make_skipping_pattern( $rOpts, 'format-skipping-begin', '#<<<' );
538 647         1663 $format_skipping_pattern_end =
539             make_skipping_pattern( $rOpts, 'format-skipping-end', '#>>>' );
540              
541 647         1825 return;
542             } ## end sub check_options
543              
544             sub new {
545              
546 649     649 0 2539 my ( $class, @arglist ) = @_;
547 649 50       1801 if ( @arglist % 2 ) { croak "Odd number of items in arg hash list\n" }
  0         0  
548              
549 649         5029 my %defaults = (
550             source_object => undef,
551             debugger_object => undef,
552             diagnostics_object => undef,
553             logger_object => undef,
554             starting_level => undef,
555             starting_line_number => 1,
556             rOpts => {},
557             );
558 649         3499 my %args = ( %defaults, @arglist );
559              
560             # we are given an object with a get_line() method to supply source lines
561 649         1616 my $source_object = $args{source_object};
562 649         1113 my $rOpts = $args{rOpts};
563              
564             # Check call args
565 649 50       1582 if ( !defined($source_object) ) {
566 0         0 Die(
567             "Perl::Tidy::Tokenizer::new called without a 'source_object' parameter\n"
568             );
569             }
570 649 50       2872 if ( !ref($source_object) ) {
571 0         0 Die(<<EOM);
572             sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference;
573             'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method
574             EOM
575             }
576              
577 649         1110 my $logger_object = $args{logger_object};
578              
579             # Tokenizer state data is as follows:
580             # _rhere_target_list_ reference to list of here-doc targets
581             # _here_doc_target_ the target string for a here document
582             # _here_quote_character_ the type of here-doc quoting (" ' ` or none)
583             # to determine if interpolation is done
584             # _quote_target_ character we seek if chasing a quote
585             # _line_start_quote_ line where we started looking for a long quote
586             # _in_here_doc_ flag indicating if we are in a here-doc
587             # _in_pod_ flag set if we are in pod documentation
588             # _in_code_skipping_ flag set if we are in a code skipping section
589             # _in_format_skipping_ flag set if we are in a format skipping section
590             # _in_error_ flag set if we saw severe error (binary in script)
591             # _do_not_format_ flag set if formatting should be skipped
592             # _warning_count_ number of calls to logger sub warning
593             # _html_tag_count_ number of apparent html tags seen (indicates html)
594             # _in_data_ flag set if we are in __DATA__ section
595             # _in_end_ flag set if we are in __END__ section
596             # _in_format_ flag set if we are in a format description
597             # _in_attribute_list_ flag telling if we are looking for attributes
598             # _in_quote_ flag telling if we are chasing a quote
599             # _starting_level_ indentation level of first line
600             # _diagnostics_object_ place to write debugging information
601             # _unexpected_error_count_ error count used to limit output
602             # _lower_case_labels_at_ line numbers where lower case labels seen
603             # _hit_bug_ program bug detected
604              
605 649         1020 my $self = [];
606 649         1389 $self->[_rhere_target_list_] = [];
607 649         1340 $self->[_in_here_doc_] = 0;
608 649         1489 $self->[_here_doc_target_] = EMPTY_STRING;
609 649         1115 $self->[_here_quote_character_] = EMPTY_STRING;
610 649         1221 $self->[_in_data_] = 0;
611 649         1431 $self->[_in_end_] = 0;
612 649         1225 $self->[_in_format_] = 0;
613 649         1174 $self->[_in_error_] = 0;
614 649         1262 $self->[_do_not_format_] = 0;
615 649         1030 $self->[_warning_count_] = 0;
616 649         1229 $self->[_html_tag_count_] = 0;
617 649         1040 $self->[_in_pod_] = 0;
618             $self->[_in_code_skipping_] =
619 649   33     1996 $rOpts->{'code-skipping-from-start'} && $rOpts_code_skipping;
620 649         1077 $self->[_in_format_skipping_] = 0;
621 649         1273 $self->[_rformat_skipping_list_] = [];
622 649         1284 $self->[_in_attribute_list_] = 0;
623 649         1072 $self->[_in_quote_] = 0;
624 649         1162 $self->[_quote_target_] = EMPTY_STRING;
625 649         1541 $self->[_line_start_quote_] = -1;
626 649         1147 $self->[_starting_level_] = $args{starting_level};
627 649         1379 $self->[_know_starting_level_] = defined( $args{starting_level} );
628 649         1365 $self->[_last_line_number_] = $args{starting_line_number} - 1;
629 649         1423 $self->[_saw_perl_dash_P_] = 0;
630 649         1299 $self->[_saw_perl_dash_w_] = 0;
631 649         1006 $self->[_saw_use_strict_] = 0;
632 649         1039 $self->[_saw_brace_error_] = 0;
633 649         1039 $self->[_hit_bug_] = 0;
634 649         1423 $self->[_look_for_autoloader_] = $rOpts_look_for_autoloader;
635 649         1121 $self->[_look_for_selfloader_] = $rOpts_look_for_selfloader;
636 649         1056 $self->[_saw_autoloader_] = 0;
637 649         1084 $self->[_saw_selfloader_] = 0;
638 649         990 $self->[_saw_hash_bang_] = 0;
639 649         981 $self->[_saw_end_] = 0;
640 649         1376 $self->[_saw_data_] = 0;
641 649         1030 $self->[_saw_negative_indentation_] = 0;
642 649         987 $self->[_started_tokenizing_] = 0;
643 649         1061 $self->[_debugger_object_] = $args{debugger_object};
644 649         1048 $self->[_diagnostics_object_] = $args{diagnostics_object};
645 649         1000 $self->[_logger_object_] = $logger_object;
646 649         1232 $self->[_unexpected_error_count_] = 0;
647 649         1003 $self->[_started_looking_for_here_target_at_] = 0;
648 649         969 $self->[_nearly_matched_here_target_at_] = undef;
649 649         1027 $self->[_line_of_text_] = EMPTY_STRING;
650 649         1050 $self->[_rlower_case_labels_at_] = undef;
651 649         1021 $self->[_maximum_level_] = 0;
652 649         940 $self->[_true_brace_error_count_] = 0;
653 649         1430 $self->[_rnon_indenting_brace_stack_] = [];
654 649         1072 $self->[_show_indentation_table_] = 0;
655 649         1152 $self->[_rbareword_info_] = {};
656              
657 649         4821 $self->[_rclosing_brace_indentation_hash_] = {
658             valid => undef,
659             rhistory_line_number => [0],
660             rhistory_level_diff => [0],
661             rhistory_anchor_point => [1],
662             };
663              
664 649         1318 $self->[_rOpts_] = $rOpts;
665 649   100     3028 $self->[_save_logfile_] =
666             defined($logger_object) && $logger_object->get_save_logfile();
667              
668 649         1421 bless $self, $class;
669              
670 649         3311 $self->prepare_for_a_new_file($source_object);
671 649         2789 $self->find_starting_indentation_level();
672              
673             # This is not a full class yet, so die if an attempt is made to
674             # create more than one object.
675              
676 649 50       1992 if ( _increment_count() > 1 ) {
677 0         0 confess
678             "Attempt to create more than 1 object in $class, which is not a true class yet\n";
679             }
680              
681 649         4773 return $self;
682              
683             } ## end sub new
684              
685             # Called externally
686             sub get_unexpected_error_count {
687 4     4 0 10 my ($self) = @_;
688 4         16 return $self->[_unexpected_error_count_];
689             }
690              
691             # Called externally
692             sub is_keyword {
693 4     4 0 8 my ($str) = @_;
694 4         16 return $is_keyword{$str};
695             }
696              
697             #----------------------------------------------------------------
698             # Line input routines, previously handled by the LineBuffer class
699             #----------------------------------------------------------------
700             sub make_source_array {
701              
702 649     649 0 1323 my ( $self, $line_source_object ) = @_;
703              
704             # Convert the source into an array of lines
705             # Given:
706             # $line_source_object = the input source stream
707             # Task:
708             # Convert the source to an array ref and store in $self
709              
710 649         1006 my $rinput_lines = [];
711              
712 649         1224 my $rsource = ref($line_source_object);
713 649         1074 my $source_string;
714              
715 649 50       2912 if ( !$rsource ) {
    50          
    50          
716              
717             # shouldn't happen: this should have been checked in sub new
718 0         0 Fault(<<EOM);
719             sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference;
720             'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method
721             EOM
722             }
723              
724             # handle an ARRAY ref
725             elsif ( $rsource eq 'ARRAY' ) {
726 0         0 $rinput_lines = $line_source_object;
727 0         0 $source_string = join( EMPTY_STRING, @{$line_source_object} );
  0         0  
728             }
729              
730             # handle a SCALAR ref
731             elsif ( $rsource eq 'SCALAR' ) {
732 649         947 $source_string = ${$line_source_object};
  649         1350  
733 649         4236 my @lines = split /^/, $source_string;
734 649         1668 $rinput_lines = \@lines;
735             }
736              
737             # handle an object - must have a get_line method
738             else {
739              
740             # This will die if user's object does have a 'get_line' method
741 0         0 my $line;
742 0         0 while ( defined( $line = $line_source_object->get_line() ) ) {
743 0         0 push( @{$rinput_lines}, $line );
  0         0  
744             }
745 0         0 $source_string = join( EMPTY_STRING, @{$rinput_lines} );
  0         0  
746             }
747              
748             # Get trimmed lines. It is much faster to strip leading whitespace from
749             # the whole input file at once than line-by-line.
750              
751             # Add a terminal newline if needed to keep line count unchanged:
752             # - avoids problem of losing a last line which is just \r and no \n (c283)
753             # - but check input line count to avoid adding line to an empty file (c286)
754 649 100 100     1000 if ( @{$rinput_lines} && $source_string !~ /\n$/ ) {
  649         5191  
755 1         2 $source_string .= "\n";
756             }
757              
758             # Remove leading whitespace except newlines
759 649         7505 $source_string =~ s/^ [^\S\n]+ //gxm;
760              
761             # Then break the string into lines
762 649         3877 my @trimmed_lines = split /^/, $source_string;
763              
764             # Safety check - a change in number of lines would be a disaster
765 649 50       1027 if ( @trimmed_lines != @{$rinput_lines} ) {
  649         1764  
766              
767             # Shouldn't happen - die in DEVEL_MODE and fix
768 0         0 my $ntr = @trimmed_lines;
769 0         0 my $utr = @{$rinput_lines};
  0         0  
770 0         0 DEVEL_MODE
771             && Fault("trimmed / untrimmed line counts differ: $ntr / $utr\n");
772              
773             # Otherwise we can safely continue with undefined trimmed lines. They
774             # will be detected and fixed later.
775 0         0 @trimmed_lines = ();
776             }
777              
778 649         1408 $self->[_rinput_lines_] = $rinput_lines;
779 649         1350 $self->[_rtrimmed_input_lines_] = \@trimmed_lines;
780 649         1151 $self->[_input_line_index_next_] = 0;
781 649         1358 return;
782             } ## end sub make_source_array
783              
784             sub peek_ahead {
785 1377     1377 0 2425 my ( $self, $buffer_index ) = @_;
786              
787             # look $buffer_index lines ahead of the current location in the input
788             # stream without disturbing the input
789 1377         1761 my $line;
790 1377         1969 my $rinput_lines = $self->[_rinput_lines_];
791 1377         2412 my $line_index = $buffer_index + $self->[_input_line_index_next_];
792 1377 100       1739 if ( $line_index < @{$rinput_lines} ) {
  1377         3076  
793 1365         2151 $line = $rinput_lines->[$line_index];
794             }
795 1377         3837 return $line;
796             } ## end sub peek_ahead
797              
798             #-----------------------------------------
799             # interface to Perl::Tidy::Logger routines
800             #-----------------------------------------
801             sub warning {
802              
803 0     0 0 0 my ( $self, $msg ) = @_;
804              
805 0         0 my $logger_object = $self->[_logger_object_];
806 0         0 $self->[_warning_count_]++;
807 0 0       0 if ($logger_object) {
808 0         0 my $msg_line_number = $self->[_last_line_number_];
809 0         0 $logger_object->warning( $msg, $msg_line_number );
810             }
811 0         0 return;
812             } ## end sub warning
813              
814             sub warning_do_not_format {
815 0     0 0 0 my ( $self, $msg ) = @_;
816              
817             # Issue a warning message and set a flag to skip formatting this file.
818 0         0 $self->warning($msg);
819 0         0 $self->[_do_not_format_] = 1;
820 0         0 return;
821             } ## end sub warning_do_not_format
822              
823             sub complain {
824              
825 35     35 0 80 my ( $self, $msg ) = @_;
826              
827 35         68 my $logger_object = $self->[_logger_object_];
828 35 50       96 if ($logger_object) {
829 35         59 my $input_line_number = $self->[_last_line_number_];
830 35         170 $logger_object->complain( $msg, $input_line_number );
831             }
832 35         54 return;
833             } ## end sub complain
834              
835             sub write_logfile_entry {
836              
837 2162     2162 0 4317 my ( $self, $msg ) = @_;
838              
839 2162         3374 my $logger_object = $self->[_logger_object_];
840 2162 100       4197 if ($logger_object) {
841 2156         5744 $logger_object->write_logfile_entry($msg);
842             }
843 2162         3530 return;
844             } ## end sub write_logfile_entry
845              
846             sub interrupt_logfile {
847              
848 0     0 0 0 my $self = shift;
849              
850 0         0 my $logger_object = $self->[_logger_object_];
851 0 0       0 if ($logger_object) {
852 0         0 $logger_object->interrupt_logfile();
853             }
854 0         0 return;
855             } ## end sub interrupt_logfile
856              
857             sub resume_logfile {
858              
859 0     0 0 0 my $self = shift;
860              
861 0         0 my $logger_object = $self->[_logger_object_];
862 0 0       0 if ($logger_object) {
863 0         0 $logger_object->resume_logfile();
864             }
865 0         0 return;
866             } ## end sub resume_logfile
867              
868             sub brace_warning {
869 0     0 0 0 my ( $self, $msg ) = @_;
870 0         0 $self->[_saw_brace_error_]++;
871              
872 0         0 my $logger_object = $self->[_logger_object_];
873 0 0       0 if ($logger_object) {
874 0         0 my $msg_line_number = $self->[_last_line_number_];
875 0         0 $logger_object->brace_warning( $msg, $msg_line_number );
876             }
877 0         0 return;
878             } ## end sub brace_warning
879              
880             sub increment_brace_error {
881              
882             # This is same as sub brace_warning but without a message
883 0     0 0 0 my $self = shift;
884 0         0 $self->[_saw_brace_error_]++;
885              
886 0         0 my $logger_object = $self->[_logger_object_];
887 0 0       0 if ($logger_object) {
888 0         0 $logger_object->increment_brace_error();
889             }
890 0         0 return;
891             } ## end sub increment_brace_error
892              
893             sub get_saw_brace_error {
894 0     0 0 0 my $self = shift;
895 0         0 return $self->[_saw_brace_error_];
896             }
897              
898             sub report_definite_bug {
899 0     0 0 0 my $self = shift;
900 0         0 $self->[_hit_bug_] = 1;
901 0         0 my $logger_object = $self->[_logger_object_];
902 0 0       0 if ($logger_object) {
903 0         0 $logger_object->report_definite_bug();
904             }
905 0         0 return;
906             } ## end sub report_definite_bug
907              
908             #-------------------------------------
909             # Interface to Perl::Tidy::Diagnostics
910             #-------------------------------------
911             sub write_diagnostics {
912 0     0 0 0 my ( $self, $msg ) = @_;
913 0         0 my $input_line_number = $self->[_last_line_number_];
914 0         0 my $diagnostics_object = $self->[_diagnostics_object_];
915 0 0       0 if ($diagnostics_object) {
916 0         0 $diagnostics_object->write_diagnostics( $msg, $input_line_number );
917             }
918 0         0 return;
919             } ## end sub write_diagnostics
920              
921             sub report_tokenization_errors {
922              
923 649     649 0 1461 my ($self) = @_;
924              
925             # Report any tokenization errors and return a flag '$severe_error'.
926             # Set $severe_error = 1 if the tokenization errors are so severe that
927             # the formatter should not attempt to format the file. Instead, it will
928             # just output the file verbatim.
929              
930             # set severe error flag if tokenizer has encountered file reading problems
931             # (i.e. unexpected binary characters)
932             # or code which may not be formatted correctly (such as 'my sub q')
933             # The difference between _in_error_ and _do_not_format_ is that
934             # _in_error_ stops the tokenizer immediately whereas
935             # _do_not_format_ lets the tokenizer finish so that all errors are seen
936             # Both block formatting and cause the input stream to be output verbatim.
937 649   33     3369 my $severe_error = $self->[_in_error_] || $self->[_do_not_format_];
938              
939             # And do not format if it looks like an html file (c209)
940 649   33     3442 $severe_error ||= $self->[_html_tag_count_] && $self->[_warning_count_];
      33        
941              
942             # Inform the logger object on length of input stream
943 649         1295 my $logger_object = $self->[_logger_object_];
944 649 100       1760 if ($logger_object) {
945 647         1186 my $last_line_number = $self->[_last_line_number_];
946 647         3205 $logger_object->set_last_input_line_number($last_line_number);
947             }
948              
949 649         1193 my $maxle = $rOpts_maximum_level_errors;
950 649         1001 my $maxue = $rOpts_maximum_unexpected_errors;
951 649 50       1523 $maxle = 1 unless ( defined($maxle) );
952 649 50       1494 $maxue = 0 unless ( defined($maxue) );
953              
954 649         2007 my $level = get_indentation_level();
955 649 50       1979 if ( $level != $self->[_starting_level_] ) {
956 0         0 $self->warning("final indentation level: $level\n");
957              
958 0         0 $self->[_show_indentation_table_] = 1;
959              
960 0         0 my $level_diff = $self->[_starting_level_] - $level;
961 0 0       0 if ( $level_diff < 0 ) { $level_diff = -$level_diff }
  0         0  
962              
963             # Set severe error flag if the level error is greater than 1.
964             # The formatter can function for any level error but it is probably
965             # best not to attempt formatting for a high level error.
966 0 0 0     0 if ( $maxle >= 0 && $level_diff > $maxle ) {
967 0         0 $severe_error = 1;
968 0         0 $self->warning(<<EOM);
969             Formatting will be skipped because level error '$level_diff' exceeds -maxle=$maxle; use -maxle=-1 to force formatting
970             EOM
971             }
972             }
973              
974 649         2992 $self->check_final_nesting_depths();
975              
976 649 50       1712 if ( $self->[_show_indentation_table_] ) {
977 0         0 $self->show_indentation_table();
978             }
979              
980             # Likewise, large numbers of brace errors usually indicate non-perl
981             # scripts, so set the severe error flag at a low number. This is similar
982             # to the level check, but different because braces may balance but be
983             # incorrectly interlaced.
984 649 50       1901 if ( $self->[_true_brace_error_count_] > 2 ) {
985 0         0 $severe_error = 1;
986             }
987              
988 649 50 66     1881 if ( $rOpts_look_for_hash_bang
989             && !$self->[_saw_hash_bang_] )
990             {
991 0         0 $self->warning(
992             "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
993             }
994              
995 649 50       1710 if ( $self->[_in_format_] ) {
996 0         0 $self->warning("hit EOF while in format description\n");
997             }
998              
999 649 50       1761 if ( $self->[_in_code_skipping_] ) {
1000 0         0 $self->write_logfile_entry(
1001             "hit EOF while in lines skipped with --code-skipping\n");
1002             }
1003              
1004 649 50       1718 if ( $self->[_in_pod_] ) {
1005              
1006             # Just write log entry if this is after __END__ or __DATA__
1007             # because this happens to often, and it is not likely to be
1008             # a parsing error.
1009 0 0 0     0 if ( $self->[_saw_data_] || $self->[_saw_end_] ) {
1010 0         0 $self->write_logfile_entry(
1011             "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
1012             );
1013             }
1014              
1015             else {
1016 0         0 $self->complain(
1017             "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
1018             );
1019             }
1020              
1021             }
1022              
1023 649 50       1759 if ( $self->[_in_here_doc_] ) {
1024 0         0 $severe_error = 1;
1025 0         0 my $here_doc_target = $self->[_here_doc_target_];
1026 0         0 my $started_looking_for_here_target_at =
1027             $self->[_started_looking_for_here_target_at_];
1028 0 0       0 if ($here_doc_target) {
1029 0         0 $self->warning(
1030             "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
1031             );
1032             }
1033             else {
1034 0         0 $self->warning(<<EOM);
1035             Hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string.
1036             (Perl will match to the end of file but this may not be intended).
1037             EOM
1038             }
1039 0         0 my $nearly_matched_here_target_at =
1040             $self->[_nearly_matched_here_target_at_];
1041 0 0       0 if ($nearly_matched_here_target_at) {
1042 0         0 $self->warning(
1043             "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
1044             );
1045             }
1046             }
1047              
1048             # Something is seriously wrong if we ended inside a quote
1049 649 50       1613 if ( $self->[_in_quote_] ) {
1050 0         0 $severe_error = 1;
1051 0         0 my $line_start_quote = $self->[_line_start_quote_];
1052 0         0 my $quote_target = $self->[_quote_target_];
1053 0 0       0 my $what =
1054             ( $self->[_in_attribute_list_] )
1055             ? "attribute list"
1056             : "quote/pattern";
1057 0         0 $self->warning(
1058             "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
1059             );
1060             }
1061              
1062 649 50       1591 if ( $self->[_hit_bug_] ) {
1063 0         0 $severe_error = 1;
1064             }
1065              
1066             # Multiple "unexpected" type tokenization errors usually indicate parsing
1067             # non-perl scripts, or that something is seriously wrong, so we should
1068             # avoid formatting them. This can happen for example if we run perltidy on
1069             # a shell script or an html file. But unfortunately this check can
1070             # interfere with some extended syntaxes, such as RPerl, so it has to be off
1071             # by default.
1072 649         1161 my $ue_count = $self->[_unexpected_error_count_];
1073 649 50 33     1969 if ( $maxue > 0 && $ue_count > $maxue ) {
1074 0         0 $self->warning(<<EOM);
1075             Formatting will be skipped since unexpected token count = $ue_count > -maxue=$maxue; use -maxue=0 to force formatting
1076             EOM
1077 0         0 $severe_error = 1;
1078             }
1079              
1080 649 100       1722 if ( !$self->[_saw_perl_dash_w_] ) {
1081 632         2140 $self->write_logfile_entry("Suggest including 'use warnings;'\n");
1082             }
1083              
1084 649 50       1716 if ( $self->[_saw_perl_dash_P_] ) {
1085 0         0 $self->write_logfile_entry(
1086             "Use of -P parameter for defines is discouraged\n");
1087             }
1088              
1089 649 100       1649 if ( !$self->[_saw_use_strict_] ) {
1090 635         1495 $self->write_logfile_entry("Suggest including 'use strict;'\n");
1091             }
1092              
1093             # it is suggested that labels have at least one upper case character
1094             # for legibility and to avoid code breakage as new keywords are introduced
1095 649 100       1915 if ( $self->[_rlower_case_labels_at_] ) {
1096 12         19 my @lower_case_labels_at = @{ $self->[_rlower_case_labels_at_] };
  12         35  
1097 12         36 $self->write_logfile_entry(
1098             "Suggest using upper case characters in label(s)\n");
1099 12         25 local $LIST_SEPARATOR = ')(';
1100 12         57 $self->write_logfile_entry(
1101             " defined at line(s): (@lower_case_labels_at)\n");
1102             }
1103              
1104             # Get the text of any leading format skipping tag
1105 649         1065 my $early_FS_end_marker;
1106 649         1170 my $rformat_skipping_list = $self->[_rformat_skipping_list_];
1107 649 100 100     1010 if ( @{$rformat_skipping_list} && $rformat_skipping_list->[0]->[0] == -1 ) {
  649         2103  
1108 3         9 $early_FS_end_marker = $rformat_skipping_list->[0]->[2];
1109             }
1110              
1111             return {
1112 649         3348 severe_error => $severe_error,
1113             early_FS_end_marker => $early_FS_end_marker,
1114             };
1115              
1116             } ## end sub report_tokenization_errors
1117              
1118             sub show_indentation_table {
1119 0     0 0 0 my ($self) = @_;
1120              
1121             # Output indentation table made at closing braces. This can be helpful for
1122             # the case of a missing brace in a previously formatted file.
1123              
1124             # skip if problem reading file
1125 0 0       0 return if ( $self->[_in_error_] );
1126              
1127             # skip if -wc is used (rare); it is too complex to use
1128 0 0       0 return if ($rOpts_whitespace_cycle);
1129              
1130             # skip if non-indenting-brace-prefix (very rare, but could be fixed)
1131 0 0       0 return if ($rOpts_non_indenting_brace_prefix);
1132              
1133             # skip if starting level is not zero (probably in editor)
1134 0 0       0 return if ($rOpts_starting_indentation_level);
1135              
1136             # skip if indentation analysis is not valid
1137 0         0 my $rhash = $self->[_rclosing_brace_indentation_hash_];
1138 0 0       0 return if ( !$rhash->{valid} );
1139              
1140 0         0 my $rhistory_line_number = $rhash->{rhistory_line_number};
1141 0         0 my $rhistory_level_diff = $rhash->{rhistory_level_diff};
1142 0         0 my $rhistory_anchor_point = $rhash->{rhistory_anchor_point};
1143              
1144             # Remove the first artificial point from the table
1145 0         0 shift @{$rhistory_line_number};
  0         0  
1146 0         0 shift @{$rhistory_level_diff};
  0         0  
1147 0         0 shift @{$rhistory_anchor_point};
  0         0  
1148              
1149             # Remove dubious points at an anchor point = 2 and beyond
1150             # These can occur when non-indenting braces are used
1151 0         0 my $num_his = @{$rhistory_level_diff};
  0         0  
1152 0         0 foreach my $i ( 0 .. $num_his - 1 ) {
1153 0 0       0 if ( $rhistory_anchor_point->[$i] == 2 ) {
1154 0         0 $num_his = $i;
1155 0         0 last;
1156             }
1157             }
1158 0 0       0 return if ( $num_his <= 1 );
1159              
1160             # Ignore an ending non-anchor point
1161 0 0       0 if ( !$rhistory_anchor_point->[-1] ) {
1162 0         0 $num_his -= 1;
1163             }
1164              
1165             # Ignore an ending point which is the same as the previous point
1166 0 0       0 if ( $num_his > 1 ) {
1167 0 0       0 if ( $rhistory_level_diff->[ $num_his - 1 ] ==
1168             $rhistory_level_diff->[ $num_his - 2 ] )
1169             {
1170 0         0 $num_his -= 1;
1171             }
1172             }
1173              
1174             # Skip if the table does not have at least 2 points to pinpoint an error
1175 0 0       0 return if ( $num_his <= 1 );
1176              
1177             # Skip if first point shows a level error - the analysis may not be valid
1178 0 0       0 return if ( $rhistory_level_diff->[0] );
1179              
1180             # Remove table points which return from negative to zero; they follow
1181             # an error and may not be correct. c448.
1182 0         0 my $min_lev = $rhistory_level_diff->[0];
1183 0         0 foreach my $ii ( 1 .. $num_his - 1 ) {
1184 0         0 my $lev = $rhistory_level_diff->[$ii];
1185 0 0       0 if ( $lev < $min_lev ) { $min_lev = $lev; next }
  0         0  
  0         0  
1186 0 0 0     0 if ( $min_lev < 0 && $lev >= 0 ) {
1187 0         0 $num_his = $ii;
1188 0         0 last;
1189             }
1190             }
1191              
1192             # Since the table could be arbitrarily large, we will limit the table to N
1193             # lines. If there are more lines than that, we will show N-3 lines, then
1194             # ..., then the last 2 lines. Allow about 3 lines per error, so a table
1195             # limit of 10 can localize up to about 3 errors in a file.
1196 0         0 my $nlines_max = 10;
1197 0         0 my @pre_indexes = ( 0 .. $num_his - 1 );
1198 0         0 my @post_indexes = ();
1199 0 0       0 if ( @pre_indexes > $nlines_max ) {
1200 0 0       0 if ( $nlines_max >= 5 ) {
1201 0         0 @pre_indexes = ( 0 .. $nlines_max - 4 );
1202 0         0 @post_indexes = ( $num_his - 2, $num_his - 1 );
1203             }
1204             else {
1205 0         0 @pre_indexes = ( 0 .. $nlines_max - 1 );
1206             }
1207             }
1208              
1209 0         0 my @output_lines;
1210 0         0 push @output_lines, <<EOM;
1211             Table of initial nesting level differences at closing braces.
1212             This might help localize brace errors IF perltidy previously formatted the file.
1213             line: error=[new brace level]-[old indentation level]
1214             EOM
1215 0         0 foreach my $i (@pre_indexes) {
1216 0         0 my $lno = $rhistory_line_number->[$i];
1217 0         0 my $diff = $rhistory_level_diff->[$i];
1218 0         0 push @output_lines, <<EOM;
1219             $lno: $diff
1220             EOM
1221             }
1222 0 0       0 if (@post_indexes) {
1223 0         0 push @output_lines, "...\n";
1224 0         0 foreach my $i (@post_indexes) {
1225 0         0 my $lno = $rhistory_line_number->[$i];
1226 0         0 my $diff = $rhistory_level_diff->[$i];
1227 0         0 push @output_lines, <<EOM;
1228             $lno: $diff
1229             EOM
1230             }
1231             }
1232              
1233             # Try to give a hint
1234 0         0 my $level_diff_1 = $rhistory_level_diff->[1];
1235 0         0 my $ln_0 = $rhistory_line_number->[0];
1236 0         0 my $ln_1 = $rhistory_line_number->[1];
1237 0 0       0 if ( $level_diff_1 < 0 ) {
    0          
1238 0         0 push @output_lines,
1239             "There may be an extra '}' or missing '{' between lines $ln_0 and $ln_1\n";
1240             }
1241             elsif ( $level_diff_1 > 0 ) {
1242 0         0 push @output_lines,
1243             "There may be a missing '}' or extra '{' between lines $ln_0 and $ln_1\n";
1244             }
1245             else {
1246             ## two leading zeros in the table - probably can't happen - no hint
1247             }
1248              
1249 0         0 push @output_lines, "\n";
1250 0         0 my $output_str = join EMPTY_STRING, @output_lines;
1251              
1252 0         0 $self->interrupt_logfile();
1253 0         0 $self->warning($output_str);
1254 0         0 $self->resume_logfile();
1255              
1256 0         0 return;
1257             } ## end sub show_indentation_table
1258              
1259             sub report_v_string {
1260              
1261             # warn if this version can't handle v-strings
1262 2     2 0 7 my ( $self, $tok ) = @_;
1263 2 50       6 if ( $] < 5.006 ) {
1264 0         0 $self->warning(
1265             "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
1266             );
1267             }
1268 2         6 return;
1269             } ## end sub report_v_string
1270              
1271             sub is_valid_token_type {
1272 447     447 0 530 my ($type) = @_;
1273 447         1019 return $is_valid_token_type{$type};
1274             }
1275              
1276             sub log_numbered_msg {
1277 208     208 0 446 my ( $self, $msg ) = @_;
1278              
1279             # write input line number + message to logfile
1280 208         318 my $input_line_number = $self->[_last_line_number_];
1281 208         785 $self->write_logfile_entry("Line $input_line_number: $msg");
1282 208         438 return;
1283             } ## end sub log_numbered_msg
1284              
1285             sub get_line {
1286              
1287 9619     9619 0 13520 my $self = shift;
1288              
1289             # Read the next input line and tokenize it
1290             # Returns:
1291             # $line_of_tokens = ref to hash of info for the tokenized line
1292              
1293             # USES GLOBAL VARIABLES:
1294             # $brace_depth, $square_bracket_depth, $paren_depth
1295              
1296             # get the next line from the input array
1297 9619         13025 my $input_line;
1298             my $trimmed_input_line;
1299 9619         13495 my $line_index = $self->[_input_line_index_next_];
1300 9619         12225 my $rinput_lines = $self->[_rinput_lines_];
1301 9619 100       11201 if ( $line_index < @{$rinput_lines} ) {
  9619         16073  
1302 8970         16158 $trimmed_input_line = $self->[_rtrimmed_input_lines_]->[$line_index];
1303 8970         15306 $input_line = $rinput_lines->[ $line_index++ ];
1304 8970         12094 $self->[_input_line_index_next_] = $line_index;
1305             }
1306              
1307             # End of file .. check if file ends in a binary operator (c565)
1308             else {
1309 649 0 33     3566 if (
      33        
1310             $is_binary_or_unary_operator_type{$last_nonblank_type}
1311             || ( $last_nonblank_type eq 'k'
1312             && $is_binary_or_unary_keyword{$last_nonblank_token} )
1313             )
1314             {
1315 0         0 $self->warning(
1316             "Unexpected EOF at operator '$last_nonblank_token'\n");
1317              
1318             # avoid repeating this message
1319 0         0 $last_nonblank_token = ';';
1320 0         0 $last_nonblank_type = ';';
1321             }
1322             }
1323              
1324 9619         14196 $self->[_line_of_text_] = $input_line;
1325              
1326 9619 100       17462 return if ( !defined($input_line) );
1327              
1328 8970         12230 my $input_line_number = ++$self->[_last_line_number_];
1329              
1330             # Find and remove what characters terminate this line, including any
1331             # control r
1332 8970         11761 my $input_line_separator = EMPTY_STRING;
1333 8970 100       19802 if ( chomp $input_line ) {
1334 8969         18124 $input_line_separator = $INPUT_RECORD_SEPARATOR;
1335             }
1336              
1337             # The first test here very significantly speeds things up, but be sure to
1338             # keep the regex and hash %other_line_endings the same.
1339 8970 50       22354 if ( $other_line_endings{ substr( $input_line, -1 ) } ) {
1340 0 0       0 if ( $input_line =~ s/([\r\035\032])+$// ) {
1341 0         0 $input_line_separator = $1 . $input_line_separator;
1342              
1343             # This could make the trimmed input line incorrect, so the
1344             # safe thing to do is to make it undef to force it to be
1345             # recomputed later.
1346 0         0 $trimmed_input_line = undef;
1347             }
1348             }
1349              
1350             # For backwards compatibility we keep the line text terminated with
1351             # a newline character
1352 8970         12443 $input_line .= "\n";
1353 8970         12011 $self->[_line_of_text_] = $input_line;
1354              
1355             # create a data structure describing this line which will be
1356             # returned to the caller.
1357              
1358             # _line_type codes are:
1359             # SYSTEM - system-specific code before hash-bang line
1360             # CODE - line of perl code (including comments)
1361             # POD_START - line starting pod, such as '=head'
1362             # POD - pod documentation text
1363             # POD_END - last line of pod section, '=cut'
1364             # HERE - text of here-document
1365             # HERE_END - last line of here-doc (target word)
1366             # FORMAT - format section
1367             # FORMAT_END - last line of format section, '.'
1368             # SKIP - code skipping section
1369             # SKIP_END - last line of code skipping section, '#>>V'
1370             # DATA_START - __DATA__ line
1371             # DATA - unidentified text following __DATA__
1372             # END_START - __END__ line
1373             # END - unidentified text following __END__
1374             # ERROR - we are in big trouble, probably not a perl script
1375              
1376             # Other variables:
1377             # _curly_brace_depth - depth of curly braces at start of line
1378             # _square_bracket_depth - depth of square brackets at start of line
1379             # _paren_depth - depth of parens at start of line
1380             # _starting_in_quote - this line continues a multi-line quote
1381             # (so don't trim leading blanks!)
1382             # _ending_in_quote - this line ends in a multi-line quote
1383             # (so don't trim trailing blanks!)
1384 8970         43010 my $line_of_tokens = {
1385             _line_type => 'EOF',
1386             _line_text => $input_line,
1387             _line_number => $input_line_number,
1388             _guessed_indentation_level => 0,
1389             _curly_brace_depth => $brace_depth,
1390             _square_bracket_depth => $square_bracket_depth,
1391             _paren_depth => $paren_depth,
1392             ## Skip these needless initializations for efficiency:
1393             ## _rtoken_type => undef,
1394             ## _rtokens => undef,
1395             ## _rlevels => undef,
1396             ## _rblock_type => undef,
1397             ## _rtype_sequence => undef,
1398             ## _starting_in_quote => 0,
1399             ## _ending_in_quote => 0,
1400             };
1401              
1402             # must print line unchanged if we are in a here document
1403 8970 100       38527 if ( $self->[_in_here_doc_] ) {
    100          
    100          
    100          
    50          
    100          
    100          
1404              
1405 32         49 $line_of_tokens->{_line_type} = 'HERE';
1406 32         46 my $here_doc_target = $self->[_here_doc_target_];
1407 32         54 my $here_quote_character = $self->[_here_quote_character_];
1408 32         46 my $candidate_target = $input_line;
1409 32         42 chomp $candidate_target;
1410              
1411             # Handle <<~ targets, which are indicated here by a leading space on
1412             # the here quote character
1413 32 100       84 if ( $here_quote_character =~ /^\s/ ) {
1414 4         12 $candidate_target =~ s/^\s+//;
1415             }
1416 32 100       67 if ( $candidate_target eq $here_doc_target ) {
1417 13         25 $self->[_nearly_matched_here_target_at_] = undef;
1418 13         22 $line_of_tokens->{_line_type} = 'HERE_END';
1419 13         47 $self->log_numbered_msg("Exiting HERE document $here_doc_target\n");
1420              
1421 13         24 my $rhere_target_list = $self->[_rhere_target_list_];
1422 13 100       21 if ( @{$rhere_target_list} ) { # there can be multiple here targets
  13         38  
1423             ( $here_doc_target, $here_quote_character ) =
1424 2         3 @{ shift @{$rhere_target_list} };
  2         3  
  2         6  
1425 2         4 $self->[_here_doc_target_] = $here_doc_target;
1426 2         2 $self->[_here_quote_character_] = $here_quote_character;
1427 2         6 $self->log_numbered_msg(
1428             "Entering HERE document $here_doc_target\n");
1429 2         4 $self->[_nearly_matched_here_target_at_] = undef;
1430 2         4 $self->[_started_looking_for_here_target_at_] =
1431             $input_line_number;
1432             }
1433             else {
1434 11         34 $self->[_in_here_doc_] = 0;
1435 11         20 $self->[_here_doc_target_] = EMPTY_STRING;
1436 11         23 $self->[_here_quote_character_] = EMPTY_STRING;
1437             }
1438             }
1439              
1440             # check for error of extra whitespace
1441             # note for PERL6: leading whitespace is allowed
1442             else {
1443 19         144 $candidate_target =~ s/^ \s+ | \s+ $//gx; # trim both ends
1444 19 50       52 if ( $candidate_target eq $here_doc_target ) {
1445 0         0 $self->[_nearly_matched_here_target_at_] = $input_line_number;
1446             }
1447             }
1448 32         96 return $line_of_tokens;
1449             }
1450              
1451             # Print line unchanged if we are in a format section
1452             elsif ( $self->[_in_format_] ) {
1453              
1454 3 100       9 if ( $input_line =~ /^\.[\s#]*$/ ) {
1455              
1456             # Decrement format depth count at a '.' after a 'format'
1457 1         2 $self->[_in_format_]--;
1458              
1459             # This is the end when count reaches 0
1460 1 50       3 if ( !$self->[_in_format_] ) {
1461 1         4 $self->log_numbered_msg("Exiting format section\n");
1462 1         2 $line_of_tokens->{_line_type} = 'FORMAT_END';
1463              
1464             # Make the tokenizer mark an opening brace which follows
1465             # as a code block. Fixes issue c202/t032.
1466 1         2 $last_nonblank_token = ';';
1467 1         1 $last_nonblank_type = ';';
1468             }
1469             }
1470             else {
1471 2         4 $line_of_tokens->{_line_type} = 'FORMAT';
1472 2 50       5 if ( $input_line =~ /^\s*format\s+\w+/ ) {
1473              
1474             # Increment format depth count at a 'format' within a 'format'
1475             # This is a simple way to handle nested formats (issue c019).
1476 0         0 $self->[_in_format_]++;
1477             }
1478             }
1479 3         7 return $line_of_tokens;
1480             }
1481              
1482             # must print line unchanged if we are in pod documentation
1483             elsif ( $self->[_in_pod_] ) {
1484              
1485 51         84 $line_of_tokens->{_line_type} = 'POD';
1486 51 100       141 if ( $input_line =~ /^=cut/ ) {
1487 22         48 $line_of_tokens->{_line_type} = 'POD_END';
1488 22         60 $self->log_numbered_msg("Exiting POD section\n");
1489 22         35 $self->[_in_pod_] = 0;
1490             }
1491 51 50 33     178 if ( $input_line =~ /^\#\!.*perl\b/ && !$self->[_in_end_] ) {
1492 0         0 $self->warning(
1493             "Hash-bang in pod can cause older versions of perl to fail! \n"
1494             );
1495             }
1496              
1497 51         177 return $line_of_tokens;
1498             }
1499              
1500             # print line unchanged if in skipped section
1501             elsif ( $self->[_in_code_skipping_] ) {
1502              
1503 8         12 $line_of_tokens->{_line_type} = 'SKIP';
1504 8 100       118 if ( $input_line =~ /$code_skipping_pattern_end/ ) {
    50          
1505 2         6 $line_of_tokens->{_line_type} = 'SKIP_END';
1506 2         5 $self->log_numbered_msg("Exiting code-skipping section\n");
1507 2         3 $self->[_in_code_skipping_] = 0;
1508             }
1509             elsif ( $input_line =~ /$code_skipping_pattern_begin/ ) {
1510              
1511             # warn of duplicate starting comment lines, git #118
1512 0         0 my $lno = $self->[_in_code_skipping_];
1513 0         0 $self->warning(
1514             "Already in code-skipping section which started at line $lno\n"
1515             );
1516             }
1517             else {
1518             # not a code-skipping control line
1519             }
1520 8         20 return $line_of_tokens;
1521             }
1522              
1523             # must print line unchanged if we have seen a severe error (i.e., we
1524             # are seeing illegal tokens and cannot continue. Syntax errors do
1525             # not pass this route). Calling routine can decide what to do, but
1526             # the default can be to just pass all lines as if they were after __END__
1527             elsif ( $self->[_in_error_] ) {
1528 0         0 $line_of_tokens->{_line_type} = 'ERROR';
1529 0         0 return $line_of_tokens;
1530             }
1531              
1532             # print line unchanged if we are __DATA__ section
1533             elsif ( $self->[_in_data_] ) {
1534              
1535             # ...but look for POD
1536             # Note that the _in_data and _in_end flags remain set
1537             # so that we return to that state after seeing the
1538             # end of a pod section
1539 1 50 33     11 if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
1540 0         0 $line_of_tokens->{_line_type} = 'POD_START';
1541 0         0 $self->log_numbered_msg("Entering POD section\n");
1542 0         0 $self->[_in_pod_] = 1;
1543 0         0 return $line_of_tokens;
1544             }
1545             else {
1546 1         5 $line_of_tokens->{_line_type} = 'DATA';
1547 1         6 return $line_of_tokens;
1548             }
1549             }
1550              
1551             # print line unchanged if we are in __END__ section
1552             elsif ( $self->[_in_end_] ) {
1553              
1554             # ...but look for POD
1555             # Note that the _in_data and _in_end flags remain set
1556             # so that we return to that state after seeing the
1557             # end of a pod section
1558 56 100 66     197 if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
1559 7         17 $line_of_tokens->{_line_type} = 'POD_START';
1560 7         25 $self->log_numbered_msg("Entering POD section\n");
1561 7         11 $self->[_in_pod_] = 1;
1562 7         23 return $line_of_tokens;
1563             }
1564             else {
1565 49         69 $line_of_tokens->{_line_type} = 'END';
1566 49         119 return $line_of_tokens;
1567             }
1568             }
1569             else {
1570             # not a special control line
1571             }
1572              
1573             # check for a hash-bang line if we haven't seen one
1574 8819 100 100     31965 if ( !$self->[_saw_hash_bang_]
      66        
1575             && substr( $input_line, 0, 2 ) eq '#!'
1576             && $input_line =~ /^\#\!.*perl\b/ )
1577             {
1578 16         36 $self->[_saw_hash_bang_] = $input_line_number;
1579              
1580             # check for -w and -P flags
1581 16 50       78 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
1582 0         0 $self->[_saw_perl_dash_P_] = 1;
1583             }
1584              
1585 16 100       82 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
1586 9         18 $self->[_saw_perl_dash_w_] = 1;
1587             }
1588              
1589 16 100 33     114 if (
      66        
      100        
      66        
1590             $input_line_number > 1
1591              
1592             # leave any hash bang in a BEGIN block alone
1593             # i.e. see 'debugger-duck_type.t'
1594             && !(
1595             $last_nonblank_block_type
1596             && $last_nonblank_block_type eq 'BEGIN'
1597             )
1598             && !$rOpts_look_for_hash_bang
1599              
1600             # Try to avoid giving a false alarm at a simple comment.
1601             # These look like valid hash-bang lines:
1602              
1603             #!/usr/bin/perl -w
1604             #! /usr/bin/perl -w
1605             #!c:\perl\bin\perl.exe
1606              
1607             # These are comments:
1608             #! I love perl
1609             #! sunos does not yet provide a /usr/bin/perl
1610              
1611             # Comments typically have multiple spaces, which suggests
1612             # the filter
1613             && $input_line =~ /^\#\!(\s+)?(\S+)?perl/
1614             )
1615             {
1616              
1617             # this is helpful for VMS systems; we may have accidentally
1618             # tokenized some DCL commands
1619 1 50       4 if ( $self->[_started_tokenizing_] ) {
1620 0         0 $self->warning(
1621             "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
1622             );
1623             }
1624             else {
1625 1         4 $self->complain("Useless hash-bang after line 1\n");
1626             }
1627             }
1628              
1629             # Report the leading hash-bang as a system line
1630             # This will prevent -dac from deleting it
1631             else {
1632 15         36 $line_of_tokens->{_line_type} = 'SYSTEM';
1633 15         74 return $line_of_tokens;
1634             }
1635             }
1636              
1637             # wait for a hash-bang before parsing if the user invoked us with -x
1638 8804 100 100     17387 if ( $rOpts_look_for_hash_bang
1639             && !$self->[_saw_hash_bang_] )
1640             {
1641 5         8 $line_of_tokens->{_line_type} = 'SYSTEM';
1642 5         12 return $line_of_tokens;
1643             }
1644              
1645             # a first line of the form ': #' will be marked as SYSTEM
1646             # since lines of this form may be used by tcsh
1647 8799 50 66     18695 if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
1648 0         0 $line_of_tokens->{_line_type} = 'SYSTEM';
1649 0         0 return $line_of_tokens;
1650             }
1651              
1652             # now we know that it is ok to tokenize the line...
1653             # the line tokenizer will modify any of these private variables:
1654             # _rhere_target_list_
1655             # _in_data_
1656             # _in_end_
1657             # _in_format_
1658             # _in_error_
1659             # _in_code_skipping_
1660             # _in_format_skipping_
1661             # _in_pod_
1662             # _in_quote_
1663              
1664 8799         23188 $self->tokenize_this_line( $line_of_tokens, $trimmed_input_line );
1665              
1666             # Now finish defining the return structure and return it
1667 8799         16932 $line_of_tokens->{_ending_in_quote} = $self->[_in_quote_];
1668              
1669             # handle severe error (binary data in script)
1670 8799 50       15894 if ( $self->[_in_error_] ) {
1671 0         0 $self->[_in_quote_] = 0; # to avoid any more messages
1672 0         0 $self->warning("Giving up after error\n");
1673 0         0 $line_of_tokens->{_line_type} = 'ERROR';
1674 0         0 reset_indentation_level(0); # avoid error messages
1675 0         0 return $line_of_tokens;
1676             }
1677              
1678             # handle start of pod documentation
1679 8799 100       14516 if ( $self->[_in_pod_] ) {
1680              
1681             # This gets tricky..above a __DATA__ or __END__ section, perl
1682             # accepts '=cut' as the start of pod section. But afterwards,
1683             # only pod utilities see it and they may ignore an =cut without
1684             # leading =head. In any case, this isn't good.
1685 15 50       62 if ( $input_line =~ /^=cut\b/ ) {
1686 0 0 0     0 if ( $self->[_saw_data_] || $self->[_saw_end_] ) {
1687 0         0 $self->complain("=cut while not in pod ignored\n");
1688 0         0 $self->[_in_pod_] = 0;
1689 0         0 $line_of_tokens->{_line_type} = 'POD_END';
1690             }
1691             else {
1692 0         0 $line_of_tokens->{_line_type} = 'POD_START';
1693 0         0 if ( !DEVEL_MODE ) {
1694 0         0 $self->warning(
1695             "=cut starts a pod section .. this can fool pod utilities.\n"
1696             );
1697             }
1698 0         0 $self->log_numbered_msg("Entering POD section\n");
1699             }
1700             }
1701              
1702             else {
1703 15         37 $line_of_tokens->{_line_type} = 'POD_START';
1704 15         53 $self->log_numbered_msg("Entering POD section\n");
1705             }
1706              
1707 15         52 return $line_of_tokens;
1708             }
1709              
1710             # handle start of skipped section
1711 8784 100       14991 if ( $self->[_in_code_skipping_] ) {
1712              
1713 2         5 $line_of_tokens->{_line_type} = 'SKIP';
1714 2         9 $self->log_numbered_msg("Entering code-skipping section\n");
1715 2         6 return $line_of_tokens;
1716             }
1717              
1718             # see if this line contains here doc targets
1719 8782         11733 my $rhere_target_list = $self->[_rhere_target_list_];
1720 8782 100       9514 if ( @{$rhere_target_list} ) {
  8782         15910  
1721              
1722             my ( $here_doc_target, $here_quote_character ) =
1723 11         20 @{ shift @{$rhere_target_list} };
  11         20  
  11         33  
1724 11         29 $self->[_in_here_doc_] = 1;
1725 11         24 $self->[_here_doc_target_] = $here_doc_target;
1726 11         22 $self->[_here_quote_character_] = $here_quote_character;
1727 11         56 $self->log_numbered_msg("Entering HERE document $here_doc_target\n");
1728 11         19 $self->[_started_looking_for_here_target_at_] = $input_line_number;
1729             }
1730              
1731             # NOTE: __END__ and __DATA__ statements are written unformatted
1732             # because they can theoretically contain additional characters
1733             # which are not tokenized (and cannot be read with <DATA> either!).
1734 8782 100       18445 if ( $self->[_in_data_] ) {
    100          
1735 1         3 $line_of_tokens->{_line_type} = 'DATA_START';
1736 1         7 $self->log_numbered_msg("Starting __DATA__ section\n");
1737 1         2 $self->[_saw_data_] = 1;
1738              
1739             # keep parsing after __DATA__ if use SelfLoader was seen
1740 1 50       5 if ( $self->[_saw_selfloader_] ) {
1741 0         0 $self->[_in_data_] = 0;
1742 0         0 $self->log_numbered_msg(
1743             "SelfLoader seen, continuing; -nlsl deactivates\n");
1744             }
1745              
1746 1         7 return $line_of_tokens;
1747             }
1748              
1749             elsif ( $self->[_in_end_] ) {
1750 7         19 $line_of_tokens->{_line_type} = 'END_START';
1751 7         23 $self->log_numbered_msg("Starting __END__ section\n");
1752 7         13 $self->[_saw_end_] = 1;
1753              
1754             # keep parsing after __END__ if use AutoLoader was seen
1755 7 50       31 if ( $self->[_saw_autoloader_] ) {
1756 0         0 $self->[_in_end_] = 0;
1757 0         0 $self->log_numbered_msg(
1758             "AutoLoader seen, continuing; -nlal deactivates\n");
1759             }
1760 7         44 return $line_of_tokens;
1761             }
1762             else {
1763             # not in __END__ or __DATA__
1764             }
1765              
1766             # now, finally, we know that this line is type 'CODE'
1767 8774         13732 $line_of_tokens->{_line_type} = 'CODE';
1768              
1769             # remember if we have seen any real code
1770 8774 100 100     21974 if ( !$self->[_started_tokenizing_]
      100        
1771             && $input_line !~ /^\s*$/
1772             && $input_line !~ /^\s*#/ )
1773             {
1774 645         1280 $self->[_started_tokenizing_] = 1;
1775             }
1776              
1777 8774 100       14977 if ( $self->[_debugger_object_] ) {
1778 7         21 $self->[_debugger_object_]->write_debug_entry($line_of_tokens);
1779             }
1780              
1781             # Note: if keyword 'format' occurs in this line code, it is still CODE
1782             # (keyword 'format' need not start a line)
1783 8774 100       14750 if ( $self->[_in_format_] ) {
1784 1         4 $self->log_numbered_msg("Entering format section\n");
1785             }
1786              
1787 8774 100 100     26309 if ( $self->[_in_quote_]
    100 100        
1788             and ( $self->[_line_start_quote_] < 0 ) )
1789             {
1790 63 100       399 if ( ( my $quote_target = $self->[_quote_target_] ) !~ /^\s*$/ ) {
1791 62         112 $self->[_line_start_quote_] = $input_line_number;
1792 62         274 $self->log_numbered_msg(
1793             "Start multi-line quote or pattern ending in $quote_target\n");
1794             }
1795             }
1796             elsif ( ( $self->[_line_start_quote_] >= 0 )
1797             && !$self->[_in_quote_] )
1798             {
1799 62         132 $self->[_line_start_quote_] = -1;
1800 62         183 $self->log_numbered_msg("End of multi-line quote or pattern\n");
1801             }
1802             else {
1803             # not at the edge of a quote
1804             }
1805              
1806             # we are returning a line of CODE
1807 8774         31151 return $line_of_tokens;
1808             } ## end sub get_line
1809              
1810             sub find_starting_indentation_level {
1811              
1812             # We need to find the indentation level of the first line of the
1813             # script being formatted. Often it will be zero for an entire file,
1814             # but if we are formatting a local block of code (within an editor for
1815             # example) it may not be zero. The user may specify this with the
1816             # -sil=n parameter but normally doesn't so we have to guess.
1817             #
1818 649     649 0 1347 my ($self) = @_;
1819 649         1213 my $starting_level = 0;
1820              
1821             # use value if given as parameter
1822 649 100       2235 if ( $self->[_know_starting_level_] ) {
    100          
1823 1         2 $starting_level = $self->[_starting_level_];
1824             }
1825              
1826             # if we know there is a hash_bang line, the level must be zero
1827             elsif ($rOpts_look_for_hash_bang) {
1828 1         2 $self->[_know_starting_level_] = 1;
1829             }
1830              
1831             # otherwise figure it out from the input file
1832             else {
1833 647         992 my $line;
1834 647         980 my $i = 0;
1835              
1836             # keep looking at lines until we find a hash bang or piece of code
1837             # ( or, for now, an =pod line)
1838 647         1075 my $msg = EMPTY_STRING;
1839 647         1171 my $in_code_skipping;
1840             my $line_for_guess;
1841 647         2607 while ( defined( $line = $self->peek_ahead( $i++ ) ) ) {
1842              
1843             # if first line is #! then assume starting level is zero
1844 973 100 100     4286 if ( $i == 1 && $line =~ /^\#\!/ ) {
1845 14         32 $starting_level = 0;
1846 14         31 last;
1847             }
1848              
1849             # ignore lines fenced off with code-skipping comments
1850 959 100       3398 if ( $line =~ /^\s*#/ ) {
1851              
1852             # use first comment for indentation guess in case of no code
1853 310 100       901 if ( !defined($line_for_guess) ) { $line_for_guess = $line }
  254         507  
1854              
1855 310 50       774 if ( !$in_code_skipping ) {
1856 310 50 33     3600 if ( $rOpts_code_skipping
1857             && $line =~ /$code_skipping_pattern_begin/ )
1858             {
1859 0         0 $in_code_skipping = 1;
1860 0         0 next;
1861             }
1862             }
1863             else {
1864 0 0       0 if ( $line =~ /$code_skipping_pattern_end/ ) {
1865 0         0 $in_code_skipping = 0;
1866             }
1867 0         0 next;
1868             }
1869              
1870             # Note that we could also ignore format-skipping lines here
1871             # but it isn't clear if that would be best.
1872             # See c326 for example code.
1873              
1874 310         808 next;
1875             }
1876 649 50       1437 next if ($in_code_skipping);
1877              
1878 649 100       2587 next if ( $line =~ /^\s*$/ ); # skip past blank lines
1879              
1880             # use first line of code for indentation guess
1881 631         1031 $line_for_guess = $line;
1882 631         1072 last;
1883             } ## end while ( defined( $line = ...))
1884              
1885 647 100       1455 if ( defined($line_for_guess) ) {
1886 631         2060 $starting_level =
1887             $self->guess_old_indentation_level($line_for_guess);
1888             }
1889 647         1510 $msg = "Line $i implies starting-indentation-level = $starting_level\n";
1890 647         2739 $self->write_logfile_entry("$msg");
1891             }
1892 649         1482 $self->[_starting_level_] = $starting_level;
1893 649         2577 reset_indentation_level($starting_level);
1894 649         952 return;
1895             } ## end sub find_starting_indentation_level
1896              
1897             sub guess_old_indentation_level {
1898 634     634 0 1312 my ( $self, $line ) = @_;
1899              
1900             # Guess the indentation level of an input line.
1901             #
1902             # For the first line of code this result will define the starting
1903             # indentation level. It will mainly be non-zero when perltidy is applied
1904             # within an editor to a local block of code.
1905             #
1906             # This is an impossible task in general because we can't know what tabs
1907             # meant for the old script and how many spaces were used for one
1908             # indentation level in the given input script. For example it may have
1909             # been previously formatted with -i=7 -et=3. But we can at least try to
1910             # make sure that perltidy guesses correctly if it is applied repeatedly to
1911             # a block of code within an editor, so that the block stays at the same
1912             # level when perltidy is applied repeatedly.
1913             #
1914             # USES GLOBAL VARIABLES: (none)
1915 634         1044 my $level = 0;
1916              
1917             # find leading tabs, spaces, and any statement label
1918 634         985 my $spaces = 0;
1919 634 50       4065 if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
1920              
1921             # If there are leading tabs, we use the tab scheme for this run, if
1922             # any, so that the code will remain stable when editing.
1923 634 100       2221 if ($1) { $spaces += length($1) * $tabsize }
  2         10  
1924              
1925 634 100       1944 if ($2) { $spaces += length($2) }
  90         236  
1926              
1927             # correct for outdented labels
1928 634 50 66     2523 if ( $3
      66        
1929             && $rOpts_outdent_labels
1930             && $rOpts_continuation_indentation > 0 )
1931             {
1932 1         3 $spaces += $rOpts_continuation_indentation;
1933             }
1934             }
1935              
1936 634         1893 $level = int( $spaces / $rOpts_indent_columns );
1937 634         1316 return ($level);
1938             } ## end sub guess_old_indentation_level
1939              
1940             sub dump_functions {
1941              
1942             # This is an unused debug routine, save for future use
1943              
1944 0     0 0 0 my $fh = *STDOUT;
1945 0         0 foreach my $pkg ( keys %{$ris_user_function} ) {
  0         0  
1946 0         0 $fh->print("\nnon-constant subs in package $pkg\n");
1947              
1948 0         0 foreach my $sub ( keys %{ $ris_user_function->{$pkg} } ) {
  0         0  
1949 0         0 my $msg = EMPTY_STRING;
1950 0 0       0 if ( $ris_block_list_function->{$pkg}->{$sub} ) {
1951 0         0 $msg = 'block_list';
1952             }
1953              
1954 0 0       0 if ( $ris_block_function->{$pkg}->{$sub} ) {
1955 0         0 $msg = 'block';
1956             }
1957 0         0 $fh->print("$sub $msg\n");
1958             }
1959             }
1960              
1961 0         0 foreach my $pkg ( keys %{$ris_constant} ) {
  0         0  
1962 0         0 $fh->print("\nconstants and constant subs in package $pkg\n");
1963              
1964 0         0 foreach my $sub ( keys %{ $ris_constant->{$pkg} } ) {
  0         0  
1965 0         0 $fh->print("$sub\n");
1966             }
1967             }
1968 0         0 return;
1969             } ## end sub dump_functions
1970              
1971             sub prepare_for_a_new_file {
1972              
1973 649     649 0 1492 my ( $self, $source_object ) = @_;
1974              
1975             # copy the source object lines to an array of lines
1976 649         2765 $self->make_source_array($source_object);
1977              
1978             # previous tokens needed to determine what to expect next
1979 649         1267 $last_nonblank_token = ';'; # the only possible starting state which
1980 649         1142 $last_nonblank_type = ';'; # will make a leading brace a code block
1981 649         1140 $last_nonblank_block_type = EMPTY_STRING;
1982              
1983             # scalars for remembering statement types across multiple lines
1984 649         1035 $statement_type = EMPTY_STRING; # '' or 'use' or 'sub..' or 'case..'
1985              
1986             # scalars for remembering where we are in the file
1987 649         1129 $current_package = "main";
1988 649         1045 $context = UNKNOWN_CONTEXT;
1989              
1990             # hashes used to remember function information
1991 649         1126 $ris_constant = {}; # user-defined constants
1992 649         1535 $ris_user_function = {}; # user-defined functions
1993 649         1443 $ruser_function_prototype = {}; # their prototypes
1994 649         1208 $ris_block_function = {};
1995 649         1229 $ris_block_list_function = {};
1996 649         1307 $rsaw_function_definition = {};
1997 649         1200 $rsaw_use_module = {};
1998              
1999             # variables used to track depths of various containers
2000             # and report nesting errors
2001 649         1264 $paren_depth = 0;
2002 649         965 $brace_depth = 0;
2003 649         928 $square_bracket_depth = 0;
2004 649         2643 $rcurrent_depth = [ (0) x scalar(@closing_brace_names) ];
2005 649         1043 $total_depth = 0;
2006 649         970 $rtotal_depth = [];
2007 649         1952 $rcurrent_sequence_number = [];
2008 649         1815 $ris_lexical_sub = {};
2009 649         1055 $next_sequence_number = SEQ_ROOT + 1;
2010              
2011 649         992 $rparen_type = [];
2012 649         1452 $rparen_semicolon_count = [];
2013 649         1281 $rparen_vars = [];
2014 649         2097 $rbrace_type = [];
2015 649         1649 $rbrace_structural_type = [];
2016 649         1379 $rbrace_context = [];
2017 649         1344 $rbrace_package = [];
2018 649         1347 $rsquare_bracket_type = [];
2019 649         1395 $rsquare_bracket_structural_type = [];
2020 649         1326 $rdepth_array = [];
2021 649         3491 $rnested_ternary_flag = [];
2022 649         1138 $rnested_statement_type = [];
2023 649         3745 $rstarting_line_of_current_depth = [];
2024              
2025 649         3679 $rparen_type->[$paren_depth] = EMPTY_STRING;
2026 649         1418 $rparen_semicolon_count->[$paren_depth] = 0;
2027 649         1352 $rparen_vars->[$paren_depth] = [];
2028 649         1371 $rbrace_type->[$brace_depth] = ';'; # identify opening brace as code block
2029 649         1274 $rbrace_structural_type->[$brace_depth] = EMPTY_STRING;
2030 649         1268 $rbrace_context->[$brace_depth] = UNKNOWN_CONTEXT;
2031 649         1369 $rbrace_package->[$paren_depth] = $current_package;
2032 649         1328 $rsquare_bracket_type->[$square_bracket_depth] = EMPTY_STRING;
2033 649         1194 $rsquare_bracket_structural_type->[$square_bracket_depth] = EMPTY_STRING;
2034              
2035 649         2252 initialize_tokenizer_state();
2036 649         1008 return;
2037             } ## end sub prepare_for_a_new_file
2038              
2039             { ## closure for sub tokenize_this_line
2040              
2041 44     44   385 use constant BRACE => 0;
  44         77  
  44         3055  
2042 44     44   256 use constant SQUARE_BRACKET => 1;
  44         82  
  44         1939  
2043 44     44   216 use constant PAREN => 2;
  44         95  
  44         1569  
2044 44     44   195 use constant QUESTION_COLON => 3;
  44         78  
  44         70916  
2045              
2046             # TV1: scalars for processing one LINE.
2047             # Re-initialized on each entry to sub tokenize_this_line.
2048             my (
2049             $block_type, $container_type, $expecting,
2050             $i, $i_tok, $input_line,
2051             $input_line_number, $last_nonblank_i, $max_token_index,
2052             $next_tok, $next_type, $peeked_ahead,
2053             $prototype, $rhere_target_list, $rtoken_map,
2054             $rtoken_type, $rtokens, $tok,
2055             $type, $type_sequence, $indent_flag,
2056             );
2057              
2058             # TV2: refs to ARRAYS for processing one LINE
2059             # Re-initialized on each call.
2060             my $routput_token_list = []; # stack of output token indexes
2061             my $routput_token_type = []; # token types
2062             my $routput_block_type = []; # types of code block
2063             my $routput_type_sequence = []; # nesting sequential number
2064             my $routput_indent_flag = []; #
2065              
2066             # TV3: SCALARS for quote variables. These are initialized with a
2067             # subroutine call and continually updated as lines are processed.
2068             my (
2069             $in_quote, $quote_type,
2070             $quote_character, $quote_pos,
2071             $quote_depth, $quoted_string_1,
2072             $quoted_string_2, $allowed_quote_modifiers,
2073             $quote_starting_tok, $quote_here_target_2,
2074             );
2075              
2076             # TV4: SCALARS for multi-line identifiers and
2077             # statements. These are initialized with a subroutine call
2078             # and continually updated as lines are processed.
2079             my ( $id_scan_state, $identifier, $want_paren );
2080              
2081             # TV5: SCALARS for tracking indentation level.
2082             # Initialized once and continually updated as lines are
2083             # processed.
2084             my (
2085             $nesting_token_string, $nesting_block_string,
2086             $nesting_block_flag, $level_in_tokenizer,
2087             );
2088              
2089             # TV6: SCALARS for remembering several previous
2090             # tokens. Initialized once and continually updated as
2091             # lines are processed.
2092             my (
2093             $last_nonblank_container_type, $last_nonblank_type_sequence,
2094             $last_last_nonblank_token, $last_last_nonblank_type,
2095             $last_nonblank_prototype,
2096             );
2097              
2098             # ----------------------------------------------------------------
2099             # beginning of tokenizer variable access and manipulation routines
2100             # ----------------------------------------------------------------
2101              
2102             sub initialize_tokenizer_state {
2103              
2104             # GV1: initialized once
2105             # TV1: initialized on each call
2106             # TV2: initialized on each call
2107             # TV3:
2108 649     649 0 1066 $in_quote = 0;
2109 649         1192 $quote_type = 'Q';
2110 649         1024 $quote_character = EMPTY_STRING;
2111 649         964 $quote_pos = 0;
2112 649         980 $quote_depth = 0;
2113 649         982 $quoted_string_1 = EMPTY_STRING;
2114 649         960 $quoted_string_2 = EMPTY_STRING;
2115 649         986 $allowed_quote_modifiers = EMPTY_STRING;
2116 649         1033 $quote_starting_tok = EMPTY_STRING;
2117 649         1048 $quote_here_target_2 = undef;
2118              
2119             # TV4:
2120 649         969 $id_scan_state = EMPTY_STRING;
2121 649         1080 $identifier = EMPTY_STRING;
2122 649         976 $want_paren = EMPTY_STRING;
2123              
2124             # TV5:
2125 649         991 $nesting_token_string = EMPTY_STRING;
2126 649         1012 $nesting_block_string = '1'; # initially in a block
2127 649         1010 $nesting_block_flag = 1;
2128 649         997 $level_in_tokenizer = 0;
2129              
2130             # TV6:
2131 649         1021 $last_nonblank_container_type = EMPTY_STRING;
2132 649         963 $last_nonblank_type_sequence = EMPTY_STRING;
2133 649         983 $last_last_nonblank_token = ';';
2134 649         981 $last_last_nonblank_type = ';';
2135 649         1011 $last_nonblank_prototype = EMPTY_STRING;
2136 649         946 return;
2137             } ## end sub initialize_tokenizer_state
2138              
2139             sub save_tokenizer_state {
2140              
2141             # Global variables:
2142 0     0 0 0 my $rGV1 = [
2143             $brace_depth,
2144             $context,
2145             $current_package,
2146             $last_nonblank_block_type,
2147             $last_nonblank_token,
2148             $last_nonblank_type,
2149             $next_sequence_number,
2150             $paren_depth,
2151             $rbrace_context,
2152             $rbrace_package,
2153             $rbrace_structural_type,
2154             $rbrace_type,
2155             $rcurrent_depth,
2156             $rcurrent_sequence_number,
2157             $ris_lexical_sub,
2158             $rdepth_array,
2159             $ris_block_function,
2160             $ris_block_list_function,
2161             $ris_constant,
2162             $ris_user_function,
2163             $rnested_statement_type,
2164             $rnested_ternary_flag,
2165             $rparen_semicolon_count,
2166             $rparen_vars,
2167             $rparen_type,
2168             $rsaw_function_definition,
2169             $rsaw_use_module,
2170             $rsquare_bracket_structural_type,
2171             $rsquare_bracket_type,
2172             $rstarting_line_of_current_depth,
2173             $rtotal_depth,
2174             $ruser_function_prototype,
2175             $square_bracket_depth,
2176             $statement_type,
2177             $total_depth,
2178              
2179             ];
2180              
2181             # Tokenizer closure variables:
2182 0         0 my $rTV1 = [
2183             $block_type, $container_type, $expecting,
2184             $i, $i_tok, $input_line,
2185             $input_line_number, $last_nonblank_i, $max_token_index,
2186             $next_tok, $next_type, $peeked_ahead,
2187             $prototype, $rhere_target_list, $rtoken_map,
2188             $rtoken_type, $rtokens, $tok,
2189             $type, $type_sequence, $indent_flag,
2190             ];
2191              
2192 0         0 my $rTV2 = [
2193             $routput_token_list, $routput_token_type,
2194             $routput_block_type, $routput_type_sequence,
2195             $routput_indent_flag,
2196             ];
2197              
2198 0         0 my $rTV3 = [
2199             $in_quote, $quote_type,
2200             $quote_character, $quote_pos,
2201             $quote_depth, $quoted_string_1,
2202             $quoted_string_2, $allowed_quote_modifiers,
2203             $quote_starting_tok, $quote_here_target_2,
2204             ];
2205              
2206 0         0 my $rTV4 = [ $id_scan_state, $identifier, $want_paren ];
2207              
2208 0         0 my $rTV5 = [
2209             $nesting_token_string, $nesting_block_string,
2210             $nesting_block_flag, $level_in_tokenizer,
2211             ];
2212              
2213 0         0 my $rTV6 = [
2214             $last_nonblank_container_type, $last_nonblank_type_sequence,
2215             $last_last_nonblank_token, $last_last_nonblank_type,
2216             $last_nonblank_prototype,
2217             ];
2218 0         0 return [ $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
2219             } ## end sub save_tokenizer_state
2220              
2221             sub restore_tokenizer_state {
2222 0     0 0 0 my ($rstate) = @_;
2223 0         0 my ( $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
  0         0  
2224              
2225             (
2226             $brace_depth,
2227             $context,
2228             $current_package,
2229             $last_nonblank_block_type,
2230             $last_nonblank_token,
2231             $last_nonblank_type,
2232             $next_sequence_number,
2233             $paren_depth,
2234             $rbrace_context,
2235             $rbrace_package,
2236             $rbrace_structural_type,
2237             $rbrace_type,
2238             $rcurrent_depth,
2239             $rcurrent_sequence_number,
2240             $ris_lexical_sub,
2241             $rdepth_array,
2242             $ris_block_function,
2243             $ris_block_list_function,
2244             $ris_constant,
2245             $ris_user_function,
2246             $rnested_statement_type,
2247             $rnested_ternary_flag,
2248             $rparen_semicolon_count,
2249             $rparen_vars,
2250             $rparen_type,
2251             $rsaw_function_definition,
2252             $rsaw_use_module,
2253             $rsquare_bracket_structural_type,
2254             $rsquare_bracket_type,
2255             $rstarting_line_of_current_depth,
2256             $rtotal_depth,
2257             $ruser_function_prototype,
2258             $square_bracket_depth,
2259             $statement_type,
2260             $total_depth,
2261              
2262 0         0 ) = @{$rGV1};
  0         0  
2263              
2264             (
2265             $block_type, $container_type, $expecting,
2266             $i, $i_tok, $input_line,
2267             $input_line_number, $last_nonblank_i, $max_token_index,
2268             $next_tok, $next_type, $peeked_ahead,
2269             $prototype, $rhere_target_list, $rtoken_map,
2270             $rtoken_type, $rtokens, $tok,
2271             $type, $type_sequence, $indent_flag,
2272 0         0 ) = @{$rTV1};
  0         0  
2273              
2274             (
2275             $routput_token_list, $routput_token_type,
2276             $routput_block_type, $routput_type_sequence,
2277             $routput_indent_flag,
2278 0         0 ) = @{$rTV2};
  0         0  
2279              
2280             (
2281             $in_quote, $quote_type,
2282             $quote_character, $quote_pos,
2283             $quote_depth, $quoted_string_1,
2284             $quoted_string_2, $allowed_quote_modifiers,
2285             $quote_starting_tok, $quote_here_target_2,
2286 0         0 ) = @{$rTV3};
  0         0  
2287              
2288 0         0 ( $id_scan_state, $identifier, $want_paren ) = @{$rTV4};
  0         0  
2289              
2290             (
2291             $nesting_token_string, $nesting_block_string,
2292             $nesting_block_flag, $level_in_tokenizer,
2293 0         0 ) = @{$rTV5};
  0         0  
2294              
2295             (
2296             $last_nonblank_container_type, $last_nonblank_type_sequence,
2297             $last_last_nonblank_token, $last_last_nonblank_type,
2298             $last_nonblank_prototype,
2299 0         0 ) = @{$rTV6};
  0         0  
2300 0         0 return;
2301             } ## end sub restore_tokenizer_state
2302              
2303             sub split_pretoken {
2304              
2305 8     8 0 17 my ( $self, $numc ) = @_;
2306              
2307             # This provides a way to work around the limitations of the
2308             # pre-tokenization scheme upon which perltidy is based. It is rarely
2309             # needed.
2310              
2311             # Split the leading $numc characters from the current token (at
2312             # index=$i) which is pre-type 'w' and insert the remainder back into
2313             # the pretoken stream with appropriate settings. Since we are
2314             # splitting a pre-type 'w', there are three cases, depending on if the
2315             # remainder starts with a digit:
2316             # Case 1: remainder is type 'd', all digits
2317             # Case 2: remainder is type 'd' and type 'w': digits & other characters
2318             # Case 3: remainder is type 'w'
2319              
2320             # Examples, for $numc=1:
2321             # $tok => $tok_0 $tok_1 $tok_2
2322             # 'x10' => 'x' '10' # case 1
2323             # 'x10if' => 'x' '10' 'if' # case 2
2324             # '0ne => 'O' 'ne' # case 3
2325              
2326             # where:
2327             # $tok_1 is a possible string of digits (pre-type 'd')
2328             # $tok_2 is a possible word (pre-type 'w')
2329              
2330             # return 1 if successful
2331             # return undef if error (shouldn't happen)
2332              
2333             # Calling routine should update '$type' and '$tok' if successful.
2334              
2335 8         15 my $pretoken = $rtokens->[$i];
2336 8 50 33     79 if ( $pretoken
      33        
2337             && length($pretoken) > $numc
2338             && substr( $pretoken, $numc ) =~ /^(\d*)(.*)$/ )
2339             {
2340              
2341             # Split $tok into up to 3 tokens:
2342 8         18 my $tok_0 = substr( $pretoken, 0, $numc );
2343 8 50       31 my $tok_1 = defined($1) ? $1 : EMPTY_STRING;
2344 8 50       22 my $tok_2 = defined($2) ? $2 : EMPTY_STRING;
2345              
2346 8         16 my $len_0 = length($tok_0);
2347 8         11 my $len_1 = length($tok_1);
2348 8         13 my $len_2 = length($tok_2);
2349              
2350             ##my $pre_type_0 = 'w';
2351 8         13 my $pre_type_1 = 'd';
2352 8         12 my $pre_type_2 = 'w';
2353              
2354 8         15 my $pos_0 = $rtoken_map->[$i];
2355 8         11 my $pos_1 = $pos_0 + $len_0;
2356 8         12 my $pos_2 = $pos_1 + $len_1;
2357              
2358 8         13 my $isplice = $i + 1;
2359              
2360             # Splice in any digits
2361 8 100       17 if ($len_1) {
2362 5         8 splice @{$rtoken_map}, $isplice, 0, $pos_1;
  5         16  
2363 5         10 splice @{$rtokens}, $isplice, 0, $tok_1;
  5         13  
2364 5         9 splice @{$rtoken_type}, $isplice, 0, $pre_type_1;
  5         10  
2365 5         8 $max_token_index++;
2366 5         8 $isplice++;
2367             }
2368              
2369             # Splice in any trailing word
2370 8 100       22 if ($len_2) {
2371 4         5 splice @{$rtoken_map}, $isplice, 0, $pos_2;
  4         9  
2372 4         7 splice @{$rtokens}, $isplice, 0, $tok_2;
  4         7  
2373 4         5 splice @{$rtoken_type}, $isplice, 0, $pre_type_2;
  4         6  
2374 4         6 $max_token_index++;
2375             }
2376              
2377 8         16 $rtokens->[$i] = $tok_0;
2378 8         27 return 1;
2379             }
2380              
2381             # Shouldn't get here - bad call parameters
2382 0         0 if (DEVEL_MODE) {
2383             Fault(<<EOM);
2384             While working near line number $input_line_number, bad arg '$tok' passed to sub split_pretoken()
2385             EOM
2386             }
2387 0         0 return;
2388             } ## end sub split_pretoken
2389              
2390             sub get_indentation_level {
2391 649     649 0 1429 return $level_in_tokenizer;
2392             }
2393              
2394             sub reset_indentation_level {
2395 649     649 0 1204 $level_in_tokenizer = shift;
2396 649         1014 return;
2397             }
2398              
2399             sub peeked_ahead {
2400 280     280 0 477 ( ( my $flag ) ) = @_;
2401              
2402             # get or set the closure flag '$peeked_ahead':
2403             # - set $peeked_ahead to $flag if given, then
2404             # - return current value
2405 280 100       556 $peeked_ahead = defined($flag) ? $flag : $peeked_ahead;
2406 280         886 return $peeked_ahead;
2407             } ## end sub peeked_ahead
2408              
2409             # ------------------------------------------------------------
2410             # end of tokenizer variable access and manipulation routines
2411             # ------------------------------------------------------------
2412              
2413             #------------------------------
2414             # beginning of tokenizer hashes
2415             #------------------------------
2416              
2417             my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
2418              
2419             my @q;
2420              
2421             # 'L' is token for opening { at hash key
2422             my %is_opening_type;
2423             @q = qw< L { ( [ >;
2424             $is_opening_type{$_} = 1 for @q;
2425              
2426             my %is_opening_or_ternary_type;
2427             push @q, '?';
2428             $is_opening_or_ternary_type{$_} = 1 for @q;
2429              
2430             # 'R' is token for closing } at hash key
2431             my %is_closing_type;
2432             @q = qw< R } ) ] >;
2433             $is_closing_type{$_} = 1 for @q;
2434              
2435             my %is_closing_or_ternary_type;
2436             push @q, ':';
2437             $is_closing_or_ternary_type{$_} = 1 for @q;
2438              
2439             my %is_redo_last_next_goto;
2440             @q = qw( redo last next goto );
2441             $is_redo_last_next_goto{$_} = 1 for @q;
2442              
2443             my %is_use_require;
2444             @q = qw( use require );
2445             $is_use_require{$_} = 1 for @q;
2446              
2447             # This hash holds the array index in $self for these keywords:
2448             # Fix for issue c035: removed 'format' from this hash
2449             my %is_END_DATA = (
2450             '__END__' => _in_end_,
2451             '__DATA__' => _in_data_,
2452             );
2453              
2454             # table showing how many quoted things to look for after quote operator..
2455             # s, y, tr have 2 (pattern and replacement)
2456             # others have 1 (pattern only)
2457             my %quote_items = (
2458             's' => 2,
2459             'y' => 2,
2460             'tr' => 2,
2461             'm' => 1,
2462             'qr' => 1,
2463             'q' => 1,
2464             'qq' => 1,
2465             'qw' => 1,
2466             'qx' => 1,
2467             );
2468              
2469             my %is_for_foreach;
2470             @q = qw( for foreach );
2471             $is_for_foreach{$_} = 1 for @q;
2472              
2473             # These keywords may introduce blocks after parenthesized expressions,
2474             # in the form:
2475             # keyword ( .... ) { BLOCK }
2476             # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
2477             my %is_blocktype_with_paren;
2478             @q =
2479             qw(if elsif unless while until for foreach switch case given when catch);
2480             $is_blocktype_with_paren{$_} = 1 for @q;
2481              
2482             my %is_case_default;
2483             @q = qw( case default );
2484             $is_case_default{$_} = 1 for @q;
2485              
2486             #------------------------
2487             # end of tokenizer hashes
2488             #------------------------
2489              
2490             # ------------------------------------------------------------
2491             # beginning of various scanner interface routines
2492             # ------------------------------------------------------------
2493             sub scan_replacement_text {
2494              
2495             # check for here-docs in replacement text invoked by
2496             # a substitution operator with executable modifier 'e'.
2497             #
2498             # given:
2499             # $replacement_text
2500             # return:
2501             # $rht = reference to any here-doc targets
2502 0     0 0 0 my ( $self, $replacement_text ) = @_;
2503              
2504             # quick check
2505 0 0       0 return if ( $replacement_text !~ /<</ );
2506              
2507 0         0 $self->write_logfile_entry(
2508             "scanning replacement text for here-doc targets\n");
2509              
2510             # save the logger object for error messages
2511 0         0 my $logger_object = $self->[_logger_object_];
2512              
2513             # save all lexical variables
2514 0         0 my $rstate = save_tokenizer_state();
2515 0         0 _decrement_count(); # avoid error check for multiple tokenizers
2516              
2517             # make a new tokenizer
2518 0         0 my $tokenizer = Perl::Tidy::Tokenizer->new(
2519             source_object => \$replacement_text,
2520             logger_object => $logger_object,
2521             starting_line_number => $input_line_number,
2522             );
2523              
2524             # scan the replacement text
2525 0         0 while ( $tokenizer->get_line() ) { }
2526              
2527             # remove any here doc targets
2528 0         0 my $rht = undef;
2529 0 0       0 if ( $tokenizer->[_in_here_doc_] ) {
2530 0         0 $rht = [];
2531 0         0 push @{$rht},
  0         0  
2532             [
2533             $tokenizer->[_here_doc_target_],
2534             $tokenizer->[_here_quote_character_],
2535             ];
2536 0 0       0 if ( $tokenizer->[_rhere_target_list_] ) {
2537 0         0 push @{$rht}, @{ $tokenizer->[_rhere_target_list_] };
  0         0  
  0         0  
2538 0         0 $tokenizer->[_rhere_target_list_] = undef;
2539             }
2540 0         0 $tokenizer->[_in_here_doc_] = undef;
2541             }
2542              
2543             # now its safe to report errors
2544 0         0 my $rtokenization_info_uu = $tokenizer->report_tokenization_errors();
2545              
2546             # TODO: Could propagate a severe error up
2547              
2548             # restore all tokenizer lexical variables
2549 0         0 restore_tokenizer_state($rstate);
2550              
2551             # return the here doc targets
2552 0         0 return $rht;
2553             } ## end sub scan_replacement_text
2554              
2555             sub scan_bare_identifier {
2556 1862     1862 0 2568 my $self = shift;
2557              
2558             # Scan a token starting with an alphanumeric variable or package
2559             # separator, :: or '.
2560              
2561 1862         5653 ( $i, $tok, $type, $prototype ) = $self->scan_bare_identifier_do(
2562              
2563             $input_line,
2564             $i,
2565             $tok,
2566             $type,
2567             $prototype,
2568             $rtoken_map,
2569             $max_token_index,
2570             );
2571 1862         3623 return;
2572             } ## end sub scan_bare_identifier
2573              
2574             sub scan_identifier {
2575              
2576             # Scan for an identifier following a sigil or -> or other
2577             # identifier prefix, such as '::'
2578              
2579 551     551 0 782 my $self = shift;
2580              
2581             (
2582              
2583 551         2096 $i,
2584             $tok,
2585             $type,
2586             $id_scan_state,
2587             $identifier,
2588             my $split_pretoken_flag,
2589              
2590             ) = $self->scan_complex_identifier(
2591              
2592             $i,
2593             $id_scan_state,
2594             $identifier,
2595             $rtokens,
2596             $max_token_index,
2597             $expecting,
2598             $rparen_type->[$paren_depth],
2599             );
2600              
2601             # Check for signal to fix a special variable adjacent to a keyword,
2602             # such as '$^One$0'.
2603 551 100       1330 if ($split_pretoken_flag) {
2604              
2605             # Try to fix it by splitting the pretoken
2606 3 50 33     17 if ( $i > 0
      33        
2607             && $rtokens->[ $i - 1 ] eq '^'
2608             && $self->split_pretoken(1) )
2609             {
2610 3         4 $identifier = substr( $identifier, 0, 3 );
2611 3         4 $tok = $identifier;
2612             }
2613             else {
2614              
2615             # This shouldn't happen ...
2616 0         0 my $var = substr( $tok, 0, 3 );
2617 0         0 my $excess = substr( $tok, 3 );
2618 0         0 $self->interrupt_logfile();
2619 0         0 $self->warning(<<EOM);
2620             $input_line_number: Trouble parsing at characters '$excess' after special variable '$var'.
2621             A space may be needed after '$var'.
2622             EOM
2623 0         0 $self->resume_logfile();
2624             }
2625             }
2626 551         846 return;
2627             } ## end sub scan_identifier
2628              
2629 44     44   310 use constant VERIFY_FASTSCAN => 0;
  44         102  
  44         4439  
2630             my %fast_scan_context;
2631              
2632             BEGIN {
2633 44     44   55109 %fast_scan_context = (
2634             '$' => SCALAR_CONTEXT,
2635             '*' => SCALAR_CONTEXT,
2636             '@' => LIST_CONTEXT,
2637             '%' => LIST_CONTEXT,
2638             '&' => UNKNOWN_CONTEXT,
2639             );
2640             } ## end BEGIN
2641              
2642             sub scan_simple_identifier {
2643              
2644 5685     5685 0 7019 my $self = shift;
2645              
2646             # This is a wrapper for sub scan_identifier. It does a fast preliminary
2647             # scan for certain common identifiers:
2648             # '$var', '@var', %var, *var, &var, '@{...}', '%{...}'
2649             # If it does not find one of these, or this is a restart, it calls the
2650             # original scanner directly.
2651              
2652             # This gives the same results as the full scanner in about 1/4 the
2653             # total runtime for a typical input stream.
2654              
2655             # Notation:
2656             # $var * 2
2657             # ^^ ^
2658             # || |
2659             # || ---- $i_next [= next nonblank pretoken ]
2660             # |----$i_plus_1 [= a bareword ]
2661             # ---$i_begin [= a sigil]
2662              
2663 5685         6790 my $i_begin = $i;
2664 5685         6939 my $tok_begin = $tok;
2665 5685         7237 my $i_plus_1 = $i + 1;
2666 5685         6494 my $fast_scan_type;
2667              
2668             #-------------------------------------------------------
2669             # Do full scan for anything following a pointer, such as
2670             # $cref->&*; # a postderef
2671             #-------------------------------------------------------
2672 5685 100 66     25465 if ( $last_nonblank_token eq '->' ) {
    100 66        
    50 33        
      0        
      33        
2673              
2674             }
2675              
2676             #------------------------------
2677             # quick scan with leading sigil
2678             #------------------------------
2679             elsif ( !$id_scan_state
2680             && $i_plus_1 <= $max_token_index
2681             && $fast_scan_context{$tok} )
2682             {
2683 5571         7818 $context = $fast_scan_context{$tok};
2684              
2685             # look for $var, @var, ...
2686 5571 100 100     10583 if ( $rtoken_type->[$i_plus_1] eq 'w' ) {
    100 66        
2687 5214         6880 my $pretype_next = EMPTY_STRING;
2688 5214 100       8842 if ( $i_plus_1 < $max_token_index ) {
2689 5092         6413 my $i_next = $i_plus_1 + 1;
2690 5092 100 100     12986 if ( $rtoken_type->[$i_next] eq 'b'
2691             && $i_next < $max_token_index )
2692             {
2693 2045         2735 $i_next += 1;
2694             }
2695 5092         7232 $pretype_next = $rtoken_type->[$i_next];
2696             }
2697 5214 100 100     14391 if ( $pretype_next ne ':' && $pretype_next ne "'" ) {
2698              
2699             # Found type 'i' like '$var', '@var', or '%var'
2700 5098         8402 $identifier = $tok . $rtokens->[$i_plus_1];
2701 5098         5908 $tok = $identifier;
2702 5098         6028 $type = 'i';
2703 5098         5800 $i = $i_plus_1;
2704 5098         6821 $fast_scan_type = $type;
2705             }
2706             }
2707              
2708             # Look for @{ or %{ .
2709             # But we must let the full scanner handle things ${ because it may
2710             # keep going to get a complete identifier like '${#}' .
2711             elsif (
2712             $rtoken_type->[$i_plus_1] eq '{'
2713             && ( $tok_begin eq '@'
2714             || $tok_begin eq '%' )
2715             )
2716             {
2717              
2718 43         79 $identifier = $tok;
2719 43         76 $type = 't';
2720 43         97 $fast_scan_type = $type;
2721             }
2722             else {
2723             ## out of tricks
2724             }
2725             }
2726              
2727             #---------------------------
2728             # Quick scan with leading ->
2729             # Look for ->[ and ->{
2730             #---------------------------
2731             elsif (
2732             $tok eq '->'
2733             && $i < $max_token_index
2734             && ( $rtokens->[$i_plus_1] eq '{'
2735             || $rtokens->[$i_plus_1] eq '[' )
2736             )
2737             {
2738 0         0 $type = $tok;
2739 0         0 $fast_scan_type = $type;
2740 0         0 $identifier = $tok;
2741 0         0 $context = UNKNOWN_CONTEXT;
2742             }
2743             else {
2744             ## out of tricks
2745             }
2746              
2747             #--------------------------------------
2748             # Verify correctness during development
2749             #--------------------------------------
2750 5685         6365 if ( VERIFY_FASTSCAN && $fast_scan_type ) {
2751              
2752             # We will call the full method
2753             my $identifier_simple = $identifier;
2754             my $tok_simple = $tok;
2755             my $i_simple = $i;
2756             my $context_simple = $context;
2757              
2758             $tok = $tok_begin;
2759             $i = $i_begin;
2760             $self->scan_identifier();
2761              
2762             if ( $tok ne $tok_simple
2763             || $type ne $fast_scan_type
2764             || $i != $i_simple
2765             || $identifier ne $identifier_simple
2766             || $id_scan_state
2767             || $context ne $context_simple )
2768             {
2769             print {*STDERR} <<EOM;
2770             scan_simple_identifier differs from scan_identifier:
2771             simple: i=$i_simple, tok=$tok_simple, type=$fast_scan_type, ident=$identifier_simple, context='$context_simple
2772             full: i=$i, tok=$tok, type=$type, ident=$identifier, context='$context state=$id_scan_state
2773             EOM
2774             }
2775             }
2776              
2777             #-------------------------------------------------
2778             # call full scanner if fast method did not succeed
2779             #-------------------------------------------------
2780 5685 100       9389 if ( !$fast_scan_type ) {
2781 544         1416 $self->scan_identifier();
2782             }
2783 5685         8424 return;
2784             } ## end sub scan_simple_identifier
2785              
2786             sub method_ok_here {
2787              
2788 14     14 0 32 my ( $self, $next_nonblank_token ) = @_;
2789              
2790             # Return:
2791             # false if this is definitely an invalid method declaration
2792             # true otherwise (even if not sure)
2793              
2794             # We are trying to avoid problems with old uses of 'method'
2795             # when --use-feature=class is set (rt145706).
2796             # For example, this should cause a return of 'false':
2797              
2798             # method paint => sub {
2799             # return;
2800             # };
2801              
2802             # Assume non-method if an error would occur
2803 14 50       35 return if ( $expecting == OPERATOR );
2804              
2805             # Currently marking a line-ending 'method' as a bareword (fix c532)
2806 14 50       32 return if ( $i_tok >= $max_token_index );
2807              
2808             # If a '$' follows 'method'...
2809             # Check for possible Object::Pad lexical method like
2810             # 'method $var {'
2811             # TODO: maybe merge this with the code below by increasing pos by 1
2812 14 100 66     50 if ( $next_nonblank_token eq '$' && new_statement_ok() ) {
2813 2         7 return 1;
2814             }
2815              
2816             # Otherwise, not a method if non-word follows ..
2817 12 100       46 if ( $next_nonblank_token !~ /^[\w\:]/ ) { return }
  4         14  
2818              
2819             # from do_scan_sub:
2820 8         16 my $i_beg = $i + 1;
2821 8         16 my $pos_beg = $rtoken_map->[$i_beg];
2822 8         25 pos($input_line) = $pos_beg;
2823              
2824             # TEST 1: look a valid sub NAME
2825 8 50       44 if (
2826             $input_line =~ m{\G\s*
2827             ((?:\w*(?:'|::))*) # package - something that ends in :: or '
2828             (\w+) # NAME - required
2829             }gcx
2830             )
2831             {
2832             # For possible future use..
2833             ##my $subname = $2;
2834             ##my $package = $1 ? $1 : EMPTY_STRING;
2835             }
2836             else {
2837 0         0 return;
2838             }
2839              
2840             # TEST 2: look for invalid characters after name, such as here:
2841             # method paint => sub {
2842             # ...
2843             # }
2844 8         15 my $next_char = EMPTY_STRING;
2845 8 100       29 if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 }
  7         17  
2846 8 100 66     36 if ( !$next_char || $next_char eq '#' ) {
2847 1         6 ( $next_char, my $i_next_uu ) =
2848             $self->find_next_nonblank_token( $max_token_index,
2849             $rtokens, $max_token_index );
2850             }
2851              
2852 8 50       18 if ( !$next_char ) {
2853              
2854             # out of characters - give up
2855 0         0 return;
2856             }
2857              
2858             # Possibly valid next token types:
2859             # '(' could start prototype or signature
2860             # ':' could start ATTRIBUTE
2861             # '{' cold start BLOCK
2862             # ';' or '}' could end a statement
2863 8 100       26 if ( $next_char !~ /^[\(\:\{\;\}]/ ) {
2864              
2865             # This does not match use feature 'class' syntax
2866 3         10 return;
2867             }
2868              
2869             # We will stop here and assume that this is valid syntax for
2870             # use feature 'class'.
2871 5         19 return 1;
2872             } ## end sub method_ok_here
2873              
2874             sub class_ok_here {
2875              
2876 12     12 0 19 my $self = shift;
2877              
2878             # Return:
2879             # false if this is definitely an invalid class declaration
2880             # true otherwise (even if not sure)
2881              
2882             # We are trying to avoid problems with old uses of 'class'
2883             # when --use-feature=class is set (rt145706). We look ahead
2884             # see if this use of 'class' is obviously inconsistent with
2885             # the syntax of use feature 'class'. This allows the default
2886             # setting --use-feature=class to work for old syntax too.
2887              
2888             # Valid class declarations look like
2889             # class NAME ?ATTRS ?VERSION ?BLOCK
2890             # where ATTRS VERSION and BLOCK are optional
2891              
2892             # For example, this should produce a return of 'false':
2893             #
2894             # class ExtendsBasicAttributes is BasicAttributes{
2895              
2896             # TEST 1: class stmt can only go where a new statement can start
2897 12 50       37 if ( !new_statement_ok() ) { return }
  0         0  
2898              
2899 12         21 my $i_beg = $i + 1;
2900 12         25 my $pos_beg = $rtoken_map->[$i_beg];
2901 12         32 pos($input_line) = $pos_beg;
2902              
2903             # TEST 2: look for a valid NAME
2904 12 50       70 if (
2905             $input_line =~ m{\G\s*
2906             ((?:\w*(?:'|::))*) # package - something that ends in :: or '
2907             (\w+) # NAME - required
2908             }gcx
2909             )
2910             {
2911             # For possible future use..
2912             ##my $subname = $2;
2913             ##my $package = $1 ? $1 : EMPTY_STRING;
2914             }
2915             else {
2916 0         0 return;
2917             }
2918              
2919             # TEST 3: look for valid characters after NAME
2920 12         22 my $next_char = EMPTY_STRING;
2921 12 100       44 if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 }
  11         22  
2922 12 100 66     57 if ( !$next_char || $next_char eq '#' ) {
2923 1         4 ( $next_char, my $i_next_uu ) =
2924             $self->find_next_nonblank_token( $max_token_index,
2925             $rtokens, $max_token_index );
2926             }
2927 12 50       27 if ( !$next_char ) {
2928              
2929             # out of characters - give up
2930 0         0 return;
2931             }
2932              
2933             # Must see one of: ATTRIBUTE, VERSION, BLOCK, or end stmt
2934              
2935             # Possibly valid next token types:
2936             # ':' could start ATTRIBUTE
2937             # '\d' could start VERSION
2938             # '{' cold start BLOCK
2939             # ';' could end a statement
2940             # '}' could end statement but would be strange
2941              
2942 12 100       44 if ( $next_char !~ /^[\:\d\{\;\}]/ ) {
2943              
2944             # This does not match use feature 'class' syntax
2945 2         7 return;
2946             }
2947              
2948             # We will stop here and assume that this is valid syntax for
2949             # use feature 'class'.
2950 10         28 return 1;
2951             } ## end sub class_ok_here
2952              
2953             sub scan_id {
2954 405     405 0 623 my $self = shift;
2955              
2956             # Scan for a sub or package name
2957              
2958 405         1634 ( $i, $tok, $type, $id_scan_state ) = $self->scan_id_do(
2959              
2960             $input_line,
2961             $i, $tok,
2962             $rtokens,
2963             $rtoken_map,
2964             $id_scan_state,
2965             $max_token_index,
2966             );
2967 405         910 return;
2968             } ## end sub scan_id
2969              
2970             sub scan_number {
2971 683     683 0 957 my $self = shift;
2972 683         861 my $number;
2973 683         1627 ( $i, $type, $number ) =
2974             $self->scan_number_do( $input_line, $i, $rtoken_map, $type,
2975             $max_token_index );
2976 683         1393 return $number;
2977             } ## end sub scan_number
2978              
2979 44     44   364 use constant VERIFY_FASTNUM => 0;
  44         77  
  44         26216  
2980              
2981             sub scan_number_fast {
2982              
2983 2900     2900 0 3538 my $self = shift;
2984              
2985             # This is a wrapper for sub scan_number. It does a fast preliminary
2986             # scan for a simple integer. It calls the original scan_number if it
2987             # does not find one.
2988              
2989 2900         3555 my $i_begin = $i;
2990 2900         3623 my $tok_begin = $tok;
2991 2900         3257 my $number;
2992              
2993             #---------------------------------
2994             # Quick check for (signed) integer
2995             #---------------------------------
2996              
2997             # This will be the string of digits:
2998 2900         3405 my $i_d = $i;
2999 2900         3459 my $tok_d = $tok;
3000 2900         4357 my $typ_d = $rtoken_type->[$i_d];
3001              
3002             # check for signed integer
3003 2900         3850 my $sign = EMPTY_STRING;
3004 2900 50 66     7288 if ( $typ_d ne 'd'
      66        
      33        
3005             && ( $typ_d eq '+' || $typ_d eq '-' )
3006             && $i_d < $max_token_index )
3007             {
3008 392         555 $sign = $tok_d;
3009 392         499 $i_d++;
3010 392         551 $tok_d = $rtokens->[$i_d];
3011 392         576 $typ_d = $rtoken_type->[$i_d];
3012             }
3013              
3014             # Handle integers
3015 2900 100 100     17047 if (
      100        
3016             $typ_d eq 'd'
3017             && (
3018             $i_d == $max_token_index
3019             || ( $i_d < $max_token_index
3020             && $rtoken_type->[ $i_d + 1 ] ne '.'
3021             && $rtoken_type->[ $i_d + 1 ] ne 'w' )
3022             )
3023             )
3024             {
3025             # Let the full scanner handle multi-digit integers beginning with
3026             # '0' because there could be error messages. For example, '009' is
3027             # not a valid number.
3028              
3029 2284 100 100     7411 if ( $tok_d eq '0' || substr( $tok_d, 0, 1 ) ne '0' ) {
3030 2227         3502 $number = $sign . $tok_d;
3031 2227         2826 $type = 'n';
3032 2227         2953 $i = $i_d;
3033             }
3034             }
3035              
3036             #--------------------------------------
3037             # Verify correctness during development
3038             #--------------------------------------
3039 2900         3175 if ( VERIFY_FASTNUM && defined($number) ) {
3040              
3041             # We will call the full method
3042             my $type_simple = $type;
3043             my $i_simple = $i;
3044             my $number_simple = $number;
3045              
3046             $tok = $tok_begin;
3047             $i = $i_begin;
3048             $number = $self->scan_number();
3049              
3050             if ( $type ne $type_simple
3051             || ( $i != $i_simple && $i <= $max_token_index )
3052             || $number ne $number_simple )
3053             {
3054             print {*STDERR} <<EOM;
3055             scan_number_fast differs from scan_number:
3056             simple: i=$i_simple, type=$type_simple, number=$number_simple
3057             full: i=$i, type=$type, number=$number
3058             EOM
3059             }
3060             }
3061              
3062             #----------------------------------------
3063             # call full scanner if may not be integer
3064             #----------------------------------------
3065 2900 100       5086 if ( !defined($number) ) {
3066 673         1530 $number = $self->scan_number();
3067             }
3068 2900         6146 return $number;
3069             } ## end sub scan_number_fast
3070              
3071             sub error_if_expecting_TERM {
3072 285     285 0 542 my ($self) = @_;
3073              
3074             # Issue a warning if a binary operator is missing a term to operate on
3075              
3076             # This should only be called if a term is expected here
3077 285 50       685 if ( $expecting != TERM ) { return }
  0         0  
3078              
3079             # Be sure a TERM is definitely required ..
3080 285 50 66     2290 if (
      33        
      66        
      33        
3081              
3082             # .. following a binary operator token type, like '='
3083             $is_binary_or_unary_operator_type{$last_nonblank_type}
3084              
3085             # .. or following a binary keyword operator, like 'and'
3086             || ( $last_nonblank_type eq 'k'
3087             && $is_binary_or_unary_keyword{$last_nonblank_token} )
3088              
3089             # .. or for a binary operator following something like a ';'
3090             || ( $is_not_a_TERM_producer_type{$last_nonblank_type}
3091             && $is_binary_operator_type{$tok} )
3092             )
3093             {
3094              
3095             # We must exclude error checking in sub signatures which have some
3096             # unusual syntax. For example the following syntax is okay within
3097             # a signature:
3098             # sub mysub ($=,) {...}
3099             # $ = undef
3100 0         0 my $ct = $rparen_type->[$paren_depth];
3101 0 0 0     0 if ( $ct && $ct =~ /^sub\b/ ) { return }
  0         0  
3102              
3103             $self->report_unexpected(
3104             {
3105 0         0 found => $tok,
3106             expecting => "term",
3107             i_tok => $i_tok,
3108             last_nonblank_i => $last_nonblank_i,
3109             rpretoken_map => $rtoken_map,
3110             rpretoken_type => $rtoken_type,
3111             input_line => $input_line,
3112             }
3113             );
3114 0         0 return 1;
3115             }
3116 285         736 return;
3117             } ## end sub error_if_expecting_TERM
3118              
3119             # a sub to warn if token found where operator expected
3120             sub error_if_expecting_OPERATOR {
3121              
3122 809     809 0 1368 my ( $self, ($thing) ) = @_;
3123              
3124             # Issue warning on error if expecting operator
3125             # Given:
3126             # $thing = the unexpected token or issue
3127             # = undef to use current pre-token
3128              
3129 809 50       1588 if ( $expecting == OPERATOR ) {
3130 0 0       0 if ( !defined($thing) ) { $thing = $tok }
  0         0  
3131             $self->report_unexpected(
3132             {
3133 0         0 found => $thing,
3134             expecting => "operator",
3135             i_tok => $i_tok,
3136             last_nonblank_i => $last_nonblank_i,
3137             rpretoken_map => $rtoken_map,
3138             rpretoken_type => $rtoken_type,
3139             input_line => $input_line,
3140             }
3141             );
3142 0 0       0 if ( $i_tok == 0 ) {
3143 0         0 $self->interrupt_logfile();
3144 0         0 $self->warning("Missing ';' or ',' above?\n");
3145 0         0 $self->resume_logfile();
3146             }
3147 0         0 return 1;
3148             }
3149 809         1272 return;
3150             } ## end sub error_if_expecting_OPERATOR
3151              
3152             # ------------------------------------------------------------
3153             # end scanner interfaces
3154             # ------------------------------------------------------------
3155              
3156             #------------------
3157             # Tokenization subs
3158             #------------------
3159             # An identifier in possible indirect object location followed by any of
3160             # these tokens: -> , ; } (plus others) is not an indirect object. Fix c257.
3161             my %Z_test_hash;
3162              
3163             BEGIN {
3164 44     44   313 my @qZ = qw#
3165             -> ; } ) ]
3166             => =~ = == !~ || >= != *= .. && |= .= -= += <= %=
3167             ^= &&= ||= //= <=>
3168             #;
3169 44         115 push @qZ, COMMA;
3170 44         252358 $Z_test_hash{$_} = 1 for @qZ;
3171             }
3172              
3173             sub do_DOLLAR_SIGN {
3174              
3175 4833     4833 0 6389 my $self = shift;
3176              
3177             # '$'
3178             # start looking for a scalar
3179 4833 50       8469 $self->error_if_expecting_OPERATOR("Scalar")
3180             if ( $expecting == OPERATOR );
3181 4833         13402 $self->scan_simple_identifier();
3182              
3183 4833 100       8641 if ( $identifier eq '$^W' ) {
3184 1         3 $self->[_saw_perl_dash_w_] = 1;
3185             }
3186              
3187             # Check for identifier in indirect object slot
3188             # (vorboard.pl, sort.t). Something like:
3189             # /^(print|printf|sort|exec|system)$/
3190 4833 100 66     30514 if (
      100        
      100        
      66        
      66        
3191             $is_indirect_object_taker{$last_nonblank_token}
3192             && $last_nonblank_type eq 'k'
3193             || ( ( $last_nonblank_token eq '(' )
3194             && $is_indirect_object_taker{ $rparen_type->[$paren_depth] } )
3195             || ( $last_nonblank_type eq 'w'
3196             || $last_nonblank_type eq 'U' ) # possible object
3197             )
3198             {
3199              
3200             # An identifier followed by '->' is not indirect object;
3201             # fixes b1175, b1176. Fix c257: Likewise for other tokens like
3202             # comma, semicolon, closing brace, and single space.
3203 104         384 my ( $next_nonblank_token, $i_next_uu ) =
3204             $self->find_next_noncomment_token( $i, $rtokens,
3205             $max_token_index );
3206 104 100       343 $type = 'Z' if ( !$Z_test_hash{$next_nonblank_token} );
3207             }
3208 4833         6561 return;
3209             } ## end sub do_DOLLAR_SIGN
3210              
3211             sub do_LEFT_PARENTHESIS {
3212              
3213 2423     2423 0 3332 my $self = shift;
3214              
3215             # '('
3216 2423         3116 ++$paren_depth;
3217              
3218             # variable to enable check for brace after closing paren (c230)
3219 2423         3473 my $want_brace = EMPTY_STRING;
3220              
3221 2423 100 66     7900 if ($want_paren) {
    100          
3222 289         462 $container_type = $want_paren;
3223 289         449 $want_brace = $want_paren;
3224 289         458 $want_paren = EMPTY_STRING;
3225             }
3226             elsif ( substr( $statement_type, 0, 3 ) eq 'sub'
3227             && $statement_type =~ /^sub\b/ )
3228             {
3229 20         38 $container_type = $statement_type;
3230             }
3231             else {
3232 2114         2871 $container_type = $last_nonblank_token;
3233              
3234             # We can check for a syntax error here of unexpected '(',
3235             # but this is going to get messy...
3236 2114 100 100     7134 if (
3237             $expecting == OPERATOR
3238              
3239             # Be sure this is not a method call of the form
3240             # &method(...), $method->(..), &{method}(...),
3241             # $ref[2](list) is ok & short for $ref[2]->(list)
3242             # NOTE: at present, braces in something like &{ xxx }
3243             # are not marked as a block, we might have a method call.
3244             # Added ')' to fix case c017, something like ()()()
3245             && $last_nonblank_token !~ /^(?:[\]\}\)\&]|\-\>)/
3246             )
3247             {
3248              
3249             # ref: camel 3 p 703.
3250 3 50       14 if ( $last_last_nonblank_token eq 'do' ) {
3251 0         0 $self->complain(
3252             "do SUBROUTINE is deprecated; consider & or -> notation\n"
3253             );
3254             }
3255             else {
3256              
3257             # if this is an empty list, (), then it is not an
3258             # error; for example, we might have a constant pi and
3259             # invoke it with pi() or just pi;
3260 3         15 my ( $next_nonblank_token, $i_next_uu ) =
3261             $self->find_next_nonblank_token( $i, $rtokens,
3262             $max_token_index );
3263              
3264             # Patch for c029: give up error check if
3265             # a side comment follows
3266 3 50 33     20 if ( $next_nonblank_token ne ')'
3267             && $next_nonblank_token ne '#' )
3268             {
3269 0         0 my $hint;
3270              
3271 0         0 $self->error_if_expecting_OPERATOR('(');
3272              
3273 0 0       0 if ( $last_nonblank_type eq 'C' ) {
    0          
3274 0         0 $hint =
3275             "$last_nonblank_token has a void prototype\n";
3276             }
3277             elsif ( $last_nonblank_type eq 'i' ) {
3278 0 0 0     0 if ( $i_tok > 0
3279             && $last_nonblank_token =~ /^\$/ )
3280             {
3281 0         0 $hint =
3282             "Do you mean '$last_nonblank_token->(' ?\n";
3283             }
3284             }
3285             else {
3286             ## no hint
3287             }
3288 0 0       0 if ($hint) {
3289 0         0 $self->interrupt_logfile();
3290 0         0 $self->warning($hint);
3291 0         0 $self->resume_logfile();
3292             }
3293             } ## end if ( $next_nonblank_token...
3294             } ## end else [ if ( $last_last_nonblank_token...
3295             } ## end if ( $expecting == OPERATOR...
3296             }
3297              
3298 2423         7611 ( $type_sequence, $indent_flag ) =
3299             $self->increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
3300              
3301             # propagate types down through nested parens
3302             # for example: the second paren in 'if ((' would be structural
3303             # since the first is.
3304              
3305 2423 100       4524 if ( $last_nonblank_token eq '(' ) {
3306 61         111 $type = $last_nonblank_type;
3307             }
3308              
3309             # We exclude parens as structural after a ',' because it
3310             # causes subtle problems with continuation indentation for
3311             # something like this, where the first 'or' will not get
3312             # indented.
3313             #
3314             # assert(
3315             # __LINE__,
3316             # ( not defined $check )
3317             # or ref $check
3318             # or $check eq "new"
3319             # or $check eq "old",
3320             # );
3321             #
3322             # Likewise, we exclude parens where a statement can start
3323             # because of problems with continuation indentation, like
3324             # these:
3325             #
3326             # ($firstline =~ /^#\!.*perl/)
3327             # and (print $File::Find::name, "\n")
3328             # and (return 1);
3329             #
3330             # (ref($usage_fref) =~ /CODE/)
3331             # ? &$usage_fref
3332             # : (&blast_usage, &blast_params, &blast_general_params);
3333              
3334             else {
3335 2362         3226 $type = '{';
3336             }
3337              
3338 2423 50       4557 if ( $last_nonblank_type eq ')' ) {
3339 0         0 $self->warning(
3340             "Syntax error? found token '$last_nonblank_type' then '('\n");
3341             }
3342              
3343             # git #105: Copy container type and want-brace flag at ') (';
3344             # propagate the container type onward so that any subsequent brace gets
3345             # correctly marked. I have implemented this as a general rule, which
3346             # should be safe, but if necessary it could be restricted to certain
3347             # container statement types such as 'for'.
3348 2423 100       4780 if ( $last_nonblank_token eq ')' ) {
3349 1         2 my $rvars = $rparen_vars->[$paren_depth];
3350 1 50       3 if ( defined($rvars) ) {
3351 1         2 $container_type = $rparen_type->[$paren_depth];
3352 1         2 ( my $type_lp_uu, $want_brace ) = @{$rvars};
  1         3  
3353             }
3354             }
3355              
3356 2423         4125 $rparen_type->[$paren_depth] = $container_type;
3357 2423         5795 $rparen_vars->[$paren_depth] = [ $type, $want_brace ];
3358 2423         4030 $rparen_semicolon_count->[$paren_depth] = 0;
3359              
3360 2423         3495 return;
3361              
3362             } ## end sub do_LEFT_PARENTHESIS
3363              
3364             sub do_RIGHT_PARENTHESIS {
3365              
3366 2423     2423 0 3216 my $self = shift;
3367              
3368             # ')'
3369 2423         6700 ( $type_sequence, $indent_flag ) =
3370             $self->decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
3371              
3372 2423         4147 my $rvars = $rparen_vars->[$paren_depth];
3373 2423 50       4755 if ( defined($rvars) ) {
3374 2423         2911 my ( $type_lp, $want_brace_uu ) = @{$rvars};
  2423         4642  
3375 2423 50 33     8328 if ( $type_lp && $type_lp eq '{' ) {
3376 2423         3702 $type = '}';
3377             }
3378             }
3379              
3380 2423         3698 $container_type = $rparen_type->[$paren_depth];
3381              
3382             # restore statement type as 'sub' at closing paren of a signature
3383             # so that a subsequent ':' is identified as an attribute
3384 2423 100 100     6635 if ( substr( $container_type, 0, 3 ) eq 'sub'
3385             && $container_type =~ /^sub\b/ )
3386             {
3387 30         60 $statement_type = $container_type;
3388             }
3389              
3390 2423 100       5947 if ( $is_for_foreach{ $rparen_type->[$paren_depth] } ) {
3391 79         141 my $num_sc = $rparen_semicolon_count->[$paren_depth];
3392 79 50 66     345 if ( $num_sc > 0 && $num_sc != 2 ) {
3393 0         0 $self->warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
3394             }
3395             }
3396              
3397 2423 50       4373 if ( $paren_depth > 0 ) { $paren_depth-- }
  2423         3007  
3398 2423         3680 return;
3399             } ## end sub do_RIGHT_PARENTHESIS
3400              
3401             sub do_COMMA {
3402              
3403 3688     3688 0 4602 my $self = shift;
3404              
3405             # ','
3406 3688 100 33     9756 if ( $last_nonblank_type eq COMMA ) {
    50          
3407 10         22 $self->complain("Repeated ','s \n");
3408             }
3409              
3410             # Note that we have to check both token and type here because a
3411             # comma following a qw list can have last token='(' but type = 'q'
3412             elsif ( $last_nonblank_token eq '(' && $last_nonblank_type eq '{' ) {
3413 0         0 $self->warning("Unexpected leading ',' after a '('\n");
3414             }
3415             else {
3416             ## Error check added in update c565, moved to end of $code loop.
3417             }
3418              
3419             # patch for operator_expected: note if we are in the list (use.t)
3420 3688 100       6242 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
  6         11  
3421 3688         4531 return;
3422              
3423             } ## end sub do_COMMA
3424              
3425             sub do_SEMICOLON {
3426              
3427 2890     2890 0 3869 my $self = shift;
3428              
3429             # ';'
3430 2890         3719 $context = UNKNOWN_CONTEXT;
3431 2890         3759 $statement_type = EMPTY_STRING;
3432 2890         3744 $want_paren = EMPTY_STRING;
3433              
3434 2890 100       7004 if ( $is_for_foreach{ $rparen_type->[$paren_depth] } )
3435             { # mark ; in for loop
3436              
3437             # Be careful: we do not want a semicolon such as the
3438             # following to be included:
3439             #
3440             # for (sort {strcoll($a,$b);} keys %investments) {
3441              
3442 35 100 66     197 if ( $brace_depth == $rdepth_array->[PAREN]->[BRACE]->[$paren_depth]
3443             && $square_bracket_depth ==
3444             $rdepth_array->[PAREN]->[SQUARE_BRACKET]->[$paren_depth] )
3445             {
3446              
3447 34         54 $type = 'f';
3448 34         52 $rparen_semicolon_count->[$paren_depth]++;
3449             }
3450             }
3451             else {
3452             ## Error check added in update c565, moved to end of $code loop.
3453             }
3454 2890         3868 return;
3455             } ## end sub do_SEMICOLON
3456              
3457             sub do_QUOTATION_MARK {
3458              
3459 1253     1253 0 1727 my $self = shift;
3460              
3461             # '"'
3462 1253 50       2474 $self->error_if_expecting_OPERATOR("String")
3463             if ( $expecting == OPERATOR );
3464 1253         1711 $in_quote = 1;
3465 1253         1639 $type = 'Q';
3466 1253         1601 $allowed_quote_modifiers = EMPTY_STRING;
3467 1253         1556 $quote_starting_tok = $tok;
3468 1253         1644 $quote_here_target_2 = undef;
3469 1253         1666 return;
3470             } ## end sub do_QUOTATION_MARK
3471              
3472             sub do_APOSTROPHE {
3473              
3474 1335     1335 0 1817 my $self = shift;
3475              
3476             # "'"
3477 1335 50       2555 $self->error_if_expecting_OPERATOR("String")
3478             if ( $expecting == OPERATOR );
3479 1335         1698 $in_quote = 1;
3480 1335         1725 $type = 'Q';
3481 1335         1636 $allowed_quote_modifiers = EMPTY_STRING;
3482 1335         1612 $quote_starting_tok = $tok;
3483 1335         1745 $quote_here_target_2 = undef;
3484 1335         1778 return;
3485             } ## end sub do_APOSTROPHE
3486              
3487             sub do_BACKTICK {
3488              
3489 0     0 0 0 my $self = shift;
3490              
3491             # '`'
3492 0 0       0 $self->error_if_expecting_OPERATOR("String")
3493             if ( $expecting == OPERATOR );
3494 0         0 $in_quote = 1;
3495 0         0 $type = 'Q';
3496 0         0 $allowed_quote_modifiers = EMPTY_STRING;
3497 0         0 $quote_starting_tok = $tok;
3498 0         0 $quote_here_target_2 = undef;
3499 0         0 return;
3500             } ## end sub do_BACKTICK
3501              
3502             sub do_SLASH {
3503              
3504 225     225 0 351 my $self = shift;
3505              
3506             # '/'
3507 225         380 my $is_pattern;
3508              
3509             # a pattern cannot follow certain keywords which take optional
3510             # arguments, like 'shift' and 'pop'. See also '?'.
3511 225 50 66     1033 if (
    50          
3512             $last_nonblank_type eq 'k'
3513             && $is_keyword_rejecting_slash_as_pattern_delimiter{
3514             $last_nonblank_token}
3515             )
3516             {
3517 0         0 $is_pattern = 0;
3518             }
3519             elsif ( $expecting == UNKNOWN ) { # indeterminate, must guess..
3520             (
3521 0         0 $is_pattern,
3522             my $msg,
3523              
3524             ) = $self->guess_if_pattern_or_division(
3525              
3526             $i,
3527             $rtokens,
3528             $rtoken_type,
3529             $rtoken_map,
3530             $max_token_index,
3531             );
3532              
3533 0 0       0 if ($msg) {
3534 0         0 if ( 0 && DEBUG_GUESS_MODE ) {
3535             $self->warning("DEBUG_GUESS_MODE message:\n$msg\n");
3536             }
3537 0         0 $self->write_diagnostics("DIVIDE:$msg\n");
3538 0         0 $self->write_logfile_entry($msg);
3539             }
3540             }
3541 225         460 else { $is_pattern = ( $expecting == TERM ) }
3542              
3543 225 100       508 if ($is_pattern) {
3544 88         134 $in_quote = 1;
3545 88         137 $type = 'Q';
3546 88         233 $allowed_quote_modifiers = $quote_modifiers{'m'};
3547 88         129 $quote_starting_tok = 'm';
3548 88         149 $quote_here_target_2 = undef;
3549             }
3550             else { # not a pattern; check for a /= token
3551              
3552 137 100       436 if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /=
3553 4         7 $i++;
3554 4         9 $tok = '/=';
3555 4         8 $type = $tok;
3556             }
3557              
3558             #DEBUG - collecting info on what tokens follow a divide
3559             # for development of guessing algorithm
3560             ## if (
3561             ## $self->is_possible_numerator( $i, $rtokens,
3562             ## $max_token_index ) < 0
3563             ## )
3564             ## {
3565             ## $self->write_diagnostics("DIVIDE? $input_line\n");
3566             ## }
3567             }
3568 225         381 return;
3569             } ## end sub do_SLASH
3570              
3571             sub do_LEFT_CURLY_BRACKET {
3572              
3573 2016     2016 0 2856 my $self = shift;
3574              
3575             # '{'
3576             # if we just saw a ')', we will label this block with
3577             # its type. We need to do this to allow sub
3578             # code_block_type to determine if this brace starts a
3579             # code block or anonymous hash. (The type of a paren
3580             # pair is the preceding token, such as 'if', 'else',
3581             # etc).
3582 2016         2890 $container_type = EMPTY_STRING;
3583              
3584             # ATTRS: for a '{' following an attribute list, reset
3585             # things to look like we just saw a sub name
3586             # Added 'package' (can be 'class') for --use-feature=class (rt145706)
3587 2016 100 100     15044 if ( substr( $statement_type, 0, 3 ) eq 'sub' ) {
    100 66        
    50 33        
    100          
    50          
3588 36         70 $last_nonblank_token = $statement_type;
3589 36         50 $last_nonblank_type = 'S'; # c250 change
3590 36         64 $statement_type = EMPTY_STRING;
3591             }
3592             elsif ( substr( $statement_type, 0, 7 ) eq 'package' ) {
3593 10         16 $last_nonblank_token = $statement_type;
3594 10         13 $last_nonblank_type = 'P'; # c250 change
3595 10         14 $statement_type = EMPTY_STRING;
3596             }
3597              
3598             # patch for SWITCH/CASE: hide these keywords from an immediately
3599             # following opening brace
3600             elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
3601             && $statement_type eq $last_nonblank_token )
3602             {
3603 0         0 $last_nonblank_token = ";";
3604             }
3605              
3606             elsif ( $last_nonblank_token eq ')' ) {
3607 295         629 $last_nonblank_token = $rparen_type->[ $paren_depth + 1 ];
3608              
3609             # defensive move in case of a nesting error (pbug.t)
3610             # in which this ')' had no previous '('
3611             # this nesting error will have been caught
3612 295 50       732 if ( !defined($last_nonblank_token) ) {
3613 0         0 $last_nonblank_token = 'if';
3614             }
3615              
3616             # Syntax check at '){'
3617 295 100       818 if ( $is_blocktype_with_paren{$last_nonblank_token} ) {
3618              
3619 279         538 my $rvars = $rparen_vars->[ $paren_depth + 1 ];
3620 279 50       689 if ( defined($rvars) ) {
3621 279         429 my ( $type_lp_uu, $want_brace ) = @{$rvars};
  279         596  
3622              
3623             # OLD: Now verify that this is not a trailing form
3624             # FIX for git #124: we have to skip this check because
3625             # the 'gather' keyword of List::Gather can operate on
3626             # a full statement, so it isn't possible to be sure
3627             # this is a trailing form.
3628 279         571 if ( 0 && !$want_brace ) {
3629             $self->warning(
3630             "syntax error at ') {', unexpected '{' after closing ')' of a trailing '$last_nonblank_token'\n"
3631             );
3632             }
3633             }
3634             }
3635             else {
3636 16 50       46 if ($rOpts_extended_syntax) {
3637              
3638             # we append a trailing () to mark this as an unknown
3639             # block type. This allows perltidy to format some
3640             # common extensions of perl syntax.
3641             # This is used by sub code_block_type
3642 16         63 $last_nonblank_token .= '()';
3643             }
3644             else {
3645 0         0 my $list =
3646             join( SPACE, sort keys %is_blocktype_with_paren );
3647 0         0 $self->warning(
3648             "syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
3649             );
3650             }
3651             }
3652             }
3653              
3654             # patch for paren-less for/foreach glitch, part 2.
3655             # see note below under 'qw'
3656             elsif ($last_nonblank_token eq 'qw'
3657             && $is_for_foreach{$want_paren} )
3658             {
3659 0         0 $last_nonblank_token = $want_paren;
3660 0 0       0 if ( $last_last_nonblank_token eq $want_paren ) {
3661 0         0 $self->warning(
3662             "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
3663             );
3664              
3665             }
3666 0         0 $want_paren = EMPTY_STRING;
3667             }
3668             else {
3669             # not special
3670             }
3671              
3672             # now identify which of the three possible types of
3673             # curly braces we have: hash index container, anonymous
3674             # hash reference, or code block.
3675              
3676             # Patch for Object::Pad "field $var BLOCK"
3677 2016 100 66     8111 if ( $statement_type eq 'field'
    100 66        
      33        
3678             && $last_last_nonblank_token eq 'field'
3679             && $last_nonblank_type eq 'i'
3680             && $last_last_nonblank_type eq 'k' )
3681             {
3682 8         9 $type = '{';
3683 8         10 $block_type = $statement_type;
3684             }
3685              
3686             # non-structural (hash index) curly brace pair
3687             # get marked 'L' and 'R'
3688             elsif ( is_non_structural_brace() ) {
3689 564         875 $type = 'L';
3690              
3691             # patch for SWITCH/CASE:
3692             # allow paren-less identifier after 'when'
3693             # if the brace is preceded by a space
3694 564 0 33     1506 if ( $statement_type eq 'when'
      33        
      0        
      0        
3695             && $last_nonblank_type eq 'i'
3696             && $last_last_nonblank_type eq 'k'
3697             && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
3698             {
3699 0         0 $type = '{';
3700 0         0 $block_type = $statement_type;
3701             }
3702             }
3703              
3704             # code and anonymous hash have the same type, '{', but are
3705             # distinguished by 'block_type',
3706             # which will be blank for an anonymous hash
3707             else {
3708 1444         4176 $block_type =
3709             $self->code_block_type( $i_tok, $rtokens, $rtoken_type,
3710             $max_token_index );
3711              
3712             # Is a new lexical sub looking for its block sequence number?
3713             # This is indicated with a special '911' signal.
3714 1444 0 66     5687 if ( $block_type
      33        
      33        
3715             && $ris_lexical_sub->{911}
3716             && $last_nonblank_type eq 'S'
3717             && substr( $block_type, 0, 3 ) eq 'sub' )
3718             {
3719 0         0 my ( $subname, $package ) = @{ $ris_lexical_sub->{911} };
  0         0  
3720 0 0 0     0 if ( $block_type =~ /^sub $subname/
3721             && $is_my_our_state{$last_last_nonblank_token} )
3722             {
3723 0         0 $ris_lexical_sub->{$subname}->{$package} =
3724             $next_sequence_number;
3725             }
3726              
3727             # Turn the signal off, even if we did not find the block being
3728             # sought - it may not exist if the sub statement was a simple
3729             # declaration without a block definition.
3730 0         0 $ris_lexical_sub->{911} = undef;
3731             }
3732              
3733             # patch to promote bareword type to function taking block
3734 1444 100 100     5065 if ( $block_type
      66        
3735             && $last_nonblank_type eq 'w'
3736             && $last_nonblank_i >= 0 )
3737             {
3738 36 50       105 if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
3739             $routput_token_type->[$last_nonblank_i] =
3740 36 100       131 $is_grep_alias{$block_type} ? 'k' : 'G';
3741             }
3742             }
3743              
3744             # patch for SWITCH/CASE: if we find a stray opening block brace
3745             # where we might accept a 'case' or 'when' block, then take it
3746 1444 100 100     4736 if ( $statement_type eq 'case'
3747             || $statement_type eq 'when' )
3748             {
3749 48 100 66     201 if ( !$block_type || $block_type eq '}' ) {
3750 4         6 $block_type = $statement_type;
3751             }
3752             }
3753             }
3754              
3755 2016         3852 $rbrace_type->[ ++$brace_depth ] = $block_type;
3756              
3757             # Patch for CLASS BLOCK definitions: do not update the package for the
3758             # current depth if this is a BLOCK type definition.
3759             # TODO: should make 'class' separate from 'package' and only do
3760             # this for 'class'
3761 2016 100       5519 $rbrace_package->[$brace_depth] = $current_package
3762             if ( substr( $block_type, 0, 8 ) ne 'package ' );
3763              
3764 2016         3342 $rbrace_structural_type->[$brace_depth] = $type;
3765 2016         3251 $rbrace_context->[$brace_depth] = $context;
3766 2016         5086 ( $type_sequence, $indent_flag ) =
3767             $self->increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
3768              
3769 2016         3212 return;
3770             } ## end sub do_LEFT_CURLY_BRACKET
3771              
3772             sub do_RIGHT_CURLY_BRACKET {
3773              
3774 2016     2016 0 2900 my $self = shift;
3775              
3776             # '}'
3777 2016         3388 $block_type = $rbrace_type->[$brace_depth];
3778 2016 100       4072 if ($block_type) { $statement_type = EMPTY_STRING }
  1115         1635  
3779 2016 100       3903 if ( defined( $rbrace_package->[$brace_depth] ) ) {
3780 2006         3290 $current_package = $rbrace_package->[$brace_depth];
3781             }
3782              
3783             # can happen on brace error (caught elsewhere)
3784             else {
3785             }
3786 2016         5579 ( $type_sequence, $indent_flag ) =
3787             $self->decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
3788              
3789 2016 100       4764 if ( $rbrace_structural_type->[$brace_depth] eq 'L' ) {
3790 564         831 $type = 'R';
3791             }
3792              
3793             # propagate type information for 'do' and 'eval' blocks, and also
3794             # for smartmatch operator. This is necessary to enable us to know
3795             # if an operator or term is expected next.
3796 2016 100       4724 if ( $is_block_operator{$block_type} ) {
3797 85         138 $tok = $block_type;
3798             }
3799              
3800             # pop non-indenting brace stack if sequence number matches
3801 2016 100 100     2451 if ( @{ $self->[_rnon_indenting_brace_stack_] }
  2016         5180  
3802             && $self->[_rnon_indenting_brace_stack_]->[-1] eq $type_sequence )
3803             {
3804 6         14 pop @{ $self->[_rnon_indenting_brace_stack_] };
  6         14  
3805             }
3806              
3807 2016         3220 $context = $rbrace_context->[$brace_depth];
3808 2016 50       3885 if ( $brace_depth > 0 ) { $brace_depth--; }
  2016         2693  
3809 2016         2816 return;
3810             } ## end sub do_RIGHT_CURLY_BRACKET
3811              
3812             sub do_AMPERSAND {
3813              
3814 126     126 0 190 my $self = shift;
3815              
3816             # '&' = maybe sub call? start looking
3817             # We have to check for sub call unless we are sure we
3818             # are expecting an operator. This example from s2p
3819             # got mistaken as a q operator in an early version:
3820             # print BODY &q(<<'EOT');
3821 126 100       306 if ( $expecting != OPERATOR ) {
3822              
3823             # But only look for a sub call if we are expecting a term or
3824             # if there is no existing space after the &.
3825             # For example we probably don't want & as sub call here:
3826             # Fcntl::S_IRUSR & $mode;
3827 106 100 66     450 if ( $expecting == TERM || $next_type ne 'b' ) {
3828 103         271 $self->scan_simple_identifier();
3829             }
3830             }
3831             else {
3832             }
3833 126         195 return;
3834             } ## end sub do_AMPERSAND
3835              
3836             sub do_LESS_THAN_SIGN {
3837              
3838 33     33 0 69 my $self = shift;
3839              
3840             # '<' - angle operator or less than?
3841 33 100       109 if ( $expecting != OPERATOR ) {
3842 8         39 ( $i, $type ) = $self->find_angle_operator_termination(
3843              
3844             $input_line,
3845             $i,
3846             $rtoken_map,
3847             $expecting,
3848             $max_token_index,
3849             );
3850 8         15 if ( DEBUG_GUESS_MODE && $expecting == UNKNOWN ) {
3851             my $msg = "guessing that '<' is ";
3852             $msg .=
3853             $type eq 'Q' ? "an angle operator" : "a less than symbol";
3854             if ( $type eq 'Q' ) {
3855             $self->warning("DEBUG_GUESS_MODE message:\n$msg\n");
3856             }
3857             }
3858             }
3859             else {
3860             }
3861 33         64 return;
3862             } ## end sub do_LESS_THAN_SIGN
3863              
3864             sub do_QUESTION_MARK {
3865              
3866 193     193 0 340 my $self = shift;
3867              
3868             # '?' = conditional or starting pattern?
3869 193         335 my $is_pattern;
3870              
3871             # Patch for rt #126965
3872             # a pattern cannot follow certain keywords which take optional
3873             # arguments, like 'shift' and 'pop'. See also '/'.
3874 193 100 66     1474 if (
    100          
    100          
3875             $last_nonblank_type eq 'k'
3876             && $is_keyword_rejecting_question_as_pattern_delimiter{
3877             $last_nonblank_token}
3878             )
3879             {
3880 1         2 $is_pattern = 0;
3881             }
3882              
3883             # patch for RT#131288, user constant function without prototype
3884             # last type is 'U' followed by ?.
3885             elsif ( $last_nonblank_type =~ /^[FUY]$/ ) {
3886 1         2 $is_pattern = 0;
3887             }
3888             elsif ( $expecting == UNKNOWN ) {
3889              
3890             # In older versions of Perl, a bare ? can be a pattern
3891             # delimiter. In perl version 5.22 this was
3892             # dropped, but we have to support it in order to format
3893             # older programs. See:
3894             ## https://perl.developpez.com/documentations/en/5.22.0/perl5211delta.html
3895             # For example, the following line worked
3896             # at one time:
3897             # ?(.*)? && (print $1,"\n");
3898             # In current versions it would have to be written with slashes:
3899             # /(.*)/ && (print $1,"\n");
3900             (
3901 12         60 $is_pattern,
3902             my $msg,
3903              
3904             ) = $self->guess_if_pattern_or_conditional(
3905              
3906             $i,
3907             $rtokens,
3908             $rtoken_type,
3909             $rtoken_map,
3910             $max_token_index,
3911             );
3912              
3913 12 50       39 if ($msg) {
3914 12         54 $self->write_logfile_entry($msg);
3915 12         24 if ( DEBUG_GUESS_MODE && $is_pattern ) {
3916             $self->warning("DEBUG_GUESS_MODE message:\n$msg\n");
3917             }
3918             }
3919             }
3920 179         430 else { $is_pattern = ( $expecting == TERM ) }
3921              
3922 193 50       488 if ($is_pattern) {
3923 0         0 $in_quote = 1;
3924 0         0 $type = 'Q';
3925 0         0 $allowed_quote_modifiers = $quote_modifiers{'m'};
3926 0         0 $quote_starting_tok = 'm';
3927 0         0 $quote_here_target_2 = undef;
3928             }
3929             else {
3930 193         620 ( $type_sequence, $indent_flag ) =
3931             $self->increase_nesting_depth( QUESTION_COLON,
3932             $rtoken_map->[$i_tok] );
3933             }
3934 193         355 return;
3935             } ## end sub do_QUESTION_MARK
3936              
3937             sub do_STAR {
3938              
3939 254     254 0 395 my $self = shift;
3940              
3941             # '*' = typeglob, or multiply?
3942              
3943             # Guess based on next token. See also c036, and versions before 2026-
3944 254 100 100     877 if ( $expecting == UNKNOWN && $next_type ne 'b' ) {
3945              
3946             # Check for a normal glob, like *OUT:
3947 6 50       32 if ( $next_tok =~ /^[_A-Za-z]/ ) {
3948 0         0 $expecting = TERM;
3949             }
3950             else {
3951             ## Could check for glob of a simple punctuation variable here
3952             ## by looking ahead one more character
3953             }
3954             }
3955              
3956 254 100       688 if ( $expecting == TERM ) {
3957 21         72 $self->scan_simple_identifier();
3958             }
3959             else {
3960              
3961 233 100       844 if ( $rtokens->[ $i + 1 ] eq '=' ) {
    100          
3962 2         3 $tok = '*=';
3963 2         4 $type = $tok;
3964 2         4 $i++;
3965             }
3966             elsif ( $rtokens->[ $i + 1 ] eq '*' ) {
3967 42         72 $tok = '**';
3968 42         71 $type = $tok;
3969 42         67 $i++;
3970 42 100       144 if ( $rtokens->[ $i + 1 ] eq '=' ) {
3971 2         3 $tok = '**=';
3972 2         3 $type = $tok;
3973 2         4 $i++;
3974             }
3975             }
3976             else {
3977             ## not multiple characters
3978             }
3979             }
3980 254         421 return;
3981             } ## end sub do_STAR
3982              
3983             sub do_DOT {
3984              
3985 168     168 0 252 my $self = shift;
3986              
3987             # '.' = what kind of . ?
3988 168 100       386 if ( $expecting != OPERATOR ) {
3989 10         37 $self->scan_number();
3990             }
3991 168         1463 return;
3992             } ## end sub do_DOT
3993              
3994             sub do_COLON {
3995              
3996 285     285 0 516 my $self = shift;
3997              
3998             # ':' = label, ternary, attribute, ?
3999              
4000             # if this is the first nonblank character, call it a label
4001             # since perl seems to just swallow it
4002 285 50 66     3403 if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
    100 66        
    100 66        
    100 66        
    100          
    100          
4003 0         0 $type = 'J';
4004             }
4005              
4006             # ATTRS: check for a ':' which introduces an attribute list
4007             # either after a 'sub' keyword or within a paren list
4008             # Added 'package' (can be 'class') for --use-feature=class (rt145706)
4009             elsif ( $statement_type =~ /^(sub|package)\b/ ) {
4010 22         41 $type = 'A';
4011 22         36 $self->[_in_attribute_list_] = 1;
4012             }
4013              
4014             # Within a signature, unless we are in a ternary. For example,
4015             # from 't/filter_example.t':
4016             # method foo4 ( $class: $bar ) { $class->bar($bar) }
4017             elsif ( $rparen_type->[$paren_depth] =~ /^sub\b/
4018             && !is_balanced_closing_container(QUESTION_COLON) )
4019             {
4020 1         2 $type = 'A';
4021 1         1 $self->[_in_attribute_list_] = 1;
4022             }
4023              
4024             # check for scalar attribute, such as
4025             # my $foo : shared = 1;
4026             elsif ($is_my_our_state{$statement_type}
4027             && $rcurrent_depth->[QUESTION_COLON] == 0 )
4028             {
4029 17         31 $type = 'A';
4030 17         27 $self->[_in_attribute_list_] = 1;
4031             }
4032              
4033             # Look for Switch::Plain syntax if an error would otherwise occur
4034             # here. Note that we do not need to check if the extended syntax
4035             # flag is set because otherwise an error would occur, and we would
4036             # then have to output a message telling the user to set the
4037             # extended syntax flag to avoid the error.
4038             # case 1: {
4039             # default: {
4040             # default:
4041             # Note that the line 'default:' will be parsed as a label elsewhere.
4042             elsif ( $is_case_default{$statement_type}
4043             && !is_balanced_closing_container(QUESTION_COLON) )
4044             {
4045             # mark it as a perltidy label type
4046 46         80 $type = 'J';
4047             }
4048              
4049             # mark colon as attribute if an error would occur otherwise; git #162
4050             elsif ( !$rcurrent_depth->[QUESTION_COLON] ) {
4051 6         9 $type = 'A';
4052 6         9 $self->[_in_attribute_list_] = 1;
4053             }
4054              
4055             # otherwise, it should be part of a ?/: operator
4056             else {
4057 193         657 ( $type_sequence, $indent_flag ) =
4058             $self->decrease_nesting_depth( QUESTION_COLON,
4059             $rtoken_map->[$i_tok] );
4060 193 50       561 if ( $last_nonblank_token eq '?' ) {
4061 0         0 $self->warning("Syntax error near ? :\n");
4062             }
4063             }
4064 285         486 return;
4065             } ## end sub do_COLON
4066              
4067             sub do_PLUS_SIGN {
4068              
4069 240     240 0 399 my $self = shift;
4070              
4071             # '+' = what kind of plus?
4072 240 100       988 if ( $expecting == TERM ) {
    100          
4073 14         53 my $number = $self->scan_number_fast();
4074              
4075             # unary plus is safest assumption if not a number
4076 14 50       41 if ( !defined($number) ) { $type = 'p'; }
  14         25  
4077             }
4078             elsif ( $expecting == OPERATOR ) {
4079             }
4080             else {
4081 2 50 33     7 if ( $next_type eq 'w' || $next_type eq '{' ) { $type = 'p' }
  2         3  
4082             }
4083 240         386 return;
4084             } ## end sub do_PLUS_SIGN
4085              
4086             sub do_AT_SIGN {
4087              
4088 524     524 0 892 my $self = shift;
4089              
4090             # '@' = sigil for array?
4091 524 50       1362 $self->error_if_expecting_OPERATOR("Array")
4092             if ( $expecting == OPERATOR );
4093 524         1771 $self->scan_simple_identifier();
4094 524         747 return;
4095             } ## end sub do_AT_SIGN
4096              
4097             sub do_PERCENT_SIGN {
4098              
4099 214     214 0 352 my $self = shift;
4100              
4101             # '%' = hash or modulo?
4102             # first guess is hash if no following blank or paren
4103 214 50       588 if ( $expecting == UNKNOWN ) {
4104 0 0 0     0 if ( $next_type ne 'b' && $next_type ne '(' ) {
4105 0         0 $expecting = TERM;
4106             }
4107             }
4108 214 100       537 if ( $expecting == TERM ) {
4109 204         662 $self->scan_simple_identifier();
4110             }
4111 214         341 return;
4112             } ## end sub do_PERCENT_SIGN
4113              
4114             sub do_LEFT_SQUARE_BRACKET {
4115              
4116 814     814 0 1111 my $self = shift;
4117              
4118             # '['
4119 814         1539 $rsquare_bracket_type->[ ++$square_bracket_depth ] =
4120             $last_nonblank_token;
4121 814         2200 ( $type_sequence, $indent_flag ) =
4122             $self->increase_nesting_depth( SQUARE_BRACKET,
4123             $rtoken_map->[$i_tok] );
4124              
4125             # It may seem odd, but structural square brackets have
4126             # type '{' and '}'. This simplifies the indentation logic.
4127 814 100       1868 if ( !is_non_structural_brace() ) {
4128 374         597 $type = '{';
4129             }
4130 814         1475 $rsquare_bracket_structural_type->[$square_bracket_depth] = $type;
4131 814         1188 return;
4132             } ## end sub do_LEFT_SQUARE_BRACKET
4133              
4134             sub do_RIGHT_SQUARE_BRACKET {
4135              
4136 814     814 0 1147 my $self = shift;
4137              
4138             # ']'
4139 814         2251 ( $type_sequence, $indent_flag ) =
4140             $self->decrease_nesting_depth( SQUARE_BRACKET,
4141             $rtoken_map->[$i_tok] );
4142              
4143 814 100       1961 if ( $rsquare_bracket_structural_type->[$square_bracket_depth] eq '{' )
4144             {
4145 374         552 $type = '}';
4146             }
4147              
4148             # propagate type information for smartmatch operator. This is
4149             # necessary to enable us to know if an operator or term is expected
4150             # next.
4151 814 100       1828 if ( $rsquare_bracket_type->[$square_bracket_depth] eq '~~' ) {
4152 20         32 $tok = $rsquare_bracket_type->[$square_bracket_depth];
4153             }
4154              
4155 814 50       1695 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
  814         1099  
4156 814         1191 return;
4157             } ## end sub do_RIGHT_SQUARE_BRACKET
4158              
4159             sub do_MINUS_SIGN {
4160              
4161 491     491 0 730 my $self = shift;
4162              
4163             # '-' = what kind of minus?
4164 491 100 100     2744 if ( ( $expecting != OPERATOR )
    100          
    100          
4165             && $is_file_test_operator{$next_tok} )
4166             {
4167 12         61 my ( $next_nonblank_token, $i_next_uu ) =
4168             $self->find_next_nonblank_token( $i + 1, $rtokens,
4169             $max_token_index );
4170              
4171             # check for a quoted word like "-w=>xx";
4172             # it is sufficient to just check for a following '='
4173 12 50       39 if ( $next_nonblank_token eq '=' ) {
4174 0         0 $type = 'm';
4175             }
4176             else {
4177 12         20 $i++;
4178 12         25 $tok .= $next_tok;
4179 12         24 $type = 'F';
4180             }
4181             }
4182             elsif ( $expecting == TERM ) {
4183 378         1020 my $number = $self->scan_number_fast();
4184              
4185             # maybe part of bareword token? unary is safest
4186 378 100       799 if ( !defined($number) ) { $type = 'm'; }
  288         402  
4187              
4188             }
4189             elsif ( $expecting == OPERATOR ) {
4190             }
4191             else {
4192 4 50       13 if ( $next_type eq 'w' ) {
4193 4         8 $type = 'm';
4194             }
4195             }
4196 491         732 return;
4197             } ## end sub do_MINUS_SIGN
4198              
4199             sub do_CARAT_SIGN {
4200              
4201 12     12 0 20 my $self = shift;
4202              
4203             # '^'
4204             # check for special variables like ${^WARNING_BITS}
4205 12 100       27 if ( $expecting == TERM ) {
4206              
4207 5 50 33     45 if ( $last_nonblank_token eq '{'
      33        
4208             && ( $next_tok !~ /^\d/ )
4209             && ( $next_tok =~ /^\w/ ) )
4210             {
4211              
4212 5 100       14 if ( $next_tok eq 'W' ) {
4213 1         2 $self->[_saw_perl_dash_w_] = 1;
4214             }
4215 5         9 $tok = $tok . $next_tok;
4216 5         10 $i = $i + 1;
4217 5         8 $type = 'w';
4218              
4219             # Optional coding to try to catch syntax errors. This can
4220             # be removed if it ever causes incorrect warning messages.
4221             # The '{^' should be preceded by either by a type or '$#'
4222             # Examples:
4223             # $#{^CAPTURE} ok
4224             # *${^LAST_FH}{NAME} ok
4225             # @{^HOWDY} ok
4226             # $hash{^HOWDY} error
4227              
4228             # Note that a type sigil '$' may be tokenized as 'Z'
4229             # after something like 'print', so allow type 'Z'
4230 5 0 33     15 if ( $last_last_nonblank_type ne 't'
      33        
4231             && $last_last_nonblank_type ne 'Z'
4232             && $last_last_nonblank_token ne '$#' )
4233             {
4234 0         0 $self->warning("Possible syntax error near '{^'\n");
4235             }
4236             }
4237             }
4238 12         20 return;
4239             } ## end sub do_CARAT_SIGN
4240              
4241             sub do_DOUBLE_COLON {
4242              
4243 9     9 0 12 my $self = shift;
4244              
4245             # '::' = probably a sub call
4246 9         20 $self->scan_bare_identifier();
4247 9         13 return;
4248             } ## end sub do_DOUBLE_COLON
4249              
4250             sub do_LEFT_SHIFT {
4251              
4252 7     7 0 18 my $self = shift;
4253              
4254             # '<<' = maybe a here-doc?
4255 7 50       117 if ( $expecting != OPERATOR ) {
4256             my (
4257 7         41 $found_target,
4258             $here_doc_target,
4259             $here_quote_character,
4260             $i_return,
4261             $saw_error,
4262              
4263             ) = $self->find_here_doc(
4264              
4265             $expecting,
4266             $i,
4267             $rtokens,
4268             $rtoken_type,
4269             $rtoken_map,
4270             $max_token_index,
4271             );
4272 7         16 $i = $i_return;
4273              
4274 7 50       20 if ($found_target) {
    0          
4275 7         14 push @{$rhere_target_list},
  7         21  
4276             [ $here_doc_target, $here_quote_character ];
4277 7         15 $type = 'h';
4278 7 50       71 if ( length($here_doc_target) > 80 ) {
    50          
    100          
4279 0         0 my $truncated = substr( $here_doc_target, 0, 80 );
4280 0         0 $self->complain("Long here-target: '$truncated' ...\n");
4281             }
4282             elsif ( !$here_doc_target ) {
4283 0 0       0 $self->warning(
4284             'Use of bare << to mean <<"" is deprecated' . "\n" )
4285             if ( !$here_quote_character );
4286             }
4287             elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
4288 2         9 $self->complain(
4289             "Unconventional here-target: '$here_doc_target'\n");
4290             }
4291             else {
4292             # nothing to complain about
4293             }
4294             }
4295             elsif ( $expecting == TERM ) {
4296 0 0       0 if ( !$saw_error ) {
4297              
4298             # shouldn't happen..arriving here implies an error in
4299             # the logic in sub 'find_here_doc'
4300 0         0 if (DEVEL_MODE) {
4301             Fault(<<EOM);
4302             Program bug; didn't find here doc target
4303             EOM
4304             }
4305             $self->warning(
4306 0         0 "Possible program error: didn't find here doc target\n"
4307             );
4308 0         0 $self->report_definite_bug();
4309             }
4310             }
4311              
4312             # target not found, expecting == UNKNOWN
4313             else {
4314             # assume it is a shift
4315             }
4316             }
4317             else {
4318             }
4319 7         12 return;
4320             } ## end sub do_LEFT_SHIFT
4321              
4322             sub do_NEW_HERE_DOC {
4323              
4324             # '<<~' = a here-doc, new type added in v26
4325              
4326 2     2 0 5 my $self = shift;
4327              
4328             return
4329 2 50       6 if ( $i >= $max_token_index ); # here-doc not possible if end of line
4330 2 50       7 if ( $expecting != OPERATOR ) {
4331             my (
4332 2         12 $found_target,
4333             $here_doc_target,
4334             $here_quote_character,
4335             $i_return,
4336             $saw_error,
4337              
4338             ) = $self->find_here_doc(
4339              
4340             $expecting,
4341             $i,
4342             $rtokens,
4343             $rtoken_type,
4344             $rtoken_map,
4345             $max_token_index,
4346             );
4347 2         4 $i = $i_return;
4348              
4349 2 50       6 if ($found_target) {
    0          
4350              
4351 2 50       14 if ( length($here_doc_target) > 80 ) {
    50          
4352 0         0 my $truncated = substr( $here_doc_target, 0, 80 );
4353 0         0 $self->complain("Long here-target: '$truncated' ...\n");
4354             }
4355             elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
4356 0         0 $self->complain(
4357             "Unconventional here-target: '$here_doc_target'\n");
4358             }
4359             else {
4360             # nothing to complain about
4361             }
4362              
4363             # Note that we put a leading space on the here quote
4364             # character indicate that it may be preceded by spaces
4365 2         5 $here_quote_character = SPACE . $here_quote_character;
4366 2         4 push @{$rhere_target_list},
  2         7  
4367             [ $here_doc_target, $here_quote_character ];
4368 2         4 $type = 'h';
4369             }
4370              
4371             # target not found ..
4372             elsif ( $expecting == TERM ) {
4373 0 0       0 if ( !$saw_error ) {
4374              
4375             # shouldn't happen..arriving here implies an error in
4376             # the logic in sub 'find_here_doc'
4377 0         0 if (DEVEL_MODE) {
4378             Fault(<<EOM);
4379             Program bug; didn't find here doc target
4380             EOM
4381             }
4382             $self->warning(
4383 0         0 "Possible program error: didn't find here doc target\n"
4384             );
4385 0         0 $self->report_definite_bug();
4386             }
4387             }
4388              
4389             # Target not found, expecting==UNKNOWN
4390             else {
4391 0         0 $self->warning("didn't find here doc target after '<<~'\n");
4392             }
4393             }
4394             else {
4395 0         0 $self->error_if_expecting_OPERATOR();
4396             }
4397 2         22 return;
4398             } ## end sub do_NEW_HERE_DOC
4399              
4400             sub do_POINTER {
4401              
4402             # '->'
4403 1173     1173 0 1577 return;
4404             }
4405              
4406             sub do_PLUS_PLUS {
4407              
4408 50     50 0 102 my $self = shift;
4409              
4410             # '++'
4411             # type = 'pp' for pre-increment, '++' for post-increment
4412 50 100       170 if ( $expecting == OPERATOR ) { $type = '++' }
  41 100       83  
4413 7         20 elsif ( $expecting == TERM ) { $type = 'pp' }
4414              
4415             # handle ( $expecting == UNKNOWN )
4416             else {
4417              
4418             # look ahead ..
4419 2         5 my ( $next_nonblank_token, $i_next ) =
4420             $self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
4421              
4422             # Fix for c042: look past a side comment
4423 2 50       7 if ( $next_nonblank_token eq '#' ) {
4424 0         0 ( $next_nonblank_token, $i_next ) =
4425             $self->find_next_nonblank_token( $max_token_index,
4426             $rtokens, $max_token_index );
4427             }
4428              
4429 2 50       5 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
  0         0  
4430             }
4431 50         116 return;
4432             } ## end sub do_PLUS_PLUS
4433              
4434             sub do_FAT_COMMA {
4435              
4436 1103     1103 0 1521 my $self = shift;
4437              
4438             # '=>'
4439 1103 50       2141 if ( $last_nonblank_type eq $tok ) {
4440 0         0 $self->complain("Repeated '=>'s \n");
4441             }
4442              
4443             # patch for operator_expected: note if we are in the list (use.t)
4444             # TODO: make version numbers a new token type
4445 1103 100       2135 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
  18         32  
4446 1103         1488 return;
4447             } ## end sub do_FAT_COMMA
4448              
4449             sub do_MINUS_MINUS {
4450              
4451 2     2 0 4 my $self = shift;
4452              
4453             # '--'
4454             # type = 'mm' for pre-decrement, '--' for post-decrement
4455              
4456 2 50       10 if ( $expecting == OPERATOR ) { $type = '--' }
  0 50       0  
4457 2         6 elsif ( $expecting == TERM ) { $type = 'mm' }
4458              
4459             # handle ( $expecting == UNKNOWN )
4460             else {
4461              
4462             # look ahead ..
4463 0         0 my ( $next_nonblank_token, $i_next ) =
4464             $self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
4465              
4466             # Fix for c042: look past a side comment
4467 0 0       0 if ( $next_nonblank_token eq '#' ) {
4468 0         0 ( $next_nonblank_token, $i_next ) =
4469             $self->find_next_nonblank_token( $max_token_index,
4470             $rtokens, $max_token_index );
4471             }
4472              
4473 0 0       0 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
  0         0  
4474             }
4475              
4476 2         4 return;
4477             } ## end sub do_MINUS_MINUS
4478              
4479             sub do_DIGITS {
4480              
4481 2508     2508 0 3258 my $self = shift;
4482              
4483             # 'd' = string of digits
4484 2508 50       4390 $self->error_if_expecting_OPERATOR("Number")
4485             if ( $expecting == OPERATOR );
4486              
4487 2508         5564 my $number = $self->scan_number_fast();
4488 2508 50       4868 if ( !defined($number) ) {
4489              
4490             # shouldn't happen - we should always get a number
4491 0         0 if (DEVEL_MODE) {
4492             Fault(<<EOM);
4493             non-number beginning with digit--program bug
4494             EOM
4495             }
4496             $self->warning(
4497 0         0 "Unexpected error condition: non-number beginning with digit\n"
4498             );
4499 0         0 $self->report_definite_bug();
4500             }
4501 2508         3327 return;
4502             } ## end sub do_DIGITS
4503              
4504             sub do_ATTRIBUTE_LIST {
4505              
4506 45     45 0 105 my ( $self, $next_nonblank_token ) = @_;
4507              
4508             # Called at a bareword encountered while in an attribute list
4509             # returns 'is_attribute':
4510             # true if attribute found
4511             # false if an attribute (continue parsing bareword)
4512              
4513             # treat bare word followed by open paren like qw(
4514 45 100       112 if ( $next_nonblank_token eq '(' ) {
4515              
4516             # For something like:
4517             # : prototype($$)
4518             # we should let do_scan_sub see it so that it can see
4519             # the prototype. All other attributes get parsed as a
4520             # quoted string.
4521 20 100       328 if ( $tok eq 'prototype' ) {
4522 2         4 $id_scan_state = 'prototype';
4523              
4524             # start just after the word 'prototype'
4525 2         4 my $i_beg = $i + 1;
4526 2         18 ( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub(
4527             {
4528             input_line => $input_line,
4529             i => $i,
4530             i_beg => $i_beg,
4531             tok => $tok,
4532             type => $type,
4533             rtokens => $rtokens,
4534             rtoken_map => $rtoken_map,
4535             id_scan_state => $id_scan_state,
4536             max_token_index => $max_token_index,
4537             }
4538             );
4539              
4540             # If successful, mark as type 'q' to be consistent
4541             # with other attributes. Type 'w' would also work.
4542 2 50       12 if ( $i > $i_beg ) {
4543 2         4 $type = 'q';
4544 2         5 return 1;
4545             }
4546              
4547             # If not successful, continue and parse as a quote.
4548             }
4549              
4550             # All other attribute lists must be parsed as quotes
4551             # (see 'signatures.t' for good examples)
4552 18         39 $in_quote = $quote_items{'q'};
4553 18         34 $allowed_quote_modifiers = $quote_modifiers{'q'};
4554 18         26 $quote_starting_tok = 'q';
4555 18         29 $type = 'q';
4556 18         27 $quote_type = 'q';
4557 18         27 $quote_here_target_2 = undef;
4558 18         32 return 1;
4559             }
4560              
4561             # handle bareword not followed by open paren
4562             else {
4563 25         39 $type = 'w';
4564 25         50 return 1;
4565             }
4566              
4567             # attribute not found
4568 0         0 return;
4569             } ## end sub do_ATTRIBUTE_LIST
4570              
4571             sub do_X_OPERATOR {
4572              
4573 17     17 0 37 my $self = shift;
4574              
4575             # We are at a pretoken starting with 'x' where an operator is expected
4576              
4577 17 100       70 if ( $tok eq 'x' ) {
4578 15 50       55 if ( $rtokens->[ $i + 1 ] eq '=' ) { # x=
4579 0         0 $tok = 'x=';
4580 0         0 $type = $tok;
4581 0         0 $i++;
4582             }
4583             else {
4584 15         31 $type = 'x';
4585             }
4586             }
4587             else {
4588              
4589             # Split a pretoken like 'x10' into 'x' and '10'.
4590             # Note: In previous versions of perltidy it was marked
4591             # as a number, $type = 'n', and fixed downstream by the
4592             # Formatter.
4593 2         3 $type = 'n';
4594 2 50       6 if ( $self->split_pretoken(1) ) {
4595 2         3 $type = 'x';
4596 2         3 $tok = 'x';
4597             }
4598             }
4599 17         39 return;
4600             } ## end sub do_X_OPERATOR
4601              
4602             sub do_USE_CONSTANT {
4603              
4604 16     16 0 29 my $self = shift;
4605              
4606             # We just saw 'use constant' and must look ahead
4607              
4608 16         59 $self->scan_bare_identifier();
4609 16         63 my ( $next_nonblank_tok2, $i_next2_uu ) =
4610             $self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
4611              
4612 16 50       47 if ($next_nonblank_tok2) {
4613              
4614 16 100       53 if ( $is_keyword{$next_nonblank_tok2} ) {
4615              
4616             # Assume qw is used as a quote and okay, as in:
4617             # use constant qw{ DEBUG 0 };
4618             # Not worth trying to parse for just a warning
4619              
4620             # NOTE: This warning is deactivated because recent
4621             # versions of perl do not complain here, but
4622             # the coding is retained for reference.
4623 1         4 if ( 0 && $next_nonblank_tok2 ne 'qw' ) {
4624             $self->warning(
4625             "Attempting to define constant '$next_nonblank_tok2' which is a perl keyword\n"
4626             );
4627             }
4628             }
4629              
4630             else {
4631 15         42 $ris_constant->{$current_package}->{$next_nonblank_tok2} = 1;
4632             }
4633             }
4634 16         30 return;
4635             } ## end sub do_USE_CONSTANT
4636              
4637             sub do_KEYWORD {
4638              
4639 3060     3060 0 4080 my $self = shift;
4640              
4641             # found a keyword - set any associated flags
4642 3060         4318 $type = 'k';
4643              
4644             # Since for and foreach may not be followed immediately
4645             # by an opening paren, we have to remember which keyword
4646             # is associated with the next '('
4647             # Previously, before update c230 : if ( $is_for_foreach{$tok} ) {
4648             ##(if elsif unless while until for foreach switch case given when catch)
4649 3060 100       6569 if ( $is_blocktype_with_paren{$tok} ) {
4650 495 100       1385 if ( new_statement_ok() ) {
4651 356         664 $want_paren = $tok;
4652             }
4653             }
4654              
4655             # Catch some unexpected keyword errors; c517.
4656             # Note that we only check keywords for OPERATOR expected, not TERM.
4657             # This is because a large number of keywords which normally expect
4658             # a TERM will also take an OPERATOR.
4659 3060 50 66     6947 if ( $expecting == OPERATOR && $is_TERM_keyword{$tok} ) {
4660 0         0 $self->error_if_expecting_OPERATOR();
4661             }
4662              
4663             # recognize 'use' statements, which are special
4664 3060 100 100     17768 if ( $is_use_require{$tok} ) {
    100 100        
    100          
    100          
    100          
    100          
4665 178         298 $statement_type = $tok;
4666             }
4667              
4668             # remember my and our to check for trailing ": shared"
4669             elsif ( $is_my_our_state{$tok} ) {
4670 747         1267 $statement_type = $tok;
4671             }
4672              
4673             # Check for unexpected 'elsif'
4674             elsif ( $tok eq 'elsif' ) {
4675 33 0 0     422 if (
      33        
4676              
4677             !$is_if_elsif_unless{$last_nonblank_block_type}
4678              
4679             # Allow isolated blocks of any kind during editing
4680             # by checking for a last noblank token of ';' and no
4681             # sequence numbers having been issued (c272). The check
4682             # on sequence number is not perfect but good enough.
4683             && !(
4684             $last_nonblank_token eq ';'
4685             && $next_sequence_number == SEQ_ROOT + 1
4686             )
4687              
4688             )
4689             {
4690             ## prevent formatting and avoid instability (b1553)
4691 0         0 $self->warning_do_not_format(
4692             "expecting '$tok' to follow one of 'if|elsif|unless'\n");
4693             }
4694             }
4695              
4696             # Check for unexpected 'else'
4697             elsif ( $tok eq 'else' ) {
4698              
4699             # patched for SWITCH/CASE
4700 49 0 66     231 if (
      0        
      33        
4701              
4702             !$is_if_elsif_unless_case_when{$last_nonblank_block_type}
4703              
4704             # patch to avoid an unwanted error message for
4705             # the case of a parenless 'case' (RT 105484):
4706             # switch ( 1 ) { case x { 2 } else { } }
4707             && !$is_if_elsif_unless_case_when{$statement_type}
4708              
4709             # Allow isolated blocks of any kind during editing (c272)
4710             && !(
4711             $last_nonblank_token eq ';'
4712             && $next_sequence_number == SEQ_ROOT + 1
4713             )
4714              
4715             )
4716             {
4717             ## prevent formatting and avoid instability (b1553)
4718 0         0 $self->warning_do_not_format(
4719             "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
4720             );
4721             }
4722             }
4723              
4724             # patch for SWITCH/CASE if 'case' and 'when are
4725             # treated as keywords. Also 'default' for Switch::Plain
4726             elsif ($tok eq 'when'
4727             || $tok eq 'case'
4728             || $tok eq 'default' )
4729             {
4730 70         109 $statement_type = $tok; # next '{' is block
4731             }
4732              
4733             # feature 'err' was removed in Perl 5.10. So mark this as
4734             # a bareword unless an operator is expected (see c158).
4735             elsif ( $tok eq 'err' ) {
4736 1 50       3 if ( $expecting != OPERATOR ) { $type = 'w' }
  1         2  
4737             }
4738             else {
4739             ## no special treatment needed
4740             }
4741              
4742 3060         5052 return;
4743             } ## end sub do_KEYWORD
4744              
4745             sub do_QUOTE_OPERATOR {
4746              
4747 232     232 0 365 my $self = shift;
4748              
4749             # We have arrived at a quote operator: q, qq, qw, qx, qr, s, y, tr, m
4750              
4751 232 50       596 if ( $expecting == OPERATOR ) {
4752              
4753             # Be careful not to call an error for a qw quote
4754             # where a parenthesized list is allowed. For example,
4755             # it could also be a for/foreach construct such as
4756             #
4757             # foreach my $key qw\Uno Due Tres Quadro\ {
4758             # print "Set $key\n";
4759             # }
4760             #
4761              
4762             # Or it could be a function call.
4763             # NOTE: Braces in something like &{ xxx } are not
4764             # marked as a block, we might have a method call.
4765             # &method(...), $method->(..), &{method}(...),
4766             # $ref[2](list) is ok & short for $ref[2]->(list)
4767             #
4768             # See notes in 'sub code_block_type' and
4769             # 'sub is_non_structural_brace'
4770              
4771             my $paren_list_possible = $tok eq 'qw'
4772             && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/
4773 0   0     0 || $is_for_foreach{$want_paren} );
4774              
4775 0 0       0 if ( !$paren_list_possible ) {
4776 0         0 $self->error_if_expecting_OPERATOR();
4777             }
4778             }
4779 232         478 $in_quote = $quote_items{$tok};
4780 232         466 $allowed_quote_modifiers = $quote_modifiers{$tok};
4781 232         349 $quote_starting_tok = $tok;
4782 232         368 $quote_here_target_2 = undef;
4783              
4784             # All quote types are 'Q' except possibly qw quotes.
4785             # qw quotes are special in that they may generally be trimmed
4786             # of leading and trailing whitespace. So they are given a
4787             # separate type, 'q', unless requested otherwise.
4788 232 100 66     966 $type =
4789             ( $tok eq 'qw' && $rOpts_trim_qw )
4790             ? 'q'
4791             : 'Q';
4792 232         431 $quote_type = $type;
4793 232         385 return;
4794             } ## end sub do_QUOTE_OPERATOR
4795              
4796             sub do_UNKNOWN_BAREWORD {
4797              
4798 1030     1030 0 2085 my ( $self, $next_nonblank_token ) = @_;
4799              
4800             # We have encountered a bareword which needs more work to classify
4801              
4802 1030         3072 $self->scan_bare_identifier();
4803              
4804 1030 100 100     3060 if ( $statement_type eq 'use'
4805             && $last_nonblank_token eq 'use' )
4806             {
4807 111         356 $rsaw_use_module->{$current_package}->{$tok} = 1;
4808             }
4809              
4810 1030 100       2159 if ( $type eq 'w' ) {
4811              
4812 1005 100       2149 if ( $expecting == OPERATOR ) {
4813              
4814             # Patch to avoid error message for RPerl overloaded
4815             # operator functions: use overload
4816             # '+' => \&sse_add,
4817             # '-' => \&sse_sub,
4818             # '*' => \&sse_mul,
4819             # '/' => \&sse_div;
4820             # TODO: this could eventually be generalized
4821 2 50 33     18 if ( $rsaw_use_module->{$current_package}->{'RPerl'}
    50 33        
    0          
    0          
4822             && $tok =~ /^sse_(mul|div|add|sub)$/ )
4823             {
4824              
4825             }
4826              
4827             # patch for Syntax::Operator::In, git #162
4828             elsif ( $tok eq 'in' && $next_nonblank_token eq ':' ) {
4829              
4830             }
4831              
4832             # Fix part 1 for git #63 in which a comment falls
4833             # between an -> and the following word. An
4834             # alternate fix would be to change operator_expected
4835             # to return an UNKNOWN for this type.
4836             elsif ( $last_nonblank_type eq '->' ) {
4837              
4838             }
4839              
4840             # don't complain about possible indirect object
4841             # notation.
4842             # For example:
4843             # package main;
4844             # sub new($) { ... }
4845             # $b = new A::; # calls A::new
4846             # $c = new A; # same thing but suspicious
4847             # This will call A::new but we have a 'new' in
4848             # main:: which looks like a constant.
4849             #
4850             elsif ( $last_nonblank_type eq 'C' ) {
4851 0 0       0 if ( $tok !~ /::$/ ) {
4852 0         0 $self->complain(<<EOM);
4853             Expecting operator after '$last_nonblank_token' but found bare word '$tok'
4854             Maybe indirect object notation?
4855             EOM
4856             }
4857             }
4858             else {
4859 0         0 $self->error_if_expecting_OPERATOR("bareword");
4860             }
4861             }
4862              
4863             # mark bare words immediately followed by a paren as
4864             # functions
4865 1005         1793 $next_tok = $rtokens->[ $i + 1 ];
4866 1005 100       1987 if ( $next_tok eq '(' ) {
4867              
4868             # Patch for issue c151, where we are processing a snippet and
4869             # have not seen that SPACE is a constant. In this case 'x' is
4870             # probably an operator. The only disadvantage with an incorrect
4871             # guess is that the space after it may be incorrect. For example
4872             # $str .= SPACE x ( 16 - length($str) ); See also b1410.
4873 294 50 33     1185 if ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { $type = 'x' }
  0 50       0  
4874              
4875             # Fix part 2 for git #63. Leave type as 'w' to keep
4876             # the type the same as if the -> were not separated
4877 294         615 elsif ( $last_nonblank_type ne '->' ) { $type = 'U' }
4878              
4879             # not a special case
4880             else { }
4881              
4882             }
4883              
4884             # underscore after file test operator is file handle
4885 1005 50 66     2445 if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
4886 0         0 $type = 'Z';
4887             }
4888              
4889             # patch for SWITCH/CASE if 'case' and 'when are
4890             # not treated as keywords:
4891 1005 50 33     4397 if (
      33        
      33        
4892             ( $tok eq 'case' && $rbrace_type->[$brace_depth] eq 'switch' )
4893             || ( $tok eq 'when'
4894             && $rbrace_type->[$brace_depth] eq 'given' )
4895             )
4896             {
4897 0         0 $statement_type = $tok; # next '{' is block
4898 0         0 $type = 'k'; # for keyword syntax coloring
4899             }
4900 1005 100       2096 if ( $next_nonblank_token eq '(' ) {
4901              
4902             # patch for SWITCH/CASE if switch and given not keywords
4903             # Switch is not a perl 5 keyword, but we will gamble
4904             # and mark switch followed by paren as a keyword. This
4905             # is only necessary to get html syntax coloring nice,
4906             # and does not commit this as being a switch/case.
4907 259 50 33     1617 if ( $tok eq 'switch' || $tok eq 'given' ) {
    50 33        
4908 0         0 $type = 'k'; # for keyword syntax coloring
4909             }
4910              
4911             # mark 'x' as operator for something like this (see b1410)
4912             # my $line = join( LD_X, map { LD_H x ( $_ + 2 ) } @$widths );
4913             elsif ( $tok eq 'x' && $last_nonblank_type eq 'w' ) {
4914 0         0 $type = 'x';
4915             }
4916             else {
4917             ## not a special case
4918             }
4919             }
4920             }
4921 1030         1654 return;
4922             } ## end sub do_UNKNOWN_BAREWORD
4923              
4924             sub sub_attribute_ok_here {
4925              
4926 37     37 0 108 my ( $self, $tok_kw, $next_nonblank_token, $i_next ) = @_;
4927              
4928             # Decide if a ':' can introduce an attribute. For example,
4929             # something like 'sub :'
4930              
4931             # Given:
4932             # $tok_kw = a bareword token
4933             # $next_nonblank_token = a following ':' being examined
4934             # $i_next = the index of the following ':'
4935              
4936             # We will decide based on if the colon is followed by a bareword
4937             # which is not a keyword. Changed inext+1 to inext to fixed case
4938             # b1190.
4939 37         62 my $sub_attribute_ok_here;
4940 37 50 66     148 if ( $is_sub{$tok_kw}
      66        
4941             && $expecting != OPERATOR
4942             && $next_nonblank_token eq ':' )
4943             {
4944 3         15 my ( $nn_nonblank_token, $i_nn_uu ) =
4945             $self->find_next_nonblank_token( $i_next, $rtokens,
4946             $max_token_index );
4947             $sub_attribute_ok_here =
4948             $nn_nonblank_token =~ /^\w/
4949             && $nn_nonblank_token !~ /^\d/
4950 3   66     46 && !$is_keyword{$nn_nonblank_token};
4951             }
4952 37         226 return $sub_attribute_ok_here;
4953             } ## end sub sub_attribute_ok_here
4954              
4955 44     44   400 use constant DEBUG_BAREWORD => 0;
  44         77  
  44         13070  
4956              
4957             sub saw_bareword_function {
4958 957     957 0 1787 my ( $self, $bareword ) = @_;
4959             $self->[_rbareword_info_]->{$current_package}->{$bareword}
4960 957         3723 ->{function_count}++;
4961 957         1702 return;
4962             } ## end sub saw_bareword_function
4963              
4964             sub saw_bareword_constant {
4965 180     180 0 345 my ( $self, $bareword ) = @_;
4966             $self->[_rbareword_info_]->{$current_package}->{$bareword}
4967 180         589 ->{constant_count}++;
4968 180         319 return;
4969             } ## end sub saw_bareword_constant
4970              
4971             sub get_bareword_counts {
4972 0     0 0 0 my ( $self, $bareword ) = @_;
4973              
4974             # Given:
4975             # $bareword = a bareword
4976             # Return:
4977             # $function_count = number of times seen as function taking >0 args
4978             # $constant_count = number of times seen as function taking 0 args
4979             # Note:
4980             # $function_count > 0 implies that a TERM should come next
4981             # $constant_count > 0 implies that an OPERATOR **may** come next,
4982             # but this can be incorrect if $bareword can take 0 or more args.
4983             # This is used to help guess tokenization around unknown barewords.
4984 0         0 my $function_count;
4985             my $constant_count;
4986 0         0 my $rbareword_info_tok = $self->[_rbareword_info_]->{$current_package};
4987 0 0       0 if ($rbareword_info_tok) {
4988 0         0 $rbareword_info_tok = $rbareword_info_tok->{$bareword};
4989 0 0       0 if ($rbareword_info_tok) {
4990 0         0 $function_count = $rbareword_info_tok->{function_count};
4991 0         0 $constant_count = $rbareword_info_tok->{constant_count};
4992              
4993             # a positive function count overrides a constant count
4994 0 0       0 if ($function_count) { $constant_count = 0 }
  0         0  
4995             }
4996             }
4997 0 0       0 if ( !defined($function_count) ) { $function_count = 0 }
  0         0  
4998 0 0       0 if ( !defined($constant_count) ) { $constant_count = 0 }
  0         0  
4999 0         0 return ( $function_count, $constant_count );
5000             } ## end sub get_bareword_counts
5001              
5002             # hashes used to help determine a bareword type
5003             my %is_wiUC;
5004             my %is_function_follower;
5005             my %is_constant_follower;
5006             my %is_use_require_no;
5007              
5008             BEGIN {
5009 44     44   221 my @qz = qw( w i U C );
5010 44         286 $is_wiUC{$_} = 1 for @qz;
5011              
5012 44         110 @qz = qw( use require no );
5013 44         172 $is_use_require_no{$_} = 1 for @qz;
5014              
5015             # These pre-token types after a bareword imply that it
5016             # is not a constant, except when '(' is followed by ')'.
5017 44         139 @qz = qw# ( [ { $ @ " ' m #;
5018 44         301 $is_function_follower{$_} = 1 for @qz;
5019              
5020             # These pre-token types after a bareword imply that it
5021             # MIGHT be a constant, but it also might be a function taking
5022             # 0 or more call args.
5023 44         110 @qz = qw# ; ) ] } if unless #;
5024 44         115 push @qz, COMMA;
5025 44         112595 $is_constant_follower{$_} = 1 for @qz;
5026             }
5027              
5028             sub do_BAREWORD {
5029              
5030 6616     6616 0 10033 my ($self) = @_;
5031              
5032             # handle a bareword token:
5033             # returns
5034             # true if this token ends the current line
5035             # false otherwise
5036              
5037 6616         7704 my $next_nonblank_token;
5038 6616         8340 my $i_next = $i + 1;
5039 6616 100 100     19849 if ( $i_next <= $max_token_index && $rtoken_type->[$i_next] eq 'b' ) {
5040 4001         5505 $i_next++;
5041             }
5042 6616 100       10515 if ( $i_next <= $max_token_index ) {
5043 6532         9768 $next_nonblank_token = $rtokens->[$i_next];
5044             }
5045             else {
5046 84         374 ( $next_nonblank_token, $i_next ) =
5047             $self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
5048             }
5049              
5050             # Fix for git #182: If --use-feature=class is set, then
5051             # a colon between words 'ADJUST' and 'params', and on the same line,
5052             # does not form the label 'ADJUST:'. It will get marked as type 'A'.
5053 6616         7742 my $is_not_label;
5054 6616 100 66     11515 if ( $tok eq 'ADJUST'
      66        
      66        
5055             && $is_code_block_token{$tok}
5056             && $rtokens->[$i_next] eq ':'
5057             && $i_next < $max_token_index )
5058             {
5059 2 50       9 my $i_next2 =
5060             $rtoken_type->[ $i_next + 1 ] eq 'b' ? $i_next + 2 : $i_next + 1;
5061 2   33     18 $is_not_label =
5062             ( $i_next2 <= $max_token_index
5063             && $rtoken_type->[$i_next2] eq 'w'
5064             && $rtokens->[$i_next2] eq 'params' );
5065             }
5066              
5067             # a bare word immediately followed by :: is not a keyword;
5068             # use $tok_kw when testing for keywords to avoid a mistake
5069 6616         8602 my $tok_kw = $tok;
5070 6616 100 100     14446 if ( $rtokens->[ $i + 1 ] eq ':'
5071             && $rtokens->[ $i + 2 ] eq ':' )
5072             {
5073 272         486 $tok_kw .= '::';
5074             }
5075              
5076 6616 100       12161 if ( $self->[_in_attribute_list_] ) {
5077 45         149 my $is_attribute = $self->do_ATTRIBUTE_LIST($next_nonblank_token);
5078 45 50       127 return if ($is_attribute);
5079             }
5080              
5081             #-----------------------------------------
5082             # Preliminary check for a lexical sub name
5083             #-----------------------------------------
5084 6571         7706 my $is_lexical_sub_type;
5085              
5086             # Has this name been seen as a lexical sub?
5087 6571 50       13171 if ( my $rseqno_hash = $ris_lexical_sub->{$tok_kw} ) {
5088              
5089             # Look back up the stack to see if it is still in scope.
5090             # Use the deepest we find if there are multiple versions.
5091 0         0 my @seqno_tested;
5092 0         0 my $cd_aa = $rcurrent_depth->[BRACE];
5093 0         0 foreach my $cd ( reverse( 0 .. $cd_aa ) ) {
5094 0 0       0 my $p_seqno =
5095             $cd
5096             ? $rcurrent_sequence_number->[BRACE]->[$cd]
5097             : SEQ_ROOT;
5098              
5099 0         0 push @seqno_tested, $p_seqno;
5100              
5101             # Lexical subs use their containing sequence number as package
5102 0 0       0 if ( my $seqno_brace = $rseqno_hash->{$p_seqno} ) {
5103              
5104             # This sub is in scope .. lookup its type
5105             $is_lexical_sub_type =
5106             $ris_constant->{$p_seqno}->{$tok_kw} ? 'C'
5107             : $ris_block_function->{$p_seqno}->{$tok_kw} ? 'G'
5108             : $ris_block_list_function->{$p_seqno}->{$tok_kw} ? 'G'
5109 0 0       0 : $ris_user_function->{$p_seqno}->{$tok_kw} ? 'U'
    0          
    0          
    0          
5110             : 'U';
5111              
5112             # But lexical subs do not apply within their defining code
5113 0         0 foreach (@seqno_tested) {
5114 0 0       0 next if ( $_ != $seqno_brace );
5115 0         0 $is_lexical_sub_type = undef;
5116 0         0 last;
5117             }
5118              
5119 0         0 last;
5120             }
5121             }
5122             }
5123              
5124             #----------------------------------------
5125             # Starting final if-elsif- chain of tests
5126             #----------------------------------------
5127              
5128             # This is the return flag:
5129             # true => this is the last token on the line
5130             # false => keep tokenizing the line
5131 6571         7701 my $is_last;
5132              
5133             # The following blocks of code must update these vars:
5134             # $type - the final token type, must always be set
5135              
5136             # In addition, if additional pretokens are added:
5137             # $tok - the final token
5138             # $i - the index of the last pretoken
5139              
5140             # They may also need to check and set various flags
5141              
5142             # Scan a bare word following a -> as an identifier; it could
5143             # have a long package name. Fixes c037, c041.
5144 6571 100 100     86220 if ( $last_nonblank_token eq '->' ) {
    100 66        
    100 100        
    100 66        
    100 66        
    100 100        
    100 66        
    50 66        
    50 100        
    100 100        
    50 33        
    100 0        
    100 0        
    100 33        
    100 0        
    100 0        
    100 66        
    100 100        
    100 100        
      100        
      100        
      100        
      66        
      66        
5145 786         2124 $self->scan_bare_identifier();
5146              
5147             # a bareward after '->' gets type 'i'
5148 786         1127 $type = 'i';
5149             }
5150              
5151             # Quote a word followed by => operator
5152             elsif (
5153             ( $next_nonblank_token eq '=' && $rtokens->[ $i_next + 1 ] eq '>' )
5154              
5155             # unless the word is __END__ or __DATA__ and is the only word on
5156             # the line.
5157             && ( !defined( $is_END_DATA{$tok_kw} )
5158             || $input_line !~ /^\s*__(?:END|DATA)__\s*$/ )
5159             )
5160             {
5161             # Bareword followed by a fat comma - see 'git18.in'
5162             # This code was previously sub do_QUOTED_BAREWORD: see c316, c317
5163              
5164             # Older perl:
5165             # 'v25=>1' is a v-string key!
5166             # '-v25=>1' is also a v-string key!
5167             # Deactivated: this is no longer true; see git #165
5168 812 100       2456 if ( 0 && $tok =~ /^v\d+$/ ) {
5169             $type = 'v';
5170             $self->complain("v-string used as hash key\n");
5171             $self->report_v_string($tok);
5172             }
5173              
5174             # If tok is something like 'x17' then it could
5175             # actually be operator x followed by number 17.
5176             # For example, here:
5177             # 123x17 => [ 792, 1224 ],
5178             # (a key of 123 repeated 17 times, perhaps not
5179             # what was intended). We will mark x17 as type
5180             # 'n' and it will be split. If the previous token
5181             # was also a bareword then it is not very clear is
5182             # going on. In this case we will not be sure that
5183             # an operator is expected, so we just mark it as a
5184             # bareword. Perl is a little murky in what it does
5185             # with stuff like this, and its behavior can change
5186             # over time. Something like
5187             # a x18 => [792, 1224], will compile as
5188             # a key with 18 a's. But something like
5189             # push @array, a x18;
5190             # is a syntax error.
5191 0 100 33     0 elsif (
      66        
5192             $expecting == OPERATOR
5193             && substr( $tok, 0, 1 ) eq 'x'
5194             && ( length($tok) == 1
5195             || substr( $tok, 1, 1 ) =~ /^\d/ )
5196             )
5197             {
5198 3         9 $type = 'n';
5199 3 50       11 if ( $self->split_pretoken(1) ) {
5200 3         5 $type = 'x';
5201 3         6 $tok = 'x';
5202             }
5203 3         12 $self->complain("x operator in hash key\n");
5204             }
5205             else {
5206              
5207             # git #18
5208 809         1157 $type = 'w';
5209 809         1916 $self->error_if_expecting_OPERATOR();
5210             }
5211             }
5212              
5213             # quote a bare word within braces..like xxx->{s}; note that we
5214             # must be sure this is not a structural brace, to avoid
5215             # mistaking {s} in the following for a quoted bare word:
5216             # for(@[){s}bla}BLA}
5217             # Also treat q in something like var{-q} as a bare word, not
5218             # a quote operator
5219             elsif (
5220             $next_nonblank_token eq '}'
5221             && (
5222             $last_nonblank_type eq 'L'
5223             || ( $last_nonblank_type eq 'm'
5224             && $last_last_nonblank_type eq 'L' )
5225             )
5226             )
5227             {
5228 138         253 $type = 'w';
5229             }
5230              
5231             # handle operator x (now we know it isn't $x=)
5232             elsif (
5233             $expecting == OPERATOR
5234             && substr( $tok, 0, 1 ) eq 'x'
5235             && ( length($tok) == 1
5236             || substr( $tok, 1, 1 ) =~ /^\d/ )
5237             )
5238             {
5239 17         79 $self->do_X_OPERATOR();
5240             }
5241             elsif ( $tok_kw eq 'CORE::' ) {
5242 3         5 $type = $tok = $tok_kw;
5243 3         5 $i += 2;
5244             }
5245             elsif ( ( $tok eq 'strict' )
5246             and ( $last_nonblank_token eq 'use' ) )
5247             {
5248 14         40 $self->[_saw_use_strict_] = 1;
5249 14         56 $self->scan_bare_identifier();
5250             }
5251              
5252             elsif ( ( $tok eq 'warnings' )
5253             and ( $last_nonblank_token eq 'use' ) )
5254             {
5255 7         17 $self->[_saw_perl_dash_w_] = 1;
5256              
5257             # scan as identifier, so that we pick up something like:
5258             # use warnings::register
5259 7         20 $self->scan_bare_identifier();
5260             }
5261              
5262             elsif (
5263             $tok eq 'AutoLoader'
5264             && $self->[_look_for_autoloader_]
5265             && (
5266             $last_nonblank_token eq 'use'
5267              
5268             # these regexes are from AutoSplit.pm, which we want
5269             # to mimic
5270             || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
5271             || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
5272             )
5273             )
5274             {
5275 0         0 $self->write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
5276 0         0 $self->[_saw_autoloader_] = 1;
5277 0         0 $self->[_look_for_autoloader_] = 0;
5278 0         0 $self->scan_bare_identifier();
5279             }
5280              
5281             elsif (
5282             $tok eq 'SelfLoader'
5283             && $self->[_look_for_selfloader_]
5284             && ( $last_nonblank_token eq 'use'
5285             || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
5286             || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
5287             )
5288             {
5289 0         0 $self->write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
5290 0         0 $self->[_saw_selfloader_] = 1;
5291 0         0 $self->[_look_for_selfloader_] = 0;
5292 0         0 $self->scan_bare_identifier();
5293             }
5294              
5295             elsif ( ( $tok eq 'constant' )
5296             and ( $last_nonblank_token eq 'use' ) )
5297             {
5298 16         54 $self->do_USE_CONSTANT();
5299             }
5300              
5301             # Lexical sub names override keywords, labels. Based on testing,
5302             # this seems to be the correct location for this check.
5303             elsif ($is_lexical_sub_type) {
5304 0         0 $type = $is_lexical_sub_type;
5305             }
5306              
5307             # various quote operators
5308             elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
5309 232         757 $self->do_QUOTE_OPERATOR();
5310             }
5311              
5312             # check for a statement label
5313             elsif (
5314             ( $next_nonblank_token eq ':' )
5315             && !$is_not_label
5316             && ( $rtokens->[ $i_next + 1 ] ne ':' )
5317             && ( $i_next <= $max_token_index ) # colon on same line
5318              
5319             # like 'sub : lvalue' ?
5320             && !$self->sub_attribute_ok_here( $tok_kw, $next_nonblank_token,
5321             $i_next )
5322             && new_statement_ok()
5323             )
5324             {
5325 33 100       179 if ( $tok !~ /[A-Z]/ ) {
5326 15         26 push @{ $self->[_rlower_case_labels_at_] }, $input_line_number;
  15         45  
5327             }
5328 33         56 $type = 'J';
5329 33         78 $tok .= ':';
5330 33         53 $i = $i_next;
5331             }
5332              
5333             # 'sub' or other sub alias
5334             elsif ( $is_sub{$tok_kw} ) {
5335              
5336             # Guess what to do for unknown word 'method':
5337             # Updated for --use-feature=class (rt145706):
5338 352 100 100     1955 if ( $tok_kw eq 'method'
      100        
5339             && $guess_if_method
5340             && !$self->method_ok_here($next_nonblank_token) )
5341             {
5342 7         23 $self->do_UNKNOWN_BAREWORD($next_nonblank_token);
5343             }
5344             else {
5345 345 50       829 $self->error_if_expecting_OPERATOR()
5346             if ( $expecting == OPERATOR );
5347 345         2349 initialize_subname();
5348 345         1092 $self->scan_id();
5349             }
5350             }
5351              
5352             # 'package'
5353             elsif ( $is_package{$tok_kw} ) {
5354              
5355             # Update for --use-feature=class (rt145706):
5356             # We have to be extra careful because 'class' may be used for other
5357             # purposes on older code; i.e.
5358             # class($x) - valid sub call
5359             # package($x) - error
5360 54 100       175 if ( $tok_kw eq 'class' ) {
5361 14 100 66     119 if ( $expecting == OPERATOR
      100        
5362             || $next_nonblank_token !~ /^[\w\:]/
5363             || !$self->class_ok_here() )
5364             {
5365 4         12 $self->do_UNKNOWN_BAREWORD($next_nonblank_token);
5366             }
5367 10         24 else { $self->scan_id() }
5368             }
5369             else {
5370 40 50       106 $self->error_if_expecting_OPERATOR()
5371             if ( $expecting == OPERATOR );
5372 40         136 $self->scan_id();
5373             }
5374             }
5375              
5376             # Fix for c035: split 'format' from 'is_format_END_DATA' to be
5377             # more restrictive. Require a new statement to be ok here.
5378             elsif ( $tok_kw eq 'format' && new_statement_ok() ) {
5379 1         2 $type = ';'; # make tokenizer look for TERM next
5380 1         3 $self->[_in_format_] = 1;
5381 1         2 $is_last = 1; ## is last token on this line
5382             }
5383              
5384             # Note on token types for format, __DATA__, __END__:
5385             # It simplifies things to give these type ';', so that when we
5386             # start rescanning we will be expecting a token of type TERM.
5387             # We will switch to type 'k' before outputting the tokens.
5388             elsif ( defined( $is_END_DATA{$tok_kw} ) ) {
5389              
5390             # Warn if this follows an operator expecting a term (c565)
5391 8 50       57 $self->error_if_expecting_TERM()
5392             if ( $expecting == TERM );
5393              
5394 8         17 $type = ';'; # make tokenizer look for TERM next
5395              
5396             # Remember that we are in one of these three sections
5397 8         25 $self->[ $is_END_DATA{$tok_kw} ] = 1;
5398 8         16 $is_last = 1; ## is last token on this line
5399             }
5400             elsif ( $is_keyword{$tok_kw} ) {
5401 3060         8635 $self->do_KEYWORD();
5402             }
5403              
5404             # check for inline label following
5405             # /^(redo|last|next|goto)$/
5406             elsif (( $last_nonblank_type eq 'k' )
5407             && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
5408             {
5409 19         42 $type = 'j';
5410             }
5411              
5412             # something else --
5413             else {
5414 1019         2980 $self->do_UNKNOWN_BAREWORD($next_nonblank_token);
5415             }
5416              
5417             #----------------------------------------------------------------
5418             # Save info for use in later guessing. Even for types 'i' and 'U'
5419             # because those may be marked as type 'w' (barewords) elsewhere.
5420             #----------------------------------------------------------------
5421 6571 100 100     22055 if ( $is_wiUC{$type}
      100        
5422             && $statement_type ne 'use'
5423             && $statement_type ne '_use' )
5424             {
5425 2626         3678 my $result = "unknown";
5426              
5427             # Words are marked 'function' if they appear in a role which
5428             # is not consistent with a constant value. Typically they are
5429             # function calls.
5430 2626 100 100     11941 if ( $type eq 'U'
    100 66        
5431             || $is_function_follower{$next_nonblank_token} )
5432             {
5433              
5434 1015         1556 my $empty_parens = 0;
5435 1015 100 100     3403 if ( $next_nonblank_token eq '(' && $i_next < $max_token_index )
5436             {
5437 592         1173 my $tok_next_p1 = $rtokens->[ $i_next + 1 ];
5438 592 100 100     2329 if ( substr( $tok_next_p1, 0, 1 ) eq SPACE
5439             && $i_next + 2 <= $max_token_index )
5440             {
5441 282         576 $tok_next_p1 = $rtokens->[ $i_next + 2 ];
5442             }
5443 592         1055 $empty_parens = $tok_next_p1 eq ')';
5444             }
5445              
5446 1015 100       2158 if ( !$empty_parens ) {
5447              
5448             # not a constant term - probably a function
5449 957         1418 $result = "function";
5450 957         2420 $self->saw_bareword_function($tok);
5451             }
5452             }
5453              
5454             # Words are marked 'constant' if they appear in a role
5455             # consistent with a constant value. However, they may simply
5456             # be functions which optionally take zero args. So if a word
5457             # appears as both constant and function, it is not a constant.
5458             elsif ($type eq 'C'
5459             || $is_constant_follower{$next_nonblank_token} )
5460             {
5461              
5462 443   100     1780 my $is_hash_key = $next_nonblank_token eq '}'
5463             && (
5464             $last_nonblank_type eq 'L'
5465             || ( $last_nonblank_type eq 'm'
5466             && $last_last_nonblank_type eq 'L' )
5467             );
5468              
5469 443 100 100     2440 if (
      100        
      100        
5470              
5471             # not a hash key like {bareword} or {-bareword}
5472             !$is_hash_key
5473              
5474             # not a package name, etc
5475             && ( $last_nonblank_type ne 'k'
5476             || !$is_use_require_no{$last_nonblank_token} )
5477              
5478             # skip arrow calls, which can go either way
5479             && $last_nonblank_token ne '->'
5480             )
5481             {
5482             # possibly a constant or constant function
5483 180         260 $result = "constant";
5484 180         525 $self->saw_bareword_constant($tok);
5485             }
5486             else {
5487 263         455 $result = "other bareword";
5488             }
5489             }
5490             else {
5491             }
5492              
5493 2626         3412 if ( DEBUG_BAREWORD && $result ne 'other bareword' ) {
5494             print
5495             "$input_line_number: $result: $tok: type=$type : last_tok=$last_nonblank_token : next_tok='$next_nonblank_token'\n";
5496             }
5497             }
5498 6571         11808 return $is_last;
5499              
5500             } ## end sub do_BAREWORD
5501              
5502             # Table of quote types checked for interpolated here targets.
5503             # Issue 310 has extensive test cases.
5504             my %is_interpolated_quote = (
5505             q{'} => 0,
5506             q{`} => 1,
5507             q{"} => 1,
5508             qq => 1,
5509             qx => 1,
5510             m => 1,
5511             qr => 1,
5512             q => 0,
5513             qw => 0,
5514             s => 1,
5515             y => 0,
5516             tr => 0,
5517             );
5518              
5519             sub push_here_targets {
5520 2     2 0 4 my ($rht) = @_;
5521              
5522             # Push here targets found in a quote onto the here target list
5523 2         2 push @{$rhere_target_list}, @{$rht};
  2         4  
  2         4  
5524              
5525             # Change type from 'Q' to 'h' for quotes with here-doc targets so that
5526             # the formatter (see sub process_line_of_CODE) will not make any line
5527             # breaks after this point.
5528 2         3 $type = 'h';
5529 2 100       6 if ( $i_tok < 0 ) {
5530 1         2 my $ilast = $routput_token_list->[-1];
5531 1         2 $routput_token_type->[$ilast] = $type;
5532             }
5533 2         4 return;
5534             } ## end sub push_here_targets
5535              
5536             sub do_FOLLOW_QUOTE {
5537              
5538 3170     3170 0 3883 my $self = shift;
5539              
5540             # Continue following a quote on a new line
5541 3170         3945 $type = $quote_type;
5542              
5543             # initialize if continuation line
5544 3170 100       3514 if ( !@{$routput_token_list} ) {
  3170         5938  
5545 245         337 push( @{$routput_token_list}, $i );
  245         381  
5546 245         425 $routput_token_type->[$i] = $type;
5547             }
5548              
5549             # Save starting lengths for here target search
5550 3170         4360 my $len_qs1 = length($quoted_string_1);
5551 3170         3768 my $len_qs2 = length($quoted_string_2);
5552 3170         3917 my $in_quote_start = $in_quote;
5553              
5554             # scan for the end of the quote or pattern
5555             (
5556 3170         8459 $i,
5557             $in_quote,
5558             $quote_character,
5559             $quote_pos,
5560             $quote_depth,
5561             $quoted_string_1,
5562             $quoted_string_2,
5563              
5564             ) = $self->do_quote(
5565              
5566             $i,
5567             $in_quote,
5568             $quote_character,
5569             $quote_pos,
5570             $quote_depth,
5571             $quoted_string_1,
5572             $quoted_string_2,
5573             $rtokens,
5574             $rtoken_type,
5575             $rtoken_map,
5576             $max_token_index,
5577              
5578             );
5579              
5580             # Save pattern and replacement text for rescanning for /e
5581 3170         5391 my $qs1_for_e_scan = $quoted_string_1;
5582              
5583             # Check for possible here targets in an interpolated quote
5584 3170 100 100     9302 if ( $is_interpolated_quote{$quote_starting_tok}
5585             && $in_quote < $in_quote_start )
5586             {
5587              
5588             # post any saved target of a 2-part quote if the end is reached
5589 1431 100 100     4553 if ( !$in_quote && defined($quote_here_target_2) ) {
5590              
5591             # Safety check
5592 1 50       4 if ( $quote_items{$quote_starting_tok} == 2 ) {
5593 1         3 push_here_targets($quote_here_target_2);
5594             }
5595             else {
5596 0         0 DEVEL_MODE
5597             && Fault(
5598             "unexpected saved here target near line $input_line_number\n"
5599             );
5600             }
5601 1         2 $quote_here_target_2 = undef;
5602             }
5603              
5604             # Single part quotes: use $quoted_string_1, and
5605             # $in_quote drops from 1 to 0 when the end is found
5606             # Dual part quotes ('s'): first part is in $quoted_string_2, and
5607             # $in_quote:
5608             # drops from 2 to 1 when the the first part is found
5609             # drops 1 to 0 when the the second part is found
5610             # drops from 2 to 0 if both parts are found in this call
5611             # The tricky part is that we must search for here targets whenever
5612             # $in_quote drops, but we can only post here targets after the end
5613             # of the last part is found (in_quote==0). See test 'here4.in'.
5614             # Update c310 added interpolated here docs and has many test cases.
5615              
5616             # Initialize for the normal case of a single quote
5617 1431         2052 my $qs = $quoted_string_1;
5618 1431         1834 my $len_qs = $len_qs1;
5619 1431         2119 my $num_quotes = $in_quote_start - $in_quote;
5620              
5621             # Dual part quotes (type 's') have first part in $quoted_string_2
5622 1431 100       2574 if ( $in_quote_start == 2 ) {
5623 31         49 $qs = $quoted_string_2;
5624 31         61 $len_qs = $len_qs2;
5625             }
5626              
5627             # Loop to search 1 or 2 quotes for here targets
5628 1431         3301 foreach ( 1 .. $num_quotes ) {
5629              
5630             # Perform quick tests to avoid a sub call:
5631 1460         2870 my $pos_shift = rindex( $qs, '<<' );
5632 1460 50 100     3329 if (
      33        
      66        
5633              
5634             # '<<' in the last line
5635             $pos_shift >= $len_qs
5636              
5637             # followed by a '}'
5638             && rindex( $qs, '}' ) > $pos_shift
5639              
5640             # preceded by '$' or '@'
5641             && ( rindex( $qs, '$', $pos_shift ) >= 0
5642             || rindex( $qs, '@', $pos_shift ) >= 0 )
5643             )
5644             {
5645              
5646             # scan the quote for here targets
5647 2         9 my ( $rht, $qs_mod ) =
5648             $self->find_interpolated_here_targets( $qs, $len_qs );
5649 2 50       8 if ($rht) {
5650              
5651             # only post here targets when end of quote is found
5652 2 100       5 if ($in_quote) {
5653 1         2 $quote_here_target_2 = $rht;
5654             }
5655             else {
5656 1         5 push_here_targets($rht);
5657              
5658             # Replace the string with the modified version
5659             # in case it is re-scanned due to a /e modifier
5660 1         2 $qs1_for_e_scan = $qs_mod;
5661             }
5662             }
5663             }
5664              
5665             # re-initialize for next pass
5666 1460         1922 $qs = $quoted_string_1;
5667 1460         2637 $len_qs = $len_qs1;
5668             } ## end while ( $num_quotes-- > 0)
5669             }
5670              
5671 3170 100       5352 if ($in_quote) { return }
  244         427  
5672              
5673             # All done with this quote...
5674              
5675             # re-initialize for next search
5676 2926         3677 $quote_character = EMPTY_STRING;
5677 2926         3482 $quote_pos = 0;
5678 2926         3526 $quote_type = 'Q';
5679 2926         3502 $quoted_string_1 = EMPTY_STRING;
5680 2926         3457 $quoted_string_2 = EMPTY_STRING;
5681 2926 100       5050 if ( ++$i > $max_token_index ) { return }
  126         256  
5682              
5683             # look for any modifiers
5684 2800 100       4789 if ($allowed_quote_modifiers) {
5685              
5686             # check for exact quote modifiers
5687 162 100       679 if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) {
5688 31         53 my $str = $rtokens->[$i];
5689 31         47 my $saw_modifier_e;
5690 31         731 while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
5691 49         82 my $pos = pos($str);
5692 49         94 my $char = substr( $str, $pos - 1, 1 );
5693 49   100     255 $saw_modifier_e ||= ( $char eq 'e' );
5694             }
5695              
5696             # For an 'e' quote modifier we must scan the replacement
5697             # text for here-doc targets...
5698             # but if the modifier starts a new line we must skip
5699             # this because either the here doc will be fully
5700             # contained in the replacement text (so we can
5701             # ignore it) or Perl will not find it. The modifier will have a
5702             # pretoken index $i=1 if it starts a new line, so we only look
5703             # for a here doc if $i>1. See test 'here2.in'.
5704 31 50 66     93 if ( $saw_modifier_e && $i > 1 ) {
5705 0         0 my $rht = $self->scan_replacement_text($qs1_for_e_scan);
5706 0 0       0 if ($rht) {
5707 0         0 push_here_targets($rht);
5708             }
5709             }
5710              
5711 31 50       70 if ( defined( pos($str) ) ) {
5712              
5713             # matched
5714 31 50       78 if ( pos($str) == length($str) ) {
5715 31 50       94 if ( ++$i > $max_token_index ) { return }
  0         0  
5716             }
5717              
5718             # Looks like a joined quote modifier
5719             # and keyword, maybe something like
5720             # s/xxx/yyy/gefor @k=...
5721             # Example is "galgen.pl". Would have to split
5722             # the word and insert a new token in the
5723             # pre-token list. This is so rare that I haven't
5724             # done it. Will just issue a warning citation.
5725              
5726             # This error might also be triggered if my quote
5727             # modifier characters are incomplete
5728             else {
5729 0         0 $self->warning(<<EOM);
5730              
5731             Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
5732             Please put a space between quote modifiers and trailing keywords.
5733             EOM
5734              
5735             # print "token $rtokens->[$i]\n";
5736             # my $num = length($str) - pos($str);
5737             # $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num);
5738             # print "continuing with new token $rtokens->[$i]\n";
5739              
5740             # skipping past this token does least damage
5741 0 0       0 if ( ++$i > $max_token_index ) { return }
  0         0  
5742             }
5743             }
5744             else {
5745              
5746             # example file: rokicki4.pl
5747             # This error might also be triggered if my quote
5748             # modifier characters are incomplete
5749 0         0 $self->write_logfile_entry(
5750             "Note: found word $str at quote modifier location\n");
5751             }
5752             }
5753              
5754             # re-initialize
5755 162         261 $allowed_quote_modifiers = EMPTY_STRING;
5756             }
5757 2800         4041 return;
5758             } ## end sub do_FOLLOW_QUOTE
5759              
5760             # ------------------------------------------------------------
5761             # begin hash of code for handling most token types
5762             # ------------------------------------------------------------
5763             my $tokenization_code = {
5764              
5765             '$' => \&do_DOLLAR_SIGN,
5766             '(' => \&do_LEFT_PARENTHESIS,
5767             ')' => \&do_RIGHT_PARENTHESIS,
5768             ',' => \&do_COMMA,
5769             ';' => \&do_SEMICOLON,
5770             '"' => \&do_QUOTATION_MARK,
5771             "'" => \&do_APOSTROPHE,
5772             '`' => \&do_BACKTICK,
5773             '/' => \&do_SLASH,
5774             '{' => \&do_LEFT_CURLY_BRACKET,
5775             '}' => \&do_RIGHT_CURLY_BRACKET,
5776             '&' => \&do_AMPERSAND,
5777             '<' => \&do_LESS_THAN_SIGN,
5778             '?' => \&do_QUESTION_MARK,
5779             '*' => \&do_STAR,
5780             '.' => \&do_DOT,
5781             ':' => \&do_COLON,
5782             '+' => \&do_PLUS_SIGN,
5783             '@' => \&do_AT_SIGN,
5784             '%' => \&do_PERCENT_SIGN,
5785             '[' => \&do_LEFT_SQUARE_BRACKET,
5786             ']' => \&do_RIGHT_SQUARE_BRACKET,
5787             '-' => \&do_MINUS_SIGN,
5788             '^' => \&do_CARAT_SIGN,
5789             '::' => \&do_DOUBLE_COLON,
5790             '<<' => \&do_LEFT_SHIFT,
5791             '<<~' => \&do_NEW_HERE_DOC,
5792             '->' => \&do_POINTER,
5793             '++' => \&do_PLUS_PLUS,
5794             '=>' => \&do_FAT_COMMA,
5795             '--' => \&do_MINUS_MINUS,
5796              
5797             # No special code for these types yet, but syntax checks
5798             # could be added.
5799             ## '&&' => \&do_LOGICAL_AND,
5800             ## '||' => \&do_LOGICAL_OR,
5801             ## '>' => \&do_GREATER_THAN_SIGN,
5802             ## '|' => \&do_VERTICAL_LINE,
5803             ## '//' => \&do_SLASH_SLASH,
5804             ## '!' => undef,
5805             ## '!=' => undef,
5806             ## '!~' => undef,
5807             ## '%=' => undef,
5808             ## '&&=' => undef,
5809             ## '&=' => undef,
5810             ## '+=' => undef,
5811             ## '-=' => undef,
5812             ## '..' => undef,
5813             ## '..' => undef,
5814             ## '...' => undef,
5815             ## '.=' => undef,
5816             ## '<<=' => undef,
5817             ## '<=' => undef,
5818             ## '<=>' => undef,
5819             ## '<>' => undef,
5820             ## '=' => undef,
5821             ## '==' => undef,
5822             ## '=~' => undef,
5823             ## '>=' => undef,
5824             ## '>>' => undef,
5825             ## '>>=' => undef,
5826             ## '\\' => undef,
5827             ## '^=' => undef,
5828             ## '|=' => undef,
5829             ## '||=' => undef,
5830             ## '//=' => undef,
5831             ## '~' => undef,
5832             ## '~~' => undef,
5833             ## '!~~' => undef,
5834              
5835             };
5836              
5837             # ------------------------------------------------------------
5838             # end hash of code for handling individual token types
5839             # ------------------------------------------------------------
5840              
5841 44     44   388 use constant DEBUG_TOKENIZE => 0;
  44         79  
  44         4639  
5842              
5843             my %is_arrow_or_Z;
5844              
5845             BEGIN {
5846 44     44   244 my @qZ = qw( -> Z );
5847 44         161157 $is_arrow_or_Z{$_} = 1 for @qZ;
5848             }
5849              
5850             sub tokenize_this_line {
5851              
5852 8799     8799 0 14822 my ( $self, $line_of_tokens, $trimmed_input_line ) = @_;
5853              
5854             # This routine tokenizes one line. The results are stored in
5855             # the hash ref '$line_of_tokens'.
5856              
5857             # Given:
5858             # $line_of_tokens = ref to hash of values being filled for this line
5859             # $trimmed_input_line
5860             # = the input line without leading whitespace, OR
5861             # = undef if not available
5862             # Returns:
5863             # nothing
5864              
5865 8799         13325 my $untrimmed_input_line = $line_of_tokens->{_line_text};
5866              
5867             # Extract line number for use in error messages
5868 8799         12184 $input_line_number = $line_of_tokens->{_line_number};
5869              
5870             #-------------------------------------
5871             # Check for start of pod documentation
5872             #-------------------------------------
5873 8799 100 100     27581 if ( !$in_quote
      66        
5874             && substr( $untrimmed_input_line, 0, 1 ) eq '='
5875             && $untrimmed_input_line =~ /^=[A-Za-z_]/ )
5876             {
5877              
5878             # Must not be in an equation where an '=' could be expected.
5879             # Perl has additional restrictions which are not checked here.
5880 15         29 my $blank_after_Z = 1;
5881 15         56 $expecting = $self->operator_expected( '=', 'b', $blank_after_Z );
5882 15 50       63 if ( $expecting == TERM ) {
5883 15         28 $self->[_in_pod_] = 1;
5884 15         35 return;
5885             }
5886             }
5887              
5888             #--------------------------
5889             # Trim leading whitespace ?
5890             #--------------------------
5891             # Use untrimmed line if we are continuing in a type 'Q' quote
5892 8784 100 100     17727 if ( $in_quote && $quote_type eq 'Q' ) {
5893 58         102 $line_of_tokens->{_starting_in_quote} = 1;
5894 58         82 $input_line = $untrimmed_input_line;
5895 58         98 chomp $input_line;
5896             }
5897              
5898             # Trim start of this line if we are not continuing a quoted line.
5899             # Do not trim end because we might end in a quote (test: deken4.pl)
5900             # Perl::Tidy::Formatter will delete needless trailing blanks
5901             else {
5902 8726         13600 $line_of_tokens->{_starting_in_quote} = 0;
5903              
5904             # Use the pre-computed trimmed line if defined (most efficient)
5905 8726         11401 $input_line = $trimmed_input_line;
5906              
5907             # otherwise trim the raw input line (much less efficient)
5908 8726 50       19499 if ( !defined($input_line) ) {
5909 0         0 $input_line = $untrimmed_input_line;
5910 0         0 $input_line =~ s/^\s+//;
5911             }
5912              
5913 8726         11869 chomp $input_line;
5914              
5915             # define 'guessed_indentation_level' if logfile will be saved
5916 8726 100 100     17970 if ( $self->[_save_logfile_] && length($input_line) ) {
5917 3         5 my $guess =
5918             $self->guess_old_indentation_level($untrimmed_input_line);
5919 3         5 $line_of_tokens->{_guessed_indentation_level} = $guess;
5920             }
5921             }
5922              
5923             #------------
5924             # Blank lines
5925             #------------
5926 8784 100       15192 if ( !length($input_line) ) {
5927 1040         1856 $line_of_tokens->{_line_type} = 'CODE';
5928 1040         1933 $line_of_tokens->{_rtokens} = [];
5929 1040         1817 $line_of_tokens->{_rtoken_type} = [];
5930 1040         2199 $line_of_tokens->{_rlevels} = [];
5931 1040         1886 $line_of_tokens->{_rblock_type} = [];
5932 1040         1983 $line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
5933 1040         1852 $line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
5934 1040         1795 return;
5935             }
5936              
5937             #---------
5938             # Comments
5939             #---------
5940 7744 100 100     21184 if ( !$in_quote && substr( $input_line, 0, 1 ) eq '#' ) {
5941              
5942             # and check for skipped section
5943 884 50 66     4398 if (
      66        
      66        
5944             (
5945             substr( $input_line, 0, 4 ) eq '#<<V'
5946             || $rOpts_code_skipping_begin
5947             )
5948             && $rOpts_code_skipping
5949              
5950             # note that the code_skipping_patterns require a newline
5951             && ( $input_line . SPACE ) =~ /$code_skipping_pattern_begin/
5952             )
5953             {
5954 2         7 $self->[_in_code_skipping_] = $self->[_last_line_number_];
5955 2         4 return;
5956             }
5957              
5958             # Look for format skipping tags, but just normal mode.
5959             # It will be used for these purposes:
5960             # - to inform the formatter of an end token with no begin token
5961             # - for making a hint when a brace error is detected
5962 882 100 100     8960 if (
    100 100        
      100        
      33        
      66        
      100        
      100        
      100        
5963             (
5964             substr( $input_line, 0, 4 ) eq '#<<<'
5965             || $rOpts_format_skipping_begin
5966             )
5967             && $rOpts_format_skipping
5968              
5969             # note that the format_skipping_patterns require a space
5970             && ( $input_line . SPACE ) =~ /$format_skipping_pattern_begin/
5971              
5972             # allow same token for begin and end
5973             && (
5974             !$self->[_in_format_skipping_]
5975             || ( $format_skipping_pattern_begin ne
5976             $format_skipping_pattern_end )
5977             )
5978             )
5979             {
5980 17         76 my $on_off = 1;
5981 17         33 my $lno = $self->[_last_line_number_];
5982 17         34 my $rformat_skipping_list = $self->[_rformat_skipping_list_];
5983              
5984             # format markers must alternate between on and off
5985 17 50 66     23 if ( @{$rformat_skipping_list}
  17         91  
5986             && $rformat_skipping_list->[-1]->[0] == $on_off )
5987             {
5988 0         0 my $lno_last = $rformat_skipping_list->[-1]->[1];
5989 0         0 $self->warning_do_not_format(
5990             "consecutive format-skipping start markers - see line $lno_last\n"
5991             );
5992             }
5993 17         36 push @{$rformat_skipping_list}, [ $on_off, $lno, $input_line ];
  17         55  
5994 17         38 $self->[_in_format_skipping_] = $lno;
5995             }
5996             elsif (
5997             (
5998             substr( $input_line, 0, 4 ) eq '#>>>'
5999             || $rOpts_format_skipping_end
6000             )
6001             && $rOpts_format_skipping
6002              
6003             # note that the format_skipping_patterns require a newline
6004             && ( $input_line . SPACE ) =~ /$format_skipping_pattern_end/
6005             )
6006             {
6007 20         54 my $lno = $self->[_last_line_number_];
6008 20         40 my $rformat_skipping_list = $self->[_rformat_skipping_list_];
6009 20         31 my $on_off = -1;
6010              
6011             # markers must alternate between on and off
6012 20 50 66     37 if ( @{$rformat_skipping_list}
  20         136  
6013             && $rformat_skipping_list->[-1]->[0] == $on_off )
6014             {
6015 0         0 my $lno_last = $rformat_skipping_list->[-1]->[1];
6016 0         0 $self->warning_do_not_format(
6017             "consecutive format-skipping end markers - see line $lno_last\n"
6018             );
6019             }
6020              
6021 20         38 push @{$rformat_skipping_list}, [ $on_off, $lno, $input_line ];
  20         76  
6022 20         51 $self->[_in_format_skipping_] = 0;
6023             }
6024             else {
6025             # not a format skipping comment
6026             }
6027              
6028             # Optional fast processing of a block comment
6029 882         1617 $line_of_tokens->{_line_type} = 'CODE';
6030 882         2165 $line_of_tokens->{_rtokens} = [$input_line];
6031 882         2026 $line_of_tokens->{_rtoken_type} = ['#'];
6032 882         2534 $line_of_tokens->{_rlevels} = [$level_in_tokenizer];
6033 882         2064 $line_of_tokens->{_rblock_type} = [EMPTY_STRING];
6034 882         1855 $line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
6035 882         1765 $line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
6036 882         1756 return;
6037             }
6038              
6039             #-------------------------------------
6040             # Loop to find all tokens on this line
6041             #-------------------------------------
6042              
6043             # Update the copy of the line for use in error messages
6044             # This must be exactly what we give the pre_tokenizer
6045 6860         9740 $self->[_line_of_text_] = $input_line;
6046              
6047             # re-initialize for the main loop
6048 6860         10029 $routput_token_list = []; # stack of output token indexes
6049 6860         12173 $routput_token_type = []; # token types
6050 6860         17675 $routput_block_type = []; # types of code block
6051 6860         16106 $routput_type_sequence = []; # nesting sequential number
6052              
6053 6860         13960 $rhere_target_list = [];
6054              
6055 6860         8993 $tok = $last_nonblank_token;
6056 6860         8738 $type = $last_nonblank_type;
6057 6860         8267 $prototype = $last_nonblank_prototype;
6058 6860         8205 $last_nonblank_i = -1;
6059 6860         8442 $block_type = $last_nonblank_block_type;
6060 6860         8002 $container_type = $last_nonblank_container_type;
6061 6860         7922 $type_sequence = $last_nonblank_type_sequence;
6062 6860         7547 $indent_flag = 0;
6063 6860         7613 $peeked_ahead = 0;
6064              
6065 6860         15579 $self->tokenizer_main_loop();
6066              
6067             #-------------------------------------------------
6068             # Done tokenizing this line ... package the result
6069             #-------------------------------------------------
6070 6860         18189 $self->tokenizer_wrapup_line($line_of_tokens);
6071              
6072 6860         10690 return;
6073             } ## end sub tokenize_this_line
6074              
6075             sub tokenizer_main_loop {
6076              
6077 6860     6860 0 9953 my ($self) = @_;
6078              
6079             # Break one input line into tokens
6080             # We are working on closure variables.
6081              
6082             # Start by breaking the line into pre-tokens
6083 6860         15521 ( $rtokens, $rtoken_map, $rtoken_type ) = pre_tokenize($input_line);
6084              
6085             # Verify that all leading whitespace has been trimmed
6086             # except for quotes of type 'Q' (c273).
6087 6860 50 66     25658 if ( @{$rtokens}
  6860   33     26494  
      66        
6088             && $rtoken_type->[0] eq 'b'
6089             && !( $in_quote && $quote_type eq 'Q' ) )
6090             {
6091              
6092             # Shouldn't happen if calling sub did trim operation correctly.
6093 0         0 DEVEL_MODE && Fault(<<EOM);
6094             leading blank at line
6095             $input_line
6096             EOM
6097              
6098             # Fix by removing the leading blank token. This fix has been
6099             # tested and works correctly even if no whitespaces was trimmed.
6100             # But it is an inefficient way to do things because, for example,
6101             # it forces all comments to be processed by sub pre_tokenize.
6102             # And it may cause indented code-skipping comments to be missed.
6103 0         0 shift @{$rtokens};
  0         0  
6104 0         0 shift @{$rtoken_map};
  0         0  
6105 0         0 shift @{$rtoken_type};
  0         0  
6106             }
6107              
6108 6860         8330 $max_token_index = scalar( @{$rtokens} ) - 1;
  6860         9551  
6109 6860         8062 push( @{$rtokens}, SPACE, SPACE, SPACE )
  6860         14237  
6110             ; # extra whitespace simplifies logic
6111 6860         7961 push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced
  6860         11613  
6112 6860         7967 push( @{$rtoken_type}, 'b', 'b', 'b' );
  6860         12334  
6113              
6114             # initialize for main loop
6115 6860         7993 if (0) { #<<< this is not necessary
6116             foreach my $ii ( 0 .. $max_token_index + 3 ) {
6117             $routput_token_type->[$ii] = EMPTY_STRING;
6118             $routput_block_type->[$ii] = EMPTY_STRING;
6119             $routput_type_sequence->[$ii] = EMPTY_STRING;
6120             $routput_indent_flag->[$ii] = 0;
6121             }
6122             }
6123              
6124 6860         8018 $i = -1;
6125 6860         8340 $i_tok = -1;
6126              
6127             #-----------------------
6128             # main tokenization loop
6129             #-----------------------
6130              
6131             # we are looking at each pre-token of one line and combining them
6132             # into tokens
6133 6860         12701 while ( ++$i <= $max_token_index ) {
6134              
6135             # continue looking for the end of a quote
6136 59584 100       82393 if ($in_quote) {
6137 3170         7822 $self->do_FOLLOW_QUOTE();
6138 3170 100 100     8844 last if ( $in_quote || $i > $max_token_index );
6139             }
6140              
6141 59214 100 100     124275 if ( $type ne 'b' && $type ne 'CORE::' ) {
6142              
6143             # try to catch some common errors
6144 41499 100 100     69369 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
6145              
6146 2065 100       4637 if ( $last_nonblank_token eq 'eq' ) {
    50          
6147 9         38 $self->complain("Should 'eq' be '==' here ?\n");
6148             }
6149             elsif ( $last_nonblank_token eq 'ne' ) {
6150 0         0 $self->complain("Should 'ne' be '!=' here ?\n");
6151             }
6152             else {
6153             # that's all
6154             }
6155             }
6156              
6157             # fix c090, only rotate vars if a new token will be stored
6158 41499 100       59952 if ( $i_tok >= 0 ) {
6159              
6160 34828         39549 $last_last_nonblank_token = $last_nonblank_token;
6161 34828         37251 $last_last_nonblank_type = $last_nonblank_type;
6162              
6163 34828         38039 $last_nonblank_prototype = $prototype;
6164 34828         38098 $last_nonblank_block_type = $block_type;
6165 34828         37220 $last_nonblank_container_type = $container_type;
6166 34828         38563 $last_nonblank_type_sequence = $type_sequence;
6167 34828         35590 $last_nonblank_i = $i_tok;
6168 34828         35317 $last_nonblank_token = $tok;
6169 34828         36504 $last_nonblank_type = $type;
6170             }
6171              
6172             # Check for patches
6173 41499 100       68796 if ( $is_arrow_or_Z{$last_last_nonblank_type} ) {
6174              
6175             # Patch for c030: Fix things in case a '->' got separated
6176             # from the subsequent identifier by a side comment. We
6177             # need the last_nonblank_token to have a leading -> to
6178             # avoid triggering an operator expected error message at
6179             # the next '('. See also fix for git #63.
6180 1218 100       2511 if ( $last_last_nonblank_type eq '->' ) {
    50          
6181 1172 100 66     4001 if ( $last_nonblank_type eq 'w'
6182             || $last_nonblank_type eq 'i' )
6183             {
6184 793         1351 $last_nonblank_token = '->' . $last_nonblank_token;
6185 793         1186 $last_nonblank_type = 'i';
6186             }
6187             }
6188              
6189             # Fix part #3 for git82: propagate type 'Z' though L-R pair
6190             elsif ( $last_last_nonblank_type eq 'Z' ) {
6191 46 100       138 if ( $last_nonblank_type eq 'R' ) {
6192 1         3 $last_nonblank_type = $last_last_nonblank_type;
6193 1         1 $last_nonblank_token = $last_last_nonblank_token;
6194             }
6195             }
6196             else {
6197             # No other patches
6198             }
6199             }
6200             }
6201              
6202             # store previous token type
6203 59214 100       80553 if ( $i_tok >= 0 ) {
6204 52543         83840 $routput_token_type->[$i_tok] = $type;
6205 52543         74068 $routput_block_type->[$i_tok] = $block_type;
6206 52543         69132 $routput_type_sequence->[$i_tok] = $type_sequence;
6207 52543         62772 $routput_indent_flag->[$i_tok] = $indent_flag;
6208             }
6209              
6210             # get the next pre-token and type
6211             # $tok and $type will be modified to make the output token
6212 59214         77133 my $pre_tok = $tok = $rtokens->[$i]; # get the next pre-token
6213 59214         73829 my $pre_type = $type = $rtoken_type->[$i]; # and type
6214              
6215             # re-initialize various flags for the next output token
6216             (
6217              
6218             # remember the starting index of this token; we will update $i
6219 59214         91052 $i_tok,
6220             $block_type,
6221             $container_type,
6222             $type_sequence,
6223             $indent_flag,
6224             $prototype,
6225             )
6226             = (
6227              
6228             $i,
6229             EMPTY_STRING,
6230             EMPTY_STRING,
6231             EMPTY_STRING,
6232             0,
6233             EMPTY_STRING,
6234             );
6235              
6236             # this pre-token will start an output token
6237 59214         59789 push( @{$routput_token_list}, $i_tok );
  59214         79284  
6238              
6239             #---------------------------------------------------
6240             # The token search leads to one of 5 main END NODES:
6241             #---------------------------------------------------
6242              
6243             #-----------------------
6244             # END NODE 1: whitespace
6245             #-----------------------
6246 59214 100       95378 next if ( $pre_type eq 'b' );
6247              
6248             #----------------------
6249             # END NODE 2: a comment
6250             #----------------------
6251 41354 100       60099 if ( $pre_type eq '#' ) {
6252              
6253             # push non-indenting brace stack Look for a possible
6254             # non-indenting brace. This is only used to give a hint in
6255             # case the file is unbalanced.
6256             # Hardwired to '#<<<' for efficiency. We will not use the
6257             # result later if the pattern has been changed (very unusual).
6258 363 100 66     1653 if ( $last_nonblank_token eq '{'
      66        
      33        
      66        
6259             && $last_nonblank_block_type
6260             && $last_nonblank_type_sequence
6261             && !$self->[_in_format_skipping_]
6262             && $rOpts_non_indenting_braces )
6263             {
6264 46         90 my $offset = $rtoken_map->[$i_tok];
6265 46         122 my $text = substr( $input_line, $offset, 5 );
6266 46         77 my $len = length($text);
6267 46 100 66     358 if ( $len == 4 && $text eq '#<<<'
      66        
      66        
6268             || $len > 4 && $text eq '#<<< ' )
6269             {
6270 6         9 push @{ $self->[_rnon_indenting_brace_stack_] },
  6         16  
6271             $last_nonblank_type_sequence;
6272             }
6273             }
6274 363         651 last;
6275             }
6276              
6277             # continue gathering identifier if necessary
6278 40991 100       59659 if ($id_scan_state) {
6279              
6280 17 100 66     76 if ( $is_sub{$id_scan_state} || $is_package{$id_scan_state} ) {
6281 10         32 $self->scan_id();
6282             }
6283             else {
6284 7         25 $self->scan_identifier();
6285             }
6286              
6287 17 100       49 if ($id_scan_state) {
6288              
6289             # Still scanning ...
6290             # Check for side comment between sub and prototype (c061)
6291              
6292             # done if nothing left to scan on this line
6293 1 50       5 last if ( $i > $max_token_index );
6294              
6295 1         4 my ( $next_nonblank_token_uu, $i_next ) =
6296             find_next_nonblank_token_on_this_line( $i, $rtokens,
6297             $max_token_index );
6298              
6299             # done if it was just some trailing space
6300 1 50       4 last if ( $i_next > $max_token_index );
6301              
6302             # something remains on the line ... must be a side comment
6303 1         6 next;
6304             }
6305              
6306 16 100 100     87 next if ( ( $i > 0 ) || $type );
6307              
6308             # didn't find any token; start over
6309 7         16 $type = $pre_type;
6310 7         12 $tok = $pre_tok;
6311             }
6312              
6313             #-----------------------------------------------------------
6314             # Combine pre-tokens into digraphs and trigraphs if possible
6315             #-----------------------------------------------------------
6316              
6317             # See if we can make a digraph...
6318             # The following tokens are excluded and handled specially:
6319             # '/=' is excluded because the / might start a pattern.
6320             # 'x=' is excluded since it might be $x=, with $ on previous line
6321             # '**' and *= might be typeglobs of punctuation variables
6322             # I have allowed tokens starting with <, such as <=,
6323             # because I don't think these could be valid angle operators.
6324             # test file: storrs4.pl
6325 40981 100 100     97132 if ( $can_start_digraph{$tok}
      100        
6326             && $i < $max_token_index
6327             && $is_digraph{ $tok . $rtokens->[ $i + 1 ] } )
6328             {
6329              
6330 3047         4085 my $combine_ok = 1;
6331 3047         5237 my $test_tok = $tok . $rtokens->[ $i + 1 ];
6332              
6333             # check for special cases which cannot be combined
6334              
6335             # Smartmatch is being deprecated, but may exist in older
6336             # scripts.
6337 3047 100       9021 if ( $test_tok eq '~~' ) {
    100          
    100          
6338              
6339             # Do not combine if a TERM is required
6340 111 100       224 if ( $self->operator_expected( $tok, '~', undef ) == TERM )
6341             {
6342              
6343             # block types ';' may actually be hash refs, c567
6344 1 50       3 if ( $last_nonblank_type eq '}' ) {
6345 1         3 my $blk = $rbrace_type->[ $brace_depth + 1 ];
6346 1 50 33     6 if ( !$blk || $blk ne ';' ) { $combine_ok = 0 }
  0         0  
6347             }
6348             else {
6349 0         0 $combine_ok = 0;
6350             }
6351             }
6352             }
6353              
6354             # '//' must be defined_or operator if an operator is expected.
6355             # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
6356             # could be migrated here for clarity
6357              
6358             # Patch for RT#102371, misparsing a // in the following snippet:
6359             # state $b //= ccc();
6360             # The solution is to always accept the digraph (or trigraph)
6361             # after type 'Z' (possible file handle). The reason is that
6362             # sub operator_expected gives TERM expected here, which is
6363             # wrong in this case.
6364             elsif ( $test_tok eq '//' ) {
6365 16 50       47 if ( $last_nonblank_type ne 'Z' ) {
6366              
6367             # note that here $tok = '/' and the next tok and type
6368             # is '/'
6369 16         29 my $blank_after_Z;
6370 16         53 $expecting =
6371             $self->operator_expected( $tok, '/', $blank_after_Z );
6372              
6373             # Patched for RT#101547, was
6374             # 'unless ($expecting==OPERATOR)'
6375 16 100       43 $combine_ok = 0 if ( $expecting == TERM );
6376             }
6377             }
6378              
6379             # Patch for RT #114359: mis-parsing of "print $x ** 0.5;
6380             # Accept the digraphs '**' only after type 'Z'
6381             # Otherwise postpone the decision.
6382             elsif ( $test_tok eq '**' ) {
6383 45 100       147 if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 }
  43         75  
6384             }
6385             else {
6386             ## no other special cases
6387             }
6388              
6389 3047 100 100     15477 if (
      66        
      100        
6390              
6391             # still ok to combine?
6392             $combine_ok
6393              
6394             && ( $test_tok ne '/=' ) # might be pattern
6395             && ( $test_tok ne 'x=' ) # might be $x
6396             && ( $test_tok ne '*=' ) # typeglob?
6397              
6398             # Moved above as part of fix for
6399             # RT #114359: Missparsing of "print $x ** 0.5;
6400             # && ( $test_tok ne '**' ) # typeglob?
6401             )
6402             {
6403 2994         3762 $tok = $test_tok;
6404 2994         3487 $i++;
6405              
6406             # Now try to assemble trigraphs. Note that all possible
6407             # perl trigraphs can be constructed by appending a character
6408             # to a digraph.
6409 2994         4407 $test_tok = $tok . $rtokens->[ $i + 1 ];
6410              
6411 2994 100       5586 if ( $is_trigraph{$test_tok} ) {
6412 87         141 $tok = $test_tok;
6413 87         147 $i++;
6414             }
6415              
6416             # The only current tetragraph is the double diamond operator
6417             # and its first three characters are NOT a trigraph, so
6418             # we do can do a special test for it
6419             else {
6420 2907 100       5775 if ( $test_tok eq '<<>' ) {
6421 1         3 $test_tok .= $rtokens->[ $i + 2 ];
6422 1 50       4 if ( $is_tetragraph{$test_tok} ) {
6423 1         2 $tok = $test_tok;
6424 1         2 $i += 2;
6425             }
6426             }
6427             }
6428             }
6429             }
6430              
6431 40981         44810 $type = $tok;
6432 40981         53410 $next_tok = $rtokens->[ $i + 1 ];
6433 40981         50179 $next_type = $rtoken_type->[ $i + 1 ];
6434              
6435             # expecting an operator here? first try table lookup, then function
6436 40981         55433 $expecting = $op_expected_table{$last_nonblank_type};
6437 40981 100       59396 if ( !defined($expecting) ) {
6438 11705   100     20925 my $blank_after_Z = $last_nonblank_type eq 'Z'
6439             && ( $i == 0 || $rtoken_type->[ $i - 1 ] eq 'b' );
6440 11705         24970 $expecting =
6441             $self->operator_expected( $tok, $next_type, $blank_after_Z );
6442             }
6443              
6444 40981         42289 DEBUG_TOKENIZE && do {
6445             local $LIST_SEPARATOR = ')(';
6446             my @debug_list = (
6447             $last_nonblank_token, $tok,
6448             $next_tok, $brace_depth,
6449             $rbrace_type->[$brace_depth], $paren_depth,
6450             $rparen_type->[$paren_depth],
6451             );
6452             print {*STDOUT} "TOKENIZE:(@debug_list)\n";
6453             };
6454              
6455             # The next token is '$tok'.
6456             # Now we have to define its '$type'
6457              
6458             #------------------------
6459             # END NODE 3: a bare word
6460             #------------------------
6461 40981 100       58274 if ( $pre_type eq 'w' ) {
6462 6616         16038 my $is_last = $self->do_BAREWORD();
6463 6616 100       11425 last if ($is_last);
6464 6607         14421 next;
6465             }
6466              
6467             # Turn off attribute list on first non-blank, non-bareword,
6468             # and non-comment (added to fix c038)
6469 34365         41814 $self->[_in_attribute_list_] = 0;
6470              
6471             #-------------------------------
6472             # END NODE 4: a string of digits
6473             #-------------------------------
6474 34365 100       48537 if ( $pre_type eq 'd' ) {
6475 2508         6975 $self->do_DIGITS();
6476 2508         4790 next;
6477             }
6478              
6479             #------------------------------------------
6480             # END NODE 5: everything else (punctuation)
6481             #------------------------------------------
6482 31857         47517 my $code = $tokenization_code->{$tok};
6483 31857 100       45635 if ($code) {
6484 29616         63461 $code->($self);
6485 29616 100       44869 redo if ($in_quote);
6486             }
6487              
6488             # Check for a non-TERM where a TERM is expected. Note that this
6489             # checks all symbols, even those without a $code (update c566)
6490 29181 100       54302 if ( $expecting == TERM ) {
6491             my $is_not_term =
6492             $type eq ';'
6493             || $type eq ','
6494 11678   100     40806 || $is_binary_operator_type{$type};
6495 11678 100       26410 if ($is_not_term) {
6496 277         879 $self->error_if_expecting_TERM();
6497             }
6498             }
6499             } ## End main tokenizer loop
6500              
6501             # Store the final token
6502 6860 100       11522 if ( $i_tok >= 0 ) {
6503 6671         12048 $routput_token_type->[$i_tok] = $type;
6504 6671         10184 $routput_block_type->[$i_tok] = $block_type;
6505 6671         10031 $routput_type_sequence->[$i_tok] = $type_sequence;
6506 6671         9167 $routput_indent_flag->[$i_tok] = $indent_flag;
6507             }
6508              
6509             # Remember last nonblank values
6510 6860 100 100     18870 if ( $type ne 'b' && $type ne '#' ) {
6511              
6512 6349         7510 $last_last_nonblank_token = $last_nonblank_token;
6513 6349         7419 $last_last_nonblank_type = $last_nonblank_type;
6514              
6515 6349         7726 $last_nonblank_prototype = $prototype;
6516 6349         7265 $last_nonblank_block_type = $block_type;
6517 6349         7336 $last_nonblank_container_type = $container_type;
6518 6349         7440 $last_nonblank_type_sequence = $type_sequence;
6519 6349         6990 $last_nonblank_token = $tok;
6520 6349         7406 $last_nonblank_type = $type;
6521             }
6522              
6523             # reset indentation level if necessary at a sub or package
6524             # in an attempt to recover from a nesting error
6525 6860 50       11466 if ( $level_in_tokenizer < 0 ) {
6526 0 0       0 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
6527 0         0 reset_indentation_level(0);
6528 0         0 $self->brace_warning("resetting level to 0 at $1 $2\n");
6529             }
6530             }
6531              
6532 6860         9063 $self->[_in_quote_] = $in_quote;
6533             $self->[_quote_target_] =
6534             $in_quote
6535             ? (
6536             $matching_end_token{$quote_character}
6537 6860 100       12334 ? $matching_end_token{$quote_character}
    100          
6538             : $quote_character
6539             )
6540             : EMPTY_STRING;
6541 6860         10782 $self->[_rhere_target_list_] = $rhere_target_list;
6542              
6543 6860         9698 return;
6544             } ## end sub tokenizer_main_loop
6545              
6546             sub tokenizer_wrapup_line {
6547 6860     6860 0 10675 my ( $self, $line_of_tokens ) = @_;
6548              
6549             #---------------------------------------------------------
6550             # Package a line of tokens for shipping back to the caller
6551             #---------------------------------------------------------
6552              
6553             # Arrays to hold token values for this line:
6554             my (
6555 6860         9899 @output_levels, @output_block_type, @output_type_sequence,
6556             @output_token_type, @output_tokens,
6557             );
6558              
6559 6860         14666 $line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
6560              
6561             # Remember starting nesting block string
6562 6860         9195 my $nesting_block_string_0 = $nesting_block_string;
6563              
6564             #-----------------
6565             # Loop over tokens
6566             #-----------------
6567             # $i is the index of the pretoken which starts this full token
6568 6860         8009 foreach my $ii ( @{$routput_token_list} ) {
  6860         11601  
6569              
6570 59459         68915 my $type_i = $routput_token_type->[$ii];
6571              
6572             #----------------------------------------
6573             # Section 1. Handle a non-sequenced token
6574             #----------------------------------------
6575 59459 100       75378 if ( !$routput_type_sequence->[$ii] ) {
6576              
6577             #-------------------------------
6578             # Section 1.1. types ';' and 't'
6579             #-------------------------------
6580             # - output anonymous 'sub' as keyword (type 'k')
6581             # - output __END__, __DATA__, and format as type 'k' instead
6582             # of ';' to make html colors correct, etc.
6583 48567 100       82490 if ( $is_semicolon_or_t{$type_i} ) {
    50          
6584 3153         4593 my $tok_i = $rtokens->[$ii];
6585 3153 100       6759 if ( $is_END_DATA_format_sub{$tok_i} ) {
6586 187         337 $type_i = 'k';
6587             }
6588             }
6589              
6590             #----------------------------------------------
6591             # Section 1.2. Check for an invalid token type.
6592             #----------------------------------------------
6593             # This can happen by running perltidy on non-scripts although
6594             # it could also be bug introduced by programming change. Perl
6595             # silently accepts a 032 (^Z) and takes it as the end
6596             elsif ( !$is_valid_token_type{$type_i} ) {
6597 0 0       0 if ( !$self->[_in_error_] ) {
6598 0         0 my $val = ord($type_i);
6599 0         0 $self->warning(
6600             "unexpected character decimal $val ($type_i) in script\n"
6601             );
6602 0         0 $self->[_in_error_] = 1;
6603             }
6604             }
6605             else {
6606             # valid token type other than ; and t
6607             }
6608              
6609             #----------------------------------------------------
6610             # Section 1.3. Store values for a non-sequenced token
6611             #----------------------------------------------------
6612 48567         63421 push( @output_levels, $level_in_tokenizer );
6613 48567         59820 push( @output_block_type, EMPTY_STRING );
6614 48567         58213 push( @output_type_sequence, EMPTY_STRING );
6615 48567         74844 push( @output_token_type, $type_i );
6616              
6617             }
6618              
6619             #------------------------------------
6620             # Section 2. Handle a sequenced token
6621             # One of { [ ( ? : ) ] }
6622             #------------------------------------
6623             else {
6624              
6625             # $level_i is the level we will store. Levels of braces are
6626             # set so that the leading braces have a HIGHER level than their
6627             # CONTENTS, which is convenient for indentation.
6628 10892         12269 my $level_i = $level_in_tokenizer;
6629              
6630             # $tok_i is the PRE-token. It only equals the token for symbols
6631 10892         13367 my $tok_i = $rtokens->[$ii];
6632              
6633             # $routput_indent_flag->[$ii] indicates that we need a change
6634             # in level at a nested ternary, as follows
6635             # 1 => at a nested ternary ?
6636             # -1 => at a nested ternary :
6637             # 0 => otherwise
6638              
6639             #--------------------------------------------
6640             # Section 2.1 Handle a level-increasing token
6641             #--------------------------------------------
6642 10892 100       21627 if ( $is_opening_or_ternary_type{$type_i} ) {
    50          
6643              
6644 5446 100       8571 if ( $type_i eq '?' ) {
6645              
6646 193 100       570 if ( $routput_indent_flag->[$ii] > 0 ) {
6647 8         15 $level_in_tokenizer++;
6648              
6649             # break BEFORE '?' in a nested ternary
6650 8         11 $level_i = $level_in_tokenizer;
6651 8         18 $nesting_block_string .= "$nesting_block_flag";
6652              
6653             }
6654             }
6655             else {
6656              
6657 5253         6773 $nesting_token_string .= $tok_i;
6658              
6659 5253 100 100     11342 if ( $type_i eq '{' || $type_i eq 'L' ) {
6660              
6661 4813         5801 $level_in_tokenizer++;
6662              
6663 4813 100       7709 if ( $routput_block_type->[$ii] ) {
6664 1115         1569 $nesting_block_flag = 1;
6665 1115         1668 $nesting_block_string .= '1';
6666             }
6667             else {
6668 3698         4774 $nesting_block_flag = 0;
6669 3698         5321 $nesting_block_string .= '0';
6670             }
6671             }
6672             }
6673             }
6674              
6675             #---------------------------------------------
6676             # Section 2.2. Handle a level-decreasing token
6677             #---------------------------------------------
6678             elsif ( $is_closing_or_ternary_type{$type_i} ) {
6679              
6680 5446 100       9582 if ( $type_i ne ':' ) {
6681 5253         8346 my $char = chop $nesting_token_string;
6682 5253 50       11465 if ( $char ne $matching_start_token{$tok_i} ) {
6683 0         0 $nesting_token_string .= $char . $tok_i;
6684             }
6685             }
6686              
6687 5446 100 100     14059 if (
      100        
      100        
6688             $type_i eq '}'
6689             || $type_i eq 'R'
6690              
6691             # only the second and higher ? : have levels
6692             || $type_i eq ':' && $routput_indent_flag->[$ii] < 0
6693             )
6694             {
6695              
6696 4821         5970 $level_i = --$level_in_tokenizer;
6697              
6698 4821 50       7827 if ( $level_in_tokenizer < 0 ) {
6699 0 0       0 if ( !$self->[_saw_negative_indentation_] ) {
6700 0         0 $self->[_saw_negative_indentation_] = 1;
6701 0         0 $self->warning(
6702             "Starting negative indentation\n");
6703             }
6704             }
6705              
6706             # restore previous level values
6707 4821 50       8149 if ( length($nesting_block_string) > 1 )
6708             { # true for valid script
6709 4821         5791 chop $nesting_block_string;
6710 4821         8489 $nesting_block_flag =
6711             substr( $nesting_block_string, -1 ) eq '1';
6712             }
6713              
6714             }
6715             }
6716              
6717             #-----------------------------------------------------
6718             # Section 2.3. Unexpected sequenced token type - error
6719             #-----------------------------------------------------
6720             else {
6721              
6722             # The tokenizer should only be assigning sequence numbers
6723             # to types { [ ( ? ) ] } :
6724 0         0 DEVEL_MODE && Fault(<<EOM);
6725             unexpected sequence number on token type $type_i with pre-tok=$tok_i
6726             EOM
6727             }
6728              
6729             #------------------------------------------------
6730             # Section 2.4. Store values for a sequenced token
6731             #------------------------------------------------
6732              
6733             # The starting nesting block string, which is used in any .LOG
6734             # output, should include the first token of the line
6735 10892 100       16503 if ( !@output_levels ) {
6736 1776         2674 $nesting_block_string_0 = $nesting_block_string;
6737             }
6738              
6739             # Store values for a sequenced token
6740 10892         15435 push( @output_levels, $level_i );
6741 10892         17567 push( @output_block_type, $routput_block_type->[$ii] );
6742 10892         15407 push( @output_type_sequence, $routput_type_sequence->[$ii] );
6743 10892         19292 push( @output_token_type, $type_i );
6744              
6745             }
6746             } ## End loop to over tokens
6747              
6748             #---------------------
6749             # Post-loop operations
6750             #---------------------
6751              
6752 6860         14186 $line_of_tokens->{_nesting_blocks_0} = $nesting_block_string_0;
6753              
6754             # Form and store the tokens
6755 6860 50       11565 if (@output_levels) {
6756              
6757 6860         7467 my $im = shift @{$routput_token_list};
  6860         10777  
6758 6860         10229 my $offset = $rtoken_map->[$im];
6759 6860         7674 foreach my $ii ( @{$routput_token_list} ) {
  6860         9672  
6760 52599         55642 my $numc = $rtoken_map->[$ii] - $offset;
6761 52599         77440 push( @output_tokens, substr( $input_line, $offset, $numc ) );
6762 52599         52524 $offset += $numc;
6763              
6764             # programming note: it seems most efficient to 'next' out of
6765             # a critical loop like this as early as possible. So instead
6766             # of 'if ( DEVEL_MODE && $numc < 0 )' we write:
6767 52599         55888 next unless (DEVEL_MODE);
6768 0 0       0 next if ( $numc > 0 );
6769              
6770             # Should not happen unless @{$rtoken_map} is corrupted
6771 0         0 Fault("number of characters is '$numc' but should be >0\n");
6772             }
6773              
6774             # Form and store the final token of this line
6775 6860         9653 my $numc = length($input_line) - $offset;
6776 6860         12043 push( @output_tokens, substr( $input_line, $offset, $numc ) );
6777              
6778 6860         8315 if (DEVEL_MODE) {
6779             if ( $numc <= 0 ) {
6780              
6781             # check '$rtoken_map' and '$routput_token_list'
6782             Fault("Number of Characters is '$numc' but should be >0\n");
6783             }
6784              
6785             # Make sure we didn't gain or lose any characters
6786             my $test_line = join EMPTY_STRING, @output_tokens;
6787             if ( $test_line ne $input_line ) {
6788             my $len_input = length($input_line);
6789             my $len_test = length($test_line);
6790              
6791             # check '$rtoken_map' and '$routput_token_list'
6792             Fault(<<EOM);
6793             Reconstructed line difers from input; input_length=$len_input test_length=$len_test
6794             input:'$input_line'
6795             test :'$test_line'
6796             EOM
6797             }
6798             }
6799             }
6800              
6801             # Wrap up this line of tokens for shipping to the Formatter
6802 6860         17468 $line_of_tokens->{_rtoken_type} = \@output_token_type;
6803 6860         14048 $line_of_tokens->{_rtokens} = \@output_tokens;
6804 6860         13291 $line_of_tokens->{_rblock_type} = \@output_block_type;
6805 6860         11445 $line_of_tokens->{_rtype_sequence} = \@output_type_sequence;
6806 6860         11259 $line_of_tokens->{_rlevels} = \@output_levels;
6807              
6808             #-----------------------------------------------------------------
6809             # Compare input indentation with computed levels at closing braces
6810             #-----------------------------------------------------------------
6811             # This may provide a useful hint for error location if the file
6812             # is not balanced in braces. Closing braces are used because they
6813             # have a well-defined indentation and can be processed efficiently.
6814 6860 100       12596 if ( $output_tokens[0] eq '}' ) {
6815              
6816 737         1218 my $blk = $output_block_type[0];
6817 737 100 100     4171 if (
      66        
6818             (
6819             # builtin block types without continuation indentation
6820             $is_zero_continuation_block_type{$blk}
6821              
6822             # or a named sub, but skip sub aliases for efficiency,
6823             # since this is just for diagnostic info
6824             || substr( $blk, 0, 4 ) eq 'sub '
6825             )
6826              
6827             # and we are not in format skipping
6828             && !$self->[_in_format_skipping_]
6829             )
6830             {
6831              
6832             # subtract 1 space for newline in untrimmed line
6833 395         811 my $untrimmed_input_line = $line_of_tokens->{_line_text};
6834 395         886 my $space_count =
6835             length($untrimmed_input_line) - length($input_line) - 1;
6836              
6837             # check for tabs
6838 395 100 100     1540 if ( $space_count
6839             && ord( substr( $untrimmed_input_line, 0, 1 ) ) == ORD_TAB )
6840             {
6841 15 50       125 if ( $untrimmed_input_line =~ /^(\t+)?(\s+)?/ ) {
6842 15 50       65 if ($1) { $space_count += length($1) * $tabsize }
  15         38  
6843 15 100       49 if ($2) { $space_count += length($2) }
  1         4  
6844             }
6845             }
6846              
6847             # '$guess' = the level according to indentation
6848 395         968 my $guess = int( $space_count / $rOpts_indent_columns );
6849              
6850             # subtract 1 level from guess for --indent-closing-brace
6851 395 100       1001 $guess -= 1 if ($rOpts_indent_closing_brace);
6852              
6853             # subtract 1 from $level for each non-indenting brace level
6854 395         561 my $adjust = @{ $self->[_rnon_indenting_brace_stack_] };
  395         692  
6855              
6856 395         655 my $level = $output_levels[0];
6857              
6858             # find the difference between expected and indentation guess
6859 395         665 my $level_diff = $level - $adjust - $guess;
6860              
6861 395         601 my $rhash = $self->[_rclosing_brace_indentation_hash_];
6862              
6863             # results are only valid if we guess correctly at the
6864             # first spaced brace
6865 395 100 100     1503 if ( $space_count && !defined( $rhash->{valid} ) ) {
6866 79         199 $rhash->{valid} = !$level_diff;
6867             }
6868              
6869             # save the result
6870 395         771 my $rhistory_line_number = $rhash->{rhistory_line_number};
6871 395         707 my $rhistory_level_diff = $rhash->{rhistory_level_diff};
6872 395         718 my $rhistory_anchor_point = $rhash->{rhistory_anchor_point};
6873              
6874 395 100       960 if ( $rhistory_level_diff->[-1] != $level_diff ) {
6875              
6876             # Patch for non-indenting-braces: if we guess zero and
6877             # match before all non-indenting braces have been found,
6878             # it means that we would need negative indentation to
6879             # match if/when the brace is found. So we have a problem
6880             # from here on. We indicate this with a value 2 instead
6881             # of 1 as a signal to stop outputting the table here.
6882 55         95 my $anchor = 1;
6883 55 50 66     277 if ( $guess == 0 && $adjust > 0 ) { $anchor = 2 }
  0         0  
6884              
6885             # add an anchor point
6886 55         89 push @{$rhistory_level_diff}, $level_diff;
  55         119  
6887 55         95 push @{$rhistory_line_number}, $input_line_number;
  55         120  
6888 55         94 push @{$rhistory_anchor_point}, $anchor;
  55         132  
6889             }
6890             else {
6891              
6892             # add a movable point following an anchor point
6893 340 100       766 if ( $rhistory_anchor_point->[-1] ) {
6894 152         291 push @{$rhistory_level_diff}, $level_diff;
  152         316  
6895 152         276 push @{$rhistory_line_number}, $input_line_number;
  152         288  
6896 152         244 push @{$rhistory_anchor_point}, 0;
  152         367  
6897             }
6898              
6899             # extend a movable point
6900             else {
6901 188         443 $rhistory_line_number->[-1] = $input_line_number;
6902             }
6903             }
6904             }
6905             }
6906              
6907 6860         13713 return;
6908             } ## end sub tokenizer_wrapup_line
6909              
6910             } ## end tokenize_this_line
6911              
6912             #######################################################################
6913             # Tokenizer routines which assist in identifying token types
6914             #######################################################################
6915              
6916             # Define Global '%op_expected_table'
6917             # = hash table of operator expected values based on last nonblank token
6918              
6919             # exceptions to perl's weird parsing rules after type 'Z'
6920             my %is_weird_parsing_rule_exception;
6921              
6922             my %is_paren_dollar;
6923              
6924             my %is_n_v;
6925              
6926             BEGIN {
6927              
6928             # Always expecting TERM following these types:
6929             # note: this is identical to '@value_requestor_type' defined later.
6930             # Fix for c250: add new type 'P' for package (expecting VERSION or {}
6931             # after package NAMESPACE, so expecting TERM)
6932             # Fix for c250: add new type 'S' for sub (not expecting operator)
6933 44     44   867 my @q = qw#
6934             ; ! + x & ? F J - p / Y : % f U ~ A G j L P S * . | ^ < = [ m { > t
6935             || >= != mm *= => .. !~ == && |= .= pp -= =~ += <= %= ^= x= ~~ ** << /=
6936             &= // >> ~. &. |. ^.
6937             ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
6938             #;
6939 44         174 push @q, BACKSLASH;
6940 44         96 push @q, COMMA;
6941 44         83 push @q, '('; # for completeness, not currently a token type
6942 44         94 push @q, '->'; # was previously in UNKNOWN
6943 44         2435 $op_expected_table{$_} = TERM for @q;
6944              
6945             # No UNKNOWN table types:
6946             # removed '->' for c030, now always TERM
6947             # removed 'w' for c392 to allow use of 'function_count' info in the sub
6948              
6949             # Always expecting OPERATOR ...
6950             # 'n' and 'v' are currently excluded because they might be VERSION numbers
6951             # 'i' is currently excluded because it might be a package
6952             # 'q' is currently excluded because it might be a prototype
6953             # Fix for c030: removed '->' from this list:
6954             # Fix for c250: added 'i' because new type 'P' was added
6955 44         258 @q = qw( -- C h R ++ ] Q <> i );
6956 44         80 push @q, ')';
6957 44         585 $op_expected_table{$_} = OPERATOR for @q;
6958              
6959             # Fix for git #62: added '*' and '%'
6960 44         122 @q = qw( < ? * % );
6961 44         115 $is_weird_parsing_rule_exception{$_} = 1 for @q;
6962              
6963 44         85 @q = qw<) $>;
6964 44         100 $is_paren_dollar{$_} = 1 for @q;
6965              
6966 44         83 @q = qw( n v );
6967 44         1377 $is_n_v{$_} = 1 for @q;
6968              
6969             } ## end BEGIN
6970              
6971 44     44   288 use constant DEBUG_OPERATOR_EXPECTED => 0;
  44         67  
  44         97167  
6972              
6973             sub operator_expected {
6974              
6975 11847     11847 0 21236 my ( $self, $tok, $next_type, $blank_after_Z ) = @_;
6976              
6977             # Returns a parameter indicating what types of tokens can occur next
6978              
6979             # Call format:
6980             # $op_expected =
6981             # $self->operator_expected( $tok, $next_type, $blank_after_Z );
6982             # where
6983             # $tok is the current token
6984             # $next_type is the type of the next token (blank or not)
6985             # $blank_after_Z = flag for guessing after a type 'Z':
6986             # true if $tok follows type 'Z' with intermediate blank
6987             # false if $tok follows type 'Z' with no intermediate blank
6988             # ignored if $tok does not follow type 'Z'
6989              
6990             # Many perl symbols have two or more meanings. For example, '<<'
6991             # can be a shift operator or a here-doc operator. The
6992             # interpretation of these symbols depends on the current state of
6993             # the tokenizer, which may either be expecting a term or an
6994             # operator. For this example, a << would be a shift if an OPERATOR
6995             # is expected, and a here-doc if a TERM is expected. This routine
6996             # is called to make this decision for any current token. It returns
6997             # one of three possible values:
6998             #
6999             # OPERATOR - operator expected (or at least, not a term)
7000             # UNKNOWN - can't tell
7001             # TERM - a term is expected (or at least, not an operator)
7002             #
7003             # The decision is based on what has been seen so far. This
7004             # information is stored in the "$last_nonblank_type" and
7005             # "$last_nonblank_token" variables. For example, if the
7006             # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
7007             # if $last_nonblank_type is 'n' (numeric), we are expecting an
7008             # OPERATOR.
7009             #
7010             # If a UNKNOWN is returned, the calling routine must guess. A major
7011             # goal of this tokenizer is to minimize the possibility of returning
7012             # UNKNOWN, because a wrong guess can spoil the formatting of a
7013             # script.
7014             #
7015             # Adding NEW_TOKENS: it is critically important that this routine be
7016             # updated to allow it to determine if an operator or term is to be
7017             # expected after the new token. Doing this simply involves adding
7018             # the new token character to one of the regexes in this routine or
7019             # to one of the hash lists
7020             # that it uses, which are initialized in the BEGIN section.
7021             # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
7022             # $statement_type
7023              
7024             # When possible, token types should be selected such that we can determine
7025             # the 'operator_expected' value by a simple hash lookup. If there are
7026             # exceptions, that is an indication that a new type is needed.
7027              
7028             #--------------------------------------------
7029             # Section 1: Table lookup will get most cases
7030             #--------------------------------------------
7031              
7032             # Many types are can be obtained by a table lookup. This typically handles
7033             # more than half of the calls. For speed, the caller may try table lookup
7034             # first before calling this sub.
7035 11847         15396 my $op_expected = $op_expected_table{$last_nonblank_type};
7036 11847 100       17770 if ( defined($op_expected) ) {
7037             DEBUG_OPERATOR_EXPECTED
7038 78         111 && print {*STDOUT}
7039             "OPERATOR_EXPECTED: Table Lookup; returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
7040 78         193 return $op_expected;
7041             }
7042              
7043             DEBUG_OPERATOR_EXPECTED
7044 11769         12596 && print {*STDOUT}
7045             "OPERATOR_EXPECTED: in hardwired table for last type $last_nonblank_type token $last_nonblank_token\n";
7046              
7047             #---------------------------------------------
7048             # Section 2: Handle special cases if necessary
7049             #---------------------------------------------
7050              
7051             # Types 'k', '}' and 'Z' depend on context
7052             # Types 'n', 'v', 'q' also depend on context.
7053              
7054             # identifier...
7055             # Fix for c250: removed coding for type 'i' because 'i' and new type 'P'
7056             # are now done by hash table lookup
7057              
7058             #--------------------
7059             # Section 2A: keyword
7060             #--------------------
7061 11769 100       18699 if ( $last_nonblank_type eq 'k' ) {
7062              
7063             # keywords expecting TERM:
7064 3063 100       7700 if ( $expecting_term_token{$last_nonblank_token} ) {
7065              
7066             # Exceptions from TERM:
7067              
7068             # // may follow perl functions which may be unary operators
7069             # see test file dor.t (defined or);
7070 2943 100 100     6011 if (
      100        
7071             $tok eq '/'
7072             && $next_type eq '/'
7073             && $is_keyword_rejecting_slash_as_pattern_delimiter{
7074             $last_nonblank_token}
7075             )
7076             {
7077 1         3 return OPERATOR;
7078             }
7079              
7080             # Patch to allow a ? following 'split' to be a deprecated pattern
7081             # delimiter. This patch is coordinated with the omission of split
7082             # from the list
7083             # %is_keyword_rejecting_question_as_pattern_delimiter. This patch
7084             # will force perltidy to guess.
7085 2942 50 66     5916 if ( $tok eq '?'
7086             && $last_nonblank_token eq 'split' )
7087             {
7088 0         0 return UNKNOWN;
7089             }
7090              
7091 2942         5593 return TERM;
7092             }
7093              
7094             # keywords expecting OPERATOR:
7095 120 100       362 if ( $expecting_operator_token{$last_nonblank_token} ) {
7096 7         19 return OPERATOR;
7097             }
7098              
7099 113         299 return TERM;
7100              
7101             } ## end type 'k'
7102              
7103             #------------------------------------
7104             # Section 2B: Closing container token
7105             #------------------------------------
7106              
7107             # Note that the actual token for type '}' may also be a ')'.
7108              
7109             # Also note that $last_nonblank_token is not the token corresponding to
7110             # $last_nonblank_type when the type is a closing container. In that
7111             # case it is the token before the corresponding opening container token.
7112             # So for example, for this snippet
7113             # $a = do { BLOCK } / 2;
7114             # the $last_nonblank_token is 'do' when $last_nonblank_type eq '}'.
7115              
7116 8706 100       15110 if ( $last_nonblank_type eq '}' ) {
7117              
7118             #-------------------------------------------
7119             # Section 2B1: Closing structural ')' or ']'
7120             #-------------------------------------------
7121 4140 100 100     10520 if ( $last_nonblank_token eq ')' || $last_nonblank_token eq ']' ) {
7122 2794         5600 return OPERATOR;
7123             }
7124              
7125             #-------------------------------------
7126             # Section 2B2: Closing block brace '}'
7127             #-------------------------------------
7128 1346         2381 my $blk = $rbrace_type->[ $brace_depth + 1 ];
7129              
7130             # Non-blocks
7131 1346 100       2576 if ( !defined($blk) ) {
7132 2         4 return OPERATOR;
7133             }
7134              
7135             # Unidentified block type
7136 1344 100       2921 if ( !$blk ) {
7137 367         776 return UNKNOWN;
7138             }
7139              
7140             # Blocks followed by a TERM
7141 977 100 100     9842 if ( $is_zero_continuation_block_type{$blk}
      100        
      100        
      100        
      100        
      100        
      66        
      100        
7142             || $is_sort_map_grep{$blk}
7143             || $is_grep_alias{$blk}
7144             || substr( $blk, -1, 1 ) eq ':' && $blk =~ /^\w+:$/
7145             || substr( $blk, 0, 3 ) eq 'sub' && $blk =~ /^sub\s/
7146             || substr( $blk, 0, 7 ) eq 'package' && $blk =~ /^package\s/ )
7147             {
7148 635         1563 return TERM;
7149             }
7150              
7151             # Blocks followed by an OPERATOR
7152             # do eval sub
7153 342 100 100     1360 if ( $is_block_operator{$blk}
7154             || $is_sub{$blk} )
7155             {
7156 274         667 return OPERATOR;
7157             }
7158              
7159             # Any other block type is marked UNKNOWN to be safe (c566).
7160             # For example, a block type marked ';' could be a hash ref:
7161             # { map { $_ => 'x' } keys %main:: } ~~ \%main::;
7162             # The tokenizer would have to analyze the contents to distinguish
7163             # between a block closure and a hash ref brace in this case. So mark
7164             # this as UNKNOWN and let the lower level routines figure it out.
7165 68         196 return UNKNOWN;
7166             }
7167              
7168             #-------------------------------
7169             # Section 2C: number or v-string
7170             #-------------------------------
7171             # An exception is for VERSION numbers a 'use' statement. It has the format
7172             # use Module VERSION LIST
7173             # We could avoid this exception by writing a special sub to parse 'use'
7174             # statements and perhaps mark these numbers with a new type V (for VERSION)
7175 4566 100       9462 if ( $is_n_v{$last_nonblank_type} ) {
7176 2621 100       4533 if ( $statement_type eq 'use' ) {
7177 11         32 return UNKNOWN;
7178             }
7179 2610         4756 return OPERATOR;
7180             }
7181              
7182             #---------------------
7183             # Section 2D: qw quote
7184             #---------------------
7185             # TODO: labeled prototype words would better be given type 'A' or maybe
7186             # 'J'; not 'q'; or maybe mark as type 'Y'?
7187 1945 100       3962 if ( $last_nonblank_type eq 'q' ) {
7188 158 50       400 if ( $last_nonblank_token eq 'prototype' ) {
7189 0         0 return TERM;
7190             }
7191              
7192             # update for --use-feature=class (rt145706):
7193             # Look for class VERSION after possible attribute, as in
7194             # class Example::Subclass : isa(Example::Base) 1.345 { ... }
7195 158 100       396 if ( $statement_type =~ /^package\b/ ) {
7196 3         5 return TERM;
7197             }
7198              
7199             # everything else
7200 155         323 return OPERATOR;
7201             }
7202              
7203             #---------------------
7204             # Section 2E: bareword
7205             #---------------------
7206 1787 100       3607 if ( $last_nonblank_type eq 'w' ) {
7207              
7208             # It is safest to return UNKNOWN if a possible ? pattern delimiter may
7209             # follow (git #32, c469) and let the guess algorithm handle it.
7210 1738 100       3193 if ( $tok eq '?' ) { return UNKNOWN }
  7         18  
7211              
7212             # see if this has been seen in the role of a function taking args
7213 1731         3232 my $rinfo = $self->[_rbareword_info_]->{$current_package};
7214 1731 100       3140 if ($rinfo) {
7215 1228         1974 $rinfo = $rinfo->{$last_nonblank_token};
7216 1228 100       2440 if ($rinfo) {
7217 317         537 my $function_count = $rinfo->{function_count};
7218 317 100 66     1108 if ( $function_count && $function_count > 0 ) { return TERM }
  135         363  
7219             }
7220             }
7221 1596         3251 return UNKNOWN;
7222             }
7223              
7224             #-----------------------------------
7225             # Section 2F: file handle or similar
7226             #-----------------------------------
7227 49 100       134 if ( $last_nonblank_type eq 'Z' ) {
7228              
7229             # angle.t
7230 45 100       193 if ( $last_nonblank_token =~ /^\w/ ) {
7231 2         5 return UNKNOWN;
7232             }
7233              
7234             # Exception to weird parsing rules for 'x(' ... see case b1205:
7235             # In something like 'print $vv x(...' the x is an operator;
7236             # Likewise in 'print $vv x$ww' the x is an operator (case b1207)
7237             # otherwise x follows the weird parsing rules.
7238 43 50 33     154 if ( $tok eq 'x' && $next_type =~ /^[\(\$\@\%]$/ ) {
7239 0         0 return OPERATOR;
7240             }
7241              
7242             # The 'weird parsing rules' of next section do not work for '<' and '?'
7243             # It is best to mark them as unknown. Test case:
7244             # print $fh <DATA>;
7245 43 100       132 if ( $is_weird_parsing_rule_exception{$tok} ) {
7246 4         8 return UNKNOWN;
7247             }
7248              
7249             # For possible file handle like "$a", Perl uses weird parsing rules.
7250             # For example:
7251             # print $a/2,"/hi"; - division
7252             # print $a / 2,"/hi"; - division
7253             # print $a/ 2,"/hi"; - division
7254             # print $a /2,"/hi"; - pattern (and error)!
7255             # Some examples where this logic works okay, for '&','*','+':
7256             # print $fh &xsi_protos(@mods);
7257             # my $x = new $CompressClass *FH;
7258             # print $OUT +( $count % 15 ? ", " : "\n\t" );
7259 39 50 66     142 if ( $blank_after_Z
7260             && $next_type ne 'b' )
7261             {
7262 0         0 return TERM;
7263             }
7264              
7265             # Note that '?' and '<' have been moved above
7266             # ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
7267 39 100       206 if ( $tok =~ /^([x\/\+\-\*\%\&\.]|\>\>)$/ ) {
7268              
7269             # Do not complain in 'use' statements, which have special syntax.
7270             # For example, from RT#130344:
7271             # use lib $FindBin::Bin . '/lib';
7272 9 50       20 if ( $statement_type ne 'use' ) {
7273 9         29 $self->complain(
7274             "operator in possible indirect object location not recommended\n"
7275             );
7276             }
7277 9         19 return OPERATOR;
7278             }
7279              
7280             # all other cases
7281              
7282 30         81 return UNKNOWN;
7283             }
7284              
7285             #--------------------------
7286             # Section 2F: anything else
7287             #--------------------------
7288 4         9 return UNKNOWN;
7289              
7290             } ## end sub operator_expected
7291              
7292             sub new_statement_ok {
7293              
7294             # Returns:
7295             # true if a new statement can begin here
7296             # false otherwise
7297              
7298             # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
7299             # $brace_depth, $rbrace_type
7300              
7301             # Uses:
7302             # - See if a 'class' statement can occur here
7303             # - See if a keyword begins at a new statement; i.e. is an 'if' a
7304             # block if or a trailing if? Also see if 'format' starts a statement.
7305             # - Decide if a ':' is part of a statement label (not a ternary)
7306              
7307             # Curly braces are tricky because some small blocks do not get marked as
7308             # blocks..
7309              
7310             # if it follows an opening curly brace..
7311 545 100 100 545 0 2305 if ( $last_nonblank_token eq '{' ) {
    100          
7312              
7313             # The safe thing is to return true in all cases because:
7314             # - a ternary ':' cannot occur here
7315             # - an 'if' here, for example, cannot be a trailing if
7316             # See test case c231 for an example.
7317             # This works but could be improved, if necessary, by returning
7318             # 'false' at obvious non-blocks.
7319 66         239 return 1;
7320             }
7321              
7322             # if it follows a closing code block curly brace..
7323             elsif ($last_nonblank_token eq '}'
7324             && $last_nonblank_type eq $last_nonblank_token )
7325             {
7326              
7327             # A new statement can follow certain closing block braces ...
7328             # Previously, a true was always returned, and this worked ok.
7329             # Update c443: now we return false for certain blocks which must be
7330             # followed by a ';'. See comments elsewhere on
7331             # '%is_zero_continuation_block_type'. The value of $brace_depth has
7332             # also been corrected, it was off by 1.
7333 115         244 my $block_type = $rbrace_type->[ $brace_depth + 1 ];
7334             return $block_type
7335 115   66     685 && !$is_sort_map_grep_eval_do_sub{$block_type};
7336             }
7337              
7338             # otherwise, it is a label if and only if it follows a ';' (real or fake)
7339             # or another label
7340             else {
7341 364   100     1789 return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' );
7342             }
7343             } ## end sub new_statement_ok
7344              
7345             sub code_block_type {
7346              
7347 1444     1444 0 3236 my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
7348              
7349             # Decide if this is a block of code, and its type.
7350             # Must be called only when $type = $token = '{'
7351             # The problem is to distinguish between the start of a block of code
7352             # and the start of an anonymous hash reference
7353             # Returns "" if not code block, otherwise returns 'last_nonblank_token'
7354             # to indicate the type of code block. (For example, 'last_nonblank_token'
7355             # might be 'if' for an if block, 'else' for an else block, etc).
7356             # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
7357             # $last_nonblank_block_type, $brace_depth, $rbrace_type
7358              
7359             # handle case of multiple '{'s
7360              
7361             #print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
7362              
7363 1444 100 66     17736 if ( $last_nonblank_token eq '{'
    100 66        
    100 66        
    100 100        
    100 66        
    100 66        
    50          
    50          
    100          
    100          
    100          
7364             && $last_nonblank_type eq $last_nonblank_token )
7365             {
7366              
7367             # opening brace where a statement may appear is probably
7368             # a code block but might be and anonymous hash reference
7369 98 50       274 if ( $rbrace_type->[$brace_depth] ) {
7370 98         281 return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
7371             $max_token_index );
7372             }
7373              
7374             # cannot start a code block within an anonymous hash
7375             else {
7376 0         0 return EMPTY_STRING;
7377             }
7378             }
7379              
7380             elsif ( $last_nonblank_token eq ';' ) {
7381              
7382             # an opening brace where a statement may appear is probably
7383             # a code block but might be and anonymous hash reference
7384 51         180 return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
7385             $max_token_index );
7386             }
7387              
7388             # handle case of '}{'
7389             elsif ($last_nonblank_token eq '}'
7390             && $last_nonblank_type eq $last_nonblank_token )
7391             {
7392              
7393             # a } { situation ...
7394             # could be hash reference after code block..(blktype1.t)
7395 10 50       34 if ($last_nonblank_block_type) {
7396 10         37 return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
7397             $max_token_index );
7398             }
7399              
7400             # must be a block if it follows a closing hash reference
7401             else {
7402 0         0 return $last_nonblank_token;
7403             }
7404             }
7405              
7406             #--------------------------------------------------------------
7407             # NOTE: braces after type characters start code blocks, but for
7408             # simplicity these are not identified as such. See also
7409             # sub is_non_structural_brace.
7410             #--------------------------------------------------------------
7411              
7412             ## elsif ( $last_nonblank_type eq 't' ) {
7413             ## return $last_nonblank_token;
7414             ## }
7415              
7416             # brace after label:
7417             elsif ( $last_nonblank_type eq 'J' ) {
7418 34         95 return $last_nonblank_token;
7419             }
7420              
7421             # otherwise, see if a block must follow the previous token (such as 'if'):
7422             elsif ($is_code_block_token{$last_nonblank_token}
7423             || $is_grep_alias{$last_nonblank_token} )
7424             {
7425              
7426             # Bug Patch: Note that the opening brace after the 'if' in the following
7427             # snippet is an anonymous hash ref and not a code block!
7428             # print 'hi' if { x => 1, }->{x};
7429             # We can identify this situation because the last nonblank type
7430             # will be a keyword (instead of a closing paren)
7431 547 50 33     2321 if (
      66        
7432             $last_nonblank_type eq 'k'
7433             && ( $last_nonblank_token eq 'if'
7434             || $last_nonblank_token eq 'unless' )
7435             )
7436             {
7437 0         0 return EMPTY_STRING;
7438             }
7439             else {
7440 547         1531 return $last_nonblank_token;
7441             }
7442             }
7443              
7444             # or a sub or package BLOCK
7445             # Fixed for c250 to include new package type 'P', and change 'i' to 'S'
7446             elsif (
7447             $last_nonblank_type eq 'P'
7448             || $last_nonblank_type eq 'S'
7449             || ( $last_nonblank_type eq 't'
7450             && substr( $last_nonblank_token, 0, 3 ) eq 'sub' )
7451             )
7452             {
7453 346         999 return $last_nonblank_token;
7454             }
7455              
7456             elsif ( $statement_type =~ /^(sub|package)\b/ ) {
7457 0         0 return $statement_type;
7458             }
7459              
7460             # user-defined subs with block parameters (like grep/map/eval)
7461             elsif ( $last_nonblank_type eq 'G' ) {
7462 0         0 return $last_nonblank_token;
7463             }
7464              
7465             # check bareword
7466             elsif ( $last_nonblank_type eq 'w' ) {
7467              
7468             # check for syntax 'use MODULE LIST'
7469             # This fixes b1022 b1025 b1027 b1028 b1029 b1030 b1031
7470 24 100       92 return EMPTY_STRING if ( $statement_type eq 'use' );
7471              
7472 23         94 return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
7473             $max_token_index );
7474             }
7475              
7476             # Patch for bug # RT #94338 reported by Daniel Trizen
7477             # for-loop in a parenthesized block-map triggering an error message:
7478             # map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
7479             # Check for a code block within a parenthesized function call
7480             elsif ( $last_nonblank_token eq '(' ) {
7481 87         173 my $paren_type = $rparen_type->[$paren_depth];
7482              
7483             # /^(map|grep|sort)$/
7484 87 100 66     395 if ( $paren_type && $is_sort_map_grep{$paren_type} ) {
7485              
7486             # We will mark this as a code block but use type 't' instead
7487             # of the name of the containing function. This will allow for
7488             # correct parsing but will usually produce better formatting.
7489             # Braces with block type 't' are not broken open automatically
7490             # in the formatter as are other code block types, and this usually
7491             # works best.
7492 1         3 return 't'; # (Not $paren_type)
7493             }
7494             else {
7495 86         213 return EMPTY_STRING;
7496             }
7497             }
7498              
7499             # handle unknown syntax ') {'
7500             # we previously appended a '()' to mark this case
7501             elsif ( $last_nonblank_token =~ /\(\)$/ ) {
7502 16         92 return $last_nonblank_token;
7503             }
7504              
7505             # anything else must be anonymous hash reference
7506             else {
7507 231         555 return EMPTY_STRING;
7508             }
7509             } ## end sub code_block_type
7510              
7511             sub decide_if_code_block {
7512              
7513             # USES GLOBAL VARIABLES: $last_nonblank_token
7514 182     182 0 416 my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
7515              
7516 182         607 my ( $next_nonblank_token, $i_next_uu ) =
7517             $self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
7518              
7519             # we are at a '{' where a statement may appear.
7520             # We must decide if this brace starts an anonymous hash or a code
7521             # block.
7522             # return "" if anonymous hash, and $last_nonblank_token otherwise
7523              
7524             # initialize to be code BLOCK
7525 182         341 my $code_block_type = $last_nonblank_token;
7526              
7527             # Check for the common case of an empty anonymous hash reference:
7528             # Maybe something like sub { { } }
7529 182 100       448 if ( $next_nonblank_token eq '}' ) {
7530 5         8 $code_block_type = EMPTY_STRING;
7531             }
7532              
7533             else {
7534              
7535             # To guess if this '{' is an anonymous hash reference, look ahead
7536             # and test as follows:
7537             #
7538             # it is a hash reference if next come:
7539             # - a string or digit followed by a comma or =>
7540             # - bareword followed by =>
7541             # otherwise it is a code block
7542             #
7543             # Examples of anonymous hash ref:
7544             # {'aa',};
7545             # {1,2}
7546             #
7547             # Examples of code blocks:
7548             # {1; print "hello\n", 1;}
7549             # {$a,1};
7550              
7551             # We are only going to look ahead one more (nonblank/comment) line.
7552             # Strange formatting could cause a bad guess, but that's unlikely.
7553 177         279 my @pre_types;
7554             my @pre_tokens;
7555              
7556             # Ignore the rest of this line if it is a side comment
7557 177 100       450 if ( $next_nonblank_token ne '#' ) {
7558 152         471 @pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ];
  152         829  
7559 152         346 @pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ];
  152         923  
7560             }
7561              
7562             # Here 20 is arbitrary but generous, and prevents wasting lots of time
7563             # in mangled files
7564 177         648 my ( $rpre_tokens, $rpre_types ) =
7565             $self->peek_ahead_for_n_nonblank_pre_tokens(20);
7566 177 100 66     539 if ( defined($rpre_types) && @{$rpre_types} ) {
  169         510  
7567 169         251 push @pre_types, @{$rpre_types};
  169         642  
7568 169         297 push @pre_tokens, @{$rpre_tokens};
  169         780  
7569             }
7570              
7571             # put a sentinel token to simplify stopping the search
7572 177         366 push @pre_types, '}';
7573 177         301 push @pre_types, '}';
7574              
7575 177         247 my $jbeg = 0;
7576 177 100       469 $jbeg = 1 if ( $pre_types[0] eq 'b' );
7577              
7578             # first look for one of these
7579             # - bareword
7580             # - bareword with leading -
7581             # - digit
7582             # - quoted string
7583 177         294 my $j = $jbeg;
7584 177 100 33     1045 if ( $pre_types[$j] =~ /^[\'\"]/ ) {
    100          
    100          
    50          
7585              
7586             # find the closing quote; don't worry about escapes
7587 1         3 my $quote_mark = $pre_types[$j];
7588 1         4 foreach my $k ( $j + 1 .. @pre_types - 2 ) {
7589 1 50       3 if ( $pre_types[$k] eq $quote_mark ) {
7590 1         3 $j = $k + 1;
7591 1         2 last;
7592             }
7593             }
7594             }
7595             elsif ( $pre_types[$j] eq 'd' ) {
7596 8         11 $j++;
7597             }
7598             elsif ( $pre_types[$j] eq 'w' ) {
7599 74         155 $j++;
7600             }
7601             elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
7602 0         0 $j++;
7603             }
7604             else {
7605             # none of the above
7606             }
7607 177 100       451 if ( $j > $jbeg ) {
7608              
7609 83 100       256 $j++ if ( $pre_types[$j] eq 'b' );
7610              
7611             # Patched for RT #95708
7612 83 100 33     576 if (
      66        
      66        
7613              
7614             # it is a comma which is not a pattern delimiter except for qw
7615             (
7616             $pre_types[$j] eq COMMA
7617             && !$is_q_qq_qx_qr_s_y_tr_m{ $pre_tokens[$jbeg] }
7618             )
7619              
7620             # or a =>
7621             || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
7622             )
7623             {
7624 18         29 $code_block_type = EMPTY_STRING;
7625             }
7626             }
7627              
7628 177 100       487 if ($code_block_type) {
7629              
7630             # Patch for cases b1085 b1128: It is uncertain if this is a block.
7631             # If this brace follows a bareword, then append a space as a signal
7632             # to the formatter that this may not be a block brace. To find the
7633             # corresponding code in Formatter.pm search for 'b1085'.
7634             # But not for the word 'method': fixes c534; this will cause the
7635             # formatter to mark an asub block instead of a sub block.
7636 159 100 66     1324 if ( $code_block_type =~ /^\w/ && $code_block_type ne 'method' ) {
7637 20         123 $code_block_type .= SPACE;
7638             }
7639             }
7640             }
7641              
7642 182         550 return $code_block_type;
7643             } ## end sub decide_if_code_block
7644              
7645             sub report_unexpected {
7646              
7647             # report unexpected token type and show where it is
7648             # USES GLOBAL VARIABLES: (none)
7649 0     0 0 0 my ( $self, $rcall_hash ) = @_;
7650              
7651 0         0 my $found = $rcall_hash->{found};
7652 0         0 my $expecting = $rcall_hash->{expecting};
7653 0         0 my $i_tok = $rcall_hash->{i_tok};
7654 0         0 my $last_nonblank_i = $rcall_hash->{last_nonblank_i};
7655 0         0 my $rpretoken_map = $rcall_hash->{rpretoken_map};
7656 0         0 my $rpretoken_type = $rcall_hash->{rpretoken_type};
7657 0         0 my $input_line = $rcall_hash->{input_line};
7658              
7659 0 0       0 if ( ++$self->[_unexpected_error_count_] <= MAX_NAG_MESSAGES ) {
7660 0         0 my $msg = "found $found where $expecting expected";
7661 0         0 my $pos = $rpretoken_map->[$i_tok];
7662 0         0 $self->interrupt_logfile();
7663 0         0 my $input_line_number = $self->[_last_line_number_];
7664 0         0 my ( $offset, $numbered_line, $underline ) =
7665             make_numbered_line( $input_line_number, $input_line, $pos );
7666 0         0 $underline = write_on_underline( $underline, $pos - $offset, '^' );
7667              
7668 0         0 my $trailer = EMPTY_STRING;
7669 0 0 0     0 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
7670 0         0 my $pos_prev = $rpretoken_map->[$last_nonblank_i];
7671 0         0 my $num;
7672 0 0       0 if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) {
7673 0         0 $num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev;
7674             }
7675             else {
7676 0         0 $num = $pos - $pos_prev;
7677             }
7678 0 0       0 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
  0         0  
  0         0  
7679              
7680             $underline =
7681 0         0 write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
7682 0         0 $trailer = " (previous token underlined)";
7683             }
7684 0         0 $underline =~ s/\s+$//;
7685 0         0 $self->warning( $numbered_line . "\n" );
7686 0         0 $self->warning( $underline . "\n" );
7687 0         0 $self->warning( $msg . $trailer . "\n" );
7688 0         0 $self->resume_logfile();
7689             }
7690 0         0 return;
7691             } ## end sub report_unexpected
7692              
7693             my %is_sigil_or_paren;
7694             my %is_R_closing_sb;
7695              
7696             BEGIN {
7697              
7698 44     44   278 my @q = qw< $ & % * @ ) >;
7699 44         298 $is_sigil_or_paren{$_} = 1 for @q;
7700              
7701 44         114 @q = qw( R ] );
7702 44         71695 $is_R_closing_sb{$_} = 1 for @q;
7703             } ## end BEGIN
7704              
7705             sub is_non_structural_brace {
7706              
7707             # Decide if a brace or bracket is structural or non-structural
7708             # by looking at the previous token and type
7709             # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
7710              
7711             # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
7712             # Tentatively deactivated because it caused the wrong operator expectation
7713             # for this code:
7714             # $user = @vars[1] / 100;
7715             # Must update sub operator_expected before re-implementing.
7716             # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
7717             # return 0;
7718             # }
7719              
7720             #--------------------------------------------------------------
7721             # NOTE: braces after type characters start code blocks, but for
7722             # simplicity these are not identified as such. See also
7723             # sub code_block_type
7724             #--------------------------------------------------------------
7725              
7726             ##if ($last_nonblank_type eq 't') {return 0}
7727              
7728             # otherwise, it is non-structural if it is decorated
7729             # by type information.
7730             # For example, the '{' here is non-structural: ${xxx}
7731             # Removed '::' to fix c074
7732             ## $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
7733             return (
7734             ## $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->)/
7735             $is_sigil_or_paren{ substr( $last_nonblank_token, 0, 1 ) }
7736             || substr( $last_nonblank_token, 0, 2 ) eq '->'
7737              
7738             # or if we follow a hash or array closing curly brace or bracket
7739             # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
7740             # because the first '}' would have been given type 'R'
7741             ##|| $last_nonblank_type =~ /^([R\]])$/
7742 2822   66 2822 0 14523 || $is_R_closing_sb{$last_nonblank_type}
7743             );
7744             } ## end sub is_non_structural_brace
7745              
7746             #######################################################################
7747             # Tokenizer routines for tracking container nesting depths
7748             #######################################################################
7749              
7750             # The following routines keep track of nesting depths of the nesting
7751             # types, ( [ { and ?. This is necessary for determining the indentation
7752             # level, and also for debugging programs. Not only do they keep track of
7753             # nesting depths of the individual brace types, but they check that each
7754             # of the other brace types is balanced within matching pairs. For
7755             # example, if the program sees this sequence:
7756             #
7757             # { ( ( ) }
7758             #
7759             # then it can determine that there is an extra left paren somewhere
7760             # between the { and the }. And so on with every other possible
7761             # combination of outer and inner brace types. For another
7762             # example:
7763             #
7764             # ( [ ..... ] ] )
7765             #
7766             # which has an extra ] within the parens.
7767             #
7768             # The brace types have indexes 0 .. 3 which are indexes into
7769             # the matrices.
7770             #
7771             # The pair ? : are treated as just another nesting type, with ? acting
7772             # as the opening brace and : acting as the closing brace.
7773             #
7774             # The matrix
7775             #
7776             # $rdepth_array->[$a][$b][ $rcurrent_depth->[$a] ] = $rcurrent_depth->[$b];
7777             #
7778             # saves the nesting depth of brace type $b (where $b is either of the other
7779             # nesting types) when brace type $a enters a new depth. When this depth
7780             # decreases, a check is made that the current depth of brace types $b is
7781             # unchanged, or otherwise there must have been an error. This can
7782             # be very useful for localizing errors, particularly when perl runs to
7783             # the end of a large file (such as this one) and announces that there
7784             # is a problem somewhere.
7785             #
7786             # A numerical sequence number is maintained for every nesting type,
7787             # so that each matching pair can be uniquely identified in a simple
7788             # way.
7789              
7790             sub increase_nesting_depth {
7791 5446     5446 0 9085 my ( $self, $aa, $pos ) = @_;
7792              
7793             # Given:
7794             # $aa = integer code of container type, 0-3
7795             # $pos = position of character, for error message
7796              
7797             # USES GLOBAL VARIABLES: $rcurrent_depth,
7798             # $rcurrent_sequence_number, $rdepth_array,
7799             # $rstarting_line_of_current_depth, $statement_type
7800 5446         7951 my $cd_aa = ++$rcurrent_depth->[$aa];
7801 5446         6305 $total_depth++;
7802 5446         8998 $rtotal_depth->[$aa]->[$cd_aa] = $total_depth;
7803 5446         7428 my $input_line_number = $self->[_last_line_number_];
7804 5446         7376 my $input_line = $self->[_line_of_text_];
7805              
7806             # Sequence numbers increment by number of items. This keeps
7807             # a unique set of numbers but still allows the relative location
7808             # of any type to be determined.
7809              
7810             # make a new unique sequence number
7811 5446         7540 my $seqno = $next_sequence_number++;
7812              
7813 5446         8693 $rcurrent_sequence_number->[$aa]->[$cd_aa] = $seqno;
7814              
7815 5446         15655 $rstarting_line_of_current_depth->[$aa]->[$cd_aa] =
7816             [ $input_line_number, $input_line, $pos ];
7817              
7818 5446         13554 for my $bb ( 0 .. @closing_brace_names - 1 ) {
7819 21784 100       30670 next if ( $bb == $aa );
7820 16338         27812 $rdepth_array->[$aa]->[$bb]->[$cd_aa] = $rcurrent_depth->[$bb];
7821             }
7822              
7823             # set a flag for indenting a nested ternary statement
7824 5446         7390 my $indent = 0;
7825 5446 100       8998 if ( $aa == QUESTION_COLON ) {
7826 193         406 $rnested_ternary_flag->[$cd_aa] = 0;
7827 193 100       497 if ( $cd_aa > 1 ) {
7828 17 100       58 if ( $rnested_ternary_flag->[ $cd_aa - 1 ] == 0 ) {
7829 16         37 my $pdepth = $rtotal_depth->[$aa]->[ $cd_aa - 1 ];
7830 16 100       56 if ( $pdepth == $total_depth - 1 ) {
7831 8         13 $indent = 1;
7832 8         20 $rnested_ternary_flag->[ $cd_aa - 1 ] = -1;
7833             }
7834             }
7835             }
7836             }
7837              
7838             # Fix part #1 for git82: save last token type for propagation of type 'Z'
7839 5446         17219 $rnested_statement_type->[$aa]->[$cd_aa] =
7840             [ $statement_type, $last_nonblank_type, $last_nonblank_token ];
7841 5446         7333 $statement_type = EMPTY_STRING;
7842 5446         11617 return ( $seqno, $indent );
7843             } ## end sub increase_nesting_depth
7844              
7845             sub is_balanced_closing_container {
7846              
7847 47     47 0 93 my ($aa) = @_;
7848              
7849             # Return true if a closing container can go here without error
7850             # Return false if not
7851             # Given:
7852             # $aa = integer code of container type, 0-3
7853              
7854             # cannot close if there was no opening
7855 47         65 my $cd_aa = $rcurrent_depth->[$aa];
7856 47 100       150 return if ( $cd_aa <= 0 );
7857              
7858             # check that any other brace types $bb contained within would be balanced
7859 8         51 for my $bb ( 0 .. @closing_brace_names - 1 ) {
7860 8 50       20 next if ( $bb == $aa );
7861             return
7862             if (
7863 8 50       39 $rdepth_array->[$aa]->[$bb]->[$cd_aa] != $rcurrent_depth->[$bb] );
7864             }
7865              
7866             # OK, everything will be balanced
7867 0         0 return 1;
7868             } ## end sub is_balanced_closing_container
7869              
7870             sub decrease_nesting_depth {
7871              
7872 5446     5446 0 8923 my ( $self, $aa, $pos ) = @_;
7873              
7874             # Given:
7875             # $aa = integer code of container type, 0-3
7876             # $pos = position of character, for error message
7877              
7878             # USES GLOBAL VARIABLES: $rcurrent_depth,
7879             # $rcurrent_sequence_number, $rdepth_array, $rstarting_line_of_current_depth
7880             # $statement_type
7881 5446         6319 my $seqno = 0;
7882 5446         11775 my $input_line_number = $self->[_last_line_number_];
7883 5446         7527 my $input_line = $self->[_line_of_text_];
7884              
7885 5446         6191 my $outdent = 0;
7886 5446         6360 $total_depth--;
7887 5446         7821 my $cd_aa = $rcurrent_depth->[$aa];
7888 5446 50       8957 if ( $cd_aa > 0 ) {
7889              
7890             # set a flag for un-indenting after seeing a nested ternary statement
7891 5446         8249 $seqno = $rcurrent_sequence_number->[$aa]->[$cd_aa];
7892 5446 100       9281 if ( $aa == QUESTION_COLON ) {
7893 193         376 $outdent = $rnested_ternary_flag->[$cd_aa];
7894             }
7895              
7896             # Fix part #2 for git82: use saved type for propagation of type 'Z'
7897             # through type L-R braces. Perl seems to allow ${bareword}
7898             # as an indirect object, but nothing much more complex than that.
7899             ( $statement_type, my $saved_type, my $saved_token_uu ) =
7900 5446         6181 @{ $rnested_statement_type->[$aa]->[ $rcurrent_depth->[$aa] ] };
  5446         13257  
7901 5446 50 100     14477 if ( $aa == BRACE
      66        
      66        
7902             && $saved_type eq 'Z'
7903             && $last_nonblank_type eq 'w'
7904             && $rbrace_structural_type->[$brace_depth] eq 'L' )
7905             {
7906 1         2 $last_nonblank_type = $saved_type;
7907             }
7908              
7909             # check that any brace types $bb contained within are balanced
7910 5446         12043 for my $bb ( 0 .. @closing_brace_names - 1 ) {
7911 21784 100       30593 next if ( $bb == $aa );
7912              
7913 16338 50       31569 if ( $rdepth_array->[$aa]->[$bb]->[$cd_aa] !=
7914             $rcurrent_depth->[$bb] )
7915             {
7916 0         0 my $diff =
7917             $rcurrent_depth->[$bb] -
7918             $rdepth_array->[$aa]->[$bb]->[$cd_aa];
7919              
7920             # don't whine too many times
7921 0         0 my $saw_brace_error = $self->get_saw_brace_error();
7922 0 0 0     0 if (
      0        
7923             $saw_brace_error <= MAX_NAG_MESSAGES
7924              
7925             # if too many closing types have occurred, we probably
7926             # already caught this error
7927             && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
7928             )
7929             {
7930 0         0 $self->interrupt_logfile();
7931 0         0 my $rsl = $rstarting_line_of_current_depth->[$aa]->[$cd_aa];
7932 0         0 my $sl = $rsl->[0];
7933 0         0 my $rel = [ $input_line_number, $input_line, $pos ];
7934 0         0 my $el = $rel->[0];
7935 0         0 my ($ess);
7936              
7937 0 0 0     0 if ( $diff == 1 || $diff == -1 ) {
7938 0         0 $ess = EMPTY_STRING;
7939             }
7940             else {
7941 0         0 $ess = 's';
7942             }
7943 0 0       0 my $bname =
7944             ( $diff > 0 )
7945             ? $opening_brace_names[$bb]
7946             : $closing_brace_names[$bb];
7947 0         0 $self->write_error_indicator_pair( @{$rsl}, '^' );
  0         0  
7948 0         0 my $msg = <<"EOM";
7949             Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
7950             EOM
7951              
7952 0 0       0 if ( $diff > 0 ) {
7953 0         0 my $rml =
7954             $rstarting_line_of_current_depth->[$bb]
7955             ->[ $rcurrent_depth->[$bb] ];
7956 0         0 my $ml = $rml->[0];
7957 0         0 $msg .=
7958             " The most recent un-matched $bname is on line $ml\n";
7959 0         0 $self->write_error_indicator_pair( @{$rml}, '^' );
  0         0  
7960             }
7961 0         0 $self->write_error_indicator_pair( @{$rel}, '^' );
  0         0  
7962 0         0 $self->warning($msg);
7963 0         0 $self->resume_logfile();
7964             }
7965 0         0 $self->increment_brace_error();
7966 0 0       0 if ( $bb eq BRACE ) { $self->[_show_indentation_table_] = 1 }
  0         0  
7967             }
7968             }
7969 5446         8153 $rcurrent_depth->[$aa]--;
7970             }
7971             else {
7972              
7973 0         0 my $saw_brace_error = $self->get_saw_brace_error();
7974 0 0       0 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
7975 0         0 my $msg = <<"EOM";
7976             There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
7977             EOM
7978 0         0 $self->indicate_error( $msg, $input_line_number, $input_line, $pos,
7979             '^' );
7980             }
7981 0         0 $self->increment_brace_error();
7982 0 0       0 if ( $aa eq BRACE ) { $self->[_show_indentation_table_] = 1 }
  0         0  
7983              
7984             # keep track of errors in braces alone (ignoring ternary nesting errors)
7985 0 0       0 $self->[_true_brace_error_count_]++
7986             if ( $closing_brace_names[$aa] ne "':'" );
7987             }
7988 5446         11244 return ( $seqno, $outdent );
7989             } ## end sub decrease_nesting_depth
7990              
7991             sub check_final_nesting_depths {
7992              
7993             # USES GLOBAL VARIABLES: $rcurrent_depth, $rstarting_line_of_current_depth
7994 649     649 0 1110 my $self = shift;
7995              
7996 649         2239 for my $aa ( 0 .. @closing_brace_names - 1 ) {
7997              
7998 2596         3415 my $cd_aa = $rcurrent_depth->[$aa];
7999 2596 50       4519 if ($cd_aa) {
8000 0         0 my $rsl = $rstarting_line_of_current_depth->[$aa]->[$cd_aa];
8001 0         0 my $sl = $rsl->[0];
8002              
8003             # Add hint for something like a missing terminal ':' of a ternary
8004 0         0 my $hint = EMPTY_STRING;
8005 0 0       0 if ( $cd_aa == 1 ) {
8006 0         0 $hint =
8007             " .. did not find its closing $closing_brace_names[$aa]";
8008             }
8009 0         0 my $msg = <<"EOM";
8010             Final nesting depth of $opening_brace_names[$aa]s is $cd_aa
8011             The most recent un-matched $opening_brace_names[$aa] is on line $sl$hint
8012             EOM
8013 0         0 $self->indicate_error( $msg, @{$rsl}, '^' );
  0         0  
8014 0         0 $self->increment_brace_error();
8015 0 0       0 if ( $aa eq BRACE ) { $self->[_show_indentation_table_] = 1 }
  0         0  
8016             }
8017             }
8018 649         1063 return;
8019             } ## end sub check_final_nesting_depths
8020              
8021             #######################################################################
8022             # Tokenizer routines for looking ahead in input stream
8023             #######################################################################
8024              
8025             sub peek_ahead_for_n_nonblank_pre_tokens {
8026              
8027 184     184 0 389 my ( $self, $max_pretokens ) = @_;
8028              
8029             # Given:
8030             # $max_pretokens = number of pretokens wanted
8031             # Return:
8032             # next $max_pretokens pretokens if they exist
8033             # undef's if hits eof without seeing any pretokens
8034              
8035             # USES GLOBAL VARIABLES: (none)
8036 184         281 my $line;
8037 184         287 my $i = 0;
8038 184         331 my ( $rpre_tokens, $rmap, $rpre_types );
8039              
8040 184         562 while ( defined( $line = $self->peek_ahead( $i++ ) ) ) {
8041 202         1337 $line =~ s/^\s+//; # trim leading blanks
8042 202 100       512 next if ( length($line) <= 0 ); # skip blank
8043 196 100       607 next if ( $line =~ /^#/ ); # skip comment
8044 176         404 ( $rpre_tokens, $rmap, $rpre_types ) =
8045             pre_tokenize( $line, $max_pretokens );
8046 176         321 last;
8047             } ## end while ( defined( $line = ...))
8048 184         511 return ( $rpre_tokens, $rpre_types );
8049             } ## end sub peek_ahead_for_n_nonblank_pre_tokens
8050              
8051             # look ahead for next non-blank, non-comment line of code
8052             sub peek_ahead_for_nonblank_token {
8053              
8054 139     139 0 342 my ( $self, $rtokens, $max_token_index ) = @_;
8055              
8056             # Given:
8057             # $rtokens = ref to token array
8058             # $max_token_index = index of last token in $rtokens
8059             # Task:
8060             # Update $rtokens with next nonblank token
8061              
8062             # USES GLOBAL VARIABLES: (none)
8063 139         240 my $line;
8064 139         227 my $i = 0;
8065              
8066 139         537 while ( defined( $line = $self->peek_ahead( $i++ ) ) ) {
8067 190         843 $line =~ s/^\s+//; # trim leading blanks
8068 190 100       554 next if ( length($line) <= 0 ); # skip blank
8069 161 100       526 next if ( $line =~ /^#/ ); # skip comment
8070              
8071             # Updated from 2 to 3 to get trigraphs, added for case b1175
8072 137         400 my ( $rtok, $rmap_uu, $rtype_uu ) = pre_tokenize( $line, 3 );
8073 137         286 my $j = $max_token_index + 1;
8074              
8075 137         231 foreach my $tok ( @{$rtok} ) {
  137         315  
8076 394 100       815 last if ( $tok =~ "\n" );
8077 351         717 $rtokens->[ ++$j ] = $tok;
8078             }
8079 137         468 last;
8080             } ## end while ( defined( $line = ...))
8081 139         303 return;
8082             } ## end sub peek_ahead_for_nonblank_token
8083              
8084             #######################################################################
8085             # Tokenizer guessing routines for ambiguous situations
8086             #######################################################################
8087              
8088             my %is_non_ternary_pretok;
8089              
8090             BEGIN {
8091              
8092             # Some pre-tokens which cannot immediately follow a ternary '?'
8093 44     44   295 my @q = qw# ; ? : ) } ] = > #;
8094 44         152 push @q, COMMA;
8095 44         181 %is_non_ternary_pretok = map { $_ => 1 } @q;
  396         21911  
8096             }
8097              
8098             sub guess_if_pattern_or_conditional {
8099              
8100 12     12 0 37 my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map_uu, $max_token_index )
8101             = @_;
8102              
8103             # This routine is called when we have encountered a ? following an
8104             # unknown bareword, and we must decide if it starts a pattern or not
8105             # Given:
8106             # $i - token index of the ? starting possible pattern
8107             # $rtokens ... = the token arrays
8108             # Return:
8109             # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
8110             # msg = a warning or diagnostic message
8111              
8112             # USES GLOBAL VARIABLES: $last_nonblank_token
8113              
8114 12         22 my $is_pattern = 0;
8115 12         31 my $msg =
8116             "guessing that ? after type='$last_nonblank_type' token='$last_nonblank_token' starts a ";
8117              
8118 12 50       65 if ( $i >= $max_token_index ) {
8119 0         0 $msg .= "conditional (no end to pattern found on the line)\n";
8120 0         0 $is_pattern = 0;
8121 0         0 return ( $is_pattern, $msg );
8122             }
8123              
8124             # See if we can rule out a ternary operator here before proceeding. c547.
8125 12         43 my ( $next_nonblank_token, $i_next_uu ) =
8126             find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
8127 12 50 33     103 if (
      33        
8128             !(
8129             $is_non_ternary_pretok{$next_nonblank_token}
8130             || ( $last_nonblank_type eq 'k' && $last_nonblank_token eq 'split' )
8131             )
8132             )
8133             {
8134             # A ternary cannot be ruled out here. We will assume this is a ternary
8135             # operator since '?' as pattern delimiter is deprecated.
8136 12         21 $is_pattern = 0;
8137 12         48 $msg .= "conditional (cannot rule it out)";
8138 12         43 return ( $is_pattern, $msg );
8139             }
8140              
8141             # If we get here, then we either have a ternary with a syntax error or some
8142             # ancient code which uses ? as a pattern delimiter. We will only select the
8143             # pattern delimiter if we can find its matching closing delimiter.
8144              
8145 0         0 my $ibeg = $i;
8146 0         0 $i = $ibeg + 1;
8147             ##my $next_token = $rtokens->[$i]; # first token after ?
8148              
8149             # look for a possible ending ? on this line..
8150 0         0 my $in_quote = 1;
8151 0         0 my $quote_depth = 0;
8152 0         0 my $quote_character = EMPTY_STRING;
8153 0         0 my $quote_pos = 0;
8154 0         0 my $quoted_string;
8155             (
8156              
8157 0         0 $i,
8158             $in_quote,
8159             $quote_character,
8160             $quote_pos,
8161             $quote_depth,
8162             $quoted_string,
8163              
8164             ) = $self->follow_quoted_string(
8165              
8166             $ibeg,
8167             $in_quote,
8168             $rtokens,
8169             $rtoken_type,
8170             $quote_character,
8171             $quote_pos,
8172             $quote_depth,
8173             $max_token_index,
8174              
8175             );
8176              
8177 0 0       0 if ($in_quote) {
8178              
8179             # we didn't find an ending ? on this line,
8180             # so we bias towards conditional
8181 0         0 $is_pattern = 0;
8182 0         0 $msg .= "conditional (no ending ? on this line)\n";
8183 0         0 return ( $is_pattern, $msg );
8184             }
8185              
8186             # we found an ending ?, so we bias towards a pattern
8187              
8188             # Watch out for an ending ? in quotes, like this
8189             # my $case_flag = File::Spec->case_tolerant ? '(?i)' : '';
8190 0         0 my $s_quote = 0;
8191 0         0 my $d_quote = 0;
8192 0         0 my $colons = 0;
8193 0         0 foreach my $ii ( $ibeg + 1 .. $i - 1 ) {
8194 0         0 my $tok = $rtokens->[$ii];
8195 0 0       0 if ( $tok eq ":" ) { $colons++ }
  0         0  
8196 0 0       0 if ( $tok eq "'" ) { $s_quote++ }
  0         0  
8197 0 0       0 if ( $tok eq '"' ) { $d_quote++ }
  0         0  
8198             }
8199 0 0 0     0 if ( $s_quote % 2 || $d_quote % 2 || $colons ) {
      0        
8200 0         0 $is_pattern = 0;
8201 0         0 $msg .= "conditional: found ending ? but unbalanced quote chars\n";
8202 0         0 return ( $is_pattern, $msg );
8203             }
8204 0 0       0 if ( $self->pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
8205 0         0 $is_pattern = 1;
8206 0         0 $msg .= "pattern (found ending ? and pattern expected)\n";
8207 0         0 return ( $is_pattern, $msg );
8208             }
8209              
8210             # NOTE: An ultimate decision could be made on version, since ? is a ternary
8211             # after version 5.22. But we may be formatting an ancient script with a
8212             # newer perl, and it might run on an older perl, so we cannot be certain.
8213             # if ($] >=5.022) {$is_pattern=0} else { ... not sure
8214              
8215 0         0 $msg .= "conditional (but uncertain)\n";
8216 0         0 return ( $is_pattern, $msg );
8217             } ## end sub guess_if_pattern_or_conditional
8218              
8219             my %is_known_constant;
8220             my %is_known_function;
8221              
8222             BEGIN {
8223              
8224             # Constants like 'pi' in Trig.pm are common
8225 44     44   244 my @q = qw( pi pi2 pi4 pip2 pip4 );
8226 44         353 $is_known_constant{$_} = 1 for @q;
8227              
8228             # parenless calls of 'ok' are common
8229 44         108 @q = qw( ok );
8230 44         76040 $is_known_function{$_} = 1 for @q;
8231             } ## end BEGIN
8232              
8233             sub guess_if_pattern_or_division {
8234              
8235 0     0 0 0 my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map, $max_token_index ) =
8236             @_;
8237              
8238             # This routine is called when we have encountered a / following an
8239             # unknown bareword, and we must decide if it starts a pattern or is a
8240             # division.
8241             # Given:
8242             # $i - token index of the / starting possible pattern
8243             # $rtokens ... = the token arrays
8244             # Return:
8245             # $is_pattern = 0 if probably division, =1 if probably a pattern
8246             # msg = a warning or diagnostic message
8247             # USES GLOBAL VARIABLES: $last_nonblank_token
8248 0         0 my $msg = "guessing that / after '$last_nonblank_token' starts a ";
8249 0         0 my $ibeg = $i;
8250 0         0 my $is_pattern = 0;
8251              
8252 0         0 my $divide_possible =
8253             $self->is_possible_numerator( $i, $rtokens, $max_token_index );
8254              
8255 0 0       0 if ( $divide_possible < 0 ) {
8256 0         0 $msg .= "pattern (division not possible here)\n";
8257 0         0 $is_pattern = 1;
8258 0         0 $self->saw_bareword_function($last_nonblank_token);
8259 0         0 return ( $is_pattern, $msg );
8260             }
8261 0 0       0 if ( $divide_possible == 4 ) {
8262 0         0 $msg .= "division (pattern not possible here)\n";
8263 0         0 $is_pattern = 0;
8264 0         0 return ( $is_pattern, $msg );
8265             }
8266              
8267             # anything left on line?
8268 0 0       0 if ( $i >= $max_token_index ) {
8269 0         0 $msg .= "division (line ends with this /)\n";
8270 0         0 $is_pattern = 0;
8271 0         0 return ( $is_pattern, $msg );
8272             }
8273              
8274             # quick check for no pattern-ending slash on this line
8275 0         0 my $pos_beg = $rtoken_map->[$ibeg];
8276 0         0 my $input_line = $self->[_line_of_text_];
8277 0 0       0 if ( index( $input_line, '/', $pos_beg + 1 ) < 0 ) {
8278 0         0 $msg .= "division (no ending / on this line)\n";
8279 0         0 $is_pattern = 0;
8280 0         0 return ( $is_pattern, $msg );
8281             }
8282              
8283             # Setup spacing rule before we change $i below..
8284 0         0 $i = $ibeg + 1;
8285 0         0 my $next_token = $rtokens->[$i]; # first token after slash
8286              
8287             # There are four possible spacings around the first slash:
8288             #
8289             # return pi/two;#/; -/-
8290             # return pi/ two;#/; -/+
8291             # return pi / two;#/; +/+
8292             # return pi /two;#/; +/- <-- possible pattern
8293             #
8294             # Spacing rule: a space before the slash but not after the slash
8295             # usually indicates a pattern. We can use this to break ties.
8296             # Note: perl seems to take a newline as a space in this rule (c243)
8297 0   0     0 my $space_before = $i < 2 || $rtokens->[ $i - 2 ] =~ m/^\s/;
8298 0         0 my $space_after = $next_token =~ m/^\s/;
8299 0   0     0 my $is_pattern_by_spacing = $space_before && !$space_after;
8300              
8301             # Make an accurate search for a possible terminating / on this line..
8302 0         0 my $in_quote = 1;
8303 0         0 my $quote_depth = 0;
8304 0         0 my $quote_character = EMPTY_STRING;
8305 0         0 my $quote_pos = 0;
8306 0         0 my $quoted_string;
8307             (
8308              
8309 0         0 $i,
8310             $in_quote,
8311             $quote_character,
8312             $quote_pos,
8313             $quote_depth,
8314             $quoted_string,
8315             )
8316             = $self->follow_quoted_string(
8317              
8318             $ibeg,
8319             $in_quote,
8320             $rtokens,
8321             $rtoken_type,
8322             $quote_character,
8323             $quote_pos,
8324             $quote_depth,
8325             $max_token_index,
8326             );
8327              
8328             # if we didn't find an ending / on this line ..
8329 0 0       0 if ($in_quote) {
8330 0         0 $is_pattern = 0;
8331 0         0 $msg .= "division (no ending / on this line)\n";
8332 0         0 return ( $is_pattern, $msg );
8333             }
8334              
8335             # we found an ending /, see if it might terminate a pattern
8336 0         0 my $pattern_expected =
8337             $self->pattern_expected( $i, $rtokens, $max_token_index );
8338              
8339 0 0       0 if ( $pattern_expected < 0 ) {
8340 0         0 $is_pattern = 0;
8341 0         0 $msg .= "division (pattern not possible)\n";
8342 0         0 return ( $is_pattern, $msg );
8343             }
8344              
8345             # Both pattern and divide can work here...
8346             # Check for known constants in the numerator, like 'pi'
8347 0 0       0 if ( $is_known_constant{$last_nonblank_token} ) {
8348 0         0 $msg .=
8349             "division (pattern works too but saw known constant '$last_nonblank_token')\n";
8350 0         0 $is_pattern = 0;
8351 0         0 return ( $is_pattern, $msg );
8352             }
8353              
8354             # Check for known functions like 'ok'
8355 0 0       0 if ( $is_known_function{$last_nonblank_token} ) {
8356 0         0 $msg .= "pattern (division works too but saw '$last_nonblank_token')\n";
8357 0         0 $is_pattern = 1;
8358 0         0 return ( $is_pattern, $msg );
8359             }
8360              
8361             # If one rule is more probable, use it
8362 0 0       0 if ( $divide_possible > $pattern_expected ) {
8363 0         0 $msg .= "division (more likely based on following tokens)\n";
8364 0         0 $is_pattern = 0;
8365 0         0 return ( $is_pattern, $msg );
8366             }
8367              
8368             # finally, we have to use the spacing rule
8369 0 0       0 if ($is_pattern_by_spacing) {
8370 0         0 $msg .= "pattern (guess on spacing, but division possible too)\n";
8371 0         0 $is_pattern = 1;
8372             }
8373             else {
8374 0         0 $msg .= "division (guess on spacing, but pattern is possible too)\n";
8375 0         0 $is_pattern = 0;
8376             }
8377              
8378 0         0 return ( $is_pattern, $msg );
8379             } ## end sub guess_if_pattern_or_division
8380              
8381             sub guess_if_here_doc {
8382              
8383 0     0 0 0 my ( $self, $next_token ) = @_;
8384              
8385             # Try to resolve here-doc vs. shift by looking ahead for
8386             # non-code or the end token (currently only looks for end token)
8387              
8388             # Given:
8389             # $next_token = the next token after '<<'
8390              
8391             # Return:
8392             # 1 if it is probably a here doc
8393             # 0 if not
8394              
8395             # USES GLOBAL VARIABLES: $current_package $ris_constant,
8396              
8397             # This is how many lines we will search for a target as part of the
8398             # guessing strategy. There is probably little reason to change it.
8399 0         0 my $HERE_DOC_WINDOW = 40;
8400              
8401 0         0 my $here_doc_expected = 0;
8402 0         0 my $line;
8403 0         0 my $k = 0;
8404 0         0 my $msg = "checking <<";
8405              
8406 0         0 while ( defined( $line = $self->peek_ahead( $k++ ) ) ) {
8407 0         0 chomp $line;
8408 0 0       0 if ( $line eq $next_token ) {
8409 0         0 $msg .= " -- found target $next_token ahead $k lines\n";
8410 0         0 $here_doc_expected = 1; # got it
8411 0         0 last;
8412             }
8413 0 0       0 last if ( $k >= $HERE_DOC_WINDOW );
8414             } ## end while ( defined( $line = ...))
8415              
8416 0 0       0 if ( !$here_doc_expected ) {
8417              
8418 0 0       0 if ( !defined($line) ) {
8419 0         0 $here_doc_expected = -1; # hit eof without seeing target
8420 0         0 $msg .= " -- must be shift; target $next_token not in file\n";
8421             }
8422             else { # still unsure..taking a wild guess
8423              
8424 0 0       0 if ( !$ris_constant->{$current_package}->{$next_token} ) {
8425 0         0 $here_doc_expected = 1;
8426 0         0 $msg .=
8427             " -- guessing it's a here-doc ($next_token not a constant)\n";
8428             }
8429             else {
8430 0         0 $msg .=
8431             " -- guessing it's a shift ($next_token is a constant)\n";
8432             }
8433 0         0 if (DEBUG_GUESS_MODE) {
8434             $self->warning("DEBUG_GUESS_MODE message:\n$msg\n");
8435             }
8436             }
8437             }
8438 0         0 $self->write_logfile_entry($msg);
8439 0         0 return $here_doc_expected;
8440             } ## end sub guess_if_here_doc
8441              
8442             #######################################################################
8443             # Tokenizer Routines for scanning identifiers and related items
8444             #######################################################################
8445              
8446             sub scan_bare_identifier_do {
8447              
8448             my (
8449              
8450 1862     1862 0 4844 $self,
8451              
8452             $input_line,
8453             $i,
8454             $tok,
8455             $type,
8456             $prototype,
8457             $rtoken_map,
8458             $max_token_index,
8459              
8460             ) = @_;
8461              
8462             # This routine is called to scan a token starting with an alphanumeric
8463             # variable or package separator, :: or '.
8464              
8465             # Given:
8466             # current scan state variables
8467              
8468             # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
8469             # $last_nonblank_type, $rparen_type, $paren_depth
8470              
8471 1862         2713 my $package = undef;
8472              
8473 1862         2475 my $i_beg = $i;
8474              
8475             # we have to back up one pretoken at a :: since each : is one pretoken
8476 1862 100       3909 if ( $tok eq '::' ) { $i_beg-- }
  9         10  
8477 1862         2769 my $pos_beg = $rtoken_map->[$i_beg];
8478 1862         5602 pos($input_line) = $pos_beg;
8479              
8480             # Examples:
8481             # A::B::C
8482             # A::
8483             # ::A
8484             # A'B
8485 1862 50       11714 if (
8486             $input_line =~ m{
8487             \G\s* # start at pos
8488             ( (?:\w*(?:'|::))* ) # $1 = maybe package name like A:: A::B:: or A'
8489             (\w+)? # $2 = maybe followed by sub name
8490             }gcx
8491             )
8492             {
8493 1862         2974 my $pos = pos($input_line);
8494 1862         2743 my $numc = $pos - $pos_beg;
8495 1862         3602 $tok = substr( $input_line, $pos_beg, $numc );
8496              
8497             # type 'w' includes anything without leading type info
8498             # ($,%,@,*) including something like abc::def::ghi
8499 1862         2627 $type = 'w';
8500              
8501 1862         2617 my $sub_name = EMPTY_STRING;
8502 1862 100       4470 if ( defined($2) ) { $sub_name = $2; }
  1857         3191  
8503 1862 100 66     6790 if ( defined($1) && length($1) ) {
8504 280         513 $package = $1;
8505              
8506             # patch: check for package call A::B::C->
8507             # in this case, C is part of the package name
8508 280 100       681 if ($sub_name) {
8509 275 100       1075 if ( $input_line =~ m{ \G\s*(?:->) }gcx ) {
8510 117         235 $package .= $sub_name;
8511 117         222 $sub_name = EMPTY_STRING;
8512             }
8513 275         615 pos($input_line) = $pos;
8514             }
8515              
8516             # patch: don't allow isolated package name which just ends
8517             # in the old style package separator (single quote). Example:
8518             # use CGI':all';
8519 280 50 66     1122 if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
8520 0         0 $pos--;
8521             }
8522              
8523 280         595 $package =~ s/\'/::/g;
8524 280 100       678 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
  9         15  
8525 280         903 $package =~ s/::$//;
8526             }
8527             else {
8528 1582         2358 $package = $current_package;
8529              
8530             # patched for c043, part 1: keyword does not follow '->'
8531 1582 50 66     4708 if ( $is_keyword{$tok} && $last_nonblank_type ne '->' ) {
8532 0         0 $type = 'k';
8533             }
8534             }
8535              
8536             # if it is a bareword.. patched for c043, part 2: not following '->'
8537 1862 100 66     6342 if ( $type eq 'w' && $last_nonblank_type ne '->' ) {
8538              
8539             # check for v-string with leading 'v' type character
8540             # (This seems to have precedence over filehandle, type 'Y')
8541 1076 100 100     14320 if ( substr( $tok, 0, 1 ) eq 'v' && $tok =~ /^v\d[_\d]*$/ ) {
    100 66        
    100 66        
    50 66        
    50          
    100          
    100          
8542              
8543             # we only have the first part - something like 'v101' -
8544             # look for more
8545 2 50       10 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
8546 2         4 $pos = pos($input_line);
8547 2         6 $numc = $pos - $pos_beg;
8548 2         5 $tok = substr( $input_line, $pos_beg, $numc );
8549             }
8550 2         6 $type = 'v';
8551 2         14 $self->report_v_string($tok);
8552             }
8553              
8554             # bareword after sort has implied empty prototype; for example:
8555             # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
8556             # This has priority over whatever the user has specified.
8557             elsif ($last_nonblank_token eq 'sort'
8558             && $last_nonblank_type eq 'k' )
8559             {
8560 1         2 $type = 'Z';
8561             }
8562              
8563             # issue c382: this elsif statement moved from above because
8564             # previous check for type 'Z' after sort has priority.
8565             elsif ( $ris_constant->{$package}->{$sub_name} ) {
8566 12         25 $type = 'C';
8567             }
8568              
8569             # Note: strangely, perl does not seem to really let you create
8570             # functions which act like eval and do, in the sense that eval
8571             # and do may have operators following the final }, but any operators
8572             # that you create with prototype (&) apparently do not allow
8573             # trailing operators, only terms. This seems strange.
8574             # If this ever changes, here is the update
8575             # to make perltidy behave accordingly:
8576              
8577             # elsif ( $ris_block_function->{$package}{$tok} ) {
8578             # $tok='eval'; # patch to do braces like eval - doesn't work
8579             # $type = 'k';
8580             #}
8581             # TODO: This could become a separate type to allow for different
8582             # future behavior:
8583             elsif ( $ris_block_function->{$package}->{$sub_name} ) {
8584 0         0 $type = 'G';
8585             }
8586             elsif ( $ris_block_list_function->{$package}->{$sub_name} ) {
8587 0         0 $type = 'G';
8588             }
8589             elsif ( $ris_user_function->{$package}->{$sub_name} ) {
8590 6         16 $type = 'U';
8591 6         17 $prototype = $ruser_function_prototype->{$package}->{$sub_name};
8592             }
8593              
8594             # check for indirect object
8595             elsif (
8596              
8597             # added 2001-03-27: must not be followed immediately by '('
8598             # see fhandle.t
8599             ( $input_line !~ m/\G\(/gc )
8600              
8601             # and
8602             && (
8603              
8604             # preceded by keyword like 'print', 'printf' and friends
8605             $is_indirect_object_taker{$last_nonblank_token}
8606              
8607             # or preceded by something like 'print(' or 'printf('
8608             || (
8609             ( $last_nonblank_token eq '(' )
8610             && $is_indirect_object_taker{
8611             $rparen_type->[$paren_depth]
8612             }
8613              
8614             )
8615             )
8616             )
8617             {
8618              
8619             # may not be indirect object unless followed by a space;
8620             # updated 2021-01-16 to consider newline to be a space.
8621             # updated for case b990 to look for either ';' or space
8622 4 50 33     39 if ( pos($input_line) == length($input_line)
8623             || $input_line =~ m/\G[;\s]/gc )
8624             {
8625 4         8 $type = 'Y';
8626              
8627             # Abandon Hope ...
8628             # Perl's indirect object notation is a very bad
8629             # thing and can cause subtle bugs, especially for
8630             # beginning programmers. And I haven't even been
8631             # able to figure out a sane warning scheme which
8632             # doesn't get in the way of good scripts.
8633              
8634             # Complain if a filehandle has any lower case
8635             # letters. This is suggested good practice.
8636             # Use 'sub_name' because something like
8637             # main::MYHANDLE is ok for filehandle
8638 4 100       16 if ( $sub_name =~ /[a-z]/ ) {
8639              
8640             # could be bug caused by older perltidy if
8641             # followed by '('
8642 1 50       20 if ( $input_line =~ m/\G\s*\(/gc ) {
8643 1         6 $self->complain(
8644             "Caution: unknown word '$tok' in indirect object slot\n"
8645             );
8646             }
8647             }
8648             }
8649              
8650             # bareword not followed by a space -- may not be filehandle
8651             # (may be function call defined in a 'use' statement)
8652             else {
8653 0         0 $type = 'Z';
8654             }
8655             }
8656              
8657             # none of the above special types
8658             else {
8659             }
8660             }
8661              
8662             # Now we must convert back from character position
8663             # to pre_token index.
8664             # I don't think an error flag can occur here ..but who knows
8665 1862         2537 my $error;
8666 1862         4679 ( $i, $error ) =
8667             inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
8668 1862 50       4058 if ($error) {
8669 0         0 $self->warning(
8670             "scan_bare_identifier: Possibly invalid tokenization\n");
8671             }
8672             }
8673              
8674             # no match but line not blank - could be syntax error
8675             # perl will take '::' alone without complaint
8676             else {
8677 0         0 $type = 'w';
8678              
8679             # change this warning to log message if it becomes annoying
8680 0         0 $self->warning("didn't find identifier after leading ::\n");
8681             }
8682 1862         7171 return ( $i, $tok, $type, $prototype );
8683             } ## end sub scan_bare_identifier_do
8684              
8685             sub scan_id_do {
8686              
8687             my (
8688              
8689 405     405 0 1245 $self,
8690              
8691             $input_line,
8692             $i,
8693             $tok,
8694             $rtokens,
8695             $rtoken_map,
8696             $id_scan_state,
8697             $max_token_index,
8698              
8699             ) = @_;
8700              
8701             # Scan identifier following a type token.
8702             # Given:
8703             # current scan state variables
8704              
8705             # This is the new scanner and may eventually replace scan_identifier.
8706             # Only type 'sub' and 'package' are implemented.
8707             # Token types $ * % @ & -> are not yet implemented.
8708             #
8709             # The type of call depends on $id_scan_state: $id_scan_state = ''
8710             # for starting call, in which case $tok must be the token defining
8711             # the type.
8712             #
8713             # If the type token is the last nonblank token on the line, a value
8714             # of $id_scan_state = $tok is returned, indicating that further
8715             # calls must be made to get the identifier. If the type token is
8716             # not the last nonblank token on the line, the identifier is
8717             # scanned and handled and a value of '' is returned.
8718              
8719 44     44   380 use constant DEBUG_NSCAN => 0;
  44         99  
  44         57837  
8720 405         658 my $type = EMPTY_STRING;
8721 405         560 my $i_beg;
8722              
8723             #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
8724             #my ($a,$b,$c) = caller;
8725             #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
8726              
8727             # on re-entry, start scanning at first token on the line
8728 405 100       801 if ($id_scan_state) {
8729 10         19 $i_beg = $i;
8730 10         19 $type = EMPTY_STRING;
8731             }
8732              
8733             # on initial entry, start scanning just after type token
8734             else {
8735 395         588 $i_beg = $i + 1;
8736 395         528 $id_scan_state = $tok;
8737 395         668 $type = 't';
8738             }
8739              
8740             # find $i_beg = index of next nonblank token,
8741             # and handle empty lines
8742 405         600 my $blank_line = 0;
8743 405         586 my $is_lexical_method = 0;
8744 405         741 my $next_nonblank_token = $rtokens->[$i_beg];
8745 405 100       937 if ( $i_beg > $max_token_index ) {
8746 2         4 $blank_line = 1;
8747             }
8748             else {
8749              
8750             # only a '#' immediately after a '$' is not a comment
8751 403 50       1024 if ( $next_nonblank_token eq '#' ) {
8752 0 0       0 if ( $tok ne '$' ) {
8753 0         0 $blank_line = 1;
8754             }
8755             }
8756              
8757 403 100       1576 if ( $next_nonblank_token =~ /^\s/ ) {
8758 383         1136 ( $next_nonblank_token, $i_beg ) =
8759             find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
8760             $max_token_index );
8761 383 100       1897 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
8762 4         13 $blank_line = 1;
8763             }
8764             }
8765              
8766             # Patch for Object::Pad lexical method like 'method $var {':
8767             # Skip past a '$'
8768 403 100 100     1991 if ( !$blank_line
      66        
8769             && $next_nonblank_token eq '$'
8770             && $id_scan_state eq 'method' )
8771             {
8772 2         6 ( $next_nonblank_token, $i_beg ) =
8773             find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
8774             $max_token_index );
8775 2         4 $is_lexical_method = 1;
8776             }
8777             }
8778              
8779             # handle non-blank line; identifier, if any, must follow
8780 405 100       1028 if ( !$blank_line ) {
8781              
8782 399 100       970 if ( $is_sub{$id_scan_state} ) {
    50          
8783 349         3770 ( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub(
8784             {
8785             input_line => $input_line,
8786             i => $i,
8787             i_beg => $i_beg,
8788             tok => $tok,
8789             type => $type,
8790             rtokens => $rtokens,
8791             rtoken_map => $rtoken_map,
8792             id_scan_state => $id_scan_state,
8793             max_token_index => $max_token_index,
8794             is_lexical_method => $is_lexical_method,
8795             }
8796             );
8797             }
8798              
8799             elsif ( $is_package{$id_scan_state} ) {
8800 50         447 ( $i, $tok, $type ) = $self->do_scan_package(
8801             {
8802             input_line => $input_line,
8803             i => $i,
8804             i_beg => $i_beg,
8805             tok => $tok,
8806             type => $type,
8807             rtokens => $rtokens,
8808             rtoken_map => $rtoken_map,
8809             max_token_index => $max_token_index,
8810             }
8811             );
8812 50         193 $id_scan_state = EMPTY_STRING;
8813             }
8814              
8815             else {
8816 0         0 $self->warning("invalid token in scan_id: $tok\n");
8817 0         0 $id_scan_state = EMPTY_STRING;
8818             }
8819             }
8820              
8821 405 50 33     2111 if ( $id_scan_state && ( !defined($type) || !$type ) ) {
      66        
8822              
8823             # shouldn't happen:
8824 0         0 if (DEVEL_MODE) {
8825             Fault(<<EOM);
8826             Program bug in scan_id: undefined type but scan_state=$id_scan_state
8827             EOM
8828             }
8829             $self->warning(
8830 0         0 "Possible program bug in sub scan_id: undefined type but scan_state=$id_scan_state\n"
8831             );
8832 0         0 $self->report_definite_bug();
8833             }
8834              
8835 405         530 DEBUG_NSCAN && do {
8836             print {*STDOUT}
8837             "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
8838             };
8839 405         1513 return ( $i, $tok, $type, $id_scan_state );
8840             } ## end sub scan_id_do
8841              
8842             sub check_prototype {
8843 173     173 0 386 my ( $proto, $package, $subname ) = @_;
8844              
8845             # Classify a sub based on its prototype
8846 173 50       402 return if ( !defined($package) );
8847 173 50       424 return if ( !defined($subname) );
8848 173 100       533 if ( defined($proto) ) {
8849 34         127 $proto =~ s/^\s*\(\s*//;
8850 34         111 $proto =~ s/\s*\)$//;
8851 34 100       90 if ($proto) {
8852 5         18 $ris_user_function->{$package}->{$subname} = 1;
8853 5         17 $ruser_function_prototype->{$package}->{$subname} = "($proto)";
8854              
8855             # prototypes containing '&' must be treated specially..
8856 5 100       18 if ( $proto =~ /\&/ ) {
8857              
8858             # right curly braces of prototypes ending in
8859             # '&' may be followed by an operator
8860 1 50       2 if ( $proto =~ /\&$/ ) {
8861 0         0 $ris_block_function->{$package}->{$subname} = 1;
8862             }
8863              
8864             # right curly braces of prototypes NOT ending in
8865             # '&' may NOT be followed by an operator
8866             else {
8867 1         3 $ris_block_list_function->{$package}->{$subname} = 1;
8868             }
8869             }
8870             }
8871             else {
8872 29         69 $ris_constant->{$package}->{$subname} = 1;
8873             }
8874             }
8875             else {
8876 139         369 $ris_user_function->{$package}->{$subname} = 1;
8877             }
8878 173         334 return;
8879             } ## end sub check_prototype
8880              
8881             sub do_scan_package {
8882              
8883 50     50 0 110 my ( $self, $rcall_hash ) = @_;
8884              
8885             # Parse a package name.
8886              
8887 50         100 my $input_line = $rcall_hash->{input_line};
8888 50         82 my $i = $rcall_hash->{i};
8889 50         75 my $i_beg = $rcall_hash->{i_beg};
8890 50         90 my $tok = $rcall_hash->{tok};
8891 50         79 my $type = $rcall_hash->{type};
8892 50         70 my $rtokens = $rcall_hash->{rtokens};
8893 50         84 my $rtoken_map = $rcall_hash->{rtoken_map};
8894 50         62 my $max_token_index = $rcall_hash->{max_token_index};
8895              
8896             # This is called with $i_beg equal to the index of the first nonblank
8897             # token following a 'package' token.
8898             # USES GLOBAL VARIABLES: $current_package,
8899              
8900             # package NAMESPACE
8901             # package NAMESPACE VERSION
8902             # package NAMESPACE BLOCK
8903             # package NAMESPACE VERSION BLOCK
8904             #
8905             # If VERSION is provided, package sets the $VERSION variable in the given
8906             # namespace to a version object with the VERSION provided. VERSION must be
8907             # a "strict" style version number as defined by the version module: a
8908             # positive decimal number (integer or decimal-fraction) without
8909             # exponentiation or else a dotted-decimal v-string with a leading 'v'
8910             # character and at least three components.
8911             # reference http://perldoc.perl.org/functions/package.html
8912              
8913 50         80 my $package = undef;
8914 50         92 my $pos_beg = $rtoken_map->[$i_beg];
8915 50         163 pos($input_line) = $pos_beg;
8916              
8917             # handle non-blank line; package name, if any, must follow
8918 50 50       278 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w*)/gc ) {
8919 50         110 $package = $1;
8920 50 50 33     243 $package = ( defined($1) && $1 ) ? $1 : 'main';
8921 50         114 $package =~ s/\'/::/g;
8922 50 50       134 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
  0         0  
8923 50         86 $package =~ s/::$//;
8924 50         78 my $pos = pos($input_line);
8925 50         83 my $numc = $pos - $pos_beg;
8926 50         122 $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
8927 50         70 $type = 'P'; # Fix for c250, previously 'i'
8928              
8929             # Now we must convert back from character position
8930             # to pre_token index.
8931             # I don't think an error flag can occur here ..but ?
8932 50         67 my $error;
8933 50         182 ( $i, $error ) =
8934             inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
8935 50 50       175 if ($error) { $self->warning("Possibly invalid package\n") }
  0         0  
8936 50         79 $current_package = $package;
8937              
8938             # we should now have package NAMESPACE
8939             # now expecting VERSION, BLOCK, or ; to follow ...
8940             # package NAMESPACE VERSION
8941             # package NAMESPACE BLOCK
8942             # package NAMESPACE VERSION BLOCK
8943 50         167 my ( $next_nonblank_token, $i_next_uu ) =
8944             $self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
8945              
8946             # check that something recognizable follows, but do not parse.
8947             # A VERSION number will be parsed later as a number or v-string in the
8948             # normal way. What is important is to set the statement type if
8949             # everything looks okay so that the operator_expected() routine
8950             # knows that the number is in a package statement.
8951             # Examples of valid primitive tokens that might follow are:
8952             # 1235 . ; { } v3 v
8953             # FIX: added a '#' since a side comment may also follow
8954             # Added ':' for class attributes (for --use-feature=class, rt145706)
8955 50 50       244 if ( $next_nonblank_token =~ /^([v\.\d;\{\}\#\:])|v\d|\d+$/ ) {
8956 50         118 $statement_type = $tok;
8957             }
8958             else {
8959 0         0 $self->warning(
8960             "Unexpected '$next_nonblank_token' after package name '$tok'\n"
8961             );
8962             }
8963             }
8964              
8965             # no match but line not blank --
8966             # could be a label with name package, like package: , for example.
8967             else {
8968 0         0 $type = 'k';
8969             }
8970              
8971 50         188 return ( $i, $tok, $type );
8972             } ## end sub do_scan_package
8973              
8974             { ## begin closure for sub scan_complex_identifier
8975              
8976 44     44   358 use constant DEBUG_SCAN_ID => 0;
  44         80  
  44         5339  
8977              
8978             # Constant hash:
8979             my %is_special_variable_char;
8980              
8981             BEGIN {
8982              
8983             # These are the only characters which can (currently) form special
8984             # variables, like $^W: (issue c066).
8985 44     44   297 my @q = qw{
8986             ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ ] ^ _
8987             };
8988 44         110 push @q, BACKSLASH;
8989 44         149469 $is_special_variable_char{$_} = 1 for @q;
8990             } ## end BEGIN
8991              
8992             # These are the possible states for this scanner:
8993             my $scan_state_SIGIL = '$';
8994             my $scan_state_ALPHA = 'A';
8995             my $scan_state_COLON = ':';
8996             my $scan_state_LPAREN = '(';
8997             my $scan_state_RPAREN = ')';
8998             my $scan_state_AMPERSAND = '&';
8999             my $scan_state_SPLIT = '^';
9000              
9001             # Only these non-blank states may be returned to caller:
9002             my %is_returnable_scan_state = (
9003             $scan_state_SIGIL => 1,
9004             $scan_state_AMPERSAND => 1,
9005             );
9006              
9007             # USES GLOBAL VARIABLES:
9008             # $context, $last_nonblank_token, $last_nonblank_type
9009              
9010             #-----------
9011             # call args:
9012             #-----------
9013             my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
9014             $expecting, $container_type );
9015              
9016             #-------------------------------------------
9017             # my variables, re-initialized on each call:
9018             #-------------------------------------------
9019             my $i_begin; # starting index $i
9020             my $type; # returned identifier type
9021             my $tok_begin; # starting token
9022             my $tok; # returned token
9023             my $id_scan_state_begin; # starting scan state
9024             my $identifier_begin; # starting identifier
9025             my $i_save; # a last good index, in case of error
9026             my $message; # hold error message for log file
9027             my $tok_is_blank;
9028             my $last_tok_is_blank;
9029             my $in_prototype_or_signature;
9030             my $saw_alpha;
9031             my $saw_type;
9032             my $allow_tick;
9033              
9034             sub initialize_my_scan_id_vars {
9035              
9036             # Initialize all 'my' vars on entry
9037 551     551 0 892 $i_begin = $i;
9038 551         834 $type = EMPTY_STRING;
9039 551         908 $tok_begin = $rtokens->[$i_begin];
9040 551         754 $tok = $tok_begin;
9041 551 50       1352 if ( $tok_begin eq ':' ) { $tok_begin = '::' }
  0         0  
9042 551         777 $id_scan_state_begin = $id_scan_state;
9043 551         761 $identifier_begin = $identifier;
9044 551         746 $i_save = undef;
9045              
9046 551         784 $message = EMPTY_STRING;
9047 551         706 $tok_is_blank = undef; # a flag to speed things up
9048 551         660 $last_tok_is_blank = undef;
9049              
9050 551   100     1570 $in_prototype_or_signature =
9051             $container_type && $container_type =~ /^sub\b/;
9052              
9053             # these flags will be used to help figure out the type:
9054 551         711 $saw_alpha = undef;
9055 551         763 $saw_type = undef;
9056              
9057             # allow old package separator (') except in 'use' statement
9058 551         890 $allow_tick = ( $last_nonblank_token ne 'use' );
9059 551         810 return;
9060             } ## end sub initialize_my_scan_id_vars
9061              
9062             #----------------------------------
9063             # Routines for handling scan states
9064             #----------------------------------
9065             sub do_id_scan_state_dollar {
9066              
9067 609     609 0 824 my $self = shift;
9068              
9069             # We saw a sigil, now looking to start a variable name
9070 609 100 66     3855 if ( $tok eq '$' ) {
    100 33        
    100          
    50          
    50          
    100          
    100          
    100          
    100          
9071              
9072 59         126 $identifier .= $tok;
9073              
9074             # we've got a punctuation variable if end of line (punct.t)
9075 59 50       170 if ( $i == $max_token_index ) {
9076 0         0 $type = 'i';
9077 0         0 $id_scan_state = EMPTY_STRING;
9078             }
9079             }
9080             elsif ( $tok =~ /^\w/ ) { # alphanumeric ..
9081 310         432 $saw_alpha = 1;
9082 310         451 $identifier .= $tok;
9083              
9084             # now need :: except for special digit vars like '$1' (c208)
9085 310 100       890 $id_scan_state = $tok =~ /^\d/ ? EMPTY_STRING : $scan_state_COLON;
9086             }
9087             elsif ( $tok eq '::' ) {
9088 16         42 $id_scan_state = $scan_state_ALPHA;
9089 16         37 $identifier .= $tok;
9090             }
9091              
9092             # POSTDEFREF ->@ ->% ->& ->*
9093             elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
9094 0         0 $identifier .= $tok;
9095             }
9096             elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
9097 0         0 $saw_alpha = 1;
9098 0         0 $id_scan_state = $scan_state_COLON; # now need ::
9099 0         0 $identifier .= $tok;
9100              
9101             # Perl will accept leading digits in identifiers,
9102             # although they may not always produce useful results.
9103             # Something like $main::0 is ok. But this also works:
9104             #
9105             # sub howdy::123::bubba{ print "bubba $54321!\n" }
9106             # howdy::123::bubba();
9107             #
9108             }
9109             elsif ( $tok eq '#' ) {
9110              
9111 100         177 my $is_punct_var = $identifier eq '$$';
9112              
9113             # side comment or identifier?
9114 100 100 66     946 if (
      66        
      66        
      33        
9115              
9116             # A '#' starts a comment if it follows a space. For example,
9117             # the following is equivalent to $ans=40.
9118             # my $ #
9119             # ans = 40;
9120             !$last_tok_is_blank
9121              
9122             # a # inside a prototype or signature can only start a
9123             # comment
9124             && !$in_prototype_or_signature
9125              
9126             # these are valid punctuation vars: *# %# @# $#
9127             # May also be '$#array' or POSTDEFREF ->$#
9128             && ( $identifier =~ /^[\%\@\$\*]$/
9129             || $identifier =~ /\$$/ )
9130              
9131             # but a '#' after '$$' is a side comment; see c147
9132             && !$is_punct_var
9133              
9134             )
9135             {
9136 96         228 $identifier .= $tok; # keep same state, a $ could follow
9137             }
9138             else {
9139              
9140             # otherwise it is a side comment
9141 4 50       16 if ( $identifier eq '->' ) { }
    50          
    50          
9142 0         0 elsif ($is_punct_var) { $type = 'i' }
9143 4         7 elsif ( $id_scan_state eq $scan_state_SIGIL ) { $type = 't' }
9144 0         0 else { $type = 'i' }
9145 4         6 $i = $i_save;
9146 4         6 $id_scan_state = EMPTY_STRING;
9147             }
9148             }
9149              
9150             elsif ( $tok eq '{' ) {
9151              
9152             # check for something like ${#} or ${?}, where ? is a special char
9153 47 100 100     520 if (
      66        
      100        
      100        
9154             (
9155             $identifier eq '$'
9156             || $identifier eq '@'
9157             || $identifier eq '$#'
9158             )
9159             && $i + 2 <= $max_token_index
9160             && $rtokens->[ $i + 2 ] eq '}'
9161             && $rtokens->[ $i + 1 ] !~ /[\s\w]/
9162             )
9163             {
9164 1         3 my $next2 = $rtokens->[ $i + 2 ];
9165 1         2 my $next1 = $rtokens->[ $i + 1 ];
9166 1         4 $identifier .= $tok . $next1 . $next2;
9167 1         1 $i += 2;
9168 1         2 $id_scan_state = EMPTY_STRING;
9169             }
9170             else {
9171              
9172             # skip something like ${xxx} or ->{
9173 46         74 $id_scan_state = EMPTY_STRING;
9174              
9175             # if this is the first token of a line, any tokens for this
9176             # identifier have already been accumulated
9177 46 100 66     165 if ( $identifier eq '$' || $i == 0 ) {
9178 31         85 $identifier = EMPTY_STRING;
9179             }
9180 46         77 $i = $i_save;
9181             }
9182             }
9183              
9184             # space ok after leading $ % * & @
9185             elsif ( $tok =~ /^\s*$/ ) {
9186              
9187 20         40 $tok_is_blank = 1;
9188              
9189             # note: an id with a leading '&' does not actually come this way
9190 20 50       72 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
    0          
9191              
9192 20 100       53 if ( length($identifier) > 1 ) {
9193 8         17 $id_scan_state = EMPTY_STRING;
9194 8         13 $i = $i_save;
9195 8         19 $type = 'i'; # probably punctuation variable
9196             }
9197             else {
9198              
9199             # fix c139: trim line-ending type 't'
9200 12 100       53 if ( $i == $max_token_index ) {
    100          
9201 1         2 $i = $i_save;
9202 1         2 $type = 't';
9203             }
9204              
9205             # spaces after $'s are common, and space after @
9206             # is harmless, so only complain about space
9207             # after other type characters. Space after $ and
9208             # @ will be removed in formatting. Report space
9209             # after % and * because they might indicate a
9210             # parsing error. In other words '% ' might be a
9211             # modulo operator. Delete this warning if it
9212             # gets annoying.
9213             elsif ( $identifier !~ /^[\@\$]$/ ) {
9214 1         2 $message =
9215             "Space in identifier, following $identifier\n";
9216             }
9217             else {
9218             # silently accept space after '$' and '@' sigils
9219             }
9220             }
9221             }
9222              
9223             elsif ( $identifier eq '->' ) {
9224              
9225             # space after '->' is ok except at line end ..
9226             # so trim line-ending in type '->' (fixes c139)
9227 0 0       0 if ( $i == $max_token_index ) {
9228 0         0 $i = $i_save;
9229 0         0 $type = '->';
9230             }
9231             }
9232              
9233             # stop at space after something other than -> or sigil
9234             # Example of what can arrive here:
9235             # eval { $MyClass->$$ };
9236             else {
9237 0         0 $id_scan_state = EMPTY_STRING;
9238 0         0 $i = $i_save;
9239 0         0 $type = 'i';
9240             }
9241             }
9242             elsif ( $tok eq '^' ) {
9243              
9244             # check for some special variables like $^ $^W
9245 11 50       36 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
9246 11         26 $identifier .= $tok;
9247 11         16 $type = 'i';
9248              
9249             # There may be one more character, not a space, after the ^
9250 11         17 my $next1 = $rtokens->[ $i + 1 ];
9251 11         22 my $chr = substr( $next1, 0, 1 );
9252 11 100       31 if ( $is_special_variable_char{$chr} ) {
9253              
9254             # It is something like $^W
9255             # Test case (c066) : $^Oeq'linux'
9256 9         13 $i++;
9257 9         15 $identifier .= $next1;
9258              
9259             # If pretoken $next1 is more than one character long,
9260             # set a flag indicating that it needs to be split.
9261 9 100       24 $id_scan_state =
9262             ( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING;
9263             }
9264             else {
9265              
9266             # it is just $^
9267             # Simple test case (c065): '$aa=$^if($bb)';
9268 2         3 $id_scan_state = EMPTY_STRING;
9269             }
9270             }
9271             else {
9272 0         0 $id_scan_state = EMPTY_STRING;
9273 0         0 $i = $i_save;
9274             }
9275             }
9276             else { # something else
9277              
9278 46 100 66     376 if ( $in_prototype_or_signature && $tok =~ /^[\),=#]/ ) {
    100 66        
    100          
    50          
    0          
    0          
9279              
9280             # We might be in an extrusion of
9281             # sub foo2 ( $first, $, $third ) {
9282             # looking at a line starting with a comma, like
9283             # $
9284             # ,
9285             # in this case the comma ends the signature variable
9286             # '$' which will have been previously marked type 't'
9287             # rather than 'i'.
9288 3 100       8 if ( $i == $i_begin ) {
9289 1         3 $identifier = EMPTY_STRING;
9290 1         1 $type = EMPTY_STRING;
9291             }
9292              
9293             # at a # we have to mark as type 't' because more may
9294             # follow, otherwise, in a signature we can let '$' be an
9295             # identifier here for better formatting.
9296             # See 'mangle4.in' for a test case.
9297             else {
9298 2         3 $type = 'i';
9299 2 50 33     10 if ( $id_scan_state eq $scan_state_SIGIL && $tok eq '#' ) {
9300 0         0 $type = 't';
9301             }
9302 2         2 $i = $i_save;
9303             }
9304 3         6 $id_scan_state = EMPTY_STRING;
9305             }
9306              
9307             # check for various punctuation variables
9308             elsif ( $identifier =~ /^[\$\*\@\%]$/ ) {
9309 35         79 $identifier .= $tok;
9310             }
9311              
9312             # POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#*
9313             elsif ($tok eq '*'
9314             && $identifier =~ /\-\>([\@\%\$\*\&]|\$\#)$/ )
9315             {
9316 6         9 $identifier .= $tok;
9317             }
9318              
9319             elsif ( $identifier eq '$#' ) {
9320              
9321 2 50       9 if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
  0 50       0  
  0         0  
9322              
9323             # perl seems to allow just these: $#: $#- $#+
9324             elsif ( $tok =~ /^[\:\-\+]$/ ) {
9325 0         0 $type = 'i';
9326 0         0 $identifier .= $tok;
9327             }
9328             else {
9329 2         4 $i = $i_save;
9330 2         6 $self->write_logfile_entry(
9331             'Use of $# is deprecated' . "\n" );
9332             }
9333             }
9334             elsif ( $identifier eq '$$' ) {
9335              
9336             # perl does not allow references to punctuation
9337             # variables without braces. For example, this
9338             # won't work:
9339             # $:=\4;
9340             # $a = $$:;
9341             # You would have to use
9342             # $a = ${$:};
9343              
9344             # '$$' alone is punctuation variable for PID
9345 0         0 $i = $i_save;
9346 0 0       0 if ( $tok eq '{' ) { $type = 't' }
  0         0  
9347 0         0 else { $type = 'i' }
9348             }
9349             elsif ( $identifier eq '->' ) {
9350 0         0 $i = $i_save;
9351             }
9352             else {
9353 0         0 $i = $i_save;
9354 0 0       0 if ( length($identifier) == 1 ) {
9355 0         0 $identifier = EMPTY_STRING;
9356             }
9357             }
9358 46         76 $id_scan_state = EMPTY_STRING;
9359             }
9360 609         897 return;
9361             } ## end sub do_id_scan_state_dollar
9362              
9363             sub do_id_scan_state_alpha {
9364              
9365 119     119 0 182 my $self = shift;
9366              
9367             # looking for alphanumeric after ::
9368 119         293 $tok_is_blank = $tok =~ /^\s*$/;
9369              
9370 119 100 33     369 if ( $tok =~ /^\w/ ) { # found it
    50 66        
    50 33        
    50          
9371 106         161 $identifier .= $tok;
9372 106         173 $id_scan_state = $scan_state_COLON; # now need ::
9373 106         177 $saw_alpha = 1;
9374             }
9375             elsif ( $tok eq "'" && $allow_tick ) {
9376 0         0 $identifier .= $tok;
9377 0         0 $id_scan_state = $scan_state_COLON; # now need ::
9378 0         0 $saw_alpha = 1;
9379             }
9380             elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
9381 0         0 $id_scan_state = $scan_state_LPAREN;
9382 0         0 $identifier .= $tok;
9383             }
9384             elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
9385 0         0 $id_scan_state = $scan_state_RPAREN;
9386 0         0 $identifier .= $tok;
9387             }
9388             else {
9389 13         21 $id_scan_state = EMPTY_STRING;
9390 13         14 $i = $i_save;
9391             }
9392 119         176 return;
9393             } ## end sub do_id_scan_state_alpha
9394              
9395             sub do_id_scan_state_colon {
9396              
9397 470     470 0 659 my $self = shift;
9398              
9399             # looking for possible :: after alphanumeric
9400              
9401 470         1402 $tok_is_blank = $tok =~ /^\s*$/;
9402              
9403 470 100 66     3078 if ( $tok eq '::' ) { # got it
    100 66        
    100 66        
    50          
    50          
9404 103         167 $identifier .= $tok;
9405 103         142 $id_scan_state = $scan_state_ALPHA; # now require alpha
9406             }
9407             elsif ( $tok =~ /^\w/ ) { # more alphanumeric is ok here
9408 20         39 $identifier .= $tok;
9409 20         33 $id_scan_state = $scan_state_COLON; # now need ::
9410 20         39 $saw_alpha = 1;
9411             }
9412             elsif ( $tok eq "'" && $allow_tick ) { # tick
9413              
9414 12 50       35 if ( $is_keyword{$identifier} ) {
9415 0         0 $id_scan_state = EMPTY_STRING; # that's all
9416 0         0 $i = $i_save;
9417             }
9418             else {
9419 12         47 $identifier .= $tok;
9420             }
9421             }
9422             elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
9423 0         0 $id_scan_state = $scan_state_LPAREN;
9424 0         0 $identifier .= $tok;
9425             }
9426             elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
9427 0         0 $id_scan_state = $scan_state_RPAREN;
9428 0         0 $identifier .= $tok;
9429             }
9430             else {
9431 335         488 $id_scan_state = EMPTY_STRING; # that's all
9432 335         469 $i = $i_save;
9433             }
9434 470         659 return;
9435             } ## end sub do_id_scan_state_colon
9436              
9437             sub do_id_scan_state_left_paren {
9438              
9439 0     0 0 0 my $self = shift;
9440              
9441             # looking for possible '(' of a prototype
9442              
9443 0 0       0 if ( $tok eq '(' ) { # got it
    0          
9444 0         0 $identifier .= $tok;
9445 0         0 $id_scan_state = $scan_state_RPAREN; # now find the end of it
9446             }
9447             elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
9448 0         0 $identifier .= $tok;
9449 0         0 $tok_is_blank = 1;
9450             }
9451             else {
9452 0         0 $id_scan_state = EMPTY_STRING; # that's all - no prototype
9453 0         0 $i = $i_save;
9454             }
9455 0         0 return;
9456             } ## end sub do_id_scan_state_left_paren
9457              
9458             sub do_id_scan_state_right_paren {
9459              
9460 0     0 0 0 my $self = shift;
9461              
9462             # looking for a ')' of prototype to close a '('
9463              
9464 0         0 $tok_is_blank = $tok =~ /^\s*$/;
9465              
9466 0 0       0 if ( $tok eq ')' ) { # got it
    0          
9467 0         0 $identifier .= $tok;
9468 0         0 $id_scan_state = EMPTY_STRING; # all done
9469             }
9470             elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
9471 0         0 $identifier .= $tok;
9472             }
9473             else { # probable error in script, but keep going
9474 0         0 $self->warning(
9475             "Unexpected '$tok' while seeking end of prototype\n");
9476 0         0 $identifier .= $tok;
9477             }
9478 0         0 return;
9479             } ## end sub do_id_scan_state_right_paren
9480              
9481             sub do_id_scan_state_ampersand {
9482              
9483 104     104 0 142 my $self = shift;
9484              
9485             # Starting sub call after seeing an '&'
9486 104 100 33     620 if ( $tok =~ /^[\$\w]/ ) { # alphanumeric ..
    50          
    100          
    50          
    50          
    0          
9487 87         152 $id_scan_state = $scan_state_COLON; # now need ::
9488 87         119 $saw_alpha = 1;
9489 87         131 $identifier .= $tok;
9490             }
9491             elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
9492 0         0 $id_scan_state = $scan_state_COLON; # now need ::
9493 0         0 $saw_alpha = 1;
9494 0         0 $identifier .= $tok;
9495             }
9496             elsif ( $tok =~ /^\s*$/ ) { # allow space
9497 2         3 $tok_is_blank = 1;
9498              
9499             # fix c139: trim line-ending type 't'
9500 2 50 33     9 if ( length($identifier) == 1 && $i == $max_token_index ) {
9501 2         4 $i = $i_save;
9502 2         3 $type = 't';
9503             }
9504             }
9505             elsif ( $tok eq '::' ) { # leading ::
9506 0         0 $id_scan_state = $scan_state_ALPHA; # accept alpha next
9507 0         0 $identifier .= $tok;
9508             }
9509             elsif ( $tok eq '{' ) {
9510 15 50 33     72 if ( $identifier eq '&' || $i == 0 ) {
9511 15         27 $identifier = EMPTY_STRING;
9512             }
9513 15         25 $i = $i_save;
9514 15         25 $id_scan_state = EMPTY_STRING;
9515             }
9516             elsif ( $tok eq '^' ) {
9517 0 0       0 if ( $identifier eq '&' ) {
9518              
9519             # Special variable (c066)
9520 0         0 $identifier .= $tok;
9521 0         0 $type = 'i';
9522              
9523             # To be a special $^ variable, there may be one more character,
9524             # not a space, after the ^
9525 0         0 my $next1 = $rtokens->[ $i + 1 ];
9526 0         0 my $chr = substr( $next1, 0, 1 );
9527 0 0       0 if ( $is_special_variable_char{$chr} ) {
9528              
9529             # It is something like &^O
9530 0         0 $i++;
9531 0         0 $identifier .= $next1;
9532              
9533             # If pretoken $next1 is more than one character long,
9534             # set a flag indicating that it needs to be split.
9535 0 0       0 $id_scan_state =
9536             ( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING;
9537             }
9538             else {
9539              
9540             # It is &^. This is parsed by perl as a call to sub '^',
9541             # even though it would be difficult to create a sub '^'.
9542             # So we mark it as an identifier (c068).
9543 0         0 $id_scan_state = EMPTY_STRING;
9544             }
9545             }
9546             else {
9547 0         0 $identifier = EMPTY_STRING;
9548 0         0 $i = $i_save;
9549             }
9550             }
9551             else {
9552              
9553             # punctuation variable?
9554             # testfile: cunningham4.pl
9555             #
9556             # We have to be careful here. If we are in an unknown state,
9557             # we will reject the punctuation variable. In the following
9558             # example the '&' is a binary operator but we are in an unknown
9559             # state because there is no sigil on 'Prima', so we don't
9560             # know what it is. But it is a bad guess that
9561             # '&~' is a function variable.
9562             # $self->{text}->{colorMap}->[
9563             # Prima::PodView::COLOR_CODE_FOREGROUND
9564             # & ~tb::COLOR_INDEX ] =
9565             # $sec->{ColorCode}
9566              
9567             # Fix for case c033: a '#' here starts a side comment
9568 0 0 0     0 if ( $identifier eq '&' && $expecting && $tok ne '#' ) {
      0        
9569 0         0 $identifier .= $tok;
9570             }
9571             else {
9572 0         0 $identifier = EMPTY_STRING;
9573 0         0 $i = $i_save;
9574 0         0 $type = '&';
9575             }
9576 0         0 $id_scan_state = EMPTY_STRING;
9577             }
9578 104         150 return;
9579             } ## end sub do_id_scan_state_ampersand
9580              
9581             #-------------------
9582             # hash of scanner subs
9583             #-------------------
9584             my $scan_identifier_code = {
9585             $scan_state_SIGIL => \&do_id_scan_state_dollar,
9586             $scan_state_ALPHA => \&do_id_scan_state_alpha,
9587             $scan_state_COLON => \&do_id_scan_state_colon,
9588             $scan_state_LPAREN => \&do_id_scan_state_left_paren,
9589             $scan_state_RPAREN => \&do_id_scan_state_right_paren,
9590             $scan_state_AMPERSAND => \&do_id_scan_state_ampersand,
9591             };
9592              
9593             sub scan_complex_identifier {
9594              
9595             (
9596 551     551 0 1673 my $self,
9597              
9598             $i,
9599             $id_scan_state,
9600             $identifier,
9601             $rtokens,
9602             $max_token_index,
9603             $expecting,
9604             $container_type,
9605              
9606             ) = @_;
9607              
9608             # This routine assembles tokens into identifiers. It maintains a
9609             # scan state, id_scan_state. It updates id_scan_state based upon
9610             # current id_scan_state and token, and returns an updated
9611             # id_scan_state and the next index after the identifier.
9612              
9613             # This routine now serves a backup for sub scan_simple_identifier
9614             # which handles most identifiers.
9615              
9616             # Note that $self must be a 'my' variable and not be a closure
9617             # variables like the other args. Otherwise it will not get
9618             # deleted by a DESTROY call at the end of a file. Then an
9619             # attempt to create multiple tokenizers can occur when multiple
9620             # files are processed, causing an error.
9621              
9622             # return flag telling caller to split the pretoken
9623 551         2570 my $split_pretoken_flag;
9624              
9625             #-------------------
9626             # Initialize my vars
9627             #-------------------
9628              
9629 551         1601 initialize_my_scan_id_vars();
9630              
9631             #--------------------------------------------------------
9632             # get started by defining a type and a state if necessary
9633             #--------------------------------------------------------
9634              
9635 551 100       1156 if ( !$id_scan_state ) {
9636 544         722 $context = UNKNOWN_CONTEXT;
9637              
9638             # fixup for digraph
9639 544 50       1112 if ( $tok eq '>' ) {
9640 0         0 $tok = '->';
9641 0         0 $tok_begin = $tok;
9642             }
9643 544         796 $identifier = $tok;
9644              
9645 544 100 100     2580 if ( $last_nonblank_token eq '->' ) {
    100 100        
    100 0        
    50          
    0          
    0          
    0          
    0          
9646 8         13 $identifier = '->' . $identifier;
9647 8         13 $id_scan_state = $scan_state_SIGIL;
9648             }
9649             elsif ( $tok eq '$' || $tok eq '*' ) {
9650 355         645 $id_scan_state = $scan_state_SIGIL;
9651 355         541 $context = SCALAR_CONTEXT;
9652             }
9653             elsif ( $tok eq '%' || $tok eq '@' ) {
9654 79         124 $id_scan_state = $scan_state_SIGIL;
9655 79         129 $context = LIST_CONTEXT;
9656             }
9657             elsif ( $tok eq '&' ) {
9658 102         166 $id_scan_state = $scan_state_AMPERSAND;
9659             }
9660             elsif ( $tok eq 'sub' or $tok eq 'package' ) {
9661 0         0 $saw_alpha = 0; # 'sub' is considered type info here
9662 0         0 $id_scan_state = $scan_state_SIGIL;
9663 0         0 $identifier .=
9664             SPACE; # need a space to separate sub from sub name
9665             }
9666             elsif ( $tok eq '::' ) {
9667 0         0 $id_scan_state = $scan_state_ALPHA;
9668             }
9669             elsif ( $tok =~ /^\w/ ) {
9670 0         0 $id_scan_state = $scan_state_COLON;
9671 0         0 $saw_alpha = 1;
9672             }
9673             elsif ( $tok eq '->' ) {
9674 0         0 $id_scan_state = $scan_state_SIGIL;
9675             }
9676             else {
9677              
9678             # shouldn't happen: bad call parameter
9679 0         0 my $msg =
9680             "Program bug detected: scan_complex_identifier received bad starting token = '$tok'\n";
9681 0         0 if (DEVEL_MODE) { Fault($msg) }
9682 0 0       0 if ( !$self->[_in_error_] ) {
9683 0         0 $self->warning($msg);
9684 0         0 $self->[_in_error_] = 1;
9685             }
9686 0         0 $id_scan_state = EMPTY_STRING;
9687              
9688             # emergency return
9689 0         0 goto RETURN;
9690             }
9691 544         818 $saw_type = !$saw_alpha;
9692             }
9693             else {
9694 7         18 $i--;
9695 7         19 $saw_alpha = ( $tok =~ /^\w/ );
9696 7         19 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
9697              
9698             # check for a valid starting state
9699 7         10 if ( DEVEL_MODE && !$is_returnable_scan_state{$id_scan_state} ) {
9700             Fault(<<EOM);
9701             Unexpected starting scan state in sub scan_complex_identifier: '$id_scan_state'
9702             EOM
9703             }
9704             }
9705              
9706             #------------------------------
9707             # loop to gather the identifier
9708             #------------------------------
9709              
9710 551         794 $i_save = $i;
9711              
9712 551   100     2043 while ( $i < $max_token_index && $id_scan_state ) {
9713              
9714             # Be sure we have code to handle this state before we proceed
9715 1305         2182 my $code = $scan_identifier_code->{$id_scan_state};
9716 1305 100       2232 if ( !$code ) {
9717              
9718 3 50       7 if ( $id_scan_state eq $scan_state_SPLIT ) {
9719             ## OK: this is the signal to exit and split the pretoken
9720             }
9721              
9722             # unknown state - should not happen
9723             else {
9724 0         0 if (DEVEL_MODE) {
9725             Fault(<<EOM);
9726             Unknown scan state in sub scan_complex_identifier: '$id_scan_state'
9727             Scan state at sub entry was '$id_scan_state_begin'
9728             EOM
9729             }
9730 0         0 $id_scan_state = EMPTY_STRING;
9731 0         0 $i = $i_save;
9732             }
9733 3         5 last;
9734             }
9735              
9736             # Remember the starting index for progress check below
9737 1302         1452 my $i_start_loop = $i;
9738              
9739 1302         1573 $last_tok_is_blank = $tok_is_blank;
9740 1302 100       1905 if ($tok_is_blank) { $tok_is_blank = undef }
  11         20  
9741 1291         1467 else { $i_save = $i }
9742              
9743 1302         1829 $tok = $rtokens->[ ++$i ];
9744              
9745             # patch to make digraph :: if necessary
9746 1302 100 100     2670 if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) {
9747 119         161 $tok = '::';
9748 119         177 $i++;
9749             }
9750              
9751 1302         2678 $code->($self);
9752              
9753             # check for forward progress: a decrease in the index $i
9754             # implies that scanning has finished
9755 1302 100       3467 last if ( $i <= $i_start_loop );
9756              
9757             } ## end while ( $i < $max_token_index...)
9758              
9759             #-------------
9760             # Check result
9761             #-------------
9762              
9763             # Be sure a valid state is returned
9764 551 100       1102 if ($id_scan_state) {
9765              
9766 24 100       112 if ( !$is_returnable_scan_state{$id_scan_state} ) {
9767              
9768 17 100       54 if ( $id_scan_state eq $scan_state_SPLIT ) {
9769 3         5 $split_pretoken_flag = 1;
9770             }
9771              
9772 17 50       53 if ( $id_scan_state eq $scan_state_RPAREN ) {
9773 0         0 $self->warning(
9774             "Hit end of line while seeking ) to end prototype\n");
9775             }
9776              
9777 17         33 $id_scan_state = EMPTY_STRING;
9778             }
9779              
9780             # Patch: the deprecated variable $# does not combine with anything
9781             # on the next line.
9782 24 50       65 if ( $identifier eq '$#' ) { $id_scan_state = EMPTY_STRING }
  0         0  
9783             }
9784              
9785             # Be sure the token index is valid
9786 551 50       1206 if ( $i < 0 ) { $i = 0 }
  0         0  
9787              
9788             # Be sure a token type is defined
9789 551 100       1182 if ( !$type ) {
9790              
9791 523 100       1015 if ($saw_type) {
    100          
9792              
9793 517 100 66     2035 if ($saw_alpha) {
    50 100        
    100 66        
      66        
9794              
9795             # The type without the -> should be the same as with the -> so
9796             # that if they get separated we get the same bond strengths,
9797             # etc. See b1234
9798 404 50 66     1372 if ( $identifier =~ /^->/
      33        
9799             && $last_nonblank_type eq 'w'
9800             && substr( $identifier, 2, 1 ) =~ /^\w/ )
9801             {
9802 0         0 $type = 'w';
9803             }
9804 404         776 else { $type = 'i' }
9805             }
9806             elsif ( $identifier eq '->' ) {
9807 0         0 $type = '->';
9808             }
9809             elsif (
9810             ( length($identifier) > 1 )
9811              
9812             # In something like '@$=' we have an identifier '@$'
9813             # In something like '$${' we have type '$$' (and only
9814             # part of an identifier)
9815             && !( $identifier =~ /\$$/ && $tok eq '{' )
9816             && $identifier ne 'sub '
9817             && $identifier ne 'package '
9818             )
9819             {
9820 53         105 $type = 'i';
9821             }
9822 60         114 else { $type = 't' }
9823             }
9824             elsif ($saw_alpha) {
9825              
9826             # type 'w' includes anything without leading type info
9827             # ($,%,@,*) including something like abc::def::ghi
9828 5         8 $type = 'w';
9829              
9830             # Fix for b1337, if restarting scan after line break between
9831             # '->' or sigil and identifier name, use type 'i'
9832 5 50 33     25 if ( $id_scan_state_begin
9833             && $identifier =~ /^([\$\%\@\*\&]|->)/ )
9834             {
9835 5         6 $type = 'i';
9836             }
9837             }
9838             else {
9839 1         2 $type = EMPTY_STRING;
9840             } # this can happen on a restart
9841             }
9842              
9843             # See if we formed an identifier...
9844 551 100       1089 if ($identifier) {
9845 504         744 $tok = $identifier;
9846 504 100       1101 if ($message) { $self->write_logfile_entry($message) }
  1         6  
9847             }
9848              
9849             # did not find an identifier, back up
9850             else {
9851 47         78 $tok = $tok_begin;
9852 47         76 $i = $i_begin;
9853             }
9854              
9855             RETURN:
9856              
9857 551         665 DEBUG_SCAN_ID && do {
9858             my ( $a, $b, $c ) = caller();
9859             print {*STDOUT}
9860             "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
9861             print {*STDOUT}
9862             "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
9863             };
9864              
9865             return (
9866              
9867 551         2324 $i,
9868             $tok,
9869             $type,
9870             $id_scan_state,
9871             $identifier,
9872             $split_pretoken_flag,
9873             );
9874             } ## end sub scan_complex_identifier
9875             } ## end closure for sub scan_complex_identifier
9876              
9877             { ## closure for sub do_scan_sub
9878              
9879             # saved package and subnames in case prototype is on separate line
9880             my ( $package_saved, $subname_saved );
9881              
9882             # initialize subname each time a new 'sub' keyword is encountered
9883             sub initialize_subname {
9884 345     345 0 613 $package_saved = EMPTY_STRING;
9885 345         529 $subname_saved = EMPTY_STRING;
9886 345         520 return;
9887             }
9888              
9889             use constant {
9890 44         98728 SUB_CALL => 1,
9891             PAREN_CALL => 2,
9892             PROTOTYPE_CALL => 3,
9893 44     44   349 };
  44         75  
9894              
9895             sub do_scan_sub {
9896              
9897 351     351 0 723 my ( $self, $rcall_hash ) = @_;
9898              
9899             # Parse a sub name and prototype.
9900              
9901 351         744 my $input_line = $rcall_hash->{input_line};
9902 351         615 my $i = $rcall_hash->{i};
9903 351         589 my $i_beg = $rcall_hash->{i_beg};
9904 351         615 my $tok = $rcall_hash->{tok};
9905 351         587 my $type = $rcall_hash->{type};
9906 351         622 my $rtokens = $rcall_hash->{rtokens};
9907 351         561 my $rtoken_map = $rcall_hash->{rtoken_map};
9908 351         567 my $id_scan_state = $rcall_hash->{id_scan_state};
9909 351         537 my $max_token_index = $rcall_hash->{max_token_index};
9910              
9911 351 100       772 my $id_prefix = $rcall_hash->{is_lexical_method} ? '$' : EMPTY_STRING;
9912              
9913             # At present there are three basic CALL TYPES which are
9914             # distinguished by the starting value of '$tok':
9915             # 1. $tok='sub', id_scan_state='sub'
9916             # it is called with $i_beg equal to the index of the first nonblank
9917             # token following a 'sub' token.
9918             # 2. $tok='(', id_scan_state='sub',
9919             # it is called with $i_beg equal to the index of a '(' which may
9920             # start a prototype.
9921             # 3. $tok='prototype', id_scan_state='prototype'
9922             # it is called with $i_beg equal to the index of a '(' which is
9923             # preceded by ': prototype' and has $id_scan_state eq 'prototype'
9924              
9925             # Examples:
9926              
9927             # A single type 1 call will get both the sub and prototype
9928             # sub foo1 ( $$ ) { }
9929             # ^
9930              
9931             # The subname will be obtained with a 'sub' call
9932             # The prototype on line 2 will be obtained with a '(' call
9933             # sub foo1
9934             # ^ <---call type 1
9935             # ( $$ ) { }
9936             # ^ <---call type 2
9937              
9938             # The subname will be obtained with a 'sub' call
9939             # The prototype will be obtained with a 'prototype' call
9940             # sub foo1 ( $x, $y ) : prototype ( $$ ) { }
9941             # ^ <---type 1 ^ <---type 3
9942              
9943             # TODO: add future error checks to be sure we have a valid
9944             # sub name. For example, 'sub &doit' is wrong. Also, be sure
9945             # a name is given if and only if a non-anonymous sub is
9946             # appropriate.
9947             # USES GLOBAL VARS: $current_package, $last_nonblank_token,
9948             # $rsaw_function_definition,
9949             # $statement_type
9950              
9951 351         500 my $i_entry = $i;
9952              
9953             # Determine the CALL TYPE
9954             # 1=sub
9955             # 2=(
9956             # 3=prototype
9957 351 100       1065 my $call_type =
    100          
9958             $tok eq 'prototype' ? PROTOTYPE_CALL
9959             : $tok eq '(' ? PAREN_CALL
9960             : SUB_CALL;
9961              
9962 351         576 $id_scan_state = EMPTY_STRING; # normally we get everything in one call
9963 351         520 my $subname = $subname_saved;
9964 351         562 my $package = $package_saved;
9965 351         556 my $proto = undef;
9966 351         486 my $attrs = undef;
9967 351         463 my $match;
9968              
9969 351         560 my $pos_beg = $rtoken_map->[$i_beg];
9970 351         1125 pos($input_line) = $pos_beg;
9971              
9972             # Look for the sub NAME if this is a SUB call
9973 351 100 100     2695 if (
9974             $call_type == SUB_CALL
9975             && $input_line =~ m{\G\s*
9976             ((?:\w*(?:'|::))*) # package - something that ends in :: or '
9977             (\w+) # NAME - required
9978             }gcx
9979             )
9980             {
9981 158         283 $match = 1;
9982 158         339 $subname = $2;
9983             my $is_lexical_sub = $last_nonblank_type eq 'k'
9984 158   33     495 && $is_my_our_state{$last_nonblank_token};
9985 158 0 33     1924 if ( $is_lexical_sub && $1 ) {
9986 0         0 $self->warning(
9987             "'$last_nonblank_token' sub $subname cannot be in package '$1'\n"
9988             );
9989 0         0 $is_lexical_sub = 0;
9990             }
9991              
9992 158 50       383 if ($is_lexical_sub) {
9993              
9994             # Lexical subs use the containing block sequence number as a
9995             # package name.
9996 0         0 my $seqno =
9997             $rcurrent_sequence_number->[BRACE]
9998             ->[ $rcurrent_depth->[BRACE] ];
9999 0 0       0 $seqno = SEQ_ROOT if ( !defined($seqno) );
10000 0         0 $package = $seqno;
10001              
10002             # The value will eventually be the sequence number of the
10003             # opening curly brace of the definition (if any). We use -1
10004             # until we find it.
10005 0         0 $ris_lexical_sub->{$subname}->{$package} = -1;
10006              
10007             # Set a special signal to tell sub do_LEFT_CURLY_BRACKET to
10008             # update this value if the next opening sub block brace is for
10009             # this sub. The reason we need this value is to avoid applying
10010             # this new sub in its own definition block. Note that '911' is
10011             # not a possible sub name. Search for '911' for related code.
10012 0         0 $ris_lexical_sub->{911} = [ $subname, $package ];
10013              
10014             # Complain if lexical sub name hides a quote operator
10015 0 0       0 if ( $is_q_qq_qw_qx_qr_s_y_tr_m{$subname} ) {
10016 0         0 $self->complain(
10017             "'my' sub '$subname' matches a builtin quote operator\n"
10018             );
10019             ## OLD CODING, before improved handling of lexical subs:
10020             ## This may end badly, it is safest to avoid formatting.
10021             ## For an example, see perl527/lexsub.t (issue c203)
10022             ## $self->[_do_not_format_] = 1;
10023             }
10024             }
10025             else {
10026 158 100 66     827 $package = ( defined($1) && $1 ) ? $1 : $current_package;
10027 158         390 $package =~ s/\'/::/g;
10028 158 50       440 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
  0         0  
10029 158         349 $package =~ s/::$//;
10030             }
10031              
10032 158         280 my $pos = pos($input_line);
10033 158         271 my $numc = $pos - $pos_beg;
10034 158         393 $tok = 'sub ' . $id_prefix . substr( $input_line, $pos_beg, $numc );
10035 158         248 $type = 'S'; ## Fix for c250, was 'i';
10036              
10037             # remember the sub name in case another call is needed to
10038             # get the prototype
10039 158         244 $package_saved = $package;
10040 158         296 $subname_saved = $subname;
10041             }
10042              
10043             # Now look for PROTO ATTRS for all call types
10044             # Look for prototype/attributes which are usually on the same
10045             # line as the sub name but which might be on a separate line.
10046             # For example, we might have an anonymous sub with attributes,
10047             # or a prototype on a separate line from its sub name
10048              
10049             # NOTE: We only want to parse PROTOTYPES here. If we see anything that
10050             # does not look like a prototype, we assume it is a SIGNATURE and we
10051             # will stop and let the standard tokenizer handle it. In
10052             # particular, we stop if we see any nested parens, braces, or commas.
10053             # Also note, a valid prototype cannot contain any alphabetic character
10054             # -- see https://perldoc.perl.org/perlsub
10055             # But it appears that an underscore is valid in a prototype, so the
10056             # regex below uses [A-Za-z] rather than \w
10057             # This is the old regex which has been replaced:
10058             # $input_line =~ m/\G(\s*\([^\)\(\}\{\,#]*\))? # PROTO
10059             # Added '=' for issue c362
10060 351         1020 my $saw_opening_paren = $input_line =~ /\G\s*\(/;
10061 351 100 100     3222 if (
      66        
10062             $input_line =~ m{\G(\s*\([^\)\(\}\{\,#A-Za-z=]*\))? # PROTO
10063             (\s*:)? # ATTRS leading ':'
10064             }gcx
10065             && ( $1 || $2 )
10066             )
10067             {
10068 45         84 $proto = $1;
10069 45         77 $attrs = $2;
10070              
10071             # Append the prototype to the starting token if it is 'sub' or
10072             # 'prototype'. This is not necessary but for compatibility with
10073             # previous versions when the -csc flag is used:
10074 45 100 100     245 if ( $proto && ( $match || $call_type == PROTOTYPE_CALL ) ) {
    100 100        
10075 24         39 $tok .= $proto;
10076             }
10077              
10078             # If we just entered the sub at an opening paren on this call, not
10079             # a following :prototype, label it with the previous token. This is
10080             # necessary to propagate the sub name to its opening block.
10081             elsif ( $call_type == PAREN_CALL ) {
10082 2         4 $tok = $last_nonblank_token;
10083             }
10084             else {
10085             }
10086              
10087 45   100     139 $match ||= 1;
10088              
10089             # Patch part #1 to fixes cases b994 and b1053:
10090             # Mark an anonymous sub keyword without prototype as type 'k', i.e.
10091             # 'sub : lvalue { ...'
10092 45         67 $type = 'S'; ## C250, was 'i';
10093 45 100 100     229 if ( $tok eq 'sub' && !$proto ) { $type = 'k' }
  2         5  
10094             }
10095              
10096 351 100       770 if ($match) {
10097              
10098             # ATTRS: if there are attributes, back up and let the ':' be
10099             # found later by the scanner.
10100 173         285 my $pos = pos($input_line);
10101 173 100       432 if ($attrs) {
10102 15         27 $pos -= length($attrs);
10103             }
10104              
10105 173         335 my $next_nonblank_token = $tok;
10106              
10107             # catch case of line with leading ATTR ':' after anonymous sub
10108 173 100 100     548 if ( $pos == $pos_beg && $tok eq ':' ) {
10109 1         2 $type = 'A';
10110 1         3 $self->[_in_attribute_list_] = 1;
10111             }
10112              
10113             # Otherwise, if we found a match we must convert back from
10114             # string position to the pre_token index for continued parsing.
10115             else {
10116              
10117             # I don't think an error flag can occur here ..but ?
10118 172         266 my $error;
10119 172         569 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
10120             $max_token_index );
10121 172 50       438 if ($error) { $self->warning("Possibly invalid sub\n") }
  0         0  
10122              
10123             # Patch part #2 to fixes cases b994 and b1053:
10124             # Do not let spaces be part of the token of an anonymous sub
10125             # keyword which we marked as type 'k' above...i.e. for
10126             # something like:
10127             # 'sub : lvalue { ...'
10128             # Back up and let it be parsed as a blank
10129 172 50 66     600 if ( $type eq 'k'
      66        
      33        
10130             && $attrs
10131             && $i > $i_entry
10132             && substr( $rtokens->[$i], 0, 1 ) =~ m/\s/ )
10133             {
10134 2         6 $i--;
10135             }
10136              
10137             # check for multiple definitions of a sub
10138 172         370 ( $next_nonblank_token, my $i_next_uu ) =
10139             find_next_nonblank_token_on_this_line( $i, $rtokens,
10140             $max_token_index );
10141             }
10142              
10143 173 100       724 if ( $next_nonblank_token =~ /^(\s*|#)$/ )
10144             { # skip blank or side comment
10145 7         38 my ( $rpre_tokens, $rpre_types_uu ) =
10146             $self->peek_ahead_for_n_nonblank_pre_tokens(1);
10147 7 50 33     29 if ( defined($rpre_tokens) && @{$rpre_tokens} ) {
  7         27  
10148 7         19 $next_nonblank_token = $rpre_tokens->[0];
10149             }
10150             else {
10151 0         0 $next_nonblank_token = '}';
10152             }
10153             }
10154              
10155             # See what's next...
10156 173 100       700 if ( $next_nonblank_token eq '{' ) {
    100          
    50          
    100          
    50          
    0          
10157 135 100       346 if ($subname) {
10158              
10159             # Check for multiple definitions of a sub, but
10160             # it is ok to have multiple sub BEGIN, etc,
10161             # so we do not complain if name is all caps
10162 125 50 33     635 if ( $rsaw_function_definition->{$subname}->{$package}
10163             && $subname !~ /^[A-Z]+$/ )
10164             {
10165             my $lno =
10166 0         0 $rsaw_function_definition->{$subname}->{$package};
10167 0 0       0 if ( $package =~ /^\d/ ) {
10168 0         0 $self->warning(
10169             "already saw definition of lexical 'sub $subname' at line $lno\n"
10170             );
10171              
10172             }
10173             else {
10174 0         0 if ( !DEVEL_MODE ) {
10175 0         0 $self->warning(
10176             "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
10177             );
10178             }
10179             }
10180             }
10181 125         383 $rsaw_function_definition->{$subname}->{$package} =
10182             $self->[_last_line_number_];
10183             }
10184             }
10185             elsif ( $next_nonblank_token eq ';' ) {
10186             }
10187             elsif ( $next_nonblank_token eq '}' ) {
10188             }
10189              
10190             # ATTRS - if an attribute list follows, remember the name
10191             # of the sub so the next opening brace can be labeled.
10192             # Setting 'statement_type' causes any ':'s to introduce
10193             # attributes.
10194             elsif ( $next_nonblank_token eq ':' ) {
10195 16 100       48 if ( $call_type == SUB_CALL ) {
10196 14 100       66 $statement_type =
10197             substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
10198             }
10199             }
10200              
10201             # if we stopped before an open paren ...
10202             elsif ( $next_nonblank_token eq '(' ) {
10203              
10204             # If we DID NOT see this paren above then it must be on the
10205             # next line so we will set a flag to come back here and see if
10206             # it is a PROTOTYPE
10207              
10208             # Otherwise, we assume it is a SIGNATURE rather than a
10209             # PROTOTYPE and let the normal tokenizer handle it as a list
10210 21 100       91 if ( !$saw_opening_paren ) {
10211 4         8 $id_scan_state = 'sub'; # we must come back to get proto
10212             }
10213 21 50       64 if ( $call_type == SUB_CALL ) {
10214 21 50       73 $statement_type =
10215             substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
10216             }
10217             }
10218              
10219             # something else..
10220             elsif ($next_nonblank_token) {
10221              
10222 0 0 0     0 if ( $rcall_hash->{tok} eq 'method' && $call_type == SUB_CALL )
10223             {
10224             # For a method call, silently ignore this error (rt145706)
10225             # to avoid needless warnings. Example which can produce it:
10226             # test(method Pack (), "method");
10227              
10228             # TODO: scan for use feature 'class' and:
10229             # - if we saw 'use feature 'class' then issue the warning.
10230             # - if we did not see use feature 'class' then issue the
10231             # warning and suggest turning off --use-feature=class
10232             }
10233             else {
10234 0 0       0 $subname = EMPTY_STRING unless ( defined($subname) );
10235 0         0 $self->warning(
10236             "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
10237             );
10238             }
10239             }
10240              
10241             # EOF technically ok
10242             else {
10243             }
10244              
10245 173         585 check_prototype( $proto, $package, $subname );
10246             }
10247              
10248             # no match to either sub name or prototype, but line not blank
10249             else {
10250              
10251             }
10252 351         1792 return ( $i, $tok, $type, $id_scan_state );
10253             } ## end sub do_scan_sub
10254             }
10255              
10256             #########################################################################
10257             # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
10258             #########################################################################
10259              
10260             sub find_next_nonblank_token {
10261 458     458 0 1183 my ( $self, $i, $rtokens, $max_token_index ) = @_;
10262              
10263             # Returns the next nonblank token after the token at index $i
10264             # To skip past a side comment, and any subsequent block comments
10265             # and blank lines, call with i=$max_token_index
10266              
10267             # Skip any ending blank (fix c258). It would be cleaner if caller passed
10268             # $rtoken_map, so we could check for type 'b', and avoid a regex test, but
10269             # benchmarking shows that this test does not take significant time. So
10270             # that would be a nice update but not essential. Also note that ending
10271             # blanks will not occur for text previously processed by perltidy.
10272 458 100 100     1832 if ( $i == $max_token_index - 1
10273             && $rtokens->[$max_token_index] =~ /^\s+$/ )
10274             {
10275 9         22 $i++;
10276             }
10277              
10278 458 100       1095 if ( $i >= $max_token_index ) {
10279 141 100       459 if ( !peeked_ahead() ) {
10280 139         380 peeked_ahead(1);
10281 139         540 $self->peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
10282             }
10283             }
10284              
10285 458         910 my $next_nonblank_token = $rtokens->[ ++$i ];
10286              
10287             # Any more tokens?
10288 458 50 33     1852 return ( SPACE, $i )
10289             if ( !defined($next_nonblank_token) || !length($next_nonblank_token) );
10290              
10291             # Skip over whitespace
10292 458         936 my $ord = ord( substr( $next_nonblank_token, 0, 1 ) );
10293 458 0 66     2523 if (
      33        
      66        
10294              
10295             ( $ord <= ORD_PRINTABLE_MIN || $ord >= ORD_PRINTABLE_MAX )
10296              
10297             # Quick test for ascii space or tab
10298             && (
10299             ( $ord == ORD_SPACE || $ord == ORD_TAB )
10300              
10301             # Slow test to for something else identified as whitespace
10302             || $next_nonblank_token =~ /^\s+$/
10303             )
10304             )
10305             {
10306 301         563 $next_nonblank_token = $rtokens->[ ++$i ];
10307 301 50       747 return ( SPACE, $i ) unless ( defined($next_nonblank_token) );
10308             }
10309              
10310             # We should be at a nonblank now
10311 458         1422 return ( $next_nonblank_token, $i );
10312              
10313             } ## end sub find_next_nonblank_token
10314              
10315             sub find_next_noncomment_token {
10316 104     104 0 258 my ( $self, $i, $rtokens, $max_token_index ) = @_;
10317              
10318             # Given the current character position, look ahead past any comments
10319             # and blank lines and return the next token, including digraphs and
10320             # trigraphs.
10321              
10322 104         379 my ( $next_nonblank_token, $i_next ) =
10323             $self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
10324              
10325             # skip past any side comment
10326 104 50       318 if ( $next_nonblank_token eq '#' ) {
10327 0         0 ( $next_nonblank_token, $i_next ) =
10328             $self->find_next_nonblank_token( $i_next, $rtokens,
10329             $max_token_index );
10330             }
10331              
10332             # check for a digraph
10333 104 50 33     701 if ( $next_nonblank_token
      33        
10334             && $next_nonblank_token ne SPACE
10335             && defined( $rtokens->[ $i_next + 1 ] ) )
10336             {
10337 104         237 my $test2 = $next_nonblank_token . $rtokens->[ $i_next + 1 ];
10338 104 100       320 if ( $is_digraph{$test2} ) {
10339 15         24 $next_nonblank_token = $test2;
10340 15         22 $i_next = $i_next + 1;
10341              
10342             # check for a trigraph
10343 15 50       48 if ( defined( $rtokens->[ $i_next + 1 ] ) ) {
10344 15         32 my $test3 = $next_nonblank_token . $rtokens->[ $i_next + 1 ];
10345 15 50       46 if ( $is_trigraph{$test3} ) {
10346 0         0 $next_nonblank_token = $test3;
10347 0         0 $i_next = $i_next + 1;
10348             }
10349             }
10350             }
10351             }
10352              
10353 104         257 return ( $next_nonblank_token, $i_next );
10354             } ## end sub find_next_noncomment_token
10355              
10356             sub is_possible_numerator {
10357              
10358 0     0 0 0 my ( $self, $i, $rtokens, $max_token_index ) = @_;
10359              
10360             # Look at the next non-comment character and decide if it could be a
10361             # numerator. Returns the following code:
10362             # -1 - division not possible
10363             # 0 - can't tell if division possible
10364             # 1 - division possible
10365             # 2 - division probable: number follows
10366             # 3 - division very probable: number and one of ; ] } follow
10367             # 4 - is division, not pattern: number and ) follow
10368              
10369 0         0 my $divide_possible_code = 0;
10370              
10371 0         0 my $next_token = $rtokens->[ $i + 1 ];
10372 0 0       0 if ( $next_token eq '=' ) { $i++; } # handle /=
  0         0  
10373 0         0 my ( $next_nonblank_token, $i_next ) =
10374             $self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
10375              
10376 0 0       0 if ( $next_nonblank_token eq '#' ) {
10377 0         0 ( $next_nonblank_token, $i_next ) =
10378             $self->find_next_nonblank_token( $max_token_index, $rtokens,
10379             $max_token_index );
10380             }
10381              
10382 0 0       0 if ( $next_nonblank_token =~ / [ \( \$ \w \. \@ ] /x ) {
    0          
10383 0         0 $divide_possible_code = 1;
10384              
10385             # look ahead one more token for some common patterns, such as
10386             # pi/2) pi/2; pi/2}
10387 0 0       0 if ( $next_nonblank_token =~ /^\d/ ) {
10388 0         0 my ( $next_next_nonblank_token, $i_next_next_uu ) =
10389             $self->find_next_nonblank_token( $i_next, $rtokens,
10390             $max_token_index );
10391 0 0 0     0 if ( $next_next_nonblank_token eq ')' ) {
    0 0        
10392 0         0 $divide_possible_code = 4;
10393             }
10394             elsif ($next_next_nonblank_token eq ';'
10395             || $next_next_nonblank_token eq ']'
10396             || $next_next_nonblank_token eq '}' )
10397             {
10398 0         0 $divide_possible_code = 3;
10399             }
10400             else {
10401 0         0 $divide_possible_code = 2;
10402             }
10403             }
10404             }
10405             elsif ( $next_nonblank_token =~ /^\s*$/ ) {
10406 0         0 $divide_possible_code = 0;
10407             }
10408             else {
10409 0         0 $divide_possible_code = -1;
10410             }
10411              
10412 0         0 return $divide_possible_code;
10413             } ## end sub is_possible_numerator
10414              
10415             { ## closure for sub pattern_expected
10416             my %pattern_test;
10417              
10418             BEGIN {
10419              
10420             # List of tokens which may follow a pattern. Note that we will not
10421             # have formed digraphs at this point, so we will see '&' instead of
10422             # '&&' and '|' instead of '||'
10423              
10424             # /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/
10425 44     44   285 my @q = qw( & && | || ? : + - * and or while if unless );
10426 44         207 push @q, ')', '}', ']', '>', COMMA, ';';
10427 44         118924 $pattern_test{$_} = 1 for @q;
10428             } ## end BEGIN
10429              
10430             sub pattern_expected {
10431              
10432 0     0 0 0 my ( $self, $i, $rtokens, $max_token_index ) = @_;
10433              
10434             # This a filter for a possible pattern.
10435             # It looks at the token after a possible pattern and tries to
10436             # determine if that token could end a pattern.
10437             # returns -
10438             # 1 - yes
10439             # 0 - can't tell
10440             # -1 - no
10441 0         0 my $is_pattern = 0;
10442              
10443 0         0 my $next_token = $rtokens->[ $i + 1 ];
10444              
10445             # skip a possible quote modifier
10446 0         0 my $possible_modifiers = $quote_modifiers{'m'};
10447 0 0       0 if ( $next_token =~ /^$possible_modifiers/ ) {
10448 0         0 $i++;
10449             }
10450              
10451 0         0 my ( $next_nonblank_token, $i_next_uu ) =
10452             $self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
10453              
10454 0 0       0 if ( $pattern_test{$next_nonblank_token} ) {
10455 0         0 $is_pattern = 1;
10456             }
10457             else {
10458              
10459             # Added '#' to fix issue c044
10460 0 0 0     0 if ( $next_nonblank_token =~ /^\s*$/
10461             || $next_nonblank_token eq '#' )
10462             {
10463 0         0 $is_pattern = 0;
10464             }
10465             else {
10466 0         0 $is_pattern = -1;
10467             }
10468             }
10469 0         0 return $is_pattern;
10470             } ## end sub pattern_expected
10471             }
10472              
10473             sub find_next_nonblank_token_on_this_line {
10474 579     579 0 1060 my ( $i, $rtokens, $max_token_index ) = @_;
10475 579         820 my $next_nonblank_token;
10476              
10477 579 100       1118 if ( $i < $max_token_index ) {
10478 571         948 $next_nonblank_token = $rtokens->[ ++$i ];
10479              
10480 571 100       2275 if ( $next_nonblank_token =~ /^\s*$/ ) {
10481              
10482 164 100       415 if ( $i < $max_token_index ) {
10483 162         358 $next_nonblank_token = $rtokens->[ ++$i ];
10484             }
10485             }
10486             }
10487             else {
10488 8         19 $next_nonblank_token = EMPTY_STRING;
10489             }
10490 579         1538 return ( $next_nonblank_token, $i );
10491             } ## end sub find_next_nonblank_token_on_this_line
10492              
10493             sub find_angle_operator_termination {
10494              
10495 8     8 0 35 my ( $self, $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index )
10496             = @_;
10497              
10498             # We are looking at a '<' and want to know if it is an angle operator.
10499             # Return:
10500             # $i = pretoken index of ending '>' if found, current $i otherwise
10501             # $type = 'Q' if found, '>' otherwise
10502              
10503 8         14 my $i = $i_beg;
10504 8         16 my $type = '<';
10505 8         39 pos($input_line) = 1 + $rtoken_map->[$i];
10506              
10507             # The token sequence '><' implies a markup language
10508 8 50       27 if ( $last_nonblank_token eq '>' ) {
10509 0         0 $self->[_html_tag_count_]++;
10510             }
10511              
10512 8         14 my $filter;
10513              
10514 8         16 my $expecting_TERM = $expecting == TERM;
10515              
10516             # we just have to find the next '>' if a term is expected
10517 8 100       23 if ($expecting_TERM) { $filter = '[\>]' }
  6 50       13  
10518              
10519             # we have to guess if we don't know what is expected
10520 2         3 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
10521              
10522             # shouldn't happen - we shouldn't be here if operator is expected
10523             else {
10524 0         0 if (DEVEL_MODE) {
10525             Fault(<<EOM);
10526             Bad call to find_angle_operator_termination
10527             EOM
10528             }
10529 0         0 return ( $i, $type );
10530             }
10531              
10532             # To illustrate what we might be looking at, in case we are
10533             # guessing, here are some examples of valid angle operators
10534             # (or file globs):
10535             # <tmp_imp/*>
10536             # <FH>
10537             # <$fh>
10538             # <*.c *.h>
10539             # <_>
10540             # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
10541             # <${PREFIX}*img*.$IMAGE_TYPE>
10542             # <img*.$IMAGE_TYPE>
10543             # <Timg*.$IMAGE_TYPE>
10544             # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
10545             #
10546             # Here are some examples of lines which do not have angle operators:
10547             # return unless $self->[2]++ < $#{$self->[1]};
10548             # < 2 || @$t >
10549             #
10550             # the following line from dlister.pl caused trouble:
10551             # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
10552             #
10553             # If the '<' starts an angle operator, it must end on this line and
10554             # it must not have certain characters like ';' and '=' in it. I use
10555             # this to limit the testing. This filter should be improved if
10556             # possible.
10557              
10558 8 50       190 if ( $input_line =~ /($filter)/g ) {
10559              
10560 8 50       35 if ( $1 eq '>' ) {
10561              
10562             # We MAY have found an angle operator termination if we get
10563             # here, but we need to do more to be sure we haven't been
10564             # fooled.
10565 8         15 my $pos = pos($input_line);
10566              
10567 8         16 my $pos_beg = $rtoken_map->[$i];
10568 8         25 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
10569              
10570             # Test for '<' after possible filehandle, issue c103
10571             # print $fh <>; # syntax error
10572             # print $fh <DATA>; # ok
10573             # print $fh < DATA>; # syntax error at '>'
10574             # print STDERR < DATA>; # ok, prints word 'DATA'
10575             # print BLABLA <DATA>; # ok; does nothing unless BLABLA is defined
10576 8 100       21 if ( $last_nonblank_type eq 'Z' ) {
10577              
10578             # $str includes brackets; something like '<DATA>'
10579 1 0 33     10 if ( substr( $last_nonblank_token, 0, 1 ) !~ /[A-Za-z_]/
10580             && substr( $str, 1, 1 ) !~ /[A-Za-z_]/ )
10581             {
10582 0         0 return ( $i, $type );
10583             }
10584             }
10585              
10586             # Reject if the closing '>' follows a '-' as in:
10587             # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
10588 8 100       25 if ( $expecting eq UNKNOWN ) {
10589 2         3 my $check = substr( $input_line, $pos - 2, 1 );
10590 2 100       4 if ( $check eq '-' ) {
10591 1         4 return ( $i, $type );
10592             }
10593             }
10594              
10595             ######################################debug#####
10596             #$self->write_diagnostics( "ANGLE? :$str\n");
10597             #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
10598             ######################################debug#####
10599 7         14 $type = 'Q';
10600 7         12 my $error;
10601 7         22 ( $i, $error ) =
10602             inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
10603              
10604             # It may be possible that a quote ends midway in a pretoken.
10605             # If this happens, it may be necessary to split the pretoken.
10606 7 50       24 if ($error) {
10607 0         0 if (DEVEL_MODE) {
10608             Fault(<<EOM);
10609             unexpected error condition returned by inverse_pretoken_map
10610             EOM
10611             }
10612             $self->warning(
10613 0         0 "Possible tokenization error..please check this line\n");
10614             }
10615              
10616             # Check for accidental formatting of a markup language doc...
10617             # Formatting will be skipped if we set _html_tag_count_ and
10618             # also set a warning of any kind.
10619 7         11 my $is_html_tag;
10620 7   33     23 my $is_first_string =
10621             $i_beg == 0 && $self->[_last_line_number_] == 1;
10622              
10623             # html comment '<!...' of any type
10624 7 50 33     158 if ( $str =~ /^<\s*!/ ) {
    50          
    50          
10625 0         0 $is_html_tag = 1;
10626 0 0       0 if ($is_first_string) {
10627 0         0 $self->warning(
10628             "looks like a markup language, continuing error checks\n"
10629             );
10630             }
10631             }
10632              
10633             # html end tag, something like </h1>
10634             elsif ( $str =~ /^<\s*\/\w+\s*>$/ ) {
10635 0         0 $is_html_tag = 1;
10636             }
10637              
10638             # xml prolog?
10639             elsif ( $str =~ /^<\?xml\s.*\?>$/i && $is_first_string ) {
10640 0         0 $is_html_tag = 1;
10641 0         0 $self->warning(
10642             "looks like a markup language, continuing error checks\n");
10643             }
10644             else {
10645             ## doesn't look like a markup tag
10646             }
10647              
10648 7 50       22 if ($is_html_tag) {
10649 0         0 $self->[_html_tag_count_]++;
10650             }
10651              
10652             # count blanks on inside of brackets
10653 7         15 my $blank_count = 0;
10654 7 100       31 $blank_count++ if ( $str =~ /<\s+/ );
10655 7 100       30 $blank_count++ if ( $str =~ /\s+>/ );
10656              
10657             # Now let's see where we stand....
10658             # OK if math op not possible
10659 7 100       26 if ($expecting_TERM) {
    50          
    50          
    0          
10660             }
10661              
10662             elsif ($is_html_tag) {
10663             }
10664              
10665             # OK if there are no more than 2 non-blank pre-tokens inside
10666             # (not possible to write 2 token math between < and >)
10667             # This catches most common cases
10668             elsif ( $i <= $i_beg + 3 + $blank_count ) {
10669              
10670             # No longer any need to document this common case
10671             ## $self->write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
10672             }
10673              
10674             # OK if there is some kind of identifier inside
10675             # print $fh <tvg::INPUT>;
10676             elsif ( $str =~ /^<\s*\$?(\w|::|\s)+\s*>$/ ) {
10677 0         0 $self->write_diagnostics("ANGLE (contains identifier): $str\n");
10678             }
10679              
10680             # Not sure..
10681             else {
10682              
10683             # Let's try a Brace Test: any braces inside must balance
10684 0         0 my $br = $str =~ tr/\{/{/ - $str =~ tr/\}/}/;
10685 0         0 my $sb = $str =~ tr/\[/[/ - $str =~ tr/\]/]/;
10686 0         0 my $pr = $str =~ tr/\(/(/ - $str =~ tr/\)/)/;
10687              
10688             # if braces do not balance - not angle operator
10689 0 0 0     0 if ( $br || $sb || $pr ) {
      0        
10690 0         0 $i = $i_beg;
10691 0         0 $type = '<';
10692 0         0 $self->write_diagnostics(
10693             "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
10694             }
10695              
10696             # we should keep doing more checks here...to be continued
10697             # Tentatively accepting this as a valid angle operator.
10698             # There are lots more things that can be checked.
10699             else {
10700 0         0 $self->write_diagnostics(
10701             "ANGLE-Guessing yes: $str expecting=$expecting\n");
10702 0         0 $self->write_logfile_entry(
10703             "Guessing angle operator here: $str\n");
10704             }
10705             }
10706             }
10707              
10708             # didn't find ending >
10709             else {
10710 0 0       0 if ($expecting_TERM) {
10711 0         0 $self->warning("No ending > for angle operator\n");
10712             }
10713             }
10714             }
10715 7         27 return ( $i, $type );
10716             } ## end sub find_angle_operator_termination
10717              
10718             sub scan_number_do {
10719              
10720 683     683 0 1488 my ( $self, $input_line, $i, $rtoken_map, $input_type, $max_token_index ) =
10721             @_;
10722              
10723             # Scan a number in any of the formats that Perl accepts
10724             # Underbars (_) are allowed in decimal numbers.
10725             # Given:
10726             # $input_line - the string to scan
10727             # $i - pre_token index to start scanning
10728             # $rtoken_map - reference to the pre_token map giving starting
10729             # character position in $input_line of token $i
10730             # Return:
10731             # $i - last pre_token index of the number just scanned
10732             # $type - the token type ('v' or 'n')
10733             # number - the number (characters); or undef if not a number
10734              
10735 683         1087 my $pos_beg = $rtoken_map->[$i];
10736 683         848 my $pos;
10737             ##my $i_begin = $i;
10738 683         999 my $number = undef;
10739 683         864 my $type = $input_type;
10740              
10741 683         1363 my $first_char = substr( $input_line, $pos_beg, 1 );
10742              
10743             # Look for bad starting characters; Shouldn't happen..
10744 683 50       2841 if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
10745 0         0 if (DEVEL_MODE) {
10746             Fault(<<EOM);
10747             Program bug - scan_number given bad first character = '$first_char'
10748             EOM
10749             }
10750 0         0 return ( $i, $type, $number );
10751             }
10752              
10753             # handle v-string without leading 'v' character ('Two Dot' rule)
10754             # (vstring.t)
10755             # Here is the format prior to including underscores:
10756             ## if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
10757 683         1828 pos($input_line) = $pos_beg;
10758 683 50       2996 if ( $input_line =~ /\G((\d[_\d]*)?\.[\d_]+(\.[\d_]+)+)/g ) {
10759 0         0 $pos = pos($input_line);
10760 0         0 my $numc = $pos - $pos_beg;
10761 0         0 $number = substr( $input_line, $pos_beg, $numc );
10762 0         0 $type = 'v';
10763             }
10764              
10765             # handle octal, hex, binary
10766 683 50       1408 if ( !defined($number) ) {
10767 683         1129 pos($input_line) = $pos_beg;
10768              
10769             # Perl 5.22 added floating point literals, like '0x0.b17217f7d1cf78p0'
10770             # For reference, the format prior to hex floating point is:
10771             # /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g )
10772             # (hex) (octal) (binary)
10773 683 100       2157 if (
10774             $input_line =~ m{
10775              
10776             \G[+-]?0( # leading [signed] 0
10777              
10778             # a hex float, i.e. '0x0.b17217f7d1cf78p0'
10779             ([xX][0-9a-fA-F_]* # X and optional leading digits
10780             (\.([0-9a-fA-F][0-9a-fA-F_]*)?)? # optional decimal and fraction
10781             [Pp][+-]?[0-9a-fA-F] # REQUIRED exponent with digit
10782             [0-9a-fA-F_]*) # optional Additional exponent digits
10783              
10784             # or hex integer
10785             |([xX][0-9a-fA-F_]+)
10786              
10787             # or octal fraction
10788             |([oO]?[0-7_]+ # string of octal digits
10789             (\.([0-7][0-7_]*)?)? # optional decimal and fraction
10790             [Pp][+-]?[0-7] # REQUIRED exponent, no underscore
10791             [0-7_]*) # Additional exponent digits with underscores
10792              
10793             # or octal integer
10794             |([oO]?[0-7_]+) # string of octal digits
10795              
10796             # or a binary float
10797             |([bB][01_]* # 'b' with string of binary digits
10798             (\.([01][01_]*)?)? # optional decimal and fraction
10799             [Pp][+-]?[01] # Required exponent indicator, no underscore
10800             [01_]*) # additional exponent bits
10801              
10802             # or binary integer
10803             |([bB][01_]+) # 'b' with string of binary digits
10804              
10805             )}gx
10806             )
10807             {
10808 72         103 $pos = pos($input_line);
10809 72         133 my $numc = $pos - $pos_beg;
10810 72         114 $number = substr( $input_line, $pos_beg, $numc );
10811 72         115 $type = 'n';
10812             }
10813             }
10814              
10815             # handle decimal
10816 683 100       1301 if ( !defined($number) ) {
10817 611         944 pos($input_line) = $pos_beg;
10818              
10819 611 50       2570 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
10820 611         876 $pos = pos($input_line);
10821              
10822             # watch out for things like 0..40 which would give 0. by this;
10823 611 100 100     1869 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
10824             && ( substr( $input_line, $pos, 1 ) eq '.' ) )
10825             {
10826 38         50 $pos--;
10827             }
10828 611         798 my $numc = $pos - $pos_beg;
10829 611         931 $number = substr( $input_line, $pos_beg, $numc );
10830 611         914 $type = 'n';
10831             }
10832             }
10833              
10834             # filter out non-numbers like e + - . e2 .e3 +e6
10835             # the rule: at least one digit, and any 'e' must be preceded by a digit
10836 683 100 66     3027 if (
      66        
10837             $number !~ /\d/ # no digits
10838             || ( $number =~ /^(.*)[eE]/
10839             && $1 !~ /\d/ ) # or no digits before the 'e'
10840             )
10841             {
10842 304         467 $number = undef;
10843 304         399 $type = $input_type;
10844 304         1081 return ( $i, $type, $number );
10845             }
10846              
10847             # Found a number; now we must convert back from character position
10848             # to pre_token index. An error here implies user syntax error.
10849             # An example would be an invalid octal number like '009'.
10850 379         519 my $error;
10851 379         754 ( $i, $error ) =
10852             inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
10853 379 50       709 if ($error) { $self->warning("Possibly invalid number\n") }
  0         0  
10854              
10855 379         1327 return ( $i, $type, $number );
10856             } ## end sub scan_number_do
10857              
10858             sub inverse_pretoken_map {
10859              
10860             # Starting with the current pre_token index $i, scan forward until
10861             # finding the index of the next pre_token whose position is $pos.
10862 2470     2470 0 4736 my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
10863 2470         3437 my $error = 0;
10864              
10865 2470         5200 while ( ++$i <= $max_token_index ) {
10866              
10867 4631 100       8686 if ( $pos <= $rtoken_map->[$i] ) {
10868              
10869             # Let the calling routine handle errors in which we do not
10870             # land on a pre-token boundary. It can happen by running
10871             # perltidy on some non-perl scripts, for example.
10872 2426 50       4695 if ( $pos < $rtoken_map->[$i] ) { $error = 1 }
  0         0  
10873 2426         3097 $i--;
10874 2426         3651 last;
10875             }
10876             } ## end while ( ++$i <= $max_token_index)
10877 2470         5173 return ( $i, $error );
10878             } ## end sub inverse_pretoken_map
10879              
10880             sub find_here_doc {
10881              
10882             my (
10883              
10884 9     9 0 25 $self,
10885              
10886             $expecting,
10887             $i,
10888             $rtokens,
10889             $rtoken_type,
10890             $rtoken_map_uu,
10891             $max_token_index,
10892              
10893             ) = @_;
10894              
10895             # Find the target of a here document, if any
10896             # Given:
10897             # $i - token index of the second < of <<
10898             # ($i must be less than the last token index if this is called)
10899             # Return:
10900             # $found_target = 0 didn't find target; =1 found target
10901             # HERE_TARGET - the target string (may be empty string)
10902             # $i - unchanged if not here doc,
10903             # or index of the last token of the here target
10904             # $saw_error - flag noting unbalanced quote on here target
10905 9         18 my $ibeg = $i;
10906 9         18 my $found_target = 0;
10907 9         19 my $here_doc_target = EMPTY_STRING;
10908 9         18 my $here_quote_character = EMPTY_STRING;
10909 9         14 my $saw_error = 0;
10910 9         20 my ( $next_nonblank_token, $i_next_nonblank, $next_token );
10911 9         21 $next_token = $rtokens->[ $i + 1 ];
10912              
10913             # perl allows a backslash before the target string (heredoc.t)
10914 9         18 my $backslash = 0;
10915 9 50       33 if ( $next_token eq BACKSLASH ) {
10916 0         0 $backslash = 1;
10917 0         0 $next_token = $rtokens->[ $i + 2 ];
10918             }
10919              
10920 9         38 ( $next_nonblank_token, $i_next_nonblank ) =
10921             find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
10922              
10923 9 100 33     57 if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
    50          
    50          
10924              
10925 6         12 my $in_quote = 1;
10926 6         10 my $quote_depth = 0;
10927 6         12 my $quote_pos = 0;
10928 6         7 my $quoted_string;
10929              
10930             (
10931              
10932 6         26 $i,
10933             $in_quote,
10934             $here_quote_character,
10935             $quote_pos,
10936             $quote_depth,
10937             $quoted_string,
10938             )
10939             = $self->follow_quoted_string(
10940              
10941             $i_next_nonblank,
10942             $in_quote,
10943             $rtokens,
10944             $rtoken_type,
10945             $here_quote_character,
10946             $quote_pos,
10947             $quote_depth,
10948             $max_token_index,
10949             );
10950              
10951 6 50       17 if ($in_quote) { # didn't find end of quote, so no target found
10952 0         0 $i = $ibeg;
10953 0 0       0 if ( $expecting == TERM ) {
10954 0         0 $self->warning(
10955             "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
10956             );
10957 0         0 $saw_error = 1;
10958             }
10959             }
10960             else { # found ending quote
10961 6         11 $found_target = 1;
10962              
10963 6         11 my $tokj;
10964 6         22 foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) {
10965 6         13 $tokj = $rtokens->[$j];
10966              
10967             # we have to remove any backslash before the quote character
10968             # so that the here-doc-target exactly matches this string
10969             next
10970 6 0 33     24 if ( $tokj eq BACKSLASH
      33        
10971             && $j < $i - 1
10972             && $rtokens->[ $j + 1 ] eq $here_quote_character );
10973 6         16 $here_doc_target .= $tokj;
10974             }
10975             }
10976             }
10977              
10978             elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
10979 0         0 $found_target = 1;
10980 0         0 $self->write_logfile_entry(
10981             "found blank here-target after <<; suggest using \"\"\n");
10982 0         0 $i = $ibeg;
10983             }
10984             elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
10985              
10986 3         7 my $here_doc_expected;
10987 3 50       25 if ( $expecting == UNKNOWN ) {
10988 0         0 $here_doc_expected = $self->guess_if_here_doc($next_token);
10989             }
10990             else {
10991 3         8 $here_doc_expected = 1;
10992             }
10993              
10994 3 50       10 if ($here_doc_expected) {
10995 3         6 $found_target = 1;
10996 3         5 $here_doc_target = $next_token;
10997 3         6 $i = $ibeg + 1;
10998             }
10999              
11000             }
11001             else {
11002              
11003 0 0       0 if ( $expecting == TERM ) {
11004 0         0 $found_target = 1;
11005 0         0 $self->write_logfile_entry("Note: bare here-doc operator <<\n");
11006             }
11007             else {
11008 0         0 $i = $ibeg;
11009             }
11010             }
11011              
11012             # patch to neglect any prepended backslash
11013 9 50 33     50 if ( $found_target && $backslash ) { $i++ }
  0         0  
11014              
11015 9         48 return ( $found_target, $here_doc_target, $here_quote_character, $i,
11016             $saw_error );
11017             } ## end sub find_here_doc
11018              
11019             sub do_quote {
11020              
11021             my (
11022              
11023 3170     3170 0 8218 $self,
11024              
11025             $i,
11026             $in_quote,
11027             $quote_character,
11028             $quote_pos,
11029             $quote_depth,
11030             $quoted_string_1,
11031             $quoted_string_2,
11032             $rtokens,
11033             $rtoken_type,
11034             $rtoken_map_uu,
11035             $max_token_index,
11036              
11037             ) = @_;
11038              
11039             # Follow (or continue following) quoted string(s)
11040             # $in_quote = return code:
11041             # 0 - ok, found end
11042             # 1 - still must find end of quote whose target is $quote_character
11043             # 2 - still looking for end of first of two quotes
11044             #
11045             # Returns updated strings:
11046             # $quoted_string_1 = quoted string seen while in_quote=1
11047             # $quoted_string_2 = quoted string seen while in_quote=2
11048              
11049 3170         4037 my $quoted_string;
11050 3170 100       5638 if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
11051 31         67 my $ibeg = $i;
11052             (
11053              
11054 31         109 $i,
11055             $in_quote,
11056             $quote_character,
11057             $quote_pos,
11058             $quote_depth,
11059             $quoted_string,
11060             )
11061             = $self->follow_quoted_string(
11062              
11063             $ibeg,
11064             $in_quote,
11065             $rtokens,
11066             $rtoken_type,
11067             $quote_character,
11068             $quote_pos,
11069             $quote_depth,
11070             $max_token_index,
11071             );
11072 31         76 $quoted_string_2 .= $quoted_string;
11073 31 50       76 if ( $in_quote == 1 ) {
11074 31 100       132 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
  1         1  
11075 31         65 $quote_character = EMPTY_STRING;
11076             }
11077             else {
11078 0         0 $quoted_string_2 .= "\n";
11079             }
11080             }
11081              
11082 3170 50       5541 if ( $in_quote == 1 ) { # one (more) quote to follow
11083 3170         3833 my $ibeg = $i;
11084             (
11085              
11086 3170         7816 $i,
11087             $in_quote,
11088             $quote_character,
11089             $quote_pos,
11090             $quote_depth,
11091             $quoted_string,
11092             )
11093             = $self->follow_quoted_string(
11094              
11095             $ibeg,
11096             $in_quote,
11097             $rtokens,
11098             $rtoken_type,
11099             $quote_character,
11100             $quote_pos,
11101             $quote_depth,
11102             $max_token_index,
11103             );
11104 3170         5680 $quoted_string_1 .= $quoted_string;
11105 3170 100       5609 if ( $in_quote == 1 ) {
11106 244         388 $quoted_string_1 .= "\n";
11107             }
11108             }
11109             return (
11110              
11111 3170         9611 $i,
11112             $in_quote,
11113             $quote_character,
11114             $quote_pos,
11115             $quote_depth,
11116             $quoted_string_1,
11117             $quoted_string_2,
11118              
11119             );
11120             } ## end sub do_quote
11121              
11122 44     44   367 use constant DEBUG_FIND_INTERPOLATED_HERE_TARGETS => 0;
  44         87  
  44         27873  
11123              
11124             sub find_interpolated_here_targets {
11125 2     2 0 4 my ( $self, $quoted_string, $len_starting_lines ) = @_;
11126              
11127             # Search for here targets in a quoted string
11128             # Given:
11129             # $quoted_string = the complete string of an interpolated quote
11130             # $len_starting_lines = number of characters of the first n-1 lines
11131             # (=0 if this is a single-line quote)
11132             # Task:
11133             # Find and return a list of all here targets on the last line;
11134             # i.e., if here target is index ii, we only return the
11135             # target if $rmap->[$ii]>=$len_starting_lines
11136              
11137             # The items returned are the format needed for @{$rhere_target_list};
11138             # [ $here_doc_target, $here_quote_character ]
11139             # there can be multiple here targets.
11140              
11141 2         3 my $rht;
11142              
11143             # Break the entire quote into pre-tokens, even if multi-line, because we
11144             # have to determine which parts are in single quotes
11145 2         5 my ( $rtokens, $rmap, $rtoken_type ) = pre_tokenize($quoted_string);
11146 2         4 my $max_ii = @{$rtokens} - 1;
  2         4  
11147              
11148             # Depth of braces controlled by a sigil
11149 2         3 my $code_depth = 0;
11150              
11151             # Loop over pre-tokens
11152 2         4 my $ii = -1;
11153 2         7 while ( ++$ii <= $max_ii ) {
11154 21         24 my $token = $rtokens->[$ii];
11155 21         19 if (DEBUG_FIND_INTERPOLATED_HERE_TARGETS) {
11156             print "i=$ii tok=$token block=$code_depth\n";
11157             }
11158              
11159 21 100       28 if ( $token eq BACKSLASH ) {
11160 4 50       9 if ( !$code_depth ) { $ii++ }
  0         0  
11161 4         5 next;
11162             }
11163              
11164             # Look for start of interpolation code block, '${', '@{', '$var{', etc
11165 17 100       22 if ( !$code_depth ) {
11166 4 50 33     10 if ( $token eq '$' || $token eq '@' ) {
11167              
11168 4 100 66     19 $ii++
11169             if ( $ii < $max_ii && $rtoken_type->[ $ii + 1 ] eq 'b' );
11170              
11171 4   33     23 while (
      33        
11172             $ii < $max_ii
11173             && ( $rtoken_type->[ $ii + 1 ] eq 'w'
11174             || $rtoken_type->[ $ii + 1 ] eq '::' )
11175             )
11176             {
11177 0         0 $ii++;
11178             } ## end while ( $ii < $max_ii && ...)
11179              
11180 4 50 33     13 $ii++
11181             if ( $ii < $max_ii && $rtoken_type->[ $ii + 1 ] eq 'b' );
11182              
11183 4 50 33     18 if ( $ii < $max_ii && $rtokens->[ $ii + 1 ] eq '{' ) {
11184 4         5 $ii++;
11185 4         6 $code_depth++;
11186             }
11187             }
11188 4         6 next;
11189             }
11190              
11191             # Continue interpolating while $code_depth > 0..
11192 13 50       20 if ( $token eq '{' ) {
11193 0         0 $code_depth++;
11194 0         0 next;
11195             }
11196 13 100       18 if ( $token eq '}' ) {
11197 4         4 $code_depth--;
11198 4         7 next;
11199             }
11200              
11201             # Look for '<<'
11202 9 50 66     29 if ( $token ne '<'
      66        
11203             || $ii >= $max_ii - 1
11204             || $rtokens->[ $ii + 1 ] ne '<' )
11205             {
11206 5         7 next;
11207             }
11208              
11209             # Remember the location of the first '<' so it can be modified
11210 4         5 my $ii_left_shift = $ii;
11211              
11212 4         5 $ii++;
11213              
11214             # or '<<~'
11215 4 50 33     14 if ( $rtoken_type->[ $ii + 1 ] eq '~' && $ii < $max_ii - 2 ) {
11216 0         0 $ii++;
11217             }
11218              
11219             # blanks ok before targets in quotes
11220 4         6 my $saw_blank;
11221 4 100 66     13 if ( $rtoken_type->[ $ii + 1 ] eq 'b' && $ii < $max_ii - 2 ) {
11222 2         3 $saw_blank = 1;
11223 2         3 $ii++;
11224             }
11225              
11226 4         5 my $next_type = $rtoken_type->[ $ii + 1 ];
11227              
11228             # Look for unquoted targets like "${ \<<END1 }"
11229 4 100 66     14 if ( $next_type eq 'w' ) {
    50 33        
11230 2 50       5 if ($saw_blank) {
11231             ## error: blank target is deprecated
11232             }
11233             else {
11234 2         4 $ii++;
11235 2 50       7 if ( $rmap->[$ii] >= $len_starting_lines ) {
11236 2         3 push @{$rht}, [ $rtokens->[$ii], EMPTY_STRING ];
  2         7  
11237              
11238             # Modify the string so the target is not found again
11239 2         4 my $pos_left_shift = $rmap->[$ii_left_shift];
11240 2         6 substr( $quoted_string, $pos_left_shift, 1, SPACE );
11241 2         5 substr( $quoted_string, $pos_left_shift + 1, 1, SPACE );
11242             }
11243             }
11244             }
11245              
11246             # Look for quoted targets like "${ \<< 'END1' }${ \<<\"END2\" }";
11247             elsif ( $next_type eq "'" || $next_type eq '"' || $next_type eq '`' ) {
11248 2         3 my $quote_char = $next_type;
11249 2         4 $ii++;
11250 2         2 my $here_target = EMPTY_STRING;
11251 2   66     8 while ( ++$ii <= $max_ii && $rtokens->[$ii] ne $quote_char ) {
11252             next
11253 2 50 66     8 if ( $quote_char ne "'" && $rtokens->[$ii] eq BACKSLASH );
11254 2         48 $here_target .= $rtokens->[$ii];
11255             }
11256 2 50       11 if ( $rmap->[$ii] >= $len_starting_lines ) {
11257 2         3 push @{$rht}, [ $here_target, $quote_char ];
  2         10  
11258             }
11259             }
11260             else {
11261             ## no here target found
11262             }
11263 4         10 next;
11264             } ## end while ( ++$ii <= $max_ii )
11265 2         14 return ( $rht, $quoted_string );
11266             } ## end sub find_interpolated_here_targets
11267              
11268             # Some possible non-word quote delimiters, for preliminary checking
11269             my %is_punct_char;
11270              
11271             BEGIN {
11272              
11273 44     44   339 my @q = qw# / " ' { } ( ) [ ] < > ; + - * | % ! x ~ = ? : . ^ & #;
11274 44         138 push @q, '#';
11275 44         114 push @q, COMMA;
11276 44         131088 $is_punct_char{$_} = 1 for @q;
11277             }
11278              
11279             sub follow_quoted_string {
11280              
11281             my (
11282              
11283 3207     3207 0 6516 $self,
11284              
11285             $i_beg,
11286             $in_quote,
11287             $rtokens,
11288             $rtoken_type,
11289             $beginning_tok,
11290             $quote_pos,
11291             $quote_depth,
11292             $max_token_index,
11293              
11294             ) = @_;
11295              
11296             # Scan for a specific token, skipping escaped characters.
11297             # If the quote character is blank, use the first non-blank character.
11298             # Given:
11299             # $rtokens = reference to the array of tokens
11300             # $i = the token index of the first character to search
11301             # $in_quote = number of quoted strings being followed
11302             # $beginning_tok = the starting quote character
11303             # $quote_pos = index to check next for alphanumeric delimiter
11304             # Return:
11305             # $i = the token index of the ending quote character
11306             # $in_quote = decremented if found end, unchanged if not
11307             # $beginning_tok = the starting quote character
11308             # $quote_pos = index to check next for alphanumeric delimiter
11309             # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
11310             # $quoted_string = the text of the quote (without quotation tokens)
11311 3207         4126 my ( $tok, $end_tok );
11312 3207         4104 my $i = $i_beg - 1;
11313 3207         3997 my $quoted_string = EMPTY_STRING;
11314              
11315 3207         3789 0 && do {
11316             print {*STDOUT}
11317             "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
11318             };
11319              
11320             # for a non-blank token, get the corresponding end token
11321 3207 100 33     11897 if (
      66        
11322             $is_punct_char{$beginning_tok}
11323             || ( length($beginning_tok)
11324             && $beginning_tok !~ /^\s+$/ )
11325             )
11326             {
11327             $end_tok =
11328             $matching_end_token{$beginning_tok}
11329 244 100       637 ? $matching_end_token{$beginning_tok}
11330             : $beginning_tok;
11331             }
11332              
11333             # for a blank token, find and use the first non-blank one
11334             else {
11335 2963 100       5130 my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
11336              
11337 2963         5439 while ( $i < $max_token_index ) {
11338 2965         4585 $tok = $rtokens->[ ++$i ];
11339              
11340 2965 100       5371 if ( $rtoken_type->[$i] ne 'b' ) {
11341              
11342 2963 50 66     6324 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
11343 0         0 $i = $max_token_index;
11344             }
11345             else {
11346              
11347 2963 100       5032 if ( length($tok) > 1 ) {
11348 1 50       4 if ( $quote_pos <= 0 ) { $quote_pos = 1 }
  1         1  
11349 1         3 $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
11350             }
11351             else {
11352 2962         3531 $beginning_tok = $tok;
11353 2962         3642 $quote_pos = 0;
11354             }
11355             $end_tok =
11356             $matching_end_token{$beginning_tok}
11357 2963 100       5670 ? $matching_end_token{$beginning_tok}
11358             : $beginning_tok;
11359 2963         3382 $quote_depth = 1;
11360 2963         4290 last;
11361             }
11362             }
11363             else {
11364 2         4 $allow_quote_comments = 1;
11365             }
11366             } ## end while ( $i < $max_token_index)
11367             }
11368              
11369             # There are two different loops which search for the ending quote
11370             # character. In the rare case of an alphanumeric quote delimiter, we
11371             # have to look through alphanumeric tokens character-by-character, since
11372             # the pre-tokenization process combines multiple alphanumeric
11373             # characters, whereas for a non-alphanumeric delimiter, only tokens of
11374             # length 1 can match.
11375              
11376             #----------------------------------------------------------------
11377             # Case 1 (rare): loop for case of alphanumeric quote delimiter..
11378             # "quote_pos" is the position the current word to begin searching
11379             #----------------------------------------------------------------
11380 3207 100 100     7275 if ( !$is_punct_char{$beginning_tok} && $beginning_tok =~ /\w/ ) {
11381              
11382             # Note this because it is not recommended practice except
11383             # for obfuscated perl contests
11384 1 50       3 if ( $in_quote == 1 ) {
11385 1         4 $self->write_logfile_entry(
11386             "Note: alphanumeric quote delimiter ($beginning_tok) \n");
11387             }
11388              
11389             # Note: changed < to <= here to fix c109. Relying on extra end blanks.
11390 1         3 while ( $i <= $max_token_index ) {
11391              
11392 4 100 66     11 if ( $quote_pos == 0 || ( $i < 0 ) ) {
11393 3         4 $tok = $rtokens->[ ++$i ];
11394              
11395 3 100       7 if ( $tok eq BACKSLASH ) {
11396              
11397             # retain backslash unless it hides the end token
11398 1 50       4 $quoted_string .= $tok
11399             unless ( $rtokens->[ $i + 1 ] eq $end_tok );
11400 1         3 $quote_pos++;
11401 1 50       4 last if ( $i >= $max_token_index );
11402 1         2 $tok = $rtokens->[ ++$i ];
11403             }
11404             }
11405 4         6 my $old_pos = $quote_pos;
11406              
11407 4         6 $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
11408              
11409 4 100       6 if ( $quote_pos > 0 ) {
11410              
11411 1         2 $quoted_string .=
11412             substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
11413              
11414             # NOTE: any quote modifiers will be at the end of '$tok'. If we
11415             # wanted to check them, this is the place to get them. But
11416             # this quote form is rarely used in practice, so it isn't
11417             # worthwhile.
11418              
11419 1         2 $quote_depth--;
11420              
11421 1 50       2 if ( $quote_depth == 0 ) {
11422 1         2 $in_quote--;
11423 1         2 last;
11424             }
11425             }
11426             else {
11427 3 50       6 if ( $old_pos <= length($tok) ) {
11428 3         6 $quoted_string .= substr( $tok, $old_pos );
11429             }
11430             }
11431             } ## end while ( $i <= $max_token_index)
11432             }
11433              
11434             #-----------------------------------------------------------------------
11435             # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
11436             #-----------------------------------------------------------------------
11437             else {
11438              
11439 3206         5856 while ( $i < $max_token_index ) {
11440 12368         14515 $tok = $rtokens->[ ++$i ];
11441              
11442 12368 100       21230 if ( $tok eq $end_tok ) {
    100          
    100          
11443 2963         3414 $quote_depth--;
11444              
11445 2963 100       4819 if ( $quote_depth == 0 ) {
11446 2962         3338 $in_quote--;
11447 2962         3715 last;
11448             }
11449             }
11450             elsif ( $tok eq $beginning_tok ) {
11451 1         2 $quote_depth++;
11452             }
11453             elsif ( $tok eq BACKSLASH ) {
11454              
11455             # retain backslash unless it hides the beginning or end token
11456 451         684 $tok = $rtokens->[ ++$i ];
11457 451 100 100     1808 $quoted_string .= BACKSLASH
11458             if ( $tok ne $end_tok && $tok ne $beginning_tok );
11459             }
11460             else {
11461             ## nothing special
11462             }
11463 9406         14101 $quoted_string .= $tok;
11464             } ## end while ( $i < $max_token_index)
11465             }
11466 3207 100       5431 if ( $i > $max_token_index ) { $i = $max_token_index }
  10         12  
11467             return (
11468              
11469 3207         11335 $i,
11470             $in_quote,
11471             $beginning_tok,
11472             $quote_pos,
11473             $quote_depth,
11474             $quoted_string,
11475              
11476             );
11477             } ## end sub follow_quoted_string
11478              
11479             sub indicate_error {
11480 0     0 0 0 my ( $self, $msg, $line_number, $input_line, $pos, $caret ) = @_;
11481              
11482             # write input line and line with carat's showing where error was detected
11483 0         0 $self->interrupt_logfile();
11484 0         0 $self->warning($msg);
11485 0         0 $self->write_error_indicator_pair( $line_number, $input_line, $pos,
11486             $caret );
11487 0         0 $self->resume_logfile();
11488 0         0 return;
11489             } ## end sub indicate_error
11490              
11491             sub write_error_indicator_pair {
11492 0     0 0 0 my ( $self, $line_number, $input_line, $pos, $caret ) = @_;
11493 0         0 my ( $offset, $numbered_line, $underline ) =
11494             make_numbered_line( $line_number, $input_line, $pos );
11495 0         0 $underline = write_on_underline( $underline, $pos - $offset, $caret );
11496 0         0 $self->warning( $numbered_line . "\n" );
11497 0         0 $underline =~ s/\s+$//;
11498 0         0 $self->warning( $underline . "\n" );
11499 0         0 return;
11500             } ## end sub write_error_indicator_pair
11501              
11502             sub make_numbered_line {
11503              
11504 0     0 0 0 my ( $lineno, $str, $pos ) = @_;
11505              
11506             # Given:
11507             # $lineno=line number
11508             # $str = an input line
11509             # $pos = character position of interest
11510             # Create a string not longer than 80 characters of the form:
11511             # $lineno: sub_string
11512             # such that the sub_string of $str contains the position of interest
11513             #
11514             # Here is an example of what we want, in this case we add trailing
11515             # '...' because the line is long.
11516             #
11517             # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
11518             #
11519             # Here is another example, this time in which we used leading '...'
11520             # because of excessive length:
11521             #
11522             # 2: ... er of the World Wide Web Consortium's
11523             #
11524             # input parameters are:
11525             # $lineno = line number
11526             # $str = the text of the line
11527             # $pos = position of interest (the error) : 0 = first character
11528             #
11529             # We return :
11530             # - $offset = an offset which corrects the position in case we only
11531             # display part of a line, such that $pos-$offset is the effective
11532             # position from the start of the displayed line.
11533             # - $numbered_line = the numbered line as above,
11534             # - $underline = a blank 'underline' which is all spaces with the same
11535             # number of characters as the numbered line.
11536              
11537 0 0       0 my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
11538 0         0 my $excess = length($str) - $offset - 68;
11539 0 0       0 my $numc = ( $excess > 0 ) ? 68 : undef;
11540              
11541 0 0       0 if ( defined($numc) ) {
11542 0 0       0 if ( $offset == 0 ) {
11543 0         0 $str = substr( $str, $offset, $numc - 4 ) . " ...";
11544             }
11545             else {
11546 0         0 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
11547             }
11548             }
11549             else {
11550              
11551 0 0       0 if ( $offset == 0 ) {
11552             }
11553             else {
11554 0         0 $str = "... " . substr( $str, $offset + 4 );
11555             }
11556             }
11557              
11558 0         0 my $numbered_line = sprintf( "%d: ", $lineno );
11559 0         0 $offset -= length($numbered_line);
11560 0         0 $numbered_line .= $str;
11561 0         0 my $underline = SPACE x length($numbered_line);
11562 0         0 return ( $offset, $numbered_line, $underline );
11563             } ## end sub make_numbered_line
11564              
11565             sub write_on_underline {
11566              
11567 0     0 0 0 my ( $underline, $pos, $pos_chr ) = @_;
11568              
11569             # The "underline" is a string that shows where an error is; it starts
11570             # out as a string of blanks with the same length as the numbered line of
11571             # code above it, and we have to add marking to show where an error is.
11572             # In the example below, we want to write the string '--^' just below
11573             # the line of bad code:
11574             #
11575             # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
11576             # ---^
11577             # We are given the current underline string, plus a position and a
11578             # string to write on it.
11579             #
11580             # In the above example, there will be 2 calls to do this:
11581             # First call: $pos=19, pos_chr=^
11582             # Second call: $pos=16, pos_chr=---
11583             #
11584             # This is a trivial thing to do with substr, but there is some
11585             # checking to do.
11586              
11587             # check for error..shouldn't happen
11588 0 0 0     0 if ( $pos < 0 || $pos > length($underline) ) {
11589 0         0 return $underline;
11590             }
11591 0         0 my $excess = length($pos_chr) + $pos - length($underline);
11592 0 0       0 if ( $excess > 0 ) {
11593 0         0 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
11594             }
11595 0         0 substr( $underline, $pos, length($pos_chr), $pos_chr );
11596 0         0 return ($underline);
11597             } ## end sub write_on_underline
11598              
11599             sub pre_tokenize {
11600              
11601 7175     7175 0 12383 my ( $str, ($max_tokens_wanted) ) = @_;
11602              
11603             # Input parameters:
11604             # $str = string to be parsed
11605             # $max_tokens_wanted > 0 to stop on reaching this many tokens.
11606             # = undef or 0 means get all tokens
11607              
11608             # Break a string, $str, into a sequence of preliminary tokens (pre-tokens).
11609             # We look for these types of tokens:
11610             # words (type='w'), example: 'max_tokens_wanted'
11611             # digits (type = 'd'), example: '0755'
11612             # whitespace (type = 'b'), example: ' '
11613             # single character punct (type = char) example: '='
11614              
11615             # Later operations will combine one or more of these pre-tokens into final
11616             # tokens. We cannot do better than this yet because we might be in a
11617             # quoted string or pattern.
11618              
11619             # An advantage of doing this pre-tokenization step is that it keeps almost
11620             # all of the regex parsing very simple and localized right here. A
11621             # disadvantage is that in some extremely rare instances we will have to go
11622             # back and split a pre-token.
11623              
11624             # Return parameters:
11625 7175         10432 my @tokens = (); # array of the tokens themselves
11626 7175         12657 my @token_map = (0); # string position of start of each token
11627 7175         9194 my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
11628              
11629 7175 100       11981 if ( !$max_tokens_wanted ) { $max_tokens_wanted = -1 }
  6862         8409  
11630              
11631 7175         12727 while ( $max_tokens_wanted-- ) {
11632              
11633 96069 100       200751 if (
11634             $str =~ m{
11635             \G(
11636             (\s+) # type 'b' = whitespace - this must come before \W
11637             | (\W) # or type=char = single-character, non-whitespace punct
11638             | (\d+) # or type 'd' = sequence of digits - must come before \w
11639             | (\w+) # or type 'w' = words not starting with a digit
11640             )
11641             }gcx
11642             )
11643             {
11644 89052         136248 push @tokens, $1;
11645 89052 100       177240 push @type,
    100          
    100          
11646             defined($2) ? 'b' : defined($3) ? $1 : defined($4) ? 'd' : 'w';
11647 89052         137013 push @token_map, pos($str);
11648             }
11649              
11650             # that's all..
11651             else {
11652 7017         38716 return ( \@tokens, \@token_map, \@type );
11653             }
11654             } ## end while ( $max_tokens_wanted...)
11655              
11656 158         617 return ( \@tokens, \@token_map, \@type );
11657             } ## end sub pre_tokenize
11658              
11659             sub show_tokens {
11660              
11661             # This is an uncalled debug routine, saved for reference
11662 0     0 0   my ( $rtokens, $rtoken_map ) = @_;
11663 0           my $num = scalar( @{$rtokens} );
  0            
11664              
11665 0           foreach my $i ( 0 .. $num - 1 ) {
11666 0           my $len = length( $rtokens->[$i] );
11667 0           print {*STDOUT} "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n";
  0            
11668             }
11669 0           return;
11670             } ## end sub show_tokens
11671              
11672             sub dump_token_types {
11673 0     0 0   my ( $class, $fh ) = @_;
11674              
11675             # This should be the latest list of token types in use
11676             # adding NEW_TOKENS: add a comment here
11677 0           $fh->print(<<'END_OF_LIST');
11678              
11679             Here is a list of the token types currently used for lines of type 'CODE'.
11680             For the following tokens, the "type" of a token is just the token itself.
11681              
11682             .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
11683             ( ) <= >= == =~ !~ != ++ -- /= x=
11684             ... **= <<= >>= &&= ||= //= <=>
11685             , + - / * | % ! x ~ = \ ? : . < > ^ & ^^
11686              
11687             The following additional token types are defined:
11688              
11689             type meaning
11690             b blank (white space)
11691             { indent: opening structural curly brace or square bracket or paren
11692             (code block, anonymous hash reference, or anonymous array reference)
11693             } outdent: right structural curly brace or square bracket or paren
11694             [ left non-structural square bracket (enclosing an array index)
11695             ] right non-structural square bracket
11696             ( left non-structural paren (all but a list right of an =)
11697             ) right non-structural paren
11698             L left non-structural curly brace (enclosing a key)
11699             R right non-structural curly brace
11700             ; terminal semicolon
11701             f indicates a semicolon in a "for" statement
11702             h here_doc operator <<
11703             # a comment
11704             Q indicates a quote or pattern
11705             q indicates a qw quote block
11706             k a perl keyword
11707             C user-defined constant or constant function (with void prototype = ())
11708             U user-defined function taking parameters
11709             G user-defined function taking block parameter (like grep/map/eval)
11710             S sub definition (reported as type 'i' in older versions)
11711             P package definition (reported as type 'i' in older versions)
11712             t type indicator such as %,$,@,*,&,sub
11713             w bare word (perhaps a subroutine call)
11714             i identifier of some type (with leading %, $, @, *, &, sub, -> )
11715             n a number
11716             v a v-string
11717             F a file test operator (like -e)
11718             Y File handle
11719             Z identifier in indirect object slot: may be file handle, object
11720             J LABEL: code block label
11721             j LABEL after next, last, redo, goto
11722             p unary +
11723             m unary -
11724             pp pre-increment operator ++
11725             mm pre-decrement operator --
11726             A : used as attribute separator
11727              
11728             Here are the '_line_type' codes used internally:
11729             SYSTEM - system-specific code before hash-bang line
11730             CODE - line of perl code (including comments)
11731             POD_START - line starting pod, such as '=head'
11732             POD - pod documentation text
11733             POD_END - last line of pod section, '=cut'
11734             HERE - text of here-document
11735             HERE_END - last line of here-doc (target word)
11736             FORMAT - format section
11737             FORMAT_END - last line of format section, '.'
11738             SKIP - code skipping section
11739             SKIP_END - last line of code skipping section, '#>>V'
11740             DATA_START - __DATA__ line
11741             DATA - unidentified text following __DATA__
11742             END_START - __END__ line
11743             END - unidentified text following __END__
11744             ERROR - we are in big trouble, probably not a perl script
11745             END_OF_LIST
11746              
11747 0           return;
11748             } ## end sub dump_token_types
11749              
11750             #------------------
11751             # About Token Types
11752             #------------------
11753              
11754             # The array "valid_token_types" in the BEGIN section has an up-to-date list
11755             # of token types. Sub 'dump_token_types' should be kept up to date with
11756             # token types.
11757              
11758             # Ideally, tokens are the smallest pieces of text
11759             # such that a newline may be inserted between any pair of tokens without
11760             # changing or invalidating the program. This version comes close to this,
11761             # although there are necessarily a few exceptions which must be caught by
11762             # the formatter. Many of these involve the treatment of bare words.
11763             #
11764             # To simplify things, token types are either a single character, or they
11765             # are identical to the tokens themselves.
11766             #
11767             # As a debugging aid, the -D flag creates a file containing a side-by-side
11768             # comparison of the input string and its tokenization for each line of a file.
11769             # This is an invaluable debugging aid.
11770             #
11771             # In addition to tokens, and some associated quantities, the tokenizer
11772             # also returns flags indication any special line types. These include
11773             # quotes, here_docs, formats.
11774             #
11775             #------------------
11776             # Adding NEW_TOKENS
11777             #------------------
11778             #
11779             # Here are some notes on the minimal steps. I wrote these notes while
11780             # adding the 'v' token type for v-strings, which are things like version
11781             # numbers 5.6.0, and ip addresses, and will use that as an example. ( You
11782             # can use your editor to search for the string "NEW_TOKENS" to find the
11783             # appropriate sections to change):
11784              
11785             # *. For another example, search for the smartmatch operator '~~'
11786             # with your editor to see where updates were made for it.
11787              
11788             # *. For another example, search for the string 'c250', which shows
11789             # locations where changes for new types 'P' and 'S' were made.
11790              
11791             # *. Think of a new, unused character for the token type, and add to
11792             # the array @valid_token_types in the BEGIN section of this package.
11793             # For example, I used 'v' for v-strings.
11794             #
11795             # *. Implement coding to recognize the $type of the token in this routine.
11796             # This is the hardest part, and is best done by imitating or modifying
11797             # some of the existing coding. For example, to recognize v-strings, I
11798             # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
11799             # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
11800             #
11801             # *. Update sub operator_expected. This update is critically important but
11802             # the coding is trivial. Look at the comments in that routine for help.
11803             # For v-strings, which should behave like numbers, I just added 'v' to the
11804             # regex used to handle numbers and strings (types 'n' and 'Q').
11805             #
11806             # *. Implement a 'bond strength' rule in sub set_bond_strengths in
11807             # Perl::Tidy::Formatter for breaking lines around this token type. You can
11808             # skip this step and take the default at first, then adjust later to get
11809             # desired results. For adding type 'v', I looked at sub bond_strength and
11810             # saw that number type 'n' was using default strengths, so I didn't do
11811             # anything. I may tune it up someday if I don't like the way line
11812             # breaks with v-strings look.
11813             #
11814             # *. Implement a 'whitespace' rule in sub set_whitespace_flags in
11815             # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
11816             # and saw that type 'n' used spaces on both sides, so I just added 'v'
11817             # to the array @spaces_both_sides.
11818             #
11819             # *. Update HtmlWriter package so that users can colorize the token as
11820             # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
11821             # that package. For v-strings, I initially chose to use a default color
11822             # equal to the default for numbers, but it might be nice to change that
11823             # eventually.
11824             #
11825             # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
11826             #
11827             # *. Run lots and lots of debug tests. Start with special files designed
11828             # to test the new token type. Run with the -D flag to create a .DEBUG
11829             # file which shows the tokenization. When these work ok, test as many old
11830             # scripts as possible. Start with all of the '.t' files in the 'test'
11831             # directory of the distribution file. Compare .tdy output with previous
11832             # version and updated version to see the differences. Then include as
11833             # many more files as possible. My own technique has been to collect a huge
11834             # number of perl scripts (thousands!) into one directory and run perltidy
11835             # *, then run diff between the output of the previous version and the
11836             # current version.
11837              
11838             BEGIN {
11839              
11840             # These names are used in error messages
11841 44     44   358 @opening_brace_names = qw# '{' '[' '(' '?' #;
11842 44         159 @closing_brace_names = qw# '}' ']' ')' ':' #;
11843              
11844 44         114 my @q;
11845              
11846 44         278 my @digraphs = qw#
11847             .. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <>
11848             <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^. ^^
11849             #;
11850 44         1201 $is_digraph{$_} = 1 for @digraphs;
11851              
11852 44         179 @q = qw(
11853             . : < > * & | / - = + - % ^ ! x ~
11854             );
11855 44         409 $can_start_digraph{$_} = 1 for @q;
11856              
11857 44         200 my @trigraphs =
11858             qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~ ^^= );
11859 44         532 $is_trigraph{$_} = 1 for @trigraphs;
11860              
11861 44         192 my @tetragraphs = qw( <<>> );
11862 44         161 $is_tetragraph{$_} = 1 for @tetragraphs;
11863              
11864             # make a hash of all valid token types for self-checking the tokenizer
11865             # (adding NEW_TOKENS : select a new character and add to this list)
11866             # fix for c250: added new token type 'P' and 'S'
11867 44         424 my @valid_token_types = qw#
11868             A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P S
11869             { } ( ) [ ] ; + - / * | % ! x ~ = ? : . < > ^ &
11870             #;
11871 44         164 push @valid_token_types, BACKSLASH;
11872 44         331 push( @valid_token_types, @digraphs );
11873 44         180 push( @valid_token_types, @trigraphs );
11874 44         95 push( @valid_token_types, @tetragraphs );
11875 44         96 push( @valid_token_types, ( '#', COMMA, 'CORE::' ) );
11876 44         2363 $is_valid_token_type{$_} = 1 for @valid_token_types;
11877              
11878             # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
11879 44         323 my @file_test_operators =
11880             qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z );
11881 44         631 $is_file_test_operator{$_} = 1 for @file_test_operators;
11882              
11883             # these functions have prototypes of the form (&), so when they are
11884             # followed by a block, that block MAY BE followed by an operator.
11885             # Smartmatch operator ~~ may be followed by anonymous hash or array ref
11886 44         149 @q = qw( do eval );
11887 44         158 $is_block_operator{$_} = 1 for @q;
11888              
11889             # these functions allow an identifier in the indirect object slot
11890 44         135 @q = qw( print printf sort exec system say );
11891 44         281 $is_indirect_object_taker{$_} = 1 for @q;
11892              
11893             # Keywords which definitely produce error if an OPERATOR is expected
11894 44         113 @q = qw( my our state local use require );
11895 44         282 $is_TERM_keyword{$_} = 1 for @q;
11896              
11897             # Note: 'field' will be added by sub check_options if --use-feature=class
11898 44         100 @q = qw( my our state );
11899 44         140 $is_my_our_state{$_} = 1 for @q;
11900              
11901             # These tokens may precede a code block
11902             # patched for SWITCH/CASE/CATCH. Actually these could be removed
11903             # now and we could let the extended-syntax coding handle them.
11904             # Added 'default' for Switch::Plain.
11905             # Note: 'ADJUST' will be added by sub check_options if --use-feature=class
11906 44         271 @q = qw(
11907             BEGIN END CHECK INIT AUTOLOAD DESTROY
11908             UNITCHECK continue if elsif else unless
11909             do while until eval for foreach
11910             map grep sort switch case given
11911             when default catch try finally
11912             );
11913 44         1050 $is_code_block_token{$_} = 1 for @q;
11914              
11915             # These block types terminate statements and do not need a trailing
11916             # semicolon; patched for SWITCH/CASE/; This may be updated in sub
11917             # check_options.
11918 44         266 @q = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
11919             if elsif else unless while until for foreach switch case given when );
11920 44         556 $is_zero_continuation_block_type{$_} = 1 for @q;
11921              
11922             # Note: this hash was formerly named '%is_not_zero_continuation_block_type'
11923             # to contrast it with the block types in '%is_zero_continuation_block_type'
11924             # Note: added 'sub' for anonymous sub blocks (c443)
11925 44         144 @q = qw( sort map grep eval do sub );
11926 44         178 $is_sort_map_grep_eval_do_sub{$_} = 1 for @q;
11927              
11928 44         140 @q = qw( sort map grep );
11929 44         112 $is_sort_map_grep{$_} = 1 for @q;
11930              
11931 44         101 %is_grep_alias = ();
11932              
11933             # I'll build the list of keywords incrementally
11934 44         67 my @Keywords = ();
11935              
11936             # keywords and tokens after which a value or pattern is expected,
11937             # but not an operator. In other words, these should consume terms
11938             # to their right, or at least they are not expected to be followed
11939             # immediately by operators.
11940 44         1228 my @value_requestor = qw(
11941             AUTOLOAD BEGIN CHECK DESTROY
11942             END EQ GE GT
11943             INIT LE LT NE
11944             UNITCHECK abs accept alarm
11945             and atan2 bind binmode
11946             bless break caller chdir
11947             chmod chomp chop chown
11948             chr chroot close closedir
11949             cmp connect continue cos
11950             crypt dbmclose dbmopen defined
11951             delete die dump each
11952             else elsif eof eq
11953             evalbytes exec exists exit
11954             exp fc fcntl fileno
11955             flock for foreach formline
11956             ge getc getgrgid getgrnam
11957             gethostbyaddr gethostbyname getnetbyaddr getnetbyname
11958             getpeername getpgrp getpriority getprotobyname
11959             getprotobynumber getpwnam getpwuid getservbyname
11960             getservbyport getsockname getsockopt glob
11961             gmtime goto grep gt
11962             hex if index int
11963             ioctl join keys kill
11964             last lc lcfirst le
11965             length link listen local
11966             localtime lock log lstat
11967             lt map mkdir msgctl
11968             msgget msgrcv msgsnd my
11969             ne next no not
11970             oct open opendir or
11971             ord our pack pipe
11972             pop pos print printf
11973             prototype push quotemeta rand
11974             read readdir readlink readline
11975             readpipe recv redo ref
11976             rename require reset return
11977             reverse rewinddir rindex rmdir
11978             scalar seek seekdir select
11979             semctl semget semop send
11980             sethostent setnetent setpgrp setpriority
11981             setprotoent setservent setsockopt shift
11982             shmctl shmget shmread shmwrite
11983             shutdown sin sleep socket
11984             socketpair sort splice split
11985             sprintf sqrt srand stat
11986             state study substr symlink
11987             syscall sysopen sysread sysseek
11988             system syswrite tell telldir
11989             tie tied truncate uc
11990             ucfirst umask undef unless
11991             unlink unpack unshift untie
11992             until use utime values
11993             vec waitpid warn while
11994             write xor case catch
11995             default err given isa
11996             say switch when
11997             );
11998              
11999             # Note: 'ADJUST', 'field' are added by sub check_options
12000             # if --use-feature=class
12001              
12002             # patched above for SWITCH/CASE given/when err say
12003             # 'err' is a fairly safe addition.
12004             # Added 'default' for Switch::Plain. Note that we could also have
12005             # a separate set of keywords to include if we see 'use Switch::Plain'
12006 44         1429 push( @Keywords, @value_requestor );
12007              
12008             # These are treated the same but are not keywords:
12009 44         128 my @extra_vr = qw( constant vars );
12010 44         215 push( @value_requestor, @extra_vr );
12011              
12012 44         6594 $expecting_term_token{$_} = 1 for @value_requestor;
12013              
12014             # this list contains keywords which do not look for arguments,
12015             # so that they might be followed by an operator, or at least
12016             # not a term.
12017 44         250 my @operator_requestor = qw(
12018             endgrent endhostent endnetent endprotoent
12019             endpwent endservent fork getgrent
12020             gethostent getlogin getnetent getppid
12021             getprotoent getpwent getservent setgrent
12022             setpwent time times wait
12023             wantarray
12024             );
12025              
12026 44         135 push( @Keywords, @operator_requestor );
12027              
12028             # These are treated the same but are not considered keywords:
12029 44         91 my @extra_or = qw( STDERR STDIN STDOUT );
12030              
12031 44         92 push( @operator_requestor, @extra_or );
12032              
12033 44         857 $expecting_operator_token{$_} = 1 for @operator_requestor;
12034              
12035             # these token TYPES expect trailing operator but not a term
12036             # note: ++ and -- are post-increment and decrement, 'C' = constant
12037 44         105 my @operator_requestor_types = qw( ++ -- C <> q );
12038              
12039             # NOTE: This hash is available but not currently used
12040 44         195 $expecting_operator_types{$_} = 1 for @operator_requestor_types;
12041              
12042             # these token TYPES consume values (terms)
12043             # note: pp and mm are pre-increment and decrement
12044             # f=semicolon in for, F=file test operator
12045 44         904 my @value_requestor_type = qw#
12046             L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
12047             **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
12048             <= >= == != => > < % * / ? & | ** <=> ~~ !~~ <<~
12049             f F pp mm Y p m U J G j >> << ^ t
12050             ~. ^. |. &. ^.= |.= &.= ^^
12051             #;
12052 44         163 push @value_requestor_type, BACKSLASH;
12053 44         83 push @value_requestor_type, COMMA;
12054              
12055             # NOTE: This hash is available but not currently used
12056 44         1410 $expecting_term_types{$_} = 1 for @value_requestor_type;
12057              
12058             # Note: the following valid token types are not assigned here to
12059             # hashes requesting to be followed by values or terms, but are
12060             # instead currently hard-coded into sub operator_expected:
12061             # ) -> :: Q R Z ] b h i k n v w } #
12062              
12063             # A syntax error will occur if following operators are not followed by a
12064             # TERM (with an exception made for tokens in sub signatures).
12065             # NOTE: this list does not include unary operator '!'
12066              
12067             # Note the following omissions from the syntax checking operators below
12068             # 'U' = user sub, depends on prototype
12069             # 'F' = file test works on $_ if no following term
12070             # 'Y' = indirect object, too risky to check syntax
12071              
12072 44         443 my @binary_ops = qw#
12073             !~ =~ . .. : && || // = + - x
12074             **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
12075             <= >= == != > < % * / ? & | ** <=> ~~ !~~ <<~
12076             >> << ^
12077             ^. |. &. ^.= |.= &.= ^^
12078             #;
12079 44         772 $is_binary_operator_type{$_} = 1 for @binary_ops;
12080              
12081             # Note: omitting unary file test type 'F' here because it assumes $_
12082 44         129 my @unary_ops = qw# ! ~ ~. m p mm pp #;
12083 44         477 $is_binary_operator_type{$_} = 1 for @binary_ops;
12084 44         89 push @unary_ops, BACKSLASH;
12085 44         1165 $is_binary_or_unary_operator_type{$_} = 1 for ( @binary_ops, @unary_ops );
12086              
12087 44         207 my @binary_keywords = qw( and or err eq ne cmp );
12088 44         192 $is_binary_keyword{$_} = 1 for @binary_keywords;
12089              
12090 44         189 $is_binary_or_unary_keyword{$_} = 1 for ( @binary_keywords, 'not' );
12091              
12092             # A syntax error occurs if a binary operator follows any of these types:
12093             # NOTE: a ',' cannot be included because of parenless calls (c015).
12094             # For example this is valid: print "hello\n", && print "goodbye\n";
12095             # NOTE: the 'not' keyword could be added to a corresponding _keyword list
12096             # NOTE: label 'j' omitted, for example: -f $file ? redo BLOCK : last BLOCK;
12097             # NOTE: Removed 'A': fixes git162.t
12098 44         185 @q = qw< L { [ ( ; f J t >;
12099 44         348 $is_not_a_TERM_producer_type{$_} = 1 for ( @q, @unary_ops );
12100              
12101 44         182 @q = qw( q qq qx qr s y tr m );
12102 44         268 $is_q_qq_qx_qr_s_y_tr_m{$_} = 1 for @q;
12103              
12104             # Note added 'qw' here
12105 44         144 @q = qw( q qq qw qx qr s y tr m );
12106 44         233 $is_q_qq_qw_qx_qr_s_y_tr_m{$_} = 1 for @q;
12107              
12108             # Quote modifiers:
12109             # original ref: camel 3 p 147,
12110             # but perl may accept undocumented flags
12111             # perl 5.10 adds 'p' (preserve)
12112             # Perl version 5.22 added 'n'
12113             # From http://perldoc.perl.org/perlop.html we have
12114             # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
12115             # s/PATTERN/REPLACEMENT/msixpodualngcer
12116             # y/SEARCHLIST/REPLACEMENTLIST/cdsr
12117             # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
12118             # qr/STRING/msixpodualn
12119 44         242 %quote_modifiers = (
12120             's' => '[msixpodualngcer]',
12121             'y' => '[cdsr]',
12122             'tr' => '[cdsr]',
12123             'm' => '[msixpodualngc]',
12124             'qr' => '[msixpodualn]',
12125             'q' => EMPTY_STRING,
12126             'qq' => EMPTY_STRING,
12127             'qw' => EMPTY_STRING,
12128             'qx' => EMPTY_STRING,
12129             );
12130              
12131             # Note: 'class' will be added by sub check_options if -use-feature=class
12132 44         133 @q = qw( package );
12133 44         157 $is_package{$_} = 1 for @q;
12134              
12135 44         100 @q = qw( if elsif unless );
12136 44         118 $is_if_elsif_unless{$_} = 1 for @q;
12137              
12138 44         107 @q = qw( ; t );
12139 44         122 $is_semicolon_or_t{$_} = 1 for @q;
12140              
12141 44         116 @q = qw( if elsif unless case when );
12142 44         147 $is_if_elsif_unless_case_when{$_} = 1 for @q;
12143              
12144             # Hash of other possible line endings which may occur.
12145             # Keep these coordinated with the regex where this is used.
12146             # Note: chr(13) = chr(015)="\r".
12147 44         90 @q = ( chr(13), chr(29), chr(26) );
12148 44         167 $other_line_endings{$_} = 1 for @q;
12149              
12150             # These keywords are handled specially in the tokenizer code:
12151 44         176 my @special_keywords =
12152             qw( do eval format m package q qq qr qw qx s sub tr y );
12153 44         365 push( @Keywords, @special_keywords );
12154              
12155             # Keywords after which list formatting may be used
12156             # WARNING: do not include |map|grep|eval or perl may die on
12157             # syntax errors (map1.t).
12158 44         527 my @keyword_taking_list = qw(
12159             and chmod chomp chop
12160             chown dbmopen die elsif
12161             exec fcntl for foreach
12162             formline getsockopt given if
12163             index ioctl join kill
12164             local msgctl msgrcv msgsnd
12165             my open or our
12166             pack print printf push
12167             read readpipe recv return
12168             reverse rindex seek select
12169             semctl semget send setpriority
12170             setsockopt shmctl shmget shmread
12171             shmwrite socket socketpair sort
12172             splice split sprintf state
12173             substr syscall sysopen sysread
12174             sysseek system syswrite tie
12175             unless unlink unpack unshift
12176             until vec warn when
12177             while
12178             );
12179              
12180             # NOTE: This hash is available but not currently used
12181 44         1306 $is_keyword_taking_list{$_} = 1 for @keyword_taking_list;
12182              
12183             # perl functions which may be unary operators.
12184              
12185             # This list is used to decide if a pattern delimited by slashes, /pattern/,
12186             # can follow one of these keywords.
12187 44         140 @q = qw( chomp eof eval fc lc pop shift uc undef );
12188              
12189 44         276 $is_keyword_rejecting_slash_as_pattern_delimiter{$_} = 1 for @q;
12190              
12191             # These are keywords for which an arg may optionally be omitted. They are
12192             # currently only used to disambiguate a ? used as a ternary from one used
12193             # as a (deprecated) pattern delimiter. In the future, they might be used
12194             # to give a warning about ambiguous syntax before a /.
12195             # Note: split has been omitted (see note below).
12196 44         487 my @keywords_taking_optional_arg = qw(
12197             abs alarm caller chdir chomp chop
12198             chr chroot close cos defined die
12199             eof eval evalbytes exit exp fc
12200             getc glob gmtime hex int last
12201             lc lcfirst length localtime log lstat
12202             mkdir next oct ord pop pos
12203             print printf prototype quotemeta rand readline
12204             readlink readpipe redo ref require reset
12205             reverse rmdir say select shift sin
12206             sleep sqrt srand stat study tell
12207             uc ucfirst umask undef unlink warn
12208             write
12209             );
12210 44         1055 $is_keyword_taking_optional_arg{$_} = 1 for @keywords_taking_optional_arg;
12211              
12212             # This list is used to decide if a pattern delimited by question marks,
12213             # ?pattern?, can follow one of these keywords. Note that from perl 5.22
12214             # on, a ?pattern? is not recognized, so we can be much more strict than
12215             # with a /pattern/. Note that 'split' is not in this list. In current
12216             # versions of perl a question following split must be a ternary, but
12217             # in older versions it could be a pattern. The guessing algorithm will
12218             # decide. We are combining two lists here to simplify the test.
12219 44         810 @q = ( @keywords_taking_optional_arg, @operator_requestor );
12220 44         1695 $is_keyword_rejecting_question_as_pattern_delimiter{$_} = 1 for @q;
12221              
12222             # These are not used in any way yet
12223             # my @unused_keywords = qw(
12224             # __FILE__
12225             # __LINE__
12226             # __PACKAGE__
12227             # );
12228              
12229             # The list of keywords was originally extracted from function 'keyword' in
12230             # perl file toke.c version 5.005.03, using this utility, plus a
12231             # little editing: (file getkwd.pl):
12232             # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
12233             # Add 'get' prefix where necessary, then split into the above lists.
12234             # This list should be updated as necessary.
12235             # The list should not contain these special variables:
12236             # ARGV DATA ENV SIG STDERR STDIN STDOUT
12237             # __DATA__ __END__
12238              
12239 44         4429 $is_keyword{$_} = 1 for @Keywords;
12240              
12241 44         8067 %matching_end_token = (
12242             '{' => '}',
12243             '(' => ')',
12244             '[' => ']',
12245             '<' => '>',
12246             );
12247             } ## end BEGIN
12248              
12249             } ## end package Perl::Tidy::Tokenizer
12250             1;