File Coverage

blib/lib/Perl/Tidy/Tokenizer.pm

Criterion	Covered	Total	%
statement	2672	3695	72.3
branch	1246	1914	65.1
condition	749	1193	62.7
subroutine	168	202	83.1
pod	0	147	0.0
total	4835	7151	67.6

line	stmt	bran	cond	sub	pod	time	code
1							#####################################################################
2							#
3							# Perl::Tidy::Tokenizer reads a source and breaks it into a stream of tokens
4							#
5							# Usage Outline:
6							#
7							# STEP 1: initialize or re-initialize Tokenizer with user options
8							# Perl::Tidy::Tokenizer::check_options($rOpts);
9							#
10							# STEP 2: create a tokenizer for a specific input source object
11							# my $tokenizer = Perl::Tidy::Tokenizer->new(
12							# source_object => $source,
13							# ...
14							# );
15							#
16							# STEP 3: get and process each tokenized 'line' (a hash ref of token info)
17							# while ( my $line = $tokenizer->get_line() ) {
18							# $formatter->write_line($line);
19							# }
20							#
21							# STEP 4: report errors
22							# my $severe_error = $tokenizer->report_tokenization_errors();
23							#
24							# The source object can be a STRING ref, an ARRAY ref, or an object with a
25							# get_line() method which supplies one line (a character string) perl call.
26							#
27							# NOTE: This is not a real class. Only one tokenizer my be used.
28							#
29							########################################################################
30
31							package Perl::Tidy::Tokenizer;
32	44			44		262	use strict;
	44					90
	44					1514
33	44			44		177	use warnings;
	44					72
	44					1886
34	44			44		166	use English qw( -no_match_vars );
	44					69
	44					233
35
36							our $VERSION = '20260204';
37
38	44			44		13408	use Carp;
	44					83
	44					2210
39
40	44			44		183	use constant DEVEL_MODE => 0;
	44					73
	44					2208
41	44			44		171	use constant DEBUG_GUESS_MODE => 0;
	44					72
	44					1818
42	44			44		218	use constant EMPTY_STRING => q{};
	44					74
	44					1600
43	44			44		172	use constant SPACE => q{ };
	44					113
	44					1662
44	44			44		168	use constant COMMA => q{,};
	44					63
	44					1662
45	44			44		177	use constant BACKSLASH => q{\\};
	44					82
	44					2557
46
47							{ #<<< A non-indenting brace to contain all lexical variables
48
49							# List of hash keys to prevent -duk from listing them.
50							# (note the backtick in this list)
51							my @unique_hash_keys_uu = qw( ` RPerl _rtype_sequence _ending_in_quote );
52
53							# Parent sequence number of tree of containers; must be 1
54	44			44		203	use constant SEQ_ROOT => 1;
	44					94
	44					1667
55
56							# Defaults for guessing old indentation
57	44			44		197	use constant INDENT_COLUMNS_DEFAULT => 4;
	44					86
	44					1443
58	44			44		147	use constant TAB_SIZE_DEFAULT => 8;
	44					69
	44					1409
59
60							# Decimal values of some ascii characters for quick checks
61	44			44		147	use constant ORD_TAB => 9;
	44					81
	44					1304
62	44			44		171	use constant ORD_SPACE => 32;
	44					63
	44					1345
63	44			44		190	use constant ORD_PRINTABLE_MIN => 33;
	44					63
	44					1830
64	44			44		198	use constant ORD_PRINTABLE_MAX => 126;
	44					64
	44					7960
65
66							# GLOBAL VARIABLES which change during tokenization:
67							# These could also be stored in $self but it is more convenient and
68							# efficient to make them global lexical variables.
69							# INITIALIZER: sub prepare_for_a_new_file
70							my (
71
72							$brace_depth,
73							$context,
74							$current_package,
75							$last_nonblank_block_type,
76							$last_nonblank_token,
77							$last_nonblank_type,
78							$next_sequence_number,
79							$paren_depth,
80							$rbrace_context,
81							$rbrace_package,
82							$rbrace_structural_type,
83							$rbrace_type,
84							$rcurrent_depth,
85							$rcurrent_sequence_number,
86							$ris_lexical_sub,
87							$rdepth_array,
88							$ris_block_function,
89							$ris_block_list_function,
90							$ris_constant,
91							$ris_user_function,
92							$rnested_statement_type,
93							$rnested_ternary_flag,
94							$rparen_semicolon_count,
95							$rparen_vars,
96							$rparen_type,
97							$rsaw_function_definition,
98							$rsaw_use_module,
99							$rsquare_bracket_structural_type,
100							$rsquare_bracket_type,
101							$rstarting_line_of_current_depth,
102							$rtotal_depth,
103							$ruser_function_prototype,
104							$square_bracket_depth,
105							$statement_type,
106							$total_depth,
107							);
108
109							my (
110
111							# GLOBAL CONSTANTS for routines in this package,
112							# INITIALIZER: BEGIN block.
113							%can_start_digraph,
114							%expecting_operator_token,
115							%expecting_operator_types,
116							%expecting_term_token,
117							%expecting_term_types,
118							%is_block_operator,
119							%is_digraph,
120							%is_file_test_operator,
121							%is_if_elsif_unless,
122							%is_if_elsif_unless_case_when,
123							%is_indirect_object_taker,
124							%is_keyword_rejecting_question_as_pattern_delimiter,
125							%is_keyword_rejecting_slash_as_pattern_delimiter,
126							%is_keyword_taking_list,
127							%is_keyword_taking_optional_arg,
128							%is_q_qq_qw_qx_qr_s_y_tr_m,
129							%is_q_qq_qx_qr_s_y_tr_m,
130							%quote_modifiers,
131							%is_semicolon_or_t,
132							%is_sort_map_grep,
133							%is_sort_map_grep_eval_do_sub,
134							%is_tetragraph,
135							%is_trigraph,
136							%is_valid_token_type,
137							%other_line_endings,
138							%is_binary_operator_type,
139							%is_binary_keyword,
140							%is_binary_or_unary_operator_type,
141							%is_binary_or_unary_keyword,
142							%is_not_a_TERM_producer_type,
143							@closing_brace_names,
144							@opening_brace_names,
145
146							# GLOBAL CONSTANT hash lookup table of operator expected values
147							# INITIALIZER: BEGIN block
148							%op_expected_table,
149
150							# GLOBAL VARIABLES which are constant after being configured.
151							# INITIALIZER: BEGIN block and modified by sub check_options
152							%is_code_block_token,
153							%is_zero_continuation_block_type,
154							%is_keyword,
155							%is_TERM_keyword,
156							%is_my_our_state,
157							%is_package,
158							%matching_end_token,
159
160							# INITIALIZER: sub check_options
161							$code_skipping_pattern_begin,
162							$code_skipping_pattern_end,
163							$format_skipping_pattern_begin,
164							$format_skipping_pattern_end,
165
166							$rOpts_code_skipping,
167							$rOpts_code_skipping_begin,
168							$rOpts_format_skipping,
169							$rOpts_format_skipping_begin,
170							$rOpts_format_skipping_end,
171							$rOpts_starting_indentation_level,
172							$rOpts_indent_columns,
173							$rOpts_look_for_hash_bang,
174							$rOpts_look_for_autoloader,
175							$rOpts_look_for_selfloader,
176							$rOpts_trim_qw,
177							$rOpts_extended_syntax,
178							$rOpts_continuation_indentation,
179							$rOpts_outdent_labels,
180							$rOpts_maximum_level_errors,
181							$rOpts_maximum_unexpected_errors,
182							$rOpts_indent_closing_brace,
183							$rOpts_non_indenting_braces,
184							$rOpts_non_indenting_brace_prefix,
185							$rOpts_whitespace_cycle,
186
187							$tabsize,
188							%is_END_DATA_format_sub,
189							%is_grep_alias,
190							%is_sub,
191							$guess_if_method,
192							);
193
194							# possible values of operator_expected()
195	44			44		183	use constant TERM => -1;
	44					84
	44					1656
196	44			44		155	use constant UNKNOWN => 0;
	44					93
	44					1378
197	44			44		190	use constant OPERATOR => 1;
	44					78
	44					1556
198
199							# possible values of context
200	44			44		165	use constant SCALAR_CONTEXT => -1;
	44					55
	44					1420
201	44			44		182	use constant UNKNOWN_CONTEXT => 0;
	44					72
	44					1420
202	44			44		169	use constant LIST_CONTEXT => 1;
	44					83
	44					1505
203
204							# Maximum number of little messages; probably need not be changed.
205	44			44		159	use constant MAX_NAG_MESSAGES => 6;
	44					63
	44					7876
206
207	0					0	BEGIN {
208
209							# Array index names for $self.
210							# Do not combine with other BEGIN blocks (c101).
211	44			44		299169	my $i = 0;
212							use constant {
213	44					13664	_rhere_target_list_ => $i++,
214							_in_here_doc_ => $i++,
215							_here_doc_target_ => $i++,
216							_here_quote_character_ => $i++,
217							_in_data_ => $i++,
218							_in_end_ => $i++,
219							_in_format_ => $i++,
220							_in_error_ => $i++,
221							_do_not_format_ => $i++,
222							_warning_count_ => $i++,
223							_html_tag_count_ => $i++,
224							_in_pod_ => $i++,
225							_in_code_skipping_ => $i++,
226							_in_format_skipping_ => $i++,
227							_rformat_skipping_list_ => $i++,
228							_in_attribute_list_ => $i++,
229							_in_quote_ => $i++,
230							_quote_target_ => $i++,
231							_line_start_quote_ => $i++,
232							_starting_level_ => $i++,
233							_know_starting_level_ => $i++,
234							_last_line_number_ => $i++,
235							_saw_perl_dash_P_ => $i++,
236							_saw_perl_dash_w_ => $i++,
237							_saw_use_strict_ => $i++,
238							_saw_brace_error_ => $i++,
239							_hit_bug_ => $i++,
240							_look_for_autoloader_ => $i++,
241							_look_for_selfloader_ => $i++,
242							_saw_autoloader_ => $i++,
243							_saw_selfloader_ => $i++,
244							_saw_hash_bang_ => $i++,
245							_saw_end_ => $i++,
246							_saw_data_ => $i++,
247							_saw_negative_indentation_ => $i++,
248							_started_tokenizing_ => $i++,
249							_debugger_object_ => $i++,
250							_diagnostics_object_ => $i++,
251							_logger_object_ => $i++,
252							_save_logfile_ => $i++,
253							_unexpected_error_count_ => $i++,
254							_started_looking_for_here_target_at_ => $i++,
255							_nearly_matched_here_target_at_ => $i++,
256							_line_of_text_ => $i++,
257							_rlower_case_labels_at_ => $i++,
258							_maximum_level_ => $i++,
259							_true_brace_error_count_ => $i++,
260							_rOpts_ => $i++,
261							_rinput_lines_ => $i++,
262							_input_line_index_next_ => $i++,
263							_rtrimmed_input_lines_ => $i++,
264							_rclosing_brace_indentation_hash_ => $i++,
265							_show_indentation_table_ => $i++,
266							_rnon_indenting_brace_stack_ => $i++,
267							_rbareword_info_ => $i++,
268	44			44		273	};
	44					99
269							} ## end BEGIN
270
271							{ ## closure for subs to count instances
272
273							# methods to count instances
274							my $_count = 0;
275	0			0	0	0	sub get_count { return $_count; }
276	649			649		1924	sub _increment_count { return ++$_count }
277	649			649		1059	sub _decrement_count { return --$_count }
278							}
279
280							sub DESTROY {
281	649			649		1250	my $self = shift;
282	649					1996	_decrement_count();
283	649					15288	return;
284							}
285
286							sub AUTOLOAD {
287
288							# Catch any undefined sub calls so that we are sure to get
289							# some diagnostic information. This sub should never be called
290							# except for a programming error.
291	0			0		0	our $AUTOLOAD;
292	0	0				0	return if ( $AUTOLOAD =~ /\bDESTROY$/ );
293	0					0	my ( $pkg, $fname, $lno ) = caller();
294	0					0	my $my_package = __PACKAGE__;
295	0					0	print {*STDERR} <<EOM;
	0					0
296							======================================================================
297							Error detected in package '$my_package', version $VERSION
298							Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
299							Called from package: '$pkg'
300							Called from File '$fname' at line '$lno'
301							This error is probably due to a recent programming change
302							======================================================================
303							EOM
304	0					0	exit 1;
305							} ## end sub AUTOLOAD
306
307							sub Die {
308	0			0	0	0	my ($msg) = @_;
309	0					0	Perl::Tidy::Die($msg);
310	0					0	croak "unexpected return from Perl::Tidy::Die";
311							}
312
313							sub Warn {
314	0			0	0	0	my ($msg) = @_;
315	0					0	Perl::Tidy::Warn($msg);
316	0					0	return;
317							}
318
319							sub Fault {
320	0			0	0	0	my ($msg) = @_;
321
322							# This routine is called for errors that really should not occur
323							# except if there has been a bug introduced by a recent program change.
324							# Please add comments at calls to Fault to explain why the call
325							# should not occur, and where to look to fix it.
326	0					0	my ( $package0_uu, $filename0_uu, $line0, $subroutine0_uu ) = caller(0);
327	0					0	my ( $package1_uu, $filename1, $line1, $subroutine1 ) = caller(1);
328	0					0	my ( $package2_uu, $filename2_uu, $line2_uu, $subroutine2 ) = caller(2);
329	0					0	my $pkg = __PACKAGE__;
330	0					0	my $input_stream_name = Perl::Tidy::get_input_stream_name();
331
332	0					0	Die(<<EOM);
333							==============================================================================
334							While operating on input stream with name: '$input_stream_name'
335							A fault was detected at line $line0 of sub '$subroutine1'
336							in file '$filename1'
337							which was called from line $line1 of sub '$subroutine2'
338							Message: '$msg'
339							This is probably an error introduced by a recent programming change.
340							$pkg reports VERSION='$VERSION'.
341							==============================================================================
342							EOM
343
344	0					0	croak "unexpected return from sub Die";
345							} ## end sub Fault
346
347							sub bad_pattern {
348	2588			2588	0	3758	my ($pattern) = @_;
349
350							# Return true if a regex pattern has an error
351							# Note: Formatter.pm also has a copy of this
352	2588					3217	my $regex_uu = eval { qr/$pattern/ };
	2588					85460
353	2588					8153	return $EVAL_ERROR;
354							} ## end sub bad_pattern
355
356							sub make_skipping_pattern {
357	2588			2588	0	4501	my ( $rOpts, $opt_name, $default ) = @_;
358
359							# Make regex patterns for the format-skipping and code-skipping options
360	2588					3970	my $param = $rOpts->{$opt_name};
361	2588	100				4217	if ( !$param ) { $param = $default }
	2584					3375
362	2588					6117	$param =~ s/^\s+//;
363	2588	50				5913	if ( $param !~ /^#/ ) {
364	0					0	Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
365							}
366
367							# Note that the ending \s will match a newline
368	2588					3926	my $pattern = '^\s*' . $param . '\s';
369	2588	50				4586	if ( bad_pattern($pattern) ) {
370	0					0	Die(
371							"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
372							);
373							}
374	2588					4718	return $pattern;
375							} ## end sub make_skipping_pattern
376
377							sub check_options {
378
379							# Check and pre-process tokenizer parameters
380	647			647	0	1332	my $rOpts = shift;
381
382	647					1768	%is_sub = ();
383	647					1647	$is_sub{'sub'} = 1;
384
385	647					3816	%is_END_DATA_format_sub = (
386							'__END__' => 1,
387							'__DATA__' => 1,
388							'format' => 1,
389							'sub' => 1,
390							);
391
392							# Install any aliases to 'sub'
393	647	100				1928	if ( $rOpts->{'sub-alias-list'} ) {
394
395							# Note that any 'sub-alias-list' has been preprocessed to
396							# be a trimmed, space-separated list which includes 'sub'
397							# for example, it might be 'sub method fun'
398	3					17	my @sub_alias_list = split /\s+/, $rOpts->{'sub-alias-list'};
399	3					8	foreach my $word (@sub_alias_list) {
400	11					20	$is_sub{$word} = 1;
401	11					23	$is_END_DATA_format_sub{$word} = 1;
402							}
403							}
404
405							# Set global flag to say if we have to guess if bareword 'method' is
406							# a sub when 'method' is in %is_sub. This will be true unless:
407							# (1) the user entered 'method' as sub alias, or
408							# (2) the user set --use-feature=class
409							# In these two cases we can assume that 'method' is a sub alias.
410	647					1128	$guess_if_method = 1;
411	647	100				1840	if ( $is_sub{'method'} ) { $guess_if_method = 0 }
	2					4
412
413							#------------------------------------------------
414							# Update hash values for any -use-feature options
415							#------------------------------------------------
416
417	647					1051	my $use_feature_class = 1;
418
419	647					1292	my $str = $rOpts->{'use-feature'};
420	647	50	33			1943	if ( defined($str) && length($str) ) {
421	0					0	$str =~ s/^\s+//;
422	0					0	$str =~ s/\s+$//;
423	0	0				0	if ( !length($str) ) {
		0
		0
424							## all spaces
425							}
426							elsif ( $str =~ /\bnoclass\b/ ) {
427	0					0	$use_feature_class = 0;
428							}
429							elsif ( $str =~ /\bclass\b/ ) {
430	0					0	$guess_if_method = 0;
431							}
432							else {
433							# At present, only 'class' and 'noclass' are valid strings
434							# This is just a Warn, for testing, but will eventually be Die
435	0					0	Warn(
436							"Unexpected text in --use-feature: expecting 'class' or 'noclass'\n"
437							);
438							}
439							}
440
441							# These are the main updates for this option. There are additional
442							# changes elsewhere, usually indicated with a comment 'rt145706'
443
444							# Update hash values for use_feature=class, added for rt145706
445							# see 'perlclass.pod'
446
447							# IMPORTANT: We are changing global hash values initially set in a BEGIN
448							# block. Values must be defined (true or false) for each of these new
449							# words whether true or false. Otherwise, programs using the module which
450							# change options between runs (such as test code) will have
451							# incorrect settings and fail.
452
453							# There are 4 new keywords:
454
455							# 'class' - treated specially as generalization of 'package'
456							# Note: we must not set 'class' to be a keyword to avoid problems
457							# with older uses.
458	647					1707	$is_package{'class'} = $use_feature_class;
459
460							# 'method' - treated like sub using the sub-alias-list option
461							# Note: we must not set 'method' to be a keyword to avoid problems
462							# with older uses.
463	647	50				1558	if ($use_feature_class) {
464	647					1322	$is_sub{'method'} = 1;
465	647					1215	$is_END_DATA_format_sub{'method'} = 1;
466							}
467
468							# 'field' - added as a keyword, and works like 'my'
469							# Setting zero_continuation_block_type allows inclusion in table of level
470							# differences in case of a missing or extra brace (see sub wrapup).
471	647					1586	$is_keyword{'field'} = $use_feature_class;
472	647					1268	$is_my_our_state{'field'} = $use_feature_class;
473	647					1358	$is_zero_continuation_block_type{'field'} = $use_feature_class;
474
475							# 'ADJUST' - added as a keyword and works like 'BEGIN'
476							# See update git #182 for 'ADJUST :params'
477							# Setting zero_continuation_block_type allows inclusion in table of level
478							# differences in case of a missing or extra brace (see sub wrapup).
479	647					1336	$is_keyword{'ADJUST'} = $use_feature_class;
480	647					1280	$is_code_block_token{'ADJUST'} = $use_feature_class;
481	647					1255	$is_zero_continuation_block_type{'ADJUST'} = $use_feature_class;
482
483	647					1857	%is_grep_alias = ();
484	647	50				1874	if ( $rOpts->{'grep-alias-list'} ) {
485
486							# Note that 'grep-alias-list' has been preprocessed to be a trimmed,
487							# space-separated list
488	647					2766	my @q = split /\s+/, $rOpts->{'grep-alias-list'};
489	647					4116	$is_grep_alias{$_} = 1 for @q;
490							}
491
492	647					1334	$rOpts_starting_indentation_level = $rOpts->{'starting-indentation-level'};
493	647					1306	$rOpts_indent_columns = $rOpts->{'indent-columns'};
494	647					1141	$rOpts_look_for_hash_bang = $rOpts->{'look-for-hash-bang'};
495	647					1266	$rOpts_look_for_autoloader = $rOpts->{'look-for-autoloader'};
496	647					1134	$rOpts_look_for_selfloader = $rOpts->{'look-for-selfloader'};
497	647					1171	$rOpts_trim_qw = $rOpts->{'trim-qw'};
498	647					1072	$rOpts_extended_syntax = $rOpts->{'extended-syntax'};
499	647					1120	$rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
500	647					1066	$rOpts_outdent_labels = $rOpts->{'outdent-labels'};
501	647					1108	$rOpts_maximum_level_errors = $rOpts->{'maximum-level-errors'};
502	647					1157	$rOpts_maximum_unexpected_errors = $rOpts->{'maximum-unexpected-errors'};
503	647					1181	$rOpts_code_skipping = $rOpts->{'code-skipping'};
504	647					1018	$rOpts_code_skipping_begin = $rOpts->{'code-skipping-begin'};
505	647					1034	$rOpts_format_skipping = $rOpts->{'format-skipping'};
506	647					1123	$rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
507	647					1167	$rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
508	647					1004	$rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
509	647					1135	$rOpts_non_indenting_braces = $rOpts->{'non-indenting-braces'};
510	647					1152	$rOpts_non_indenting_brace_prefix = $rOpts->{'non-indenting-brace-prefix'};
511	647					1077	$rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
512
513							# In the Tokenizer, --indent-columns is just used for guessing old
514							# indentation, and must be positive. If -i=0 is used for this run (which
515							# is possible) we'll just guess that the old run used 4 spaces per level.
516	647	100				1565	if ( !$rOpts_indent_columns ) {
517	12					23	$rOpts_indent_columns = INDENT_COLUMNS_DEFAULT;
518							}
519
520							# Define $tabsize, the number of spaces per tab for use in
521							# guessing the indentation of source lines with leading tabs.
522							# Assume same as for this run if tabs are used, otherwise assume
523							# a default value, typically 8
524							$tabsize =
525							$rOpts->{'entab-leading-whitespace'}
526							? $rOpts->{'entab-leading-whitespace'}
527							: $rOpts->{'tabs'} ? $rOpts->{'indent-columns'}
528	647	50				2564	: $rOpts->{'default-tabsize'};
		100
529	647	50				1507	if ( !$tabsize ) { $tabsize = TAB_SIZE_DEFAULT }
	0					0
530
531							$code_skipping_pattern_begin =
532	647					2184	make_skipping_pattern( $rOpts, 'code-skipping-begin', '#<<V' );
533	647					1747	$code_skipping_pattern_end =
534							make_skipping_pattern( $rOpts, 'code-skipping-end', '#>>V' );
535
536	647					1899	$format_skipping_pattern_begin =
537							make_skipping_pattern( $rOpts, 'format-skipping-begin', '#<<<' );
538	647					1663	$format_skipping_pattern_end =
539							make_skipping_pattern( $rOpts, 'format-skipping-end', '#>>>' );
540
541	647					1825	return;
542							} ## end sub check_options
543
544							sub new {
545
546	649			649	0	2539	my ( $class, @arglist ) = @_;
547	649	50				1801	if ( @arglist % 2 ) { croak "Odd number of items in arg hash list\n" }
	0					0
548
549	649					5029	my %defaults = (
550							source_object => undef,
551							debugger_object => undef,
552							diagnostics_object => undef,
553							logger_object => undef,
554							starting_level => undef,
555							starting_line_number => 1,
556							rOpts => {},
557							);
558	649					3499	my %args = ( %defaults, @arglist );
559
560							# we are given an object with a get_line() method to supply source lines
561	649					1616	my $source_object = $args{source_object};
562	649					1113	my $rOpts = $args{rOpts};
563
564							# Check call args
565	649	50				1582	if ( !defined($source_object) ) {
566	0					0	Die(
567							"Perl::Tidy::Tokenizer::new called without a 'source_object' parameter\n"
568							);
569							}
570	649	50				2872	if ( !ref($source_object) ) {
571	0					0	Die(<<EOM);
572							sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference;
573							'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method
574							EOM
575							}
576
577	649					1110	my $logger_object = $args{logger_object};
578
579							# Tokenizer state data is as follows:
580							# _rhere_target_list_ reference to list of here-doc targets
581							# _here_doc_target_ the target string for a here document
582							# _here_quote_character_ the type of here-doc quoting (" ' ` or none)
583							# to determine if interpolation is done
584							# _quote_target_ character we seek if chasing a quote
585							# _line_start_quote_ line where we started looking for a long quote
586							# _in_here_doc_ flag indicating if we are in a here-doc
587							# _in_pod_ flag set if we are in pod documentation
588							# _in_code_skipping_ flag set if we are in a code skipping section
589							# _in_format_skipping_ flag set if we are in a format skipping section
590							# _in_error_ flag set if we saw severe error (binary in script)
591							# _do_not_format_ flag set if formatting should be skipped
592							# _warning_count_ number of calls to logger sub warning
593							# _html_tag_count_ number of apparent html tags seen (indicates html)
594							# _in_data_ flag set if we are in __DATA__ section
595							# _in_end_ flag set if we are in __END__ section
596							# _in_format_ flag set if we are in a format description
597							# _in_attribute_list_ flag telling if we are looking for attributes
598							# _in_quote_ flag telling if we are chasing a quote
599							# _starting_level_ indentation level of first line
600							# _diagnostics_object_ place to write debugging information
601							# _unexpected_error_count_ error count used to limit output
602							# _lower_case_labels_at_ line numbers where lower case labels seen
603							# _hit_bug_ program bug detected
604
605	649					1020	my $self = [];
606	649					1389	$self->[_rhere_target_list_] = [];
607	649					1340	$self->[_in_here_doc_] = 0;
608	649					1489	$self->[_here_doc_target_] = EMPTY_STRING;
609	649					1115	$self->[_here_quote_character_] = EMPTY_STRING;
610	649					1221	$self->[_in_data_] = 0;
611	649					1431	$self->[_in_end_] = 0;
612	649					1225	$self->[_in_format_] = 0;
613	649					1174	$self->[_in_error_] = 0;
614	649					1262	$self->[_do_not_format_] = 0;
615	649					1030	$self->[_warning_count_] = 0;
616	649					1229	$self->[_html_tag_count_] = 0;
617	649					1040	$self->[_in_pod_] = 0;
618							$self->[_in_code_skipping_] =
619	649		33			1996	$rOpts->{'code-skipping-from-start'} && $rOpts_code_skipping;
620	649					1077	$self->[_in_format_skipping_] = 0;
621	649					1273	$self->[_rformat_skipping_list_] = [];
622	649					1284	$self->[_in_attribute_list_] = 0;
623	649					1072	$self->[_in_quote_] = 0;
624	649					1162	$self->[_quote_target_] = EMPTY_STRING;
625	649					1541	$self->[_line_start_quote_] = -1;
626	649					1147	$self->[_starting_level_] = $args{starting_level};
627	649					1379	$self->[_know_starting_level_] = defined( $args{starting_level} );
628	649					1365	$self->[_last_line_number_] = $args{starting_line_number} - 1;
629	649					1423	$self->[_saw_perl_dash_P_] = 0;
630	649					1299	$self->[_saw_perl_dash_w_] = 0;
631	649					1006	$self->[_saw_use_strict_] = 0;
632	649					1039	$self->[_saw_brace_error_] = 0;
633	649					1039	$self->[_hit_bug_] = 0;
634	649					1423	$self->[_look_for_autoloader_] = $rOpts_look_for_autoloader;
635	649					1121	$self->[_look_for_selfloader_] = $rOpts_look_for_selfloader;
636	649					1056	$self->[_saw_autoloader_] = 0;
637	649					1084	$self->[_saw_selfloader_] = 0;
638	649					990	$self->[_saw_hash_bang_] = 0;
639	649					981	$self->[_saw_end_] = 0;
640	649					1376	$self->[_saw_data_] = 0;
641	649					1030	$self->[_saw_negative_indentation_] = 0;
642	649					987	$self->[_started_tokenizing_] = 0;
643	649					1061	$self->[_debugger_object_] = $args{debugger_object};
644	649					1048	$self->[_diagnostics_object_] = $args{diagnostics_object};
645	649					1000	$self->[_logger_object_] = $logger_object;
646	649					1232	$self->[_unexpected_error_count_] = 0;
647	649					1003	$self->[_started_looking_for_here_target_at_] = 0;
648	649					969	$self->[_nearly_matched_here_target_at_] = undef;
649	649					1027	$self->[_line_of_text_] = EMPTY_STRING;
650	649					1050	$self->[_rlower_case_labels_at_] = undef;
651	649					1021	$self->[_maximum_level_] = 0;
652	649					940	$self->[_true_brace_error_count_] = 0;
653	649					1430	$self->[_rnon_indenting_brace_stack_] = [];
654	649					1072	$self->[_show_indentation_table_] = 0;
655	649					1152	$self->[_rbareword_info_] = {};
656
657	649					4821	$self->[_rclosing_brace_indentation_hash_] = {
658							valid => undef,
659							rhistory_line_number => [0],
660							rhistory_level_diff => [0],
661							rhistory_anchor_point => [1],
662							};
663
664	649					1318	$self->[_rOpts_] = $rOpts;
665	649		100			3028	$self->[_save_logfile_] =
666							defined($logger_object) && $logger_object->get_save_logfile();
667
668	649					1421	bless $self, $class;
669
670	649					3311	$self->prepare_for_a_new_file($source_object);
671	649					2789	$self->find_starting_indentation_level();
672
673							# This is not a full class yet, so die if an attempt is made to
674							# create more than one object.
675
676	649	50				1992	if ( _increment_count() > 1 ) {
677	0					0	confess
678							"Attempt to create more than 1 object in $class, which is not a true class yet\n";
679							}
680
681	649					4773	return $self;
682
683							} ## end sub new
684
685							# Called externally
686							sub get_unexpected_error_count {
687	4			4	0	10	my ($self) = @_;
688	4					16	return $self->[_unexpected_error_count_];
689							}
690
691							# Called externally
692							sub is_keyword {
693	4			4	0	8	my ($str) = @_;
694	4					16	return $is_keyword{$str};
695							}
696
697							#----------------------------------------------------------------
698							# Line input routines, previously handled by the LineBuffer class
699							#----------------------------------------------------------------
700							sub make_source_array {
701
702	649			649	0	1323	my ( $self, $line_source_object ) = @_;
703
704							# Convert the source into an array of lines
705							# Given:
706							# $line_source_object = the input source stream
707							# Task:
708							# Convert the source to an array ref and store in $self
709
710	649					1006	my $rinput_lines = [];
711
712	649					1224	my $rsource = ref($line_source_object);
713	649					1074	my $source_string;
714
715	649	50				2912	if ( !$rsource ) {
		50
		50
716
717							# shouldn't happen: this should have been checked in sub new
718	0					0	Fault(<<EOM);
719							sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference;
720							'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method
721							EOM
722							}
723
724							# handle an ARRAY ref
725							elsif ( $rsource eq 'ARRAY' ) {
726	0					0	$rinput_lines = $line_source_object;
727	0					0	$source_string = join( EMPTY_STRING, @{$line_source_object} );
	0					0
728							}
729
730							# handle a SCALAR ref
731							elsif ( $rsource eq 'SCALAR' ) {
732	649					947	$source_string = ${$line_source_object};
	649					1350
733	649					4236	my @lines = split /^/, $source_string;
734	649					1668	$rinput_lines = \@lines;
735							}
736
737							# handle an object - must have a get_line method
738							else {
739
740							# This will die if user's object does have a 'get_line' method
741	0					0	my $line;
742	0					0	while ( defined( $line = $line_source_object->get_line() ) ) {
743	0					0	push( @{$rinput_lines}, $line );
	0					0
744							}
745	0					0	$source_string = join( EMPTY_STRING, @{$rinput_lines} );
	0					0
746							}
747
748							# Get trimmed lines. It is much faster to strip leading whitespace from
749							# the whole input file at once than line-by-line.
750
751							# Add a terminal newline if needed to keep line count unchanged:
752							# - avoids problem of losing a last line which is just \r and no \n (c283)
753							# - but check input line count to avoid adding line to an empty file (c286)
754	649	100	100			1000	if ( @{$rinput_lines} && $source_string !~ /\n$/ ) {
	649					5191
755	1					2	$source_string .= "\n";
756							}
757
758							# Remove leading whitespace except newlines
759	649					7505	$source_string =~ s/^ [^\S\n]+ //gxm;
760
761							# Then break the string into lines
762	649					3877	my @trimmed_lines = split /^/, $source_string;
763
764							# Safety check - a change in number of lines would be a disaster
765	649	50				1027	if ( @trimmed_lines != @{$rinput_lines} ) {
	649					1764
766
767							# Shouldn't happen - die in DEVEL_MODE and fix
768	0					0	my $ntr = @trimmed_lines;
769	0					0	my $utr = @{$rinput_lines};
	0					0
770	0					0	DEVEL_MODE
771							&& Fault("trimmed / untrimmed line counts differ: $ntr / $utr\n");
772
773							# Otherwise we can safely continue with undefined trimmed lines. They
774							# will be detected and fixed later.
775	0					0	@trimmed_lines = ();
776							}
777
778	649					1408	$self->[_rinput_lines_] = $rinput_lines;
779	649					1350	$self->[_rtrimmed_input_lines_] = \@trimmed_lines;
780	649					1151	$self->[_input_line_index_next_] = 0;
781	649					1358	return;
782							} ## end sub make_source_array
783
784							sub peek_ahead {
785	1377			1377	0	2425	my ( $self, $buffer_index ) = @_;
786
787							# look $buffer_index lines ahead of the current location in the input
788							# stream without disturbing the input
789	1377					1761	my $line;
790	1377					1969	my $rinput_lines = $self->[_rinput_lines_];
791	1377					2412	my $line_index = $buffer_index + $self->[_input_line_index_next_];
792	1377	100				1739	if ( $line_index < @{$rinput_lines} ) {
	1377					3076
793	1365					2151	$line = $rinput_lines->[$line_index];
794							}
795	1377					3837	return $line;
796							} ## end sub peek_ahead
797
798							#-----------------------------------------
799							# interface to Perl::Tidy::Logger routines
800							#-----------------------------------------
801							sub warning {
802
803	0			0	0	0	my ( $self, $msg ) = @_;
804
805	0					0	my $logger_object = $self->[_logger_object_];
806	0					0	$self->[_warning_count_]++;
807	0	0				0	if ($logger_object) {
808	0					0	my $msg_line_number = $self->[_last_line_number_];
809	0					0	$logger_object->warning( $msg, $msg_line_number );
810							}
811	0					0	return;
812							} ## end sub warning
813
814							sub warning_do_not_format {
815	0			0	0	0	my ( $self, $msg ) = @_;
816
817							# Issue a warning message and set a flag to skip formatting this file.
818	0					0	$self->warning($msg);
819	0					0	$self->[_do_not_format_] = 1;
820	0					0	return;
821							} ## end sub warning_do_not_format
822
823							sub complain {
824
825	35			35	0	80	my ( $self, $msg ) = @_;
826
827	35					68	my $logger_object = $self->[_logger_object_];
828	35	50				96	if ($logger_object) {
829	35					59	my $input_line_number = $self->[_last_line_number_];
830	35					170	$logger_object->complain( $msg, $input_line_number );
831							}
832	35					54	return;
833							} ## end sub complain
834
835							sub write_logfile_entry {
836
837	2162			2162	0	4317	my ( $self, $msg ) = @_;
838
839	2162					3374	my $logger_object = $self->[_logger_object_];
840	2162	100				4197	if ($logger_object) {
841	2156					5744	$logger_object->write_logfile_entry($msg);
842							}
843	2162					3530	return;
844							} ## end sub write_logfile_entry
845
846							sub interrupt_logfile {
847
848	0			0	0	0	my $self = shift;
849
850	0					0	my $logger_object = $self->[_logger_object_];
851	0	0				0	if ($logger_object) {
852	0					0	$logger_object->interrupt_logfile();
853							}
854	0					0	return;
855							} ## end sub interrupt_logfile
856
857							sub resume_logfile {
858
859	0			0	0	0	my $self = shift;
860
861	0					0	my $logger_object = $self->[_logger_object_];
862	0	0				0	if ($logger_object) {
863	0					0	$logger_object->resume_logfile();
864							}
865	0					0	return;
866							} ## end sub resume_logfile
867
868							sub brace_warning {
869	0			0	0	0	my ( $self, $msg ) = @_;
870	0					0	$self->[_saw_brace_error_]++;
871
872	0					0	my $logger_object = $self->[_logger_object_];
873	0	0				0	if ($logger_object) {
874	0					0	my $msg_line_number = $self->[_last_line_number_];
875	0					0	$logger_object->brace_warning( $msg, $msg_line_number );
876							}
877	0					0	return;
878							} ## end sub brace_warning
879
880							sub increment_brace_error {
881
882							# This is same as sub brace_warning but without a message
883	0			0	0	0	my $self = shift;
884	0					0	$self->[_saw_brace_error_]++;
885
886	0					0	my $logger_object = $self->[_logger_object_];
887	0	0				0	if ($logger_object) {
888	0					0	$logger_object->increment_brace_error();
889							}
890	0					0	return;
891							} ## end sub increment_brace_error
892
893							sub get_saw_brace_error {
894	0			0	0	0	my $self = shift;
895	0					0	return $self->[_saw_brace_error_];
896							}
897
898							sub report_definite_bug {
899	0			0	0	0	my $self = shift;
900	0					0	$self->[_hit_bug_] = 1;
901	0					0	my $logger_object = $self->[_logger_object_];
902	0	0				0	if ($logger_object) {
903	0					0	$logger_object->report_definite_bug();
904							}
905	0					0	return;
906							} ## end sub report_definite_bug
907
908							#-------------------------------------
909							# Interface to Perl::Tidy::Diagnostics
910							#-------------------------------------
911							sub write_diagnostics {
912	0			0	0	0	my ( $self, $msg ) = @_;
913	0					0	my $input_line_number = $self->[_last_line_number_];
914	0					0	my $diagnostics_object = $self->[_diagnostics_object_];
915	0	0				0	if ($diagnostics_object) {
916	0					0	$diagnostics_object->write_diagnostics( $msg, $input_line_number );
917							}
918	0					0	return;
919							} ## end sub write_diagnostics
920
921							sub report_tokenization_errors {
922
923	649			649	0	1461	my ($self) = @_;
924
925							# Report any tokenization errors and return a flag '$severe_error'.
926							# Set $severe_error = 1 if the tokenization errors are so severe that
927							# the formatter should not attempt to format the file. Instead, it will
928							# just output the file verbatim.
929
930							# set severe error flag if tokenizer has encountered file reading problems
931							# (i.e. unexpected binary characters)
932							# or code which may not be formatted correctly (such as 'my sub q')
933							# The difference between _in_error_ and _do_not_format_ is that
934							# _in_error_ stops the tokenizer immediately whereas
935							# _do_not_format_ lets the tokenizer finish so that all errors are seen
936							# Both block formatting and cause the input stream to be output verbatim.
937	649		33			3369	my $severe_error = $self->[_in_error_] \|\| $self->[_do_not_format_];
938
939							# And do not format if it looks like an html file (c209)
940	649		33			3442	$severe_error \|\|= $self->[_html_tag_count_] && $self->[_warning_count_];
			33
941
942							# Inform the logger object on length of input stream
943	649					1295	my $logger_object = $self->[_logger_object_];
944	649	100				1760	if ($logger_object) {
945	647					1186	my $last_line_number = $self->[_last_line_number_];
946	647					3205	$logger_object->set_last_input_line_number($last_line_number);
947							}
948
949	649					1193	my $maxle = $rOpts_maximum_level_errors;
950	649					1001	my $maxue = $rOpts_maximum_unexpected_errors;
951	649	50				1523	$maxle = 1 unless ( defined($maxle) );
952	649	50				1494	$maxue = 0 unless ( defined($maxue) );
953
954	649					2007	my $level = get_indentation_level();
955	649	50				1979	if ( $level != $self->[_starting_level_] ) {
956	0					0	$self->warning("final indentation level: $level\n");
957
958	0					0	$self->[_show_indentation_table_] = 1;
959
960	0					0	my $level_diff = $self->[_starting_level_] - $level;
961	0	0				0	if ( $level_diff < 0 ) { $level_diff = -$level_diff }
	0					0
962
963							# Set severe error flag if the level error is greater than 1.
964							# The formatter can function for any level error but it is probably
965							# best not to attempt formatting for a high level error.
966	0	0	0			0	if ( $maxle >= 0 && $level_diff > $maxle ) {
967	0					0	$severe_error = 1;
968	0					0	$self->warning(<<EOM);
969							Formatting will be skipped because level error '$level_diff' exceeds -maxle=$maxle; use -maxle=-1 to force formatting
970							EOM
971							}
972							}
973
974	649					2992	$self->check_final_nesting_depths();
975
976	649	50				1712	if ( $self->[_show_indentation_table_] ) {
977	0					0	$self->show_indentation_table();
978							}
979
980							# Likewise, large numbers of brace errors usually indicate non-perl
981							# scripts, so set the severe error flag at a low number. This is similar
982							# to the level check, but different because braces may balance but be
983							# incorrectly interlaced.
984	649	50				1901	if ( $self->[_true_brace_error_count_] > 2 ) {
985	0					0	$severe_error = 1;
986							}
987
988	649	50	66			1881	if ( $rOpts_look_for_hash_bang
989							&& !$self->[_saw_hash_bang_] )
990							{
991	0					0	$self->warning(
992							"hit EOF without seeing hash-bang line; maybe don't need -x?\n");
993							}
994
995	649	50				1710	if ( $self->[_in_format_] ) {
996	0					0	$self->warning("hit EOF while in format description\n");
997							}
998
999	649	50				1761	if ( $self->[_in_code_skipping_] ) {
1000	0					0	$self->write_logfile_entry(
1001							"hit EOF while in lines skipped with --code-skipping\n");
1002							}
1003
1004	649	50				1718	if ( $self->[_in_pod_] ) {
1005
1006							# Just write log entry if this is after __END__ or __DATA__
1007							# because this happens to often, and it is not likely to be
1008							# a parsing error.
1009	0	0	0			0	if ( $self->[_saw_data_] \|\| $self->[_saw_end_] ) {
1010	0					0	$self->write_logfile_entry(
1011							"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
1012							);
1013							}
1014
1015							else {
1016	0					0	$self->complain(
1017							"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
1018							);
1019							}
1020
1021							}
1022
1023	649	50				1759	if ( $self->[_in_here_doc_] ) {
1024	0					0	$severe_error = 1;
1025	0					0	my $here_doc_target = $self->[_here_doc_target_];
1026	0					0	my $started_looking_for_here_target_at =
1027							$self->[_started_looking_for_here_target_at_];
1028	0	0				0	if ($here_doc_target) {
1029	0					0	$self->warning(
1030							"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
1031							);
1032							}
1033							else {
1034	0					0	$self->warning(<<EOM);
1035							Hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string.
1036							(Perl will match to the end of file but this may not be intended).
1037							EOM
1038							}
1039	0					0	my $nearly_matched_here_target_at =
1040							$self->[_nearly_matched_here_target_at_];
1041	0	0				0	if ($nearly_matched_here_target_at) {
1042	0					0	$self->warning(
1043							"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
1044							);
1045							}
1046							}
1047
1048							# Something is seriously wrong if we ended inside a quote
1049	649	50				1613	if ( $self->[_in_quote_] ) {
1050	0					0	$severe_error = 1;
1051	0					0	my $line_start_quote = $self->[_line_start_quote_];
1052	0					0	my $quote_target = $self->[_quote_target_];
1053	0	0				0	my $what =
1054							( $self->[_in_attribute_list_] )
1055							? "attribute list"
1056							: "quote/pattern";
1057	0					0	$self->warning(
1058							"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
1059							);
1060							}
1061
1062	649	50				1591	if ( $self->[_hit_bug_] ) {
1063	0					0	$severe_error = 1;
1064							}
1065
1066							# Multiple "unexpected" type tokenization errors usually indicate parsing
1067							# non-perl scripts, or that something is seriously wrong, so we should
1068							# avoid formatting them. This can happen for example if we run perltidy on
1069							# a shell script or an html file. But unfortunately this check can
1070							# interfere with some extended syntaxes, such as RPerl, so it has to be off
1071							# by default.
1072	649					1161	my $ue_count = $self->[_unexpected_error_count_];
1073	649	50	33			1969	if ( $maxue > 0 && $ue_count > $maxue ) {
1074	0					0	$self->warning(<<EOM);
1075							Formatting will be skipped since unexpected token count = $ue_count > -maxue=$maxue; use -maxue=0 to force formatting
1076							EOM
1077	0					0	$severe_error = 1;
1078							}
1079
1080	649	100				1722	if ( !$self->[_saw_perl_dash_w_] ) {
1081	632					2140	$self->write_logfile_entry("Suggest including 'use warnings;'\n");
1082							}
1083
1084	649	50				1716	if ( $self->[_saw_perl_dash_P_] ) {
1085	0					0	$self->write_logfile_entry(
1086							"Use of -P parameter for defines is discouraged\n");
1087							}
1088
1089	649	100				1649	if ( !$self->[_saw_use_strict_] ) {
1090	635					1495	$self->write_logfile_entry("Suggest including 'use strict;'\n");
1091							}
1092
1093							# it is suggested that labels have at least one upper case character
1094							# for legibility and to avoid code breakage as new keywords are introduced
1095	649	100				1915	if ( $self->[_rlower_case_labels_at_] ) {
1096	12					19	my @lower_case_labels_at = @{ $self->[_rlower_case_labels_at_] };
	12					35
1097	12					36	$self->write_logfile_entry(
1098							"Suggest using upper case characters in label(s)\n");
1099	12					25	local $LIST_SEPARATOR = ')(';
1100	12					57	$self->write_logfile_entry(
1101							" defined at line(s): (@lower_case_labels_at)\n");
1102							}
1103
1104							# Get the text of any leading format skipping tag
1105	649					1065	my $early_FS_end_marker;
1106	649					1170	my $rformat_skipping_list = $self->[_rformat_skipping_list_];
1107	649	100	100			1010	if ( @{$rformat_skipping_list} && $rformat_skipping_list->[0]->[0] == -1 ) {
	649					2103
1108	3					9	$early_FS_end_marker = $rformat_skipping_list->[0]->[2];
1109							}
1110
1111							return {
1112	649					3348	severe_error => $severe_error,
1113							early_FS_end_marker => $early_FS_end_marker,
1114							};
1115
1116							} ## end sub report_tokenization_errors
1117
1118							sub show_indentation_table {
1119	0			0	0	0	my ($self) = @_;
1120
1121							# Output indentation table made at closing braces. This can be helpful for
1122							# the case of a missing brace in a previously formatted file.
1123
1124							# skip if problem reading file
1125	0	0				0	return if ( $self->[_in_error_] );
1126
1127							# skip if -wc is used (rare); it is too complex to use
1128	0	0				0	return if ($rOpts_whitespace_cycle);
1129
1130							# skip if non-indenting-brace-prefix (very rare, but could be fixed)
1131	0	0				0	return if ($rOpts_non_indenting_brace_prefix);
1132
1133							# skip if starting level is not zero (probably in editor)
1134	0	0				0	return if ($rOpts_starting_indentation_level);
1135
1136							# skip if indentation analysis is not valid
1137	0					0	my $rhash = $self->[_rclosing_brace_indentation_hash_];
1138	0	0				0	return if ( !$rhash->{valid} );
1139
1140	0					0	my $rhistory_line_number = $rhash->{rhistory_line_number};
1141	0					0	my $rhistory_level_diff = $rhash->{rhistory_level_diff};
1142	0					0	my $rhistory_anchor_point = $rhash->{rhistory_anchor_point};
1143
1144							# Remove the first artificial point from the table
1145	0					0	shift @{$rhistory_line_number};
	0					0
1146	0					0	shift @{$rhistory_level_diff};
	0					0
1147	0					0	shift @{$rhistory_anchor_point};
	0					0
1148
1149							# Remove dubious points at an anchor point = 2 and beyond
1150							# These can occur when non-indenting braces are used
1151	0					0	my $num_his = @{$rhistory_level_diff};
	0					0
1152	0					0	foreach my $i ( 0 .. $num_his - 1 ) {
1153	0	0				0	if ( $rhistory_anchor_point->[$i] == 2 ) {
1154	0					0	$num_his = $i;
1155	0					0	last;
1156							}
1157							}
1158	0	0				0	return if ( $num_his <= 1 );
1159
1160							# Ignore an ending non-anchor point
1161	0	0				0	if ( !$rhistory_anchor_point->[-1] ) {
1162	0					0	$num_his -= 1;
1163							}
1164
1165							# Ignore an ending point which is the same as the previous point
1166	0	0				0	if ( $num_his > 1 ) {
1167	0	0				0	if ( $rhistory_level_diff->[ $num_his - 1 ] ==
1168							$rhistory_level_diff->[ $num_his - 2 ] )
1169							{
1170	0					0	$num_his -= 1;
1171							}
1172							}
1173
1174							# Skip if the table does not have at least 2 points to pinpoint an error
1175	0	0				0	return if ( $num_his <= 1 );
1176
1177							# Skip if first point shows a level error - the analysis may not be valid
1178	0	0				0	return if ( $rhistory_level_diff->[0] );
1179
1180							# Remove table points which return from negative to zero; they follow
1181							# an error and may not be correct. c448.
1182	0					0	my $min_lev = $rhistory_level_diff->[0];
1183	0					0	foreach my $ii ( 1 .. $num_his - 1 ) {
1184	0					0	my $lev = $rhistory_level_diff->[$ii];
1185	0	0				0	if ( $lev < $min_lev ) { $min_lev = $lev; next }
	0					0
	0					0
1186	0	0	0			0	if ( $min_lev < 0 && $lev >= 0 ) {
1187	0					0	$num_his = $ii;
1188	0					0	last;
1189							}
1190							}
1191
1192							# Since the table could be arbitrarily large, we will limit the table to N
1193							# lines. If there are more lines than that, we will show N-3 lines, then
1194							# ..., then the last 2 lines. Allow about 3 lines per error, so a table
1195							# limit of 10 can localize up to about 3 errors in a file.
1196	0					0	my $nlines_max = 10;
1197	0					0	my @pre_indexes = ( 0 .. $num_his - 1 );
1198	0					0	my @post_indexes = ();
1199	0	0				0	if ( @pre_indexes > $nlines_max ) {
1200	0	0				0	if ( $nlines_max >= 5 ) {
1201	0					0	@pre_indexes = ( 0 .. $nlines_max - 4 );
1202	0					0	@post_indexes = ( $num_his - 2, $num_his - 1 );
1203							}
1204							else {
1205	0					0	@pre_indexes = ( 0 .. $nlines_max - 1 );
1206							}
1207							}
1208
1209	0					0	my @output_lines;
1210	0					0	push @output_lines, <<EOM;
1211							Table of initial nesting level differences at closing braces.
1212							This might help localize brace errors IF perltidy previously formatted the file.
1213							line: error=[new brace level]-[old indentation level]
1214							EOM
1215	0					0	foreach my $i (@pre_indexes) {
1216	0					0	my $lno = $rhistory_line_number->[$i];
1217	0					0	my $diff = $rhistory_level_diff->[$i];
1218	0					0	push @output_lines, <<EOM;
1219							$lno: $diff
1220							EOM
1221							}
1222	0	0				0	if (@post_indexes) {
1223	0					0	push @output_lines, "...\n";
1224	0					0	foreach my $i (@post_indexes) {
1225	0					0	my $lno = $rhistory_line_number->[$i];
1226	0					0	my $diff = $rhistory_level_diff->[$i];
1227	0					0	push @output_lines, <<EOM;
1228							$lno: $diff
1229							EOM
1230							}
1231							}
1232
1233							# Try to give a hint
1234	0					0	my $level_diff_1 = $rhistory_level_diff->[1];
1235	0					0	my $ln_0 = $rhistory_line_number->[0];
1236	0					0	my $ln_1 = $rhistory_line_number->[1];
1237	0	0				0	if ( $level_diff_1 < 0 ) {
		0
1238	0					0	push @output_lines,
1239							"There may be an extra '}' or missing '{' between lines $ln_0 and $ln_1\n";
1240							}
1241							elsif ( $level_diff_1 > 0 ) {
1242	0					0	push @output_lines,
1243							"There may be a missing '}' or extra '{' between lines $ln_0 and $ln_1\n";
1244							}
1245							else {
1246							## two leading zeros in the table - probably can't happen - no hint
1247							}
1248
1249	0					0	push @output_lines, "\n";
1250	0					0	my $output_str = join EMPTY_STRING, @output_lines;
1251
1252	0					0	$self->interrupt_logfile();
1253	0					0	$self->warning($output_str);
1254	0					0	$self->resume_logfile();
1255
1256	0					0	return;
1257							} ## end sub show_indentation_table
1258
1259							sub report_v_string {
1260
1261							# warn if this version can't handle v-strings
1262	2			2	0	7	my ( $self, $tok ) = @_;
1263	2	50				6	if ( $] < 5.006 ) {
1264	0					0	$self->warning(
1265							"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
1266							);
1267							}
1268	2					6	return;
1269							} ## end sub report_v_string
1270
1271							sub is_valid_token_type {
1272	447			447	0	530	my ($type) = @_;
1273	447					1019	return $is_valid_token_type{$type};
1274							}
1275
1276							sub log_numbered_msg {
1277	208			208	0	446	my ( $self, $msg ) = @_;
1278
1279							# write input line number + message to logfile
1280	208					318	my $input_line_number = $self->[_last_line_number_];
1281	208					785	$self->write_logfile_entry("Line $input_line_number: $msg");
1282	208					438	return;
1283							} ## end sub log_numbered_msg
1284
1285							sub get_line {
1286
1287	9619			9619	0	13520	my $self = shift;
1288
1289							# Read the next input line and tokenize it
1290							# Returns:
1291							# $line_of_tokens = ref to hash of info for the tokenized line
1292
1293							# USES GLOBAL VARIABLES:
1294							# $brace_depth, $square_bracket_depth, $paren_depth
1295
1296							# get the next line from the input array
1297	9619					13025	my $input_line;
1298							my $trimmed_input_line;
1299	9619					13495	my $line_index = $self->[_input_line_index_next_];
1300	9619					12225	my $rinput_lines = $self->[_rinput_lines_];
1301	9619	100				11201	if ( $line_index < @{$rinput_lines} ) {
	9619					16073
1302	8970					16158	$trimmed_input_line = $self->[_rtrimmed_input_lines_]->[$line_index];
1303	8970					15306	$input_line = $rinput_lines->[ $line_index++ ];
1304	8970					12094	$self->[_input_line_index_next_] = $line_index;
1305							}
1306
1307							# End of file .. check if file ends in a binary operator (c565)
1308							else {
1309	649	0	33			3566	if (
			33
1310							$is_binary_or_unary_operator_type{$last_nonblank_type}
1311							\|\| ( $last_nonblank_type eq 'k'
1312							&& $is_binary_or_unary_keyword{$last_nonblank_token} )
1313							)
1314							{
1315	0					0	$self->warning(
1316							"Unexpected EOF at operator '$last_nonblank_token'\n");
1317
1318							# avoid repeating this message
1319	0					0	$last_nonblank_token = ';';
1320	0					0	$last_nonblank_type = ';';
1321							}
1322							}
1323
1324	9619					14196	$self->[_line_of_text_] = $input_line;
1325
1326	9619	100				17462	return if ( !defined($input_line) );
1327
1328	8970					12230	my $input_line_number = ++$self->[_last_line_number_];
1329
1330							# Find and remove what characters terminate this line, including any
1331							# control r
1332	8970					11761	my $input_line_separator = EMPTY_STRING;
1333	8970	100				19802	if ( chomp $input_line ) {
1334	8969					18124	$input_line_separator = $INPUT_RECORD_SEPARATOR;
1335							}
1336
1337							# The first test here very significantly speeds things up, but be sure to
1338							# keep the regex and hash %other_line_endings the same.
1339	8970	50				22354	if ( $other_line_endings{ substr( $input_line, -1 ) } ) {
1340	0	0				0	if ( $input_line =~ s/([\r\035\032])+$// ) {
1341	0					0	$input_line_separator = $1 . $input_line_separator;
1342
1343							# This could make the trimmed input line incorrect, so the
1344							# safe thing to do is to make it undef to force it to be
1345							# recomputed later.
1346	0					0	$trimmed_input_line = undef;
1347							}
1348							}
1349
1350							# For backwards compatibility we keep the line text terminated with
1351							# a newline character
1352	8970					12443	$input_line .= "\n";
1353	8970					12011	$self->[_line_of_text_] = $input_line;
1354
1355							# create a data structure describing this line which will be
1356							# returned to the caller.
1357
1358							# _line_type codes are:
1359							# SYSTEM - system-specific code before hash-bang line
1360							# CODE - line of perl code (including comments)
1361							# POD_START - line starting pod, such as '=head'
1362							# POD - pod documentation text
1363							# POD_END - last line of pod section, '=cut'
1364							# HERE - text of here-document
1365							# HERE_END - last line of here-doc (target word)
1366							# FORMAT - format section
1367							# FORMAT_END - last line of format section, '.'
1368							# SKIP - code skipping section
1369							# SKIP_END - last line of code skipping section, '#>>V'
1370							# DATA_START - __DATA__ line
1371							# DATA - unidentified text following __DATA__
1372							# END_START - __END__ line
1373							# END - unidentified text following __END__
1374							# ERROR - we are in big trouble, probably not a perl script
1375
1376							# Other variables:
1377							# _curly_brace_depth - depth of curly braces at start of line
1378							# _square_bracket_depth - depth of square brackets at start of line
1379							# _paren_depth - depth of parens at start of line
1380							# _starting_in_quote - this line continues a multi-line quote
1381							# (so don't trim leading blanks!)
1382							# _ending_in_quote - this line ends in a multi-line quote
1383							# (so don't trim trailing blanks!)
1384	8970					43010	my $line_of_tokens = {
1385							_line_type => 'EOF',
1386							_line_text => $input_line,
1387							_line_number => $input_line_number,
1388							_guessed_indentation_level => 0,
1389							_curly_brace_depth => $brace_depth,
1390							_square_bracket_depth => $square_bracket_depth,
1391							_paren_depth => $paren_depth,
1392							## Skip these needless initializations for efficiency:
1393							## _rtoken_type => undef,
1394							## _rtokens => undef,
1395							## _rlevels => undef,
1396							## _rblock_type => undef,
1397							## _rtype_sequence => undef,
1398							## _starting_in_quote => 0,
1399							## _ending_in_quote => 0,
1400							};
1401
1402							# must print line unchanged if we are in a here document
1403	8970	100				38527	if ( $self->[_in_here_doc_] ) {
		100
		100
		100
		50
		100
		100
1404
1405	32					49	$line_of_tokens->{_line_type} = 'HERE';
1406	32					46	my $here_doc_target = $self->[_here_doc_target_];
1407	32					54	my $here_quote_character = $self->[_here_quote_character_];
1408	32					46	my $candidate_target = $input_line;
1409	32					42	chomp $candidate_target;
1410
1411							# Handle <<~ targets, which are indicated here by a leading space on
1412							# the here quote character
1413	32	100				84	if ( $here_quote_character =~ /^\s/ ) {
1414	4					12	$candidate_target =~ s/^\s+//;
1415							}
1416	32	100				67	if ( $candidate_target eq $here_doc_target ) {
1417	13					25	$self->[_nearly_matched_here_target_at_] = undef;
1418	13					22	$line_of_tokens->{_line_type} = 'HERE_END';
1419	13					47	$self->log_numbered_msg("Exiting HERE document $here_doc_target\n");
1420
1421	13					24	my $rhere_target_list = $self->[_rhere_target_list_];
1422	13	100				21	if ( @{$rhere_target_list} ) { # there can be multiple here targets
	13					38
1423							( $here_doc_target, $here_quote_character ) =
1424	2					3	@{ shift @{$rhere_target_list} };
	2					3
	2					6
1425	2					4	$self->[_here_doc_target_] = $here_doc_target;
1426	2					2	$self->[_here_quote_character_] = $here_quote_character;
1427	2					6	$self->log_numbered_msg(
1428							"Entering HERE document $here_doc_target\n");
1429	2					4	$self->[_nearly_matched_here_target_at_] = undef;
1430	2					4	$self->[_started_looking_for_here_target_at_] =
1431							$input_line_number;
1432							}
1433							else {
1434	11					34	$self->[_in_here_doc_] = 0;
1435	11					20	$self->[_here_doc_target_] = EMPTY_STRING;
1436	11					23	$self->[_here_quote_character_] = EMPTY_STRING;
1437							}
1438							}
1439
1440							# check for error of extra whitespace
1441							# note for PERL6: leading whitespace is allowed
1442							else {
1443	19					144	$candidate_target =~ s/^ \s+ \| \s+ $//gx; # trim both ends
1444	19	50				52	if ( $candidate_target eq $here_doc_target ) {
1445	0					0	$self->[_nearly_matched_here_target_at_] = $input_line_number;
1446							}
1447							}
1448	32					96	return $line_of_tokens;
1449							}
1450
1451							# Print line unchanged if we are in a format section
1452							elsif ( $self->[_in_format_] ) {
1453
1454	3	100				9	if ( $input_line =~ /^\.[\s#]*$/ ) {
1455
1456							# Decrement format depth count at a '.' after a 'format'
1457	1					2	$self->[_in_format_]--;
1458
1459							# This is the end when count reaches 0
1460	1	50				3	if ( !$self->[_in_format_] ) {
1461	1					4	$self->log_numbered_msg("Exiting format section\n");
1462	1					2	$line_of_tokens->{_line_type} = 'FORMAT_END';
1463
1464							# Make the tokenizer mark an opening brace which follows
1465							# as a code block. Fixes issue c202/t032.
1466	1					2	$last_nonblank_token = ';';
1467	1					1	$last_nonblank_type = ';';
1468							}
1469							}
1470							else {
1471	2					4	$line_of_tokens->{_line_type} = 'FORMAT';
1472	2	50				5	if ( $input_line =~ /^\s*format\s+\w+/ ) {
1473
1474							# Increment format depth count at a 'format' within a 'format'
1475							# This is a simple way to handle nested formats (issue c019).
1476	0					0	$self->[_in_format_]++;
1477							}
1478							}
1479	3					7	return $line_of_tokens;
1480							}
1481
1482							# must print line unchanged if we are in pod documentation
1483							elsif ( $self->[_in_pod_] ) {
1484
1485	51					84	$line_of_tokens->{_line_type} = 'POD';
1486	51	100				141	if ( $input_line =~ /^=cut/ ) {
1487	22					48	$line_of_tokens->{_line_type} = 'POD_END';
1488	22					60	$self->log_numbered_msg("Exiting POD section\n");
1489	22					35	$self->[_in_pod_] = 0;
1490							}
1491	51	50	33			178	if ( $input_line =~ /^\#\!.*perl\b/ && !$self->[_in_end_] ) {
1492	0					0	$self->warning(
1493							"Hash-bang in pod can cause older versions of perl to fail! \n"
1494							);
1495							}
1496
1497	51					177	return $line_of_tokens;
1498							}
1499
1500							# print line unchanged if in skipped section
1501							elsif ( $self->[_in_code_skipping_] ) {
1502
1503	8					12	$line_of_tokens->{_line_type} = 'SKIP';
1504	8	100				118	if ( $input_line =~ /$code_skipping_pattern_end/ ) {
		50
1505	2					6	$line_of_tokens->{_line_type} = 'SKIP_END';
1506	2					5	$self->log_numbered_msg("Exiting code-skipping section\n");
1507	2					3	$self->[_in_code_skipping_] = 0;
1508							}
1509							elsif ( $input_line =~ /$code_skipping_pattern_begin/ ) {
1510
1511							# warn of duplicate starting comment lines, git #118
1512	0					0	my $lno = $self->[_in_code_skipping_];
1513	0					0	$self->warning(
1514							"Already in code-skipping section which started at line $lno\n"
1515							);
1516							}
1517							else {
1518							# not a code-skipping control line
1519							}
1520	8					20	return $line_of_tokens;
1521							}
1522
1523							# must print line unchanged if we have seen a severe error (i.e., we
1524							# are seeing illegal tokens and cannot continue. Syntax errors do
1525							# not pass this route). Calling routine can decide what to do, but
1526							# the default can be to just pass all lines as if they were after __END__
1527							elsif ( $self->[_in_error_] ) {
1528	0					0	$line_of_tokens->{_line_type} = 'ERROR';
1529	0					0	return $line_of_tokens;
1530							}
1531
1532							# print line unchanged if we are __DATA__ section
1533							elsif ( $self->[_in_data_] ) {
1534
1535							# ...but look for POD
1536							# Note that the _in_data and _in_end flags remain set
1537							# so that we return to that state after seeing the
1538							# end of a pod section
1539	1	50	33			11	if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
1540	0					0	$line_of_tokens->{_line_type} = 'POD_START';
1541	0					0	$self->log_numbered_msg("Entering POD section\n");
1542	0					0	$self->[_in_pod_] = 1;
1543	0					0	return $line_of_tokens;
1544							}
1545							else {
1546	1					5	$line_of_tokens->{_line_type} = 'DATA';
1547	1					6	return $line_of_tokens;
1548							}
1549							}
1550
1551							# print line unchanged if we are in __END__ section
1552							elsif ( $self->[_in_end_] ) {
1553
1554							# ...but look for POD
1555							# Note that the _in_data and _in_end flags remain set
1556							# so that we return to that state after seeing the
1557							# end of a pod section
1558	56	100	66			197	if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
1559	7					17	$line_of_tokens->{_line_type} = 'POD_START';
1560	7					25	$self->log_numbered_msg("Entering POD section\n");
1561	7					11	$self->[_in_pod_] = 1;
1562	7					23	return $line_of_tokens;
1563							}
1564							else {
1565	49					69	$line_of_tokens->{_line_type} = 'END';
1566	49					119	return $line_of_tokens;
1567							}
1568							}
1569							else {
1570							# not a special control line
1571							}
1572
1573							# check for a hash-bang line if we haven't seen one
1574	8819	100	100			31965	if ( !$self->[_saw_hash_bang_]
			66
1575							&& substr( $input_line, 0, 2 ) eq '#!'
1576							&& $input_line =~ /^\#\!.*perl\b/ )
1577							{
1578	16					36	$self->[_saw_hash_bang_] = $input_line_number;
1579
1580							# check for -w and -P flags
1581	16	50				78	if ( $input_line =~ /^\#\!.perl\s.-.*P/ ) {
1582	0					0	$self->[_saw_perl_dash_P_] = 1;
1583							}
1584
1585	16	100				82	if ( $input_line =~ /^\#\!.perl\s.-.*w/ ) {
1586	9					18	$self->[_saw_perl_dash_w_] = 1;
1587							}
1588
1589	16	100	33			114	if (
			66
			100
			66
1590							$input_line_number > 1
1591
1592							# leave any hash bang in a BEGIN block alone
1593							# i.e. see 'debugger-duck_type.t'
1594							&& !(
1595							$last_nonblank_block_type
1596							&& $last_nonblank_block_type eq 'BEGIN'
1597							)
1598							&& !$rOpts_look_for_hash_bang
1599
1600							# Try to avoid giving a false alarm at a simple comment.
1601							# These look like valid hash-bang lines:
1602
1603							#!/usr/bin/perl -w
1604							#! /usr/bin/perl -w
1605							#!c:\perl\bin\perl.exe
1606
1607							# These are comments:
1608							#! I love perl
1609							#! sunos does not yet provide a /usr/bin/perl
1610
1611							# Comments typically have multiple spaces, which suggests
1612							# the filter
1613							&& $input_line =~ /^\#\!(\s+)?(\S+)?perl/
1614							)
1615							{
1616
1617							# this is helpful for VMS systems; we may have accidentally
1618							# tokenized some DCL commands
1619	1	50				4	if ( $self->[_started_tokenizing_] ) {
1620	0					0	$self->warning(
1621							"There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
1622							);
1623							}
1624							else {
1625	1					4	$self->complain("Useless hash-bang after line 1\n");
1626							}
1627							}
1628
1629							# Report the leading hash-bang as a system line
1630							# This will prevent -dac from deleting it
1631							else {
1632	15					36	$line_of_tokens->{_line_type} = 'SYSTEM';
1633	15					74	return $line_of_tokens;
1634							}
1635							}
1636
1637							# wait for a hash-bang before parsing if the user invoked us with -x
1638	8804	100	100			17387	if ( $rOpts_look_for_hash_bang
1639							&& !$self->[_saw_hash_bang_] )
1640							{
1641	5					8	$line_of_tokens->{_line_type} = 'SYSTEM';
1642	5					12	return $line_of_tokens;
1643							}
1644
1645							# a first line of the form ': #' will be marked as SYSTEM
1646							# since lines of this form may be used by tcsh
1647	8799	50	66			18695	if ( $input_line_number == 1 && $input_line =~ /^\s\:\s\#/ ) {
1648	0					0	$line_of_tokens->{_line_type} = 'SYSTEM';
1649	0					0	return $line_of_tokens;
1650							}
1651
1652							# now we know that it is ok to tokenize the line...
1653							# the line tokenizer will modify any of these private variables:
1654							# _rhere_target_list_
1655							# _in_data_
1656							# _in_end_
1657							# _in_format_
1658							# _in_error_
1659							# _in_code_skipping_
1660							# _in_format_skipping_
1661							# _in_pod_
1662							# _in_quote_
1663
1664	8799					23188	$self->tokenize_this_line( $line_of_tokens, $trimmed_input_line );
1665
1666							# Now finish defining the return structure and return it
1667	8799					16932	$line_of_tokens->{_ending_in_quote} = $self->[_in_quote_];
1668
1669							# handle severe error (binary data in script)
1670	8799	50				15894	if ( $self->[_in_error_] ) {
1671	0					0	$self->[_in_quote_] = 0; # to avoid any more messages
1672	0					0	$self->warning("Giving up after error\n");
1673	0					0	$line_of_tokens->{_line_type} = 'ERROR';
1674	0					0	reset_indentation_level(0); # avoid error messages
1675	0					0	return $line_of_tokens;
1676							}
1677
1678							# handle start of pod documentation
1679	8799	100				14516	if ( $self->[_in_pod_] ) {
1680
1681							# This gets tricky..above a __DATA__ or __END__ section, perl
1682							# accepts '=cut' as the start of pod section. But afterwards,
1683							# only pod utilities see it and they may ignore an =cut without
1684							# leading =head. In any case, this isn't good.
1685	15	50				62	if ( $input_line =~ /^=cut\b/ ) {
1686	0	0	0			0	if ( $self->[_saw_data_] \|\| $self->[_saw_end_] ) {
1687	0					0	$self->complain("=cut while not in pod ignored\n");
1688	0					0	$self->[_in_pod_] = 0;
1689	0					0	$line_of_tokens->{_line_type} = 'POD_END';
1690							}
1691							else {
1692	0					0	$line_of_tokens->{_line_type} = 'POD_START';
1693	0					0	if ( !DEVEL_MODE ) {
1694	0					0	$self->warning(
1695							"=cut starts a pod section .. this can fool pod utilities.\n"
1696							);
1697							}
1698	0					0	$self->log_numbered_msg("Entering POD section\n");
1699							}
1700							}
1701
1702							else {
1703	15					37	$line_of_tokens->{_line_type} = 'POD_START';
1704	15					53	$self->log_numbered_msg("Entering POD section\n");
1705							}
1706
1707	15					52	return $line_of_tokens;
1708							}
1709
1710							# handle start of skipped section
1711	8784	100				14991	if ( $self->[_in_code_skipping_] ) {
1712
1713	2					5	$line_of_tokens->{_line_type} = 'SKIP';
1714	2					9	$self->log_numbered_msg("Entering code-skipping section\n");
1715	2					6	return $line_of_tokens;
1716							}
1717
1718							# see if this line contains here doc targets
1719	8782					11733	my $rhere_target_list = $self->[_rhere_target_list_];
1720	8782	100				9514	if ( @{$rhere_target_list} ) {
	8782					15910
1721
1722							my ( $here_doc_target, $here_quote_character ) =
1723	11					20	@{ shift @{$rhere_target_list} };
	11					20
	11					33
1724	11					29	$self->[_in_here_doc_] = 1;
1725	11					24	$self->[_here_doc_target_] = $here_doc_target;
1726	11					22	$self->[_here_quote_character_] = $here_quote_character;
1727	11					56	$self->log_numbered_msg("Entering HERE document $here_doc_target\n");
1728	11					19	$self->[_started_looking_for_here_target_at_] = $input_line_number;
1729							}
1730
1731							# NOTE: __END__ and __DATA__ statements are written unformatted
1732							# because they can theoretically contain additional characters
1733							# which are not tokenized (and cannot be read with <DATA> either!).
1734	8782	100				18445	if ( $self->[_in_data_] ) {
		100
1735	1					3	$line_of_tokens->{_line_type} = 'DATA_START';
1736	1					7	$self->log_numbered_msg("Starting __DATA__ section\n");
1737	1					2	$self->[_saw_data_] = 1;
1738
1739							# keep parsing after __DATA__ if use SelfLoader was seen
1740	1	50				5	if ( $self->[_saw_selfloader_] ) {
1741	0					0	$self->[_in_data_] = 0;
1742	0					0	$self->log_numbered_msg(
1743							"SelfLoader seen, continuing; -nlsl deactivates\n");
1744							}
1745
1746	1					7	return $line_of_tokens;
1747							}
1748
1749							elsif ( $self->[_in_end_] ) {
1750	7					19	$line_of_tokens->{_line_type} = 'END_START';
1751	7					23	$self->log_numbered_msg("Starting __END__ section\n");
1752	7					13	$self->[_saw_end_] = 1;
1753
1754							# keep parsing after __END__ if use AutoLoader was seen
1755	7	50				31	if ( $self->[_saw_autoloader_] ) {
1756	0					0	$self->[_in_end_] = 0;
1757	0					0	$self->log_numbered_msg(
1758							"AutoLoader seen, continuing; -nlal deactivates\n");
1759							}
1760	7					44	return $line_of_tokens;
1761							}
1762							else {
1763							# not in __END__ or __DATA__
1764							}
1765
1766							# now, finally, we know that this line is type 'CODE'
1767	8774					13732	$line_of_tokens->{_line_type} = 'CODE';
1768
1769							# remember if we have seen any real code
1770	8774	100	100			21974	if ( !$self->[_started_tokenizing_]
			100
1771							&& $input_line !~ /^\s*$/
1772							&& $input_line !~ /^\s*#/ )
1773							{
1774	645					1280	$self->[_started_tokenizing_] = 1;
1775							}
1776
1777	8774	100				14977	if ( $self->[_debugger_object_] ) {
1778	7					21	$self->[_debugger_object_]->write_debug_entry($line_of_tokens);
1779							}
1780
1781							# Note: if keyword 'format' occurs in this line code, it is still CODE
1782							# (keyword 'format' need not start a line)
1783	8774	100				14750	if ( $self->[_in_format_] ) {
1784	1					4	$self->log_numbered_msg("Entering format section\n");
1785							}
1786
1787	8774	100	100			26309	if ( $self->[_in_quote_]
		100	100
1788							and ( $self->[_line_start_quote_] < 0 ) )
1789							{
1790	63	100				399	if ( ( my $quote_target = $self->[_quote_target_] ) !~ /^\s*$/ ) {
1791	62					112	$self->[_line_start_quote_] = $input_line_number;
1792	62					274	$self->log_numbered_msg(
1793							"Start multi-line quote or pattern ending in $quote_target\n");
1794							}
1795							}
1796							elsif ( ( $self->[_line_start_quote_] >= 0 )
1797							&& !$self->[_in_quote_] )
1798							{
1799	62					132	$self->[_line_start_quote_] = -1;
1800	62					183	$self->log_numbered_msg("End of multi-line quote or pattern\n");
1801							}
1802							else {
1803							# not at the edge of a quote
1804							}
1805
1806							# we are returning a line of CODE
1807	8774					31151	return $line_of_tokens;
1808							} ## end sub get_line
1809
1810							sub find_starting_indentation_level {
1811
1812							# We need to find the indentation level of the first line of the
1813							# script being formatted. Often it will be zero for an entire file,
1814							# but if we are formatting a local block of code (within an editor for
1815							# example) it may not be zero. The user may specify this with the
1816							# -sil=n parameter but normally doesn't so we have to guess.
1817							#
1818	649			649	0	1347	my ($self) = @_;
1819	649					1213	my $starting_level = 0;
1820
1821							# use value if given as parameter
1822	649	100				2235	if ( $self->[_know_starting_level_] ) {
		100
1823	1					2	$starting_level = $self->[_starting_level_];
1824							}
1825
1826							# if we know there is a hash_bang line, the level must be zero
1827							elsif ($rOpts_look_for_hash_bang) {
1828	1					2	$self->[_know_starting_level_] = 1;
1829							}
1830
1831							# otherwise figure it out from the input file
1832							else {
1833	647					992	my $line;
1834	647					980	my $i = 0;
1835
1836							# keep looking at lines until we find a hash bang or piece of code
1837							# ( or, for now, an =pod line)
1838	647					1075	my $msg = EMPTY_STRING;
1839	647					1171	my $in_code_skipping;
1840							my $line_for_guess;
1841	647					2607	while ( defined( $line = $self->peek_ahead( $i++ ) ) ) {
1842
1843							# if first line is #! then assume starting level is zero
1844	973	100	100			4286	if ( $i == 1 && $line =~ /^\#\!/ ) {
1845	14					32	$starting_level = 0;
1846	14					31	last;
1847							}
1848
1849							# ignore lines fenced off with code-skipping comments
1850	959	100				3398	if ( $line =~ /^\s*#/ ) {
1851
1852							# use first comment for indentation guess in case of no code
1853	310	100				901	if ( !defined($line_for_guess) ) { $line_for_guess = $line }
	254					507
1854
1855	310	50				774	if ( !$in_code_skipping ) {
1856	310	50	33			3600	if ( $rOpts_code_skipping
1857							&& $line =~ /$code_skipping_pattern_begin/ )
1858							{
1859	0					0	$in_code_skipping = 1;
1860	0					0	next;
1861							}
1862							}
1863							else {
1864	0	0				0	if ( $line =~ /$code_skipping_pattern_end/ ) {
1865	0					0	$in_code_skipping = 0;
1866							}
1867	0					0	next;
1868							}
1869
1870							# Note that we could also ignore format-skipping lines here
1871							# but it isn't clear if that would be best.
1872							# See c326 for example code.
1873
1874	310					808	next;
1875							}
1876	649	50				1437	next if ($in_code_skipping);
1877
1878	649	100				2587	next if ( $line =~ /^\s*$/ ); # skip past blank lines
1879
1880							# use first line of code for indentation guess
1881	631					1031	$line_for_guess = $line;
1882	631					1072	last;
1883							} ## end while ( defined( $line = ...))
1884
1885	647	100				1455	if ( defined($line_for_guess) ) {
1886	631					2060	$starting_level =
1887							$self->guess_old_indentation_level($line_for_guess);
1888							}
1889	647					1510	$msg = "Line $i implies starting-indentation-level = $starting_level\n";
1890	647					2739	$self->write_logfile_entry("$msg");
1891							}
1892	649					1482	$self->[_starting_level_] = $starting_level;
1893	649					2577	reset_indentation_level($starting_level);
1894	649					952	return;
1895							} ## end sub find_starting_indentation_level
1896
1897							sub guess_old_indentation_level {
1898	634			634	0	1312	my ( $self, $line ) = @_;
1899
1900							# Guess the indentation level of an input line.
1901							#
1902							# For the first line of code this result will define the starting
1903							# indentation level. It will mainly be non-zero when perltidy is applied
1904							# within an editor to a local block of code.
1905							#
1906							# This is an impossible task in general because we can't know what tabs
1907							# meant for the old script and how many spaces were used for one
1908							# indentation level in the given input script. For example it may have
1909							# been previously formatted with -i=7 -et=3. But we can at least try to
1910							# make sure that perltidy guesses correctly if it is applied repeatedly to
1911							# a block of code within an editor, so that the block stays at the same
1912							# level when perltidy is applied repeatedly.
1913							#
1914							# USES GLOBAL VARIABLES: (none)
1915	634					1044	my $level = 0;
1916
1917							# find leading tabs, spaces, and any statement label
1918	634					985	my $spaces = 0;
1919	634	50				4065	if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
1920
1921							# If there are leading tabs, we use the tab scheme for this run, if
1922							# any, so that the code will remain stable when editing.
1923	634	100				2221	if ($1) { $spaces += length($1) * $tabsize }
	2					10
1924
1925	634	100				1944	if ($2) { $spaces += length($2) }
	90					236
1926
1927							# correct for outdented labels
1928	634	50	66			2523	if ( $3
			66
1929							&& $rOpts_outdent_labels
1930							&& $rOpts_continuation_indentation > 0 )
1931							{
1932	1					3	$spaces += $rOpts_continuation_indentation;
1933							}
1934							}
1935
1936	634					1893	$level = int( $spaces / $rOpts_indent_columns );
1937	634					1316	return ($level);
1938							} ## end sub guess_old_indentation_level
1939
1940							sub dump_functions {
1941
1942							# This is an unused debug routine, save for future use
1943
1944	0			0	0	0	my $fh = *STDOUT;
1945	0					0	foreach my $pkg ( keys %{$ris_user_function} ) {
	0					0
1946	0					0	$fh->print("\nnon-constant subs in package $pkg\n");
1947
1948	0					0	foreach my $sub ( keys %{ $ris_user_function->{$pkg} } ) {
	0					0
1949	0					0	my $msg = EMPTY_STRING;
1950	0	0				0	if ( $ris_block_list_function->{$pkg}->{$sub} ) {
1951	0					0	$msg = 'block_list';
1952							}
1953
1954	0	0				0	if ( $ris_block_function->{$pkg}->{$sub} ) {
1955	0					0	$msg = 'block';
1956							}
1957	0					0	$fh->print("$sub $msg\n");
1958							}
1959							}
1960
1961	0					0	foreach my $pkg ( keys %{$ris_constant} ) {
	0					0
1962	0					0	$fh->print("\nconstants and constant subs in package $pkg\n");
1963
1964	0					0	foreach my $sub ( keys %{ $ris_constant->{$pkg} } ) {
	0					0
1965	0					0	$fh->print("$sub\n");
1966							}
1967							}
1968	0					0	return;
1969							} ## end sub dump_functions
1970
1971							sub prepare_for_a_new_file {
1972
1973	649			649	0	1492	my ( $self, $source_object ) = @_;
1974
1975							# copy the source object lines to an array of lines
1976	649					2765	$self->make_source_array($source_object);
1977
1978							# previous tokens needed to determine what to expect next
1979	649					1267	$last_nonblank_token = ';'; # the only possible starting state which
1980	649					1142	$last_nonblank_type = ';'; # will make a leading brace a code block
1981	649					1140	$last_nonblank_block_type = EMPTY_STRING;
1982
1983							# scalars for remembering statement types across multiple lines
1984	649					1035	$statement_type = EMPTY_STRING; # '' or 'use' or 'sub..' or 'case..'
1985
1986							# scalars for remembering where we are in the file
1987	649					1129	$current_package = "main";
1988	649					1045	$context = UNKNOWN_CONTEXT;
1989
1990							# hashes used to remember function information
1991	649					1126	$ris_constant = {}; # user-defined constants
1992	649					1535	$ris_user_function = {}; # user-defined functions
1993	649					1443	$ruser_function_prototype = {}; # their prototypes
1994	649					1208	$ris_block_function = {};
1995	649					1229	$ris_block_list_function = {};
1996	649					1307	$rsaw_function_definition = {};
1997	649					1200	$rsaw_use_module = {};
1998
1999							# variables used to track depths of various containers
2000							# and report nesting errors
2001	649					1264	$paren_depth = 0;
2002	649					965	$brace_depth = 0;
2003	649					928	$square_bracket_depth = 0;
2004	649					2643	$rcurrent_depth = [ (0) x scalar(@closing_brace_names) ];
2005	649					1043	$total_depth = 0;
2006	649					970	$rtotal_depth = [];
2007	649					1952	$rcurrent_sequence_number = [];
2008	649					1815	$ris_lexical_sub = {};
2009	649					1055	$next_sequence_number = SEQ_ROOT + 1;
2010
2011	649					992	$rparen_type = [];
2012	649					1452	$rparen_semicolon_count = [];
2013	649					1281	$rparen_vars = [];
2014	649					2097	$rbrace_type = [];
2015	649					1649	$rbrace_structural_type = [];
2016	649					1379	$rbrace_context = [];
2017	649					1344	$rbrace_package = [];
2018	649					1347	$rsquare_bracket_type = [];
2019	649					1395	$rsquare_bracket_structural_type = [];
2020	649					1326	$rdepth_array = [];
2021	649					3491	$rnested_ternary_flag = [];
2022	649					1138	$rnested_statement_type = [];
2023	649					3745	$rstarting_line_of_current_depth = [];
2024
2025	649					3679	$rparen_type->[$paren_depth] = EMPTY_STRING;
2026	649					1418	$rparen_semicolon_count->[$paren_depth] = 0;
2027	649					1352	$rparen_vars->[$paren_depth] = [];
2028	649					1371	$rbrace_type->[$brace_depth] = ';'; # identify opening brace as code block
2029	649					1274	$rbrace_structural_type->[$brace_depth] = EMPTY_STRING;
2030	649					1268	$rbrace_context->[$brace_depth] = UNKNOWN_CONTEXT;
2031	649					1369	$rbrace_package->[$paren_depth] = $current_package;
2032	649					1328	$rsquare_bracket_type->[$square_bracket_depth] = EMPTY_STRING;
2033	649					1194	$rsquare_bracket_structural_type->[$square_bracket_depth] = EMPTY_STRING;
2034
2035	649					2252	initialize_tokenizer_state();
2036	649					1008	return;
2037							} ## end sub prepare_for_a_new_file
2038
2039							{ ## closure for sub tokenize_this_line
2040
2041	44			44		385	use constant BRACE => 0;
	44					77
	44					3055
2042	44			44		256	use constant SQUARE_BRACKET => 1;
	44					82
	44					1939
2043	44			44		216	use constant PAREN => 2;
	44					95
	44					1569
2044	44			44		195	use constant QUESTION_COLON => 3;
	44					78
	44					70916
2045
2046							# TV1: scalars for processing one LINE.
2047							# Re-initialized on each entry to sub tokenize_this_line.
2048							my (
2049							$block_type, $container_type, $expecting,
2050							$i, $i_tok, $input_line,
2051							$input_line_number, $last_nonblank_i, $max_token_index,
2052							$next_tok, $next_type, $peeked_ahead,
2053							$prototype, $rhere_target_list, $rtoken_map,
2054							$rtoken_type, $rtokens, $tok,
2055							$type, $type_sequence, $indent_flag,
2056							);
2057
2058							# TV2: refs to ARRAYS for processing one LINE
2059							# Re-initialized on each call.
2060							my $routput_token_list = []; # stack of output token indexes
2061							my $routput_token_type = []; # token types
2062							my $routput_block_type = []; # types of code block
2063							my $routput_type_sequence = []; # nesting sequential number
2064							my $routput_indent_flag = []; #
2065
2066							# TV3: SCALARS for quote variables. These are initialized with a
2067							# subroutine call and continually updated as lines are processed.
2068							my (
2069							$in_quote, $quote_type,
2070							$quote_character, $quote_pos,
2071							$quote_depth, $quoted_string_1,
2072							$quoted_string_2, $allowed_quote_modifiers,
2073							$quote_starting_tok, $quote_here_target_2,
2074							);
2075
2076							# TV4: SCALARS for multi-line identifiers and
2077							# statements. These are initialized with a subroutine call
2078							# and continually updated as lines are processed.
2079							my ( $id_scan_state, $identifier, $want_paren );
2080
2081							# TV5: SCALARS for tracking indentation level.
2082							# Initialized once and continually updated as lines are
2083							# processed.
2084							my (
2085							$nesting_token_string, $nesting_block_string,
2086							$nesting_block_flag, $level_in_tokenizer,
2087							);
2088
2089							# TV6: SCALARS for remembering several previous
2090							# tokens. Initialized once and continually updated as
2091							# lines are processed.
2092							my (
2093							$last_nonblank_container_type, $last_nonblank_type_sequence,
2094							$last_last_nonblank_token, $last_last_nonblank_type,
2095							$last_nonblank_prototype,
2096							);
2097
2098							# ----------------------------------------------------------------
2099							# beginning of tokenizer variable access and manipulation routines
2100							# ----------------------------------------------------------------
2101
2102							sub initialize_tokenizer_state {
2103
2104							# GV1: initialized once
2105							# TV1: initialized on each call
2106							# TV2: initialized on each call
2107							# TV3:
2108	649			649	0	1066	$in_quote = 0;
2109	649					1192	$quote_type = 'Q';
2110	649					1024	$quote_character = EMPTY_STRING;
2111	649					964	$quote_pos = 0;
2112	649					980	$quote_depth = 0;
2113	649					982	$quoted_string_1 = EMPTY_STRING;
2114	649					960	$quoted_string_2 = EMPTY_STRING;
2115	649					986	$allowed_quote_modifiers = EMPTY_STRING;
2116	649					1033	$quote_starting_tok = EMPTY_STRING;
2117	649					1048	$quote_here_target_2 = undef;
2118
2119							# TV4:
2120	649					969	$id_scan_state = EMPTY_STRING;
2121	649					1080	$identifier = EMPTY_STRING;
2122	649					976	$want_paren = EMPTY_STRING;
2123
2124							# TV5:
2125	649					991	$nesting_token_string = EMPTY_STRING;
2126	649					1012	$nesting_block_string = '1'; # initially in a block
2127	649					1010	$nesting_block_flag = 1;
2128	649					997	$level_in_tokenizer = 0;
2129
2130							# TV6:
2131	649					1021	$last_nonblank_container_type = EMPTY_STRING;
2132	649					963	$last_nonblank_type_sequence = EMPTY_STRING;
2133	649					983	$last_last_nonblank_token = ';';
2134	649					981	$last_last_nonblank_type = ';';
2135	649					1011	$last_nonblank_prototype = EMPTY_STRING;
2136	649					946	return;
2137							} ## end sub initialize_tokenizer_state
2138
2139							sub save_tokenizer_state {
2140
2141							# Global variables:
2142	0			0	0	0	my $rGV1 = [
2143							$brace_depth,
2144							$context,
2145							$current_package,
2146							$last_nonblank_block_type,
2147							$last_nonblank_token,
2148							$last_nonblank_type,
2149							$next_sequence_number,
2150							$paren_depth,
2151							$rbrace_context,
2152							$rbrace_package,
2153							$rbrace_structural_type,
2154							$rbrace_type,
2155							$rcurrent_depth,
2156							$rcurrent_sequence_number,
2157							$ris_lexical_sub,
2158							$rdepth_array,
2159							$ris_block_function,
2160							$ris_block_list_function,
2161							$ris_constant,
2162							$ris_user_function,
2163							$rnested_statement_type,
2164							$rnested_ternary_flag,
2165							$rparen_semicolon_count,
2166							$rparen_vars,
2167							$rparen_type,
2168							$rsaw_function_definition,
2169							$rsaw_use_module,
2170							$rsquare_bracket_structural_type,
2171							$rsquare_bracket_type,
2172							$rstarting_line_of_current_depth,
2173							$rtotal_depth,
2174							$ruser_function_prototype,
2175							$square_bracket_depth,
2176							$statement_type,
2177							$total_depth,
2178
2179							];
2180
2181							# Tokenizer closure variables:
2182	0					0	my $rTV1 = [
2183							$block_type, $container_type, $expecting,
2184							$i, $i_tok, $input_line,
2185							$input_line_number, $last_nonblank_i, $max_token_index,
2186							$next_tok, $next_type, $peeked_ahead,
2187							$prototype, $rhere_target_list, $rtoken_map,
2188							$rtoken_type, $rtokens, $tok,
2189							$type, $type_sequence, $indent_flag,
2190							];
2191
2192	0					0	my $rTV2 = [
2193							$routput_token_list, $routput_token_type,
2194							$routput_block_type, $routput_type_sequence,
2195							$routput_indent_flag,
2196							];
2197
2198	0					0	my $rTV3 = [
2199							$in_quote, $quote_type,
2200							$quote_character, $quote_pos,
2201							$quote_depth, $quoted_string_1,
2202							$quoted_string_2, $allowed_quote_modifiers,
2203							$quote_starting_tok, $quote_here_target_2,
2204							];
2205
2206	0					0	my $rTV4 = [ $id_scan_state, $identifier, $want_paren ];
2207
2208	0					0	my $rTV5 = [
2209							$nesting_token_string, $nesting_block_string,
2210							$nesting_block_flag, $level_in_tokenizer,
2211							];
2212
2213	0					0	my $rTV6 = [
2214							$last_nonblank_container_type, $last_nonblank_type_sequence,
2215							$last_last_nonblank_token, $last_last_nonblank_type,
2216							$last_nonblank_prototype,
2217							];
2218	0					0	return [ $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
2219							} ## end sub save_tokenizer_state
2220
2221							sub restore_tokenizer_state {
2222	0			0	0	0	my ($rstate) = @_;
2223	0					0	my ( $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
	0					0
2224
2225							(
2226							$brace_depth,
2227							$context,
2228							$current_package,
2229							$last_nonblank_block_type,
2230							$last_nonblank_token,
2231							$last_nonblank_type,
2232							$next_sequence_number,
2233							$paren_depth,
2234							$rbrace_context,
2235							$rbrace_package,
2236							$rbrace_structural_type,
2237							$rbrace_type,
2238							$rcurrent_depth,
2239							$rcurrent_sequence_number,
2240							$ris_lexical_sub,
2241							$rdepth_array,
2242							$ris_block_function,
2243							$ris_block_list_function,
2244							$ris_constant,
2245							$ris_user_function,
2246							$rnested_statement_type,
2247							$rnested_ternary_flag,
2248							$rparen_semicolon_count,
2249							$rparen_vars,
2250							$rparen_type,
2251							$rsaw_function_definition,
2252							$rsaw_use_module,
2253							$rsquare_bracket_structural_type,
2254							$rsquare_bracket_type,
2255							$rstarting_line_of_current_depth,
2256							$rtotal_depth,
2257							$ruser_function_prototype,
2258							$square_bracket_depth,
2259							$statement_type,
2260							$total_depth,
2261
2262	0					0	) = @{$rGV1};
	0					0
2263
2264							(
2265							$block_type, $container_type, $expecting,
2266							$i, $i_tok, $input_line,
2267							$input_line_number, $last_nonblank_i, $max_token_index,
2268							$next_tok, $next_type, $peeked_ahead,
2269							$prototype, $rhere_target_list, $rtoken_map,
2270							$rtoken_type, $rtokens, $tok,
2271							$type, $type_sequence, $indent_flag,
2272	0					0	) = @{$rTV1};
	0					0
2273
2274							(
2275							$routput_token_list, $routput_token_type,
2276							$routput_block_type, $routput_type_sequence,
2277							$routput_indent_flag,
2278	0					0	) = @{$rTV2};
	0					0
2279
2280							(
2281							$in_quote, $quote_type,
2282							$quote_character, $quote_pos,
2283							$quote_depth, $quoted_string_1,
2284							$quoted_string_2, $allowed_quote_modifiers,
2285							$quote_starting_tok, $quote_here_target_2,
2286	0					0	) = @{$rTV3};
	0					0
2287
2288	0					0	( $id_scan_state, $identifier, $want_paren ) = @{$rTV4};
	0					0
2289
2290							(
2291							$nesting_token_string, $nesting_block_string,
2292							$nesting_block_flag, $level_in_tokenizer,
2293	0					0	) = @{$rTV5};
	0					0
2294
2295							(
2296							$last_nonblank_container_type, $last_nonblank_type_sequence,
2297							$last_last_nonblank_token, $last_last_nonblank_type,
2298							$last_nonblank_prototype,
2299	0					0	) = @{$rTV6};
	0					0
2300	0					0	return;
2301							} ## end sub restore_tokenizer_state
2302
2303							sub split_pretoken {
2304
2305	8			8	0	17	my ( $self, $numc ) = @_;
2306
2307							# This provides a way to work around the limitations of the
2308							# pre-tokenization scheme upon which perltidy is based. It is rarely
2309							# needed.
2310
2311							# Split the leading $numc characters from the current token (at
2312							# index=$i) which is pre-type 'w' and insert the remainder back into
2313							# the pretoken stream with appropriate settings. Since we are
2314							# splitting a pre-type 'w', there are three cases, depending on if the
2315							# remainder starts with a digit:
2316							# Case 1: remainder is type 'd', all digits
2317							# Case 2: remainder is type 'd' and type 'w': digits & other characters
2318							# Case 3: remainder is type 'w'
2319
2320							# Examples, for $numc=1:
2321							# $tok => $tok_0 $tok_1 $tok_2
2322							# 'x10' => 'x' '10' # case 1
2323							# 'x10if' => 'x' '10' 'if' # case 2
2324							# '0ne => 'O' 'ne' # case 3
2325
2326							# where:
2327							# $tok_1 is a possible string of digits (pre-type 'd')
2328							# $tok_2 is a possible word (pre-type 'w')
2329
2330							# return 1 if successful
2331							# return undef if error (shouldn't happen)
2332
2333							# Calling routine should update '$type' and '$tok' if successful.
2334
2335	8					15	my $pretoken = $rtokens->[$i];
2336	8	50	33			79	if ( $pretoken
			33
2337							&& length($pretoken) > $numc
2338							&& substr( $pretoken, $numc ) =~ /^(\d)(.)$/ )
2339							{
2340
2341							# Split $tok into up to 3 tokens:
2342	8					18	my $tok_0 = substr( $pretoken, 0, $numc );
2343	8	50				31	my $tok_1 = defined($1) ? $1 : EMPTY_STRING;
2344	8	50				22	my $tok_2 = defined($2) ? $2 : EMPTY_STRING;
2345
2346	8					16	my $len_0 = length($tok_0);
2347	8					11	my $len_1 = length($tok_1);
2348	8					13	my $len_2 = length($tok_2);
2349
2350							##my $pre_type_0 = 'w';
2351	8					13	my $pre_type_1 = 'd';
2352	8					12	my $pre_type_2 = 'w';
2353
2354	8					15	my $pos_0 = $rtoken_map->[$i];
2355	8					11	my $pos_1 = $pos_0 + $len_0;
2356	8					12	my $pos_2 = $pos_1 + $len_1;
2357
2358	8					13	my $isplice = $i + 1;
2359
2360							# Splice in any digits
2361	8	100				17	if ($len_1) {
2362	5					8	splice @{$rtoken_map}, $isplice, 0, $pos_1;
	5					16
2363	5					10	splice @{$rtokens}, $isplice, 0, $tok_1;
	5					13
2364	5					9	splice @{$rtoken_type}, $isplice, 0, $pre_type_1;
	5					10
2365	5					8	$max_token_index++;
2366	5					8	$isplice++;
2367							}
2368
2369							# Splice in any trailing word
2370	8	100				22	if ($len_2) {
2371	4					5	splice @{$rtoken_map}, $isplice, 0, $pos_2;
	4					9
2372	4					7	splice @{$rtokens}, $isplice, 0, $tok_2;
	4					7
2373	4					5	splice @{$rtoken_type}, $isplice, 0, $pre_type_2;
	4					6
2374	4					6	$max_token_index++;
2375							}
2376
2377	8					16	$rtokens->[$i] = $tok_0;
2378	8					27	return 1;
2379							}
2380
2381							# Shouldn't get here - bad call parameters
2382	0					0	if (DEVEL_MODE) {
2383							Fault(<<EOM);
2384							While working near line number $input_line_number, bad arg '$tok' passed to sub split_pretoken()
2385							EOM
2386							}
2387	0					0	return;
2388							} ## end sub split_pretoken
2389
2390							sub get_indentation_level {
2391	649			649	0	1429	return $level_in_tokenizer;
2392							}
2393
2394							sub reset_indentation_level {
2395	649			649	0	1204	$level_in_tokenizer = shift;
2396	649					1014	return;
2397							}
2398
2399							sub peeked_ahead {
2400	280			280	0	477	( ( my $flag ) ) = @_;
2401
2402							# get or set the closure flag '$peeked_ahead':
2403							# - set $peeked_ahead to $flag if given, then
2404							# - return current value
2405	280	100				556	$peeked_ahead = defined($flag) ? $flag : $peeked_ahead;
2406	280					886	return $peeked_ahead;
2407							} ## end sub peeked_ahead
2408
2409							# ------------------------------------------------------------
2410							# end of tokenizer variable access and manipulation routines
2411							# ------------------------------------------------------------
2412
2413							#------------------------------
2414							# beginning of tokenizer hashes
2415							#------------------------------
2416
2417							my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
2418
2419							my @q;
2420
2421							# 'L' is token for opening { at hash key
2422							my %is_opening_type;
2423							@q = qw< L { ( [ >;
2424							$is_opening_type{$_} = 1 for @q;
2425
2426							my %is_opening_or_ternary_type;
2427							push @q, '?';
2428							$is_opening_or_ternary_type{$_} = 1 for @q;
2429
2430							# 'R' is token for closing } at hash key
2431							my %is_closing_type;
2432							@q = qw< R } ) ] >;
2433							$is_closing_type{$_} = 1 for @q;
2434
2435							my %is_closing_or_ternary_type;
2436							push @q, ':';
2437							$is_closing_or_ternary_type{$_} = 1 for @q;
2438
2439							my %is_redo_last_next_goto;
2440							@q = qw( redo last next goto );
2441							$is_redo_last_next_goto{$_} = 1 for @q;
2442
2443							my %is_use_require;
2444							@q = qw( use require );
2445							$is_use_require{$_} = 1 for @q;
2446
2447							# This hash holds the array index in $self for these keywords:
2448							# Fix for issue c035: removed 'format' from this hash
2449							my %is_END_DATA = (
2450							'__END__' => _in_end_,
2451							'__DATA__' => _in_data_,
2452							);
2453
2454							# table showing how many quoted things to look for after quote operator..
2455							# s, y, tr have 2 (pattern and replacement)
2456							# others have 1 (pattern only)
2457							my %quote_items = (
2458							's' => 2,
2459							'y' => 2,
2460							'tr' => 2,
2461							'm' => 1,
2462							'qr' => 1,
2463							'q' => 1,
2464							'qq' => 1,
2465							'qw' => 1,
2466							'qx' => 1,
2467							);
2468
2469							my %is_for_foreach;
2470							@q = qw( for foreach );
2471							$is_for_foreach{$_} = 1 for @q;
2472
2473							# These keywords may introduce blocks after parenthesized expressions,
2474							# in the form:
2475							# keyword ( .... ) { BLOCK }
2476							# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
2477							my %is_blocktype_with_paren;
2478							@q =
2479							qw(if elsif unless while until for foreach switch case given when catch);
2480							$is_blocktype_with_paren{$_} = 1 for @q;
2481
2482							my %is_case_default;
2483							@q = qw( case default );
2484							$is_case_default{$_} = 1 for @q;
2485
2486							#------------------------
2487							# end of tokenizer hashes
2488							#------------------------
2489
2490							# ------------------------------------------------------------
2491							# beginning of various scanner interface routines
2492							# ------------------------------------------------------------
2493							sub scan_replacement_text {
2494
2495							# check for here-docs in replacement text invoked by
2496							# a substitution operator with executable modifier 'e'.
2497							#
2498							# given:
2499							# $replacement_text
2500							# return:
2501							# $rht = reference to any here-doc targets
2502	0			0	0	0	my ( $self, $replacement_text ) = @_;
2503
2504							# quick check
2505	0	0				0	return if ( $replacement_text !~ /<</ );
2506
2507	0					0	$self->write_logfile_entry(
2508							"scanning replacement text for here-doc targets\n");
2509
2510							# save the logger object for error messages
2511	0					0	my $logger_object = $self->[_logger_object_];
2512
2513							# save all lexical variables
2514	0					0	my $rstate = save_tokenizer_state();
2515	0					0	_decrement_count(); # avoid error check for multiple tokenizers
2516
2517							# make a new tokenizer
2518	0					0	my $tokenizer = Perl::Tidy::Tokenizer->new(
2519							source_object => \$replacement_text,
2520							logger_object => $logger_object,
2521							starting_line_number => $input_line_number,
2522							);
2523
2524							# scan the replacement text
2525	0					0	while ( $tokenizer->get_line() ) { }
2526
2527							# remove any here doc targets
2528	0					0	my $rht = undef;
2529	0	0				0	if ( $tokenizer->[_in_here_doc_] ) {
2530	0					0	$rht = [];
2531	0					0	push @{$rht},
	0					0
2532							[
2533							$tokenizer->[_here_doc_target_],
2534							$tokenizer->[_here_quote_character_],
2535							];
2536	0	0				0	if ( $tokenizer->[_rhere_target_list_] ) {
2537	0					0	push @{$rht}, @{ $tokenizer->[_rhere_target_list_] };
	0					0
	0					0
2538	0					0	$tokenizer->[_rhere_target_list_] = undef;
2539							}
2540	0					0	$tokenizer->[_in_here_doc_] = undef;
2541							}
2542
2543							# now its safe to report errors
2544	0					0	my $rtokenization_info_uu = $tokenizer->report_tokenization_errors();
2545
2546							# TODO: Could propagate a severe error up
2547
2548							# restore all tokenizer lexical variables
2549	0					0	restore_tokenizer_state($rstate);
2550
2551							# return the here doc targets
2552	0					0	return $rht;
2553							} ## end sub scan_replacement_text
2554
2555							sub scan_bare_identifier {
2556	1862			1862	0	2568	my $self = shift;
2557
2558							# Scan a token starting with an alphanumeric variable or package
2559							# separator, :: or '.
2560
2561	1862					5653	( $i, $tok, $type, $prototype ) = $self->scan_bare_identifier_do(
2562
2563							$input_line,
2564							$i,
2565							$tok,
2566							$type,
2567							$prototype,
2568							$rtoken_map,
2569							$max_token_index,
2570							);
2571	1862					3623	return;
2572							} ## end sub scan_bare_identifier
2573
2574							sub scan_identifier {
2575
2576							# Scan for an identifier following a sigil or -> or other
2577							# identifier prefix, such as '::'
2578
2579	551			551	0	782	my $self = shift;
2580
2581							(
2582
2583	551					2096	$i,
2584							$tok,
2585							$type,
2586							$id_scan_state,
2587							$identifier,
2588							my $split_pretoken_flag,
2589
2590							) = $self->scan_complex_identifier(
2591
2592							$i,
2593							$id_scan_state,
2594							$identifier,
2595							$rtokens,
2596							$max_token_index,
2597							$expecting,
2598							$rparen_type->[$paren_depth],
2599							);
2600
2601							# Check for signal to fix a special variable adjacent to a keyword,
2602							# such as '$^One$0'.
2603	551	100				1330	if ($split_pretoken_flag) {
2604
2605							# Try to fix it by splitting the pretoken
2606	3	50	33			17	if ( $i > 0
			33
2607							&& $rtokens->[ $i - 1 ] eq '^'
2608							&& $self->split_pretoken(1) )
2609							{
2610	3					4	$identifier = substr( $identifier, 0, 3 );
2611	3					4	$tok = $identifier;
2612							}
2613							else {
2614
2615							# This shouldn't happen ...
2616	0					0	my $var = substr( $tok, 0, 3 );
2617	0					0	my $excess = substr( $tok, 3 );
2618	0					0	$self->interrupt_logfile();
2619	0					0	$self->warning(<<EOM);
2620							$input_line_number: Trouble parsing at characters '$excess' after special variable '$var'.
2621							A space may be needed after '$var'.
2622							EOM
2623	0					0	$self->resume_logfile();
2624							}
2625							}
2626	551					846	return;
2627							} ## end sub scan_identifier
2628
2629	44			44		310	use constant VERIFY_FASTSCAN => 0;
	44					102
	44					4439
2630							my %fast_scan_context;
2631
2632							BEGIN {
2633	44			44		55109	%fast_scan_context = (
2634							'$' => SCALAR_CONTEXT,
2635							'*' => SCALAR_CONTEXT,
2636							'@' => LIST_CONTEXT,
2637							'%' => LIST_CONTEXT,
2638							'&' => UNKNOWN_CONTEXT,
2639							);
2640							} ## end BEGIN
2641
2642							sub scan_simple_identifier {
2643
2644	5685			5685	0	7019	my $self = shift;
2645
2646							# This is a wrapper for sub scan_identifier. It does a fast preliminary
2647							# scan for certain common identifiers:
2648							# '$var', '@var', %var, *var, &var, '@{...}', '%{...}'
2649							# If it does not find one of these, or this is a restart, it calls the
2650							# original scanner directly.
2651
2652							# This gives the same results as the full scanner in about 1/4 the
2653							# total runtime for a typical input stream.
2654
2655							# Notation:
2656							# $var * 2
2657							# ^^ ^
2658							# \|\| \|
2659							# \|\| ---- $i_next [= next nonblank pretoken ]
2660							# \|----$i_plus_1 [= a bareword ]
2661							# ---$i_begin [= a sigil]
2662
2663	5685					6790	my $i_begin = $i;
2664	5685					6939	my $tok_begin = $tok;
2665	5685					7237	my $i_plus_1 = $i + 1;
2666	5685					6494	my $fast_scan_type;
2667
2668							#-------------------------------------------------------
2669							# Do full scan for anything following a pointer, such as
2670							# $cref->&*; # a postderef
2671							#-------------------------------------------------------
2672	5685	100	66			25465	if ( $last_nonblank_token eq '->' ) {
		100	66
		50	33
			0
			33
2673
2674							}
2675
2676							#------------------------------
2677							# quick scan with leading sigil
2678							#------------------------------
2679							elsif ( !$id_scan_state
2680							&& $i_plus_1 <= $max_token_index
2681							&& $fast_scan_context{$tok} )
2682							{
2683	5571					7818	$context = $fast_scan_context{$tok};
2684
2685							# look for $var, @var, ...
2686	5571	100	100			10583	if ( $rtoken_type->[$i_plus_1] eq 'w' ) {
		100	66
2687	5214					6880	my $pretype_next = EMPTY_STRING;
2688	5214	100				8842	if ( $i_plus_1 < $max_token_index ) {
2689	5092					6413	my $i_next = $i_plus_1 + 1;
2690	5092	100	100			12986	if ( $rtoken_type->[$i_next] eq 'b'
2691							&& $i_next < $max_token_index )
2692							{
2693	2045					2735	$i_next += 1;
2694							}
2695	5092					7232	$pretype_next = $rtoken_type->[$i_next];
2696							}
2697	5214	100	100			14391	if ( $pretype_next ne ':' && $pretype_next ne "'" ) {
2698
2699							# Found type 'i' like '$var', '@var', or '%var'
2700	5098					8402	$identifier = $tok . $rtokens->[$i_plus_1];
2701	5098					5908	$tok = $identifier;
2702	5098					6028	$type = 'i';
2703	5098					5800	$i = $i_plus_1;
2704	5098					6821	$fast_scan_type = $type;
2705							}
2706							}
2707
2708							# Look for @{ or %{ .
2709							# But we must let the full scanner handle things ${ because it may
2710							# keep going to get a complete identifier like '${#}' .
2711							elsif (
2712							$rtoken_type->[$i_plus_1] eq '{'
2713							&& ( $tok_begin eq '@'
2714							\|\| $tok_begin eq '%' )
2715							)
2716							{
2717
2718	43					79	$identifier = $tok;
2719	43					76	$type = 't';
2720	43					97	$fast_scan_type = $type;
2721							}
2722							else {
2723							## out of tricks
2724							}
2725							}
2726
2727							#---------------------------
2728							# Quick scan with leading ->
2729							# Look for ->[ and ->{
2730							#---------------------------
2731							elsif (
2732							$tok eq '->'
2733							&& $i < $max_token_index
2734							&& ( $rtokens->[$i_plus_1] eq '{'
2735							\|\| $rtokens->[$i_plus_1] eq '[' )
2736							)
2737							{
2738	0					0	$type = $tok;
2739	0					0	$fast_scan_type = $type;
2740	0					0	$identifier = $tok;
2741	0					0	$context = UNKNOWN_CONTEXT;
2742							}
2743							else {
2744							## out of tricks
2745							}
2746
2747							#--------------------------------------
2748							# Verify correctness during development
2749							#--------------------------------------
2750	5685					6365	if ( VERIFY_FASTSCAN && $fast_scan_type ) {
2751
2752							# We will call the full method
2753							my $identifier_simple = $identifier;
2754							my $tok_simple = $tok;
2755							my $i_simple = $i;
2756							my $context_simple = $context;
2757
2758							$tok = $tok_begin;
2759							$i = $i_begin;
2760							$self->scan_identifier();
2761
2762							if ( $tok ne $tok_simple
2763							\|\| $type ne $fast_scan_type
2764							\|\| $i != $i_simple
2765							\|\| $identifier ne $identifier_simple
2766							\|\| $id_scan_state
2767							\|\| $context ne $context_simple )
2768							{
2769							print {*STDERR} <<EOM;
2770							scan_simple_identifier differs from scan_identifier:
2771							simple: i=$i_simple, tok=$tok_simple, type=$fast_scan_type, ident=$identifier_simple, context='$context_simple
2772							full: i=$i, tok=$tok, type=$type, ident=$identifier, context='$context state=$id_scan_state
2773							EOM
2774							}
2775							}
2776
2777							#-------------------------------------------------
2778							# call full scanner if fast method did not succeed
2779							#-------------------------------------------------
2780	5685	100				9389	if ( !$fast_scan_type ) {
2781	544					1416	$self->scan_identifier();
2782							}
2783	5685					8424	return;
2784							} ## end sub scan_simple_identifier
2785
2786							sub method_ok_here {
2787
2788	14			14	0	32	my ( $self, $next_nonblank_token ) = @_;
2789
2790							# Return:
2791							# false if this is definitely an invalid method declaration
2792							# true otherwise (even if not sure)
2793
2794							# We are trying to avoid problems with old uses of 'method'
2795							# when --use-feature=class is set (rt145706).
2796							# For example, this should cause a return of 'false':
2797
2798							# method paint => sub {
2799							# return;
2800							# };
2801
2802							# Assume non-method if an error would occur
2803	14	50				35	return if ( $expecting == OPERATOR );
2804
2805							# Currently marking a line-ending 'method' as a bareword (fix c532)
2806	14	50				32	return if ( $i_tok >= $max_token_index );
2807
2808							# If a '$' follows 'method'...
2809							# Check for possible Object::Pad lexical method like
2810							# 'method $var {'
2811							# TODO: maybe merge this with the code below by increasing pos by 1
2812	14	100	66			50	if ( $next_nonblank_token eq '$' && new_statement_ok() ) {
2813	2					7	return 1;
2814							}
2815
2816							# Otherwise, not a method if non-word follows ..
2817	12	100				46	if ( $next_nonblank_token !~ /^[\w\:]/ ) { return }
	4					14
2818
2819							# from do_scan_sub:
2820	8					16	my $i_beg = $i + 1;
2821	8					16	my $pos_beg = $rtoken_map->[$i_beg];
2822	8					25	pos($input_line) = $pos_beg;
2823
2824							# TEST 1: look a valid sub NAME
2825	8	50				44	if (
2826							$input_line =~ m{\G\s*
2827							((?:\w(?:'\|::))) # package - something that ends in :: or '
2828							(\w+) # NAME - required
2829							}gcx
2830							)
2831							{
2832							# For possible future use..
2833							##my $subname = $2;
2834							##my $package = $1 ? $1 : EMPTY_STRING;
2835							}
2836							else {
2837	0					0	return;
2838							}
2839
2840							# TEST 2: look for invalid characters after name, such as here:
2841							# method paint => sub {
2842							# ...
2843							# }
2844	8					15	my $next_char = EMPTY_STRING;
2845	8	100				29	if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 }
	7					17
2846	8	100	66			36	if ( !$next_char \|\| $next_char eq '#' ) {
2847	1					6	( $next_char, my $i_next_uu ) =
2848							$self->find_next_nonblank_token( $max_token_index,
2849							$rtokens, $max_token_index );
2850							}
2851
2852	8	50				18	if ( !$next_char ) {
2853
2854							# out of characters - give up
2855	0					0	return;
2856							}
2857
2858							# Possibly valid next token types:
2859							# '(' could start prototype or signature
2860							# ':' could start ATTRIBUTE
2861							# '{' cold start BLOCK
2862							# ';' or '}' could end a statement
2863	8	100				26	if ( $next_char !~ /^[\(\:\{\;\}]/ ) {
2864
2865							# This does not match use feature 'class' syntax
2866	3					10	return;
2867							}
2868
2869							# We will stop here and assume that this is valid syntax for
2870							# use feature 'class'.
2871	5					19	return 1;
2872							} ## end sub method_ok_here
2873
2874							sub class_ok_here {
2875
2876	12			12	0	19	my $self = shift;
2877
2878							# Return:
2879							# false if this is definitely an invalid class declaration
2880							# true otherwise (even if not sure)
2881
2882							# We are trying to avoid problems with old uses of 'class'
2883							# when --use-feature=class is set (rt145706). We look ahead
2884							# see if this use of 'class' is obviously inconsistent with
2885							# the syntax of use feature 'class'. This allows the default
2886							# setting --use-feature=class to work for old syntax too.
2887
2888							# Valid class declarations look like
2889							# class NAME ?ATTRS ?VERSION ?BLOCK
2890							# where ATTRS VERSION and BLOCK are optional
2891
2892							# For example, this should produce a return of 'false':
2893							#
2894							# class ExtendsBasicAttributes is BasicAttributes{
2895
2896							# TEST 1: class stmt can only go where a new statement can start
2897	12	50				37	if ( !new_statement_ok() ) { return }
	0					0
2898
2899	12					21	my $i_beg = $i + 1;
2900	12					25	my $pos_beg = $rtoken_map->[$i_beg];
2901	12					32	pos($input_line) = $pos_beg;
2902
2903							# TEST 2: look for a valid NAME
2904	12	50				70	if (
2905							$input_line =~ m{\G\s*
2906							((?:\w(?:'\|::))) # package - something that ends in :: or '
2907							(\w+) # NAME - required
2908							}gcx
2909							)
2910							{
2911							# For possible future use..
2912							##my $subname = $2;
2913							##my $package = $1 ? $1 : EMPTY_STRING;
2914							}
2915							else {
2916	0					0	return;
2917							}
2918
2919							# TEST 3: look for valid characters after NAME
2920	12					22	my $next_char = EMPTY_STRING;
2921	12	100				44	if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 }
	11					22
2922	12	100	66			57	if ( !$next_char \|\| $next_char eq '#' ) {
2923	1					4	( $next_char, my $i_next_uu ) =
2924							$self->find_next_nonblank_token( $max_token_index,
2925							$rtokens, $max_token_index );
2926							}
2927	12	50				27	if ( !$next_char ) {
2928
2929							# out of characters - give up
2930	0					0	return;
2931							}
2932
2933							# Must see one of: ATTRIBUTE, VERSION, BLOCK, or end stmt
2934
2935							# Possibly valid next token types:
2936							# ':' could start ATTRIBUTE
2937							# '\d' could start VERSION
2938							# '{' cold start BLOCK
2939							# ';' could end a statement
2940							# '}' could end statement but would be strange
2941
2942	12	100				44	if ( $next_char !~ /^[\:\d\{\;\}]/ ) {
2943
2944							# This does not match use feature 'class' syntax
2945	2					7	return;
2946							}
2947
2948							# We will stop here and assume that this is valid syntax for
2949							# use feature 'class'.
2950	10					28	return 1;
2951							} ## end sub class_ok_here
2952
2953							sub scan_id {
2954	405			405	0	623	my $self = shift;
2955
2956							# Scan for a sub or package name
2957
2958	405					1634	( $i, $tok, $type, $id_scan_state ) = $self->scan_id_do(
2959
2960							$input_line,
2961							$i, $tok,
2962							$rtokens,
2963							$rtoken_map,
2964							$id_scan_state,
2965							$max_token_index,
2966							);
2967	405					910	return;
2968							} ## end sub scan_id
2969
2970							sub scan_number {
2971	683			683	0	957	my $self = shift;
2972	683					861	my $number;
2973	683					1627	( $i, $type, $number ) =
2974							$self->scan_number_do( $input_line, $i, $rtoken_map, $type,
2975							$max_token_index );
2976	683					1393	return $number;
2977							} ## end sub scan_number
2978
2979	44			44		364	use constant VERIFY_FASTNUM => 0;
	44					77
	44					26216
2980
2981							sub scan_number_fast {
2982
2983	2900			2900	0	3538	my $self = shift;
2984
2985							# This is a wrapper for sub scan_number. It does a fast preliminary
2986							# scan for a simple integer. It calls the original scan_number if it
2987							# does not find one.
2988
2989	2900					3555	my $i_begin = $i;
2990	2900					3623	my $tok_begin = $tok;
2991	2900					3257	my $number;
2992
2993							#---------------------------------
2994							# Quick check for (signed) integer
2995							#---------------------------------
2996
2997							# This will be the string of digits:
2998	2900					3405	my $i_d = $i;
2999	2900					3459	my $tok_d = $tok;
3000	2900					4357	my $typ_d = $rtoken_type->[$i_d];
3001
3002							# check for signed integer
3003	2900					3850	my $sign = EMPTY_STRING;
3004	2900	50	66			7288	if ( $typ_d ne 'd'
			66
			33
3005							&& ( $typ_d eq '+' \|\| $typ_d eq '-' )
3006							&& $i_d < $max_token_index )
3007							{
3008	392					555	$sign = $tok_d;
3009	392					499	$i_d++;
3010	392					551	$tok_d = $rtokens->[$i_d];
3011	392					576	$typ_d = $rtoken_type->[$i_d];
3012							}
3013
3014							# Handle integers
3015	2900	100	100			17047	if (
			100
3016							$typ_d eq 'd'
3017							&& (
3018							$i_d == $max_token_index
3019							\|\| ( $i_d < $max_token_index
3020							&& $rtoken_type->[ $i_d + 1 ] ne '.'
3021							&& $rtoken_type->[ $i_d + 1 ] ne 'w' )
3022							)
3023							)
3024							{
3025							# Let the full scanner handle multi-digit integers beginning with
3026							# '0' because there could be error messages. For example, '009' is
3027							# not a valid number.
3028
3029	2284	100	100			7411	if ( $tok_d eq '0' \|\| substr( $tok_d, 0, 1 ) ne '0' ) {
3030	2227					3502	$number = $sign . $tok_d;
3031	2227					2826	$type = 'n';
3032	2227					2953	$i = $i_d;
3033							}
3034							}
3035
3036							#--------------------------------------
3037							# Verify correctness during development
3038							#--------------------------------------
3039	2900					3175	if ( VERIFY_FASTNUM && defined($number) ) {
3040
3041							# We will call the full method
3042							my $type_simple = $type;
3043							my $i_simple = $i;
3044							my $number_simple = $number;
3045
3046							$tok = $tok_begin;
3047							$i = $i_begin;
3048							$number = $self->scan_number();
3049
3050							if ( $type ne $type_simple
3051							\|\| ( $i != $i_simple && $i <= $max_token_index )
3052							\|\| $number ne $number_simple )
3053							{
3054							print {*STDERR} <<EOM;
3055							scan_number_fast differs from scan_number:
3056							simple: i=$i_simple, type=$type_simple, number=$number_simple
3057							full: i=$i, type=$type, number=$number
3058							EOM
3059							}
3060							}
3061
3062							#----------------------------------------
3063							# call full scanner if may not be integer
3064							#----------------------------------------
3065	2900	100				5086	if ( !defined($number) ) {
3066	673					1530	$number = $self->scan_number();
3067							}
3068	2900					6146	return $number;
3069							} ## end sub scan_number_fast
3070
3071							sub error_if_expecting_TERM {
3072	285			285	0	542	my ($self) = @_;
3073
3074							# Issue a warning if a binary operator is missing a term to operate on
3075
3076							# This should only be called if a term is expected here
3077	285	50				685	if ( $expecting != TERM ) { return }
	0					0
3078
3079							# Be sure a TERM is definitely required ..
3080	285	50	66			2290	if (
			33
			66
			33
3081
3082							# .. following a binary operator token type, like '='
3083							$is_binary_or_unary_operator_type{$last_nonblank_type}
3084
3085							# .. or following a binary keyword operator, like 'and'
3086							\|\| ( $last_nonblank_type eq 'k'
3087							&& $is_binary_or_unary_keyword{$last_nonblank_token} )
3088
3089							# .. or for a binary operator following something like a ';'
3090							\|\| ( $is_not_a_TERM_producer_type{$last_nonblank_type}
3091							&& $is_binary_operator_type{$tok} )
3092							)
3093							{
3094
3095							# We must exclude error checking in sub signatures which have some
3096							# unusual syntax. For example the following syntax is okay within
3097							# a signature:
3098							# sub mysub ($=,) {...}
3099							# $ = undef
3100	0					0	my $ct = $rparen_type->[$paren_depth];
3101	0	0	0			0	if ( $ct && $ct =~ /^sub\b/ ) { return }
	0					0
3102
3103							$self->report_unexpected(
3104							{
3105	0					0	found => $tok,
3106							expecting => "term",
3107							i_tok => $i_tok,
3108							last_nonblank_i => $last_nonblank_i,
3109							rpretoken_map => $rtoken_map,
3110							rpretoken_type => $rtoken_type,
3111							input_line => $input_line,
3112							}
3113							);
3114	0					0	return 1;
3115							}
3116	285					736	return;
3117							} ## end sub error_if_expecting_TERM
3118
3119							# a sub to warn if token found where operator expected
3120							sub error_if_expecting_OPERATOR {
3121
3122	809			809	0	1368	my ( $self, ($thing) ) = @_;
3123
3124							# Issue warning on error if expecting operator
3125							# Given:
3126							# $thing = the unexpected token or issue
3127							# = undef to use current pre-token
3128
3129	809	50				1588	if ( $expecting == OPERATOR ) {
3130	0	0				0	if ( !defined($thing) ) { $thing = $tok }
	0					0
3131							$self->report_unexpected(
3132							{
3133	0					0	found => $thing,
3134							expecting => "operator",
3135							i_tok => $i_tok,
3136							last_nonblank_i => $last_nonblank_i,
3137							rpretoken_map => $rtoken_map,
3138							rpretoken_type => $rtoken_type,
3139							input_line => $input_line,
3140							}
3141							);
3142	0	0				0	if ( $i_tok == 0 ) {
3143	0					0	$self->interrupt_logfile();
3144	0					0	$self->warning("Missing ';' or ',' above?\n");
3145	0					0	$self->resume_logfile();
3146							}
3147	0					0	return 1;
3148							}
3149	809					1272	return;
3150							} ## end sub error_if_expecting_OPERATOR
3151
3152							# ------------------------------------------------------------
3153							# end scanner interfaces
3154							# ------------------------------------------------------------
3155
3156							#------------------
3157							# Tokenization subs
3158							#------------------
3159							# An identifier in possible indirect object location followed by any of
3160							# these tokens: -> , ; } (plus others) is not an indirect object. Fix c257.
3161							my %Z_test_hash;
3162
3163							BEGIN {
3164	44			44		313	my @qZ = qw#
3165							-> ; } ) ]
3166							=> =~ = == !~ \|\| >= != *= .. && \|= .= -= += <= %=
3167							^= &&= \|\|= //= <=>
3168							#;
3169	44					115	push @qZ, COMMA;
3170	44					252358	$Z_test_hash{$_} = 1 for @qZ;
3171							}
3172
3173							sub do_DOLLAR_SIGN {
3174
3175	4833			4833	0	6389	my $self = shift;
3176
3177							# '$'
3178							# start looking for a scalar
3179	4833	50				8469	$self->error_if_expecting_OPERATOR("Scalar")
3180							if ( $expecting == OPERATOR );
3181	4833					13402	$self->scan_simple_identifier();
3182
3183	4833	100				8641	if ( $identifier eq '$^W' ) {
3184	1					3	$self->[_saw_perl_dash_w_] = 1;
3185							}
3186
3187							# Check for identifier in indirect object slot
3188							# (vorboard.pl, sort.t). Something like:
3189							# /^(print\|printf\|sort\|exec\|system)$/
3190	4833	100	66			30514	if (
			100
			100
			66
			66
3191							$is_indirect_object_taker{$last_nonblank_token}
3192							&& $last_nonblank_type eq 'k'
3193							\|\| ( ( $last_nonblank_token eq '(' )
3194							&& $is_indirect_object_taker{ $rparen_type->[$paren_depth] } )
3195							\|\| ( $last_nonblank_type eq 'w'
3196							\|\| $last_nonblank_type eq 'U' ) # possible object
3197							)
3198							{
3199
3200							# An identifier followed by '->' is not indirect object;
3201							# fixes b1175, b1176. Fix c257: Likewise for other tokens like
3202							# comma, semicolon, closing brace, and single space.
3203	104					384	my ( $next_nonblank_token, $i_next_uu ) =
3204							$self->find_next_noncomment_token( $i, $rtokens,
3205							$max_token_index );
3206	104	100				343	$type = 'Z' if ( !$Z_test_hash{$next_nonblank_token} );
3207							}
3208	4833					6561	return;
3209							} ## end sub do_DOLLAR_SIGN
3210
3211							sub do_LEFT_PARENTHESIS {
3212
3213	2423			2423	0	3332	my $self = shift;
3214
3215							# '('
3216	2423					3116	++$paren_depth;
3217
3218							# variable to enable check for brace after closing paren (c230)
3219	2423					3473	my $want_brace = EMPTY_STRING;
3220
3221	2423	100	66			7900	if ($want_paren) {
		100
3222	289					462	$container_type = $want_paren;
3223	289					449	$want_brace = $want_paren;
3224	289					458	$want_paren = EMPTY_STRING;
3225							}
3226							elsif ( substr( $statement_type, 0, 3 ) eq 'sub'
3227							&& $statement_type =~ /^sub\b/ )
3228							{
3229	20					38	$container_type = $statement_type;
3230							}
3231							else {
3232	2114					2871	$container_type = $last_nonblank_token;
3233
3234							# We can check for a syntax error here of unexpected '(',
3235							# but this is going to get messy...
3236	2114	100	100			7134	if (
3237							$expecting == OPERATOR
3238
3239							# Be sure this is not a method call of the form
3240							# &method(...), $method->(..), &{method}(...),
3241							# $ref[2](list) is ok & short for $ref[2]->(list)
3242							# NOTE: at present, braces in something like &{ xxx }
3243							# are not marked as a block, we might have a method call.
3244							# Added ')' to fix case c017, something like ()()()
3245							&& $last_nonblank_token !~ /^(?:[\]\}\)\&]\|\-\>)/
3246							)
3247							{
3248
3249							# ref: camel 3 p 703.
3250	3	50				14	if ( $last_last_nonblank_token eq 'do' ) {
3251	0					0	$self->complain(
3252							"do SUBROUTINE is deprecated; consider & or -> notation\n"
3253							);
3254							}
3255							else {
3256
3257							# if this is an empty list, (), then it is not an
3258							# error; for example, we might have a constant pi and
3259							# invoke it with pi() or just pi;
3260	3					15	my ( $next_nonblank_token, $i_next_uu ) =
3261							$self->find_next_nonblank_token( $i, $rtokens,
3262							$max_token_index );
3263
3264							# Patch for c029: give up error check if
3265							# a side comment follows
3266	3	50	33			20	if ( $next_nonblank_token ne ')'
3267							&& $next_nonblank_token ne '#' )
3268							{
3269	0					0	my $hint;
3270
3271	0					0	$self->error_if_expecting_OPERATOR('(');
3272
3273	0	0				0	if ( $last_nonblank_type eq 'C' ) {
		0
3274	0					0	$hint =
3275							"$last_nonblank_token has a void prototype\n";
3276							}
3277							elsif ( $last_nonblank_type eq 'i' ) {
3278	0	0	0			0	if ( $i_tok > 0
3279							&& $last_nonblank_token =~ /^\$/ )
3280							{
3281	0					0	$hint =
3282							"Do you mean '$last_nonblank_token->(' ?\n";
3283							}
3284							}
3285							else {
3286							## no hint
3287							}
3288	0	0				0	if ($hint) {
3289	0					0	$self->interrupt_logfile();
3290	0					0	$self->warning($hint);
3291	0					0	$self->resume_logfile();
3292							}
3293							} ## end if ( $next_nonblank_token...
3294							} ## end else [ if ( $last_last_nonblank_token...
3295							} ## end if ( $expecting == OPERATOR...
3296							}
3297
3298	2423					7611	( $type_sequence, $indent_flag ) =
3299							$self->increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
3300
3301							# propagate types down through nested parens
3302							# for example: the second paren in 'if ((' would be structural
3303							# since the first is.
3304
3305	2423	100				4524	if ( $last_nonblank_token eq '(' ) {
3306	61					111	$type = $last_nonblank_type;
3307							}
3308
3309							# We exclude parens as structural after a ',' because it
3310							# causes subtle problems with continuation indentation for
3311							# something like this, where the first 'or' will not get
3312							# indented.
3313							#
3314							# assert(
3315							# __LINE__,
3316							# ( not defined $check )
3317							# or ref $check
3318							# or $check eq "new"
3319							# or $check eq "old",
3320							# );
3321							#
3322							# Likewise, we exclude parens where a statement can start
3323							# because of problems with continuation indentation, like
3324							# these:
3325							#
3326							# ($firstline =~ /^#\!.*perl/)
3327							# and (print $File::Find::name, "\n")
3328							# and (return 1);
3329							#
3330							# (ref($usage_fref) =~ /CODE/)
3331							# ? &$usage_fref
3332							# : (&blast_usage, &blast_params, &blast_general_params);
3333
3334							else {
3335	2362					3226	$type = '{';
3336							}
3337
3338	2423	50				4557	if ( $last_nonblank_type eq ')' ) {
3339	0					0	$self->warning(
3340							"Syntax error? found token '$last_nonblank_type' then '('\n");
3341							}
3342
3343							# git #105: Copy container type and want-brace flag at ') (';
3344							# propagate the container type onward so that any subsequent brace gets
3345							# correctly marked. I have implemented this as a general rule, which
3346							# should be safe, but if necessary it could be restricted to certain
3347							# container statement types such as 'for'.
3348	2423	100				4780	if ( $last_nonblank_token eq ')' ) {
3349	1					2	my $rvars = $rparen_vars->[$paren_depth];
3350	1	50				3	if ( defined($rvars) ) {
3351	1					2	$container_type = $rparen_type->[$paren_depth];
3352	1					2	( my $type_lp_uu, $want_brace ) = @{$rvars};
	1					3
3353							}
3354							}
3355
3356	2423					4125	$rparen_type->[$paren_depth] = $container_type;
3357	2423					5795	$rparen_vars->[$paren_depth] = [ $type, $want_brace ];
3358	2423					4030	$rparen_semicolon_count->[$paren_depth] = 0;
3359
3360	2423					3495	return;
3361
3362							} ## end sub do_LEFT_PARENTHESIS
3363
3364							sub do_RIGHT_PARENTHESIS {
3365
3366	2423			2423	0	3216	my $self = shift;
3367
3368							# ')'
3369	2423					6700	( $type_sequence, $indent_flag ) =
3370							$self->decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
3371
3372	2423					4147	my $rvars = $rparen_vars->[$paren_depth];
3373	2423	50				4755	if ( defined($rvars) ) {
3374	2423					2911	my ( $type_lp, $want_brace_uu ) = @{$rvars};
	2423					4642
3375	2423	50	33			8328	if ( $type_lp && $type_lp eq '{' ) {
3376	2423					3702	$type = '}';
3377							}
3378							}
3379
3380	2423					3698	$container_type = $rparen_type->[$paren_depth];
3381
3382							# restore statement type as 'sub' at closing paren of a signature
3383							# so that a subsequent ':' is identified as an attribute
3384	2423	100	100			6635	if ( substr( $container_type, 0, 3 ) eq 'sub'
3385							&& $container_type =~ /^sub\b/ )
3386							{
3387	30					60	$statement_type = $container_type;
3388							}
3389
3390	2423	100				5947	if ( $is_for_foreach{ $rparen_type->[$paren_depth] } ) {
3391	79					141	my $num_sc = $rparen_semicolon_count->[$paren_depth];
3392	79	50	66			345	if ( $num_sc > 0 && $num_sc != 2 ) {
3393	0					0	$self->warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
3394							}
3395							}
3396
3397	2423	50				4373	if ( $paren_depth > 0 ) { $paren_depth-- }
	2423					3007
3398	2423					3680	return;
3399							} ## end sub do_RIGHT_PARENTHESIS
3400
3401							sub do_COMMA {
3402
3403	3688			3688	0	4602	my $self = shift;
3404
3405							# ','
3406	3688	100	33			9756	if ( $last_nonblank_type eq COMMA ) {
		50
3407	10					22	$self->complain("Repeated ','s \n");
3408							}
3409
3410							# Note that we have to check both token and type here because a
3411							# comma following a qw list can have last token='(' but type = 'q'
3412							elsif ( $last_nonblank_token eq '(' && $last_nonblank_type eq '{' ) {
3413	0					0	$self->warning("Unexpected leading ',' after a '('\n");
3414							}
3415							else {
3416							## Error check added in update c565, moved to end of $code loop.
3417							}
3418
3419							# patch for operator_expected: note if we are in the list (use.t)
3420	3688	100				6242	if ( $statement_type eq 'use' ) { $statement_type = '_use' }
	6					11
3421	3688					4531	return;
3422
3423							} ## end sub do_COMMA
3424
3425							sub do_SEMICOLON {
3426
3427	2890			2890	0	3869	my $self = shift;
3428
3429							# ';'
3430	2890					3719	$context = UNKNOWN_CONTEXT;
3431	2890					3759	$statement_type = EMPTY_STRING;
3432	2890					3744	$want_paren = EMPTY_STRING;
3433
3434	2890	100				7004	if ( $is_for_foreach{ $rparen_type->[$paren_depth] } )
3435							{ # mark ; in for loop
3436
3437							# Be careful: we do not want a semicolon such as the
3438							# following to be included:
3439							#
3440							# for (sort {strcoll($a,$b);} keys %investments) {
3441
3442	35	100	66			197	if ( $brace_depth == $rdepth_array->[PAREN]->[BRACE]->[$paren_depth]
3443							&& $square_bracket_depth ==
3444							$rdepth_array->[PAREN]->[SQUARE_BRACKET]->[$paren_depth] )
3445							{
3446
3447	34					54	$type = 'f';
3448	34					52	$rparen_semicolon_count->[$paren_depth]++;
3449							}
3450							}
3451							else {
3452							## Error check added in update c565, moved to end of $code loop.
3453							}
3454	2890					3868	return;
3455							} ## end sub do_SEMICOLON
3456
3457							sub do_QUOTATION_MARK {
3458
3459	1253			1253	0	1727	my $self = shift;
3460
3461							# '"'
3462	1253	50				2474	$self->error_if_expecting_OPERATOR("String")
3463							if ( $expecting == OPERATOR );
3464	1253					1711	$in_quote = 1;
3465	1253					1639	$type = 'Q';
3466	1253					1601	$allowed_quote_modifiers = EMPTY_STRING;
3467	1253					1556	$quote_starting_tok = $tok;
3468	1253					1644	$quote_here_target_2 = undef;
3469	1253					1666	return;
3470							} ## end sub do_QUOTATION_MARK
3471
3472							sub do_APOSTROPHE {
3473
3474	1335			1335	0	1817	my $self = shift;
3475
3476							# "'"
3477	1335	50				2555	$self->error_if_expecting_OPERATOR("String")
3478							if ( $expecting == OPERATOR );
3479	1335					1698	$in_quote = 1;
3480	1335					1725	$type = 'Q';
3481	1335					1636	$allowed_quote_modifiers = EMPTY_STRING;
3482	1335					1612	$quote_starting_tok = $tok;
3483	1335					1745	$quote_here_target_2 = undef;
3484	1335					1778	return;
3485							} ## end sub do_APOSTROPHE
3486
3487							sub do_BACKTICK {
3488
3489	0			0	0	0	my $self = shift;
3490
3491							# '`'
3492	0	0				0	$self->error_if_expecting_OPERATOR("String")
3493							if ( $expecting == OPERATOR );
3494	0					0	$in_quote = 1;
3495	0					0	$type = 'Q';
3496	0					0	$allowed_quote_modifiers = EMPTY_STRING;
3497	0					0	$quote_starting_tok = $tok;
3498	0					0	$quote_here_target_2 = undef;
3499	0					0	return;
3500							} ## end sub do_BACKTICK
3501
3502							sub do_SLASH {
3503
3504	225			225	0	351	my $self = shift;
3505
3506							# '/'
3507	225					380	my $is_pattern;
3508
3509							# a pattern cannot follow certain keywords which take optional
3510							# arguments, like 'shift' and 'pop'. See also '?'.
3511	225	50	66			1033	if (
		50
3512							$last_nonblank_type eq 'k'
3513							&& $is_keyword_rejecting_slash_as_pattern_delimiter{
3514							$last_nonblank_token}
3515							)
3516							{
3517	0					0	$is_pattern = 0;
3518							}
3519							elsif ( $expecting == UNKNOWN ) { # indeterminate, must guess..
3520							(
3521	0					0	$is_pattern,
3522							my $msg,
3523
3524							) = $self->guess_if_pattern_or_division(
3525
3526							$i,
3527							$rtokens,
3528							$rtoken_type,
3529							$rtoken_map,
3530							$max_token_index,
3531							);
3532
3533	0	0				0	if ($msg) {
3534	0					0	if ( 0 && DEBUG_GUESS_MODE ) {
3535							$self->warning("DEBUG_GUESS_MODE message:\n$msg\n");
3536							}
3537	0					0	$self->write_diagnostics("DIVIDE:$msg\n");
3538	0					0	$self->write_logfile_entry($msg);
3539							}
3540							}
3541	225					460	else { $is_pattern = ( $expecting == TERM ) }
3542
3543	225	100				508	if ($is_pattern) {
3544	88					134	$in_quote = 1;
3545	88					137	$type = 'Q';
3546	88					233	$allowed_quote_modifiers = $quote_modifiers{'m'};
3547	88					129	$quote_starting_tok = 'm';
3548	88					149	$quote_here_target_2 = undef;
3549							}
3550							else { # not a pattern; check for a /= token
3551
3552	137	100				436	if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /=
3553	4					7	$i++;
3554	4					9	$tok = '/=';
3555	4					8	$type = $tok;
3556							}
3557
3558							#DEBUG - collecting info on what tokens follow a divide
3559							# for development of guessing algorithm
3560							## if (
3561							## $self->is_possible_numerator( $i, $rtokens,
3562							## $max_token_index ) < 0
3563							## )
3564							## {
3565							## $self->write_diagnostics("DIVIDE? $input_line\n");
3566							## }
3567							}
3568	225					381	return;
3569							} ## end sub do_SLASH
3570
3571							sub do_LEFT_CURLY_BRACKET {
3572
3573	2016			2016	0	2856	my $self = shift;
3574
3575							# '{'
3576							# if we just saw a ')', we will label this block with
3577							# its type. We need to do this to allow sub
3578							# code_block_type to determine if this brace starts a
3579							# code block or anonymous hash. (The type of a paren
3580							# pair is the preceding token, such as 'if', 'else',
3581							# etc).
3582	2016					2890	$container_type = EMPTY_STRING;
3583
3584							# ATTRS: for a '{' following an attribute list, reset
3585							# things to look like we just saw a sub name
3586							# Added 'package' (can be 'class') for --use-feature=class (rt145706)
3587	2016	100	100			15044	if ( substr( $statement_type, 0, 3 ) eq 'sub' ) {
		100	66
		50	33
		100
		50
3588	36					70	$last_nonblank_token = $statement_type;
3589	36					50	$last_nonblank_type = 'S'; # c250 change
3590	36					64	$statement_type = EMPTY_STRING;
3591							}
3592							elsif ( substr( $statement_type, 0, 7 ) eq 'package' ) {
3593	10					16	$last_nonblank_token = $statement_type;
3594	10					13	$last_nonblank_type = 'P'; # c250 change
3595	10					14	$statement_type = EMPTY_STRING;
3596							}
3597
3598							# patch for SWITCH/CASE: hide these keywords from an immediately
3599							# following opening brace
3600							elsif ( ( $statement_type eq 'case' \|\| $statement_type eq 'when' )
3601							&& $statement_type eq $last_nonblank_token )
3602							{
3603	0					0	$last_nonblank_token = ";";
3604							}
3605
3606							elsif ( $last_nonblank_token eq ')' ) {
3607	295					629	$last_nonblank_token = $rparen_type->[ $paren_depth + 1 ];
3608
3609							# defensive move in case of a nesting error (pbug.t)
3610							# in which this ')' had no previous '('
3611							# this nesting error will have been caught
3612	295	50				732	if ( !defined($last_nonblank_token) ) {
3613	0					0	$last_nonblank_token = 'if';
3614							}
3615
3616							# Syntax check at '){'
3617	295	100				818	if ( $is_blocktype_with_paren{$last_nonblank_token} ) {
3618
3619	279					538	my $rvars = $rparen_vars->[ $paren_depth + 1 ];
3620	279	50				689	if ( defined($rvars) ) {
3621	279					429	my ( $type_lp_uu, $want_brace ) = @{$rvars};
	279					596
3622
3623							# OLD: Now verify that this is not a trailing form
3624							# FIX for git #124: we have to skip this check because
3625							# the 'gather' keyword of List::Gather can operate on
3626							# a full statement, so it isn't possible to be sure
3627							# this is a trailing form.
3628	279					571	if ( 0 && !$want_brace ) {
3629							$self->warning(
3630							"syntax error at ') {', unexpected '{' after closing ')' of a trailing '$last_nonblank_token'\n"
3631							);
3632							}
3633							}
3634							}
3635							else {
3636	16	50				46	if ($rOpts_extended_syntax) {
3637
3638							# we append a trailing () to mark this as an unknown
3639							# block type. This allows perltidy to format some
3640							# common extensions of perl syntax.
3641							# This is used by sub code_block_type
3642	16					63	$last_nonblank_token .= '()';
3643							}
3644							else {
3645	0					0	my $list =
3646							join( SPACE, sort keys %is_blocktype_with_paren );
3647	0					0	$self->warning(
3648							"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
3649							);
3650							}
3651							}
3652							}
3653
3654							# patch for paren-less for/foreach glitch, part 2.
3655							# see note below under 'qw'
3656							elsif ($last_nonblank_token eq 'qw'
3657							&& $is_for_foreach{$want_paren} )
3658							{
3659	0					0	$last_nonblank_token = $want_paren;
3660	0	0				0	if ( $last_last_nonblank_token eq $want_paren ) {
3661	0					0	$self->warning(
3662							"syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
3663							);
3664
3665							}
3666	0					0	$want_paren = EMPTY_STRING;
3667							}
3668							else {
3669							# not special
3670							}
3671
3672							# now identify which of the three possible types of
3673							# curly braces we have: hash index container, anonymous
3674							# hash reference, or code block.
3675
3676							# Patch for Object::Pad "field $var BLOCK"
3677	2016	100	66			8111	if ( $statement_type eq 'field'
		100	66
			33
3678							&& $last_last_nonblank_token eq 'field'
3679							&& $last_nonblank_type eq 'i'
3680							&& $last_last_nonblank_type eq 'k' )
3681							{
3682	8					9	$type = '{';
3683	8					10	$block_type = $statement_type;
3684							}
3685
3686							# non-structural (hash index) curly brace pair
3687							# get marked 'L' and 'R'
3688							elsif ( is_non_structural_brace() ) {
3689	564					875	$type = 'L';
3690
3691							# patch for SWITCH/CASE:
3692							# allow paren-less identifier after 'when'
3693							# if the brace is preceded by a space
3694	564	0	33			1506	if ( $statement_type eq 'when'
			33
			0
			0
3695							&& $last_nonblank_type eq 'i'
3696							&& $last_last_nonblank_type eq 'k'
3697							&& ( $i_tok == 0 \|\| $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
3698							{
3699	0					0	$type = '{';
3700	0					0	$block_type = $statement_type;
3701							}
3702							}
3703
3704							# code and anonymous hash have the same type, '{', but are
3705							# distinguished by 'block_type',
3706							# which will be blank for an anonymous hash
3707							else {
3708	1444					4176	$block_type =
3709							$self->code_block_type( $i_tok, $rtokens, $rtoken_type,
3710							$max_token_index );
3711
3712							# Is a new lexical sub looking for its block sequence number?
3713							# This is indicated with a special '911' signal.
3714	1444	0	66			5687	if ( $block_type
			33
			33
3715							&& $ris_lexical_sub->{911}
3716							&& $last_nonblank_type eq 'S'
3717							&& substr( $block_type, 0, 3 ) eq 'sub' )
3718							{
3719	0					0	my ( $subname, $package ) = @{ $ris_lexical_sub->{911} };
	0					0
3720	0	0	0			0	if ( $block_type =~ /^sub $subname/
3721							&& $is_my_our_state{$last_last_nonblank_token} )
3722							{
3723	0					0	$ris_lexical_sub->{$subname}->{$package} =
3724							$next_sequence_number;
3725							}
3726
3727							# Turn the signal off, even if we did not find the block being
3728							# sought - it may not exist if the sub statement was a simple
3729							# declaration without a block definition.
3730	0					0	$ris_lexical_sub->{911} = undef;
3731							}
3732
3733							# patch to promote bareword type to function taking block
3734	1444	100	100			5065	if ( $block_type
			66
3735							&& $last_nonblank_type eq 'w'
3736							&& $last_nonblank_i >= 0 )
3737							{
3738	36	50				105	if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
3739							$routput_token_type->[$last_nonblank_i] =
3740	36	100				131	$is_grep_alias{$block_type} ? 'k' : 'G';
3741							}
3742							}
3743
3744							# patch for SWITCH/CASE: if we find a stray opening block brace
3745							# where we might accept a 'case' or 'when' block, then take it
3746	1444	100	100			4736	if ( $statement_type eq 'case'
3747							\|\| $statement_type eq 'when' )
3748							{
3749	48	100	66			201	if ( !$block_type \|\| $block_type eq '}' ) {
3750	4					6	$block_type = $statement_type;
3751							}
3752							}
3753							}
3754
3755	2016					3852	$rbrace_type->[ ++$brace_depth ] = $block_type;
3756
3757							# Patch for CLASS BLOCK definitions: do not update the package for the
3758							# current depth if this is a BLOCK type definition.
3759							# TODO: should make 'class' separate from 'package' and only do
3760							# this for 'class'
3761	2016	100				5519	$rbrace_package->[$brace_depth] = $current_package
3762							if ( substr( $block_type, 0, 8 ) ne 'package ' );
3763
3764	2016					3342	$rbrace_structural_type->[$brace_depth] = $type;
3765	2016					3251	$rbrace_context->[$brace_depth] = $context;
3766	2016					5086	( $type_sequence, $indent_flag ) =
3767							$self->increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
3768
3769	2016					3212	return;
3770							} ## end sub do_LEFT_CURLY_BRACKET
3771
3772							sub do_RIGHT_CURLY_BRACKET {
3773
3774	2016			2016	0	2900	my $self = shift;
3775
3776							# '}'
3777	2016					3388	$block_type = $rbrace_type->[$brace_depth];
3778	2016	100				4072	if ($block_type) { $statement_type = EMPTY_STRING }
	1115					1635
3779	2016	100				3903	if ( defined( $rbrace_package->[$brace_depth] ) ) {
3780	2006					3290	$current_package = $rbrace_package->[$brace_depth];
3781							}
3782
3783							# can happen on brace error (caught elsewhere)
3784							else {
3785							}
3786	2016					5579	( $type_sequence, $indent_flag ) =
3787							$self->decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
3788
3789	2016	100				4764	if ( $rbrace_structural_type->[$brace_depth] eq 'L' ) {
3790	564					831	$type = 'R';
3791							}
3792
3793							# propagate type information for 'do' and 'eval' blocks, and also
3794							# for smartmatch operator. This is necessary to enable us to know
3795							# if an operator or term is expected next.
3796	2016	100				4724	if ( $is_block_operator{$block_type} ) {
3797	85					138	$tok = $block_type;
3798							}
3799
3800							# pop non-indenting brace stack if sequence number matches
3801	2016	100	100			2451	if ( @{ $self->[_rnon_indenting_brace_stack_] }
	2016					5180
3802							&& $self->[_rnon_indenting_brace_stack_]->[-1] eq $type_sequence )
3803							{
3804	6					14	pop @{ $self->[_rnon_indenting_brace_stack_] };
	6					14
3805							}
3806
3807	2016					3220	$context = $rbrace_context->[$brace_depth];
3808	2016	50				3885	if ( $brace_depth > 0 ) { $brace_depth--; }
	2016					2693
3809	2016					2816	return;
3810							} ## end sub do_RIGHT_CURLY_BRACKET
3811
3812							sub do_AMPERSAND {
3813
3814	126			126	0	190	my $self = shift;
3815
3816							# '&' = maybe sub call? start looking
3817							# We have to check for sub call unless we are sure we
3818							# are expecting an operator. This example from s2p
3819							# got mistaken as a q operator in an early version:
3820							# print BODY &q(<<'EOT');
3821	126	100				306	if ( $expecting != OPERATOR ) {
3822
3823							# But only look for a sub call if we are expecting a term or
3824							# if there is no existing space after the &.
3825							# For example we probably don't want & as sub call here:
3826							# Fcntl::S_IRUSR & $mode;
3827	106	100	66			450	if ( $expecting == TERM \|\| $next_type ne 'b' ) {
3828	103					271	$self->scan_simple_identifier();
3829							}
3830							}
3831							else {
3832							}
3833	126					195	return;
3834							} ## end sub do_AMPERSAND
3835
3836							sub do_LESS_THAN_SIGN {
3837
3838	33			33	0	69	my $self = shift;
3839
3840							# '<' - angle operator or less than?
3841	33	100				109	if ( $expecting != OPERATOR ) {
3842	8					39	( $i, $type ) = $self->find_angle_operator_termination(
3843
3844							$input_line,
3845							$i,
3846							$rtoken_map,
3847							$expecting,
3848							$max_token_index,
3849							);
3850	8					15	if ( DEBUG_GUESS_MODE && $expecting == UNKNOWN ) {
3851							my $msg = "guessing that '<' is ";
3852							$msg .=
3853							$type eq 'Q' ? "an angle operator" : "a less than symbol";
3854							if ( $type eq 'Q' ) {
3855							$self->warning("DEBUG_GUESS_MODE message:\n$msg\n");
3856							}
3857							}
3858							}
3859							else {
3860							}
3861	33					64	return;
3862							} ## end sub do_LESS_THAN_SIGN
3863
3864							sub do_QUESTION_MARK {
3865
3866	193			193	0	340	my $self = shift;
3867
3868							# '?' = conditional or starting pattern?
3869	193					335	my $is_pattern;
3870
3871							# Patch for rt #126965
3872							# a pattern cannot follow certain keywords which take optional
3873							# arguments, like 'shift' and 'pop'. See also '/'.
3874	193	100	66			1474	if (
		100
		100
3875							$last_nonblank_type eq 'k'
3876							&& $is_keyword_rejecting_question_as_pattern_delimiter{
3877							$last_nonblank_token}
3878							)
3879							{
3880	1					2	$is_pattern = 0;
3881							}
3882
3883							# patch for RT#131288, user constant function without prototype
3884							# last type is 'U' followed by ?.
3885							elsif ( $last_nonblank_type =~ /^[FUY]$/ ) {
3886	1					2	$is_pattern = 0;
3887							}
3888							elsif ( $expecting == UNKNOWN ) {
3889
3890							# In older versions of Perl, a bare ? can be a pattern
3891							# delimiter. In perl version 5.22 this was
3892							# dropped, but we have to support it in order to format
3893							# older programs. See:
3894							## https://perl.developpez.com/documentations/en/5.22.0/perl5211delta.html
3895							# For example, the following line worked
3896							# at one time:
3897							# ?(.*)? && (print $1,"\n");
3898							# In current versions it would have to be written with slashes:
3899							# /(.*)/ && (print $1,"\n");
3900							(
3901	12					60	$is_pattern,
3902							my $msg,
3903
3904							) = $self->guess_if_pattern_or_conditional(
3905
3906							$i,
3907							$rtokens,
3908							$rtoken_type,
3909							$rtoken_map,
3910							$max_token_index,
3911							);
3912
3913	12	50				39	if ($msg) {
3914	12					54	$self->write_logfile_entry($msg);
3915	12					24	if ( DEBUG_GUESS_MODE && $is_pattern ) {
3916							$self->warning("DEBUG_GUESS_MODE message:\n$msg\n");
3917							}
3918							}
3919							}
3920	179					430	else { $is_pattern = ( $expecting == TERM ) }
3921
3922	193	50				488	if ($is_pattern) {
3923	0					0	$in_quote = 1;
3924	0					0	$type = 'Q';
3925	0					0	$allowed_quote_modifiers = $quote_modifiers{'m'};
3926	0					0	$quote_starting_tok = 'm';
3927	0					0	$quote_here_target_2 = undef;
3928							}
3929							else {
3930	193					620	( $type_sequence, $indent_flag ) =
3931							$self->increase_nesting_depth( QUESTION_COLON,
3932							$rtoken_map->[$i_tok] );
3933							}
3934	193					355	return;
3935							} ## end sub do_QUESTION_MARK
3936
3937							sub do_STAR {
3938
3939	254			254	0	395	my $self = shift;
3940
3941							# '*' = typeglob, or multiply?
3942
3943							# Guess based on next token. See also c036, and versions before 2026-
3944	254	100	100			877	if ( $expecting == UNKNOWN && $next_type ne 'b' ) {
3945
3946							# Check for a normal glob, like *OUT:
3947	6	50				32	if ( $next_tok =~ /^[_A-Za-z]/ ) {
3948	0					0	$expecting = TERM;
3949							}
3950							else {
3951							## Could check for glob of a simple punctuation variable here
3952							## by looking ahead one more character
3953							}
3954							}
3955
3956	254	100				688	if ( $expecting == TERM ) {
3957	21					72	$self->scan_simple_identifier();
3958							}
3959							else {
3960
3961	233	100				844	if ( $rtokens->[ $i + 1 ] eq '=' ) {
		100
3962	2					3	$tok = '*=';
3963	2					4	$type = $tok;
3964	2					4	$i++;
3965							}
3966							elsif ( $rtokens->[ $i + 1 ] eq '*' ) {
3967	42					72	$tok = '**';
3968	42					71	$type = $tok;
3969	42					67	$i++;
3970	42	100				144	if ( $rtokens->[ $i + 1 ] eq '=' ) {
3971	2					3	$tok = '**=';
3972	2					3	$type = $tok;
3973	2					4	$i++;
3974							}
3975							}
3976							else {
3977							## not multiple characters
3978							}
3979							}
3980	254					421	return;
3981							} ## end sub do_STAR
3982
3983							sub do_DOT {
3984
3985	168			168	0	252	my $self = shift;
3986
3987							# '.' = what kind of . ?
3988	168	100				386	if ( $expecting != OPERATOR ) {
3989	10					37	$self->scan_number();
3990							}
3991	168					1463	return;
3992							} ## end sub do_DOT
3993
3994							sub do_COLON {
3995
3996	285			285	0	516	my $self = shift;
3997
3998							# ':' = label, ternary, attribute, ?
3999
4000							# if this is the first nonblank character, call it a label
4001							# since perl seems to just swallow it
4002	285	50	66			3403	if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
		100	66
		100	66
		100	66
		100
		100
4003	0					0	$type = 'J';
4004							}
4005
4006							# ATTRS: check for a ':' which introduces an attribute list
4007							# either after a 'sub' keyword or within a paren list
4008							# Added 'package' (can be 'class') for --use-feature=class (rt145706)
4009							elsif ( $statement_type =~ /^(sub\|package)\b/ ) {
4010	22					41	$type = 'A';
4011	22					36	$self->[_in_attribute_list_] = 1;
4012							}
4013
4014							# Within a signature, unless we are in a ternary. For example,
4015							# from 't/filter_example.t':
4016							# method foo4 ( $class: $bar ) { $class->bar($bar) }
4017							elsif ( $rparen_type->[$paren_depth] =~ /^sub\b/
4018							&& !is_balanced_closing_container(QUESTION_COLON) )
4019							{
4020	1					2	$type = 'A';
4021	1					1	$self->[_in_attribute_list_] = 1;
4022							}
4023
4024							# check for scalar attribute, such as
4025							# my $foo : shared = 1;
4026							elsif ($is_my_our_state{$statement_type}
4027							&& $rcurrent_depth->[QUESTION_COLON] == 0 )
4028							{
4029	17					31	$type = 'A';
4030	17					27	$self->[_in_attribute_list_] = 1;
4031							}
4032
4033							# Look for Switch::Plain syntax if an error would otherwise occur
4034							# here. Note that we do not need to check if the extended syntax
4035							# flag is set because otherwise an error would occur, and we would
4036							# then have to output a message telling the user to set the
4037							# extended syntax flag to avoid the error.
4038							# case 1: {
4039							# default: {
4040							# default:
4041							# Note that the line 'default:' will be parsed as a label elsewhere.
4042							elsif ( $is_case_default{$statement_type}
4043							&& !is_balanced_closing_container(QUESTION_COLON) )
4044							{
4045							# mark it as a perltidy label type
4046	46					80	$type = 'J';
4047							}
4048
4049							# mark colon as attribute if an error would occur otherwise; git #162
4050							elsif ( !$rcurrent_depth->[QUESTION_COLON] ) {
4051	6					9	$type = 'A';
4052	6					9	$self->[_in_attribute_list_] = 1;
4053							}
4054
4055							# otherwise, it should be part of a ?/: operator
4056							else {
4057	193					657	( $type_sequence, $indent_flag ) =
4058							$self->decrease_nesting_depth( QUESTION_COLON,
4059							$rtoken_map->[$i_tok] );
4060	193	50				561	if ( $last_nonblank_token eq '?' ) {
4061	0					0	$self->warning("Syntax error near ? :\n");
4062							}
4063							}
4064	285					486	return;
4065							} ## end sub do_COLON
4066
4067							sub do_PLUS_SIGN {
4068
4069	240			240	0	399	my $self = shift;
4070
4071							# '+' = what kind of plus?
4072	240	100				988	if ( $expecting == TERM ) {
		100
4073	14					53	my $number = $self->scan_number_fast();
4074
4075							# unary plus is safest assumption if not a number
4076	14	50				41	if ( !defined($number) ) { $type = 'p'; }
	14					25
4077							}
4078							elsif ( $expecting == OPERATOR ) {
4079							}
4080							else {
4081	2	50	33			7	if ( $next_type eq 'w' \|\| $next_type eq '{' ) { $type = 'p' }
	2					3
4082							}
4083	240					386	return;
4084							} ## end sub do_PLUS_SIGN
4085
4086							sub do_AT_SIGN {
4087
4088	524			524	0	892	my $self = shift;
4089
4090							# '@' = sigil for array?
4091	524	50				1362	$self->error_if_expecting_OPERATOR("Array")
4092							if ( $expecting == OPERATOR );
4093	524					1771	$self->scan_simple_identifier();
4094	524					747	return;
4095							} ## end sub do_AT_SIGN
4096
4097							sub do_PERCENT_SIGN {
4098
4099	214			214	0	352	my $self = shift;
4100
4101							# '%' = hash or modulo?
4102							# first guess is hash if no following blank or paren
4103	214	50				588	if ( $expecting == UNKNOWN ) {
4104	0	0	0			0	if ( $next_type ne 'b' && $next_type ne '(' ) {
4105	0					0	$expecting = TERM;
4106							}
4107							}
4108	214	100				537	if ( $expecting == TERM ) {
4109	204					662	$self->scan_simple_identifier();
4110							}
4111	214					341	return;
4112							} ## end sub do_PERCENT_SIGN
4113
4114							sub do_LEFT_SQUARE_BRACKET {
4115
4116	814			814	0	1111	my $self = shift;
4117
4118							# '['
4119	814					1539	$rsquare_bracket_type->[ ++$square_bracket_depth ] =
4120							$last_nonblank_token;
4121	814					2200	( $type_sequence, $indent_flag ) =
4122							$self->increase_nesting_depth( SQUARE_BRACKET,
4123							$rtoken_map->[$i_tok] );
4124
4125							# It may seem odd, but structural square brackets have
4126							# type '{' and '}'. This simplifies the indentation logic.
4127	814	100				1868	if ( !is_non_structural_brace() ) {
4128	374					597	$type = '{';
4129							}
4130	814					1475	$rsquare_bracket_structural_type->[$square_bracket_depth] = $type;
4131	814					1188	return;
4132							} ## end sub do_LEFT_SQUARE_BRACKET
4133
4134							sub do_RIGHT_SQUARE_BRACKET {
4135
4136	814			814	0	1147	my $self = shift;
4137
4138							# ']'
4139	814					2251	( $type_sequence, $indent_flag ) =
4140							$self->decrease_nesting_depth( SQUARE_BRACKET,
4141							$rtoken_map->[$i_tok] );
4142
4143	814	100				1961	if ( $rsquare_bracket_structural_type->[$square_bracket_depth] eq '{' )
4144							{
4145	374					552	$type = '}';
4146							}
4147
4148							# propagate type information for smartmatch operator. This is
4149							# necessary to enable us to know if an operator or term is expected
4150							# next.
4151	814	100				1828	if ( $rsquare_bracket_type->[$square_bracket_depth] eq '~~' ) {
4152	20					32	$tok = $rsquare_bracket_type->[$square_bracket_depth];
4153							}
4154
4155	814	50				1695	if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
	814					1099
4156	814					1191	return;
4157							} ## end sub do_RIGHT_SQUARE_BRACKET
4158
4159							sub do_MINUS_SIGN {
4160
4161	491			491	0	730	my $self = shift;
4162
4163							# '-' = what kind of minus?
4164	491	100	100			2744	if ( ( $expecting != OPERATOR )
		100
		100
4165							&& $is_file_test_operator{$next_tok} )
4166							{
4167	12					61	my ( $next_nonblank_token, $i_next_uu ) =
4168							$self->find_next_nonblank_token( $i + 1, $rtokens,
4169							$max_token_index );
4170
4171							# check for a quoted word like "-w=>xx";
4172							# it is sufficient to just check for a following '='
4173	12	50				39	if ( $next_nonblank_token eq '=' ) {
4174	0					0	$type = 'm';
4175							}
4176							else {
4177	12					20	$i++;
4178	12					25	$tok .= $next_tok;
4179	12					24	$type = 'F';
4180							}
4181							}
4182							elsif ( $expecting == TERM ) {
4183	378					1020	my $number = $self->scan_number_fast();
4184
4185							# maybe part of bareword token? unary is safest
4186	378	100				799	if ( !defined($number) ) { $type = 'm'; }
	288					402
4187
4188							}
4189							elsif ( $expecting == OPERATOR ) {
4190							}
4191							else {
4192	4	50				13	if ( $next_type eq 'w' ) {
4193	4					8	$type = 'm';
4194							}
4195							}
4196	491					732	return;
4197							} ## end sub do_MINUS_SIGN
4198
4199							sub do_CARAT_SIGN {
4200
4201	12			12	0	20	my $self = shift;
4202
4203							# '^'
4204							# check for special variables like ${^WARNING_BITS}
4205	12	100				27	if ( $expecting == TERM ) {
4206
4207	5	50	33			45	if ( $last_nonblank_token eq '{'
			33
4208							&& ( $next_tok !~ /^\d/ )
4209							&& ( $next_tok =~ /^\w/ ) )
4210							{
4211
4212	5	100				14	if ( $next_tok eq 'W' ) {
4213	1					2	$self->[_saw_perl_dash_w_] = 1;
4214							}
4215	5					9	$tok = $tok . $next_tok;
4216	5					10	$i = $i + 1;
4217	5					8	$type = 'w';
4218
4219							# Optional coding to try to catch syntax errors. This can
4220							# be removed if it ever causes incorrect warning messages.
4221							# The '{^' should be preceded by either by a type or '$#'
4222							# Examples:
4223							# $#{^CAPTURE} ok
4224							# *${^LAST_FH}{NAME} ok
4225							# @{^HOWDY} ok
4226							# $hash{^HOWDY} error
4227
4228							# Note that a type sigil '$' may be tokenized as 'Z'
4229							# after something like 'print', so allow type 'Z'
4230	5	0	33			15	if ( $last_last_nonblank_type ne 't'
			33
4231							&& $last_last_nonblank_type ne 'Z'
4232							&& $last_last_nonblank_token ne '$#' )
4233							{
4234	0					0	$self->warning("Possible syntax error near '{^'\n");
4235							}
4236							}
4237							}
4238	12					20	return;
4239							} ## end sub do_CARAT_SIGN
4240
4241							sub do_DOUBLE_COLON {
4242
4243	9			9	0	12	my $self = shift;
4244
4245							# '::' = probably a sub call
4246	9					20	$self->scan_bare_identifier();
4247	9					13	return;
4248							} ## end sub do_DOUBLE_COLON
4249
4250							sub do_LEFT_SHIFT {
4251
4252	7			7	0	18	my $self = shift;
4253
4254							# '<<' = maybe a here-doc?
4255	7	50				117	if ( $expecting != OPERATOR ) {
4256							my (
4257	7					41	$found_target,
4258							$here_doc_target,
4259							$here_quote_character,
4260							$i_return,
4261							$saw_error,
4262
4263							) = $self->find_here_doc(
4264
4265							$expecting,
4266							$i,
4267							$rtokens,
4268							$rtoken_type,
4269							$rtoken_map,
4270							$max_token_index,
4271							);
4272	7					16	$i = $i_return;
4273
4274	7	50				20	if ($found_target) {
		0
4275	7					14	push @{$rhere_target_list},
	7					21
4276							[ $here_doc_target, $here_quote_character ];
4277	7					15	$type = 'h';
4278	7	50				71	if ( length($here_doc_target) > 80 ) {
		50
		100
4279	0					0	my $truncated = substr( $here_doc_target, 0, 80 );
4280	0					0	$self->complain("Long here-target: '$truncated' ...\n");
4281							}
4282							elsif ( !$here_doc_target ) {
4283	0	0				0	$self->warning(
4284							'Use of bare << to mean <<"" is deprecated' . "\n" )
4285							if ( !$here_quote_character );
4286							}
4287							elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
4288	2					9	$self->complain(
4289							"Unconventional here-target: '$here_doc_target'\n");
4290							}
4291							else {
4292							# nothing to complain about
4293							}
4294							}
4295							elsif ( $expecting == TERM ) {
4296	0	0				0	if ( !$saw_error ) {
4297
4298							# shouldn't happen..arriving here implies an error in
4299							# the logic in sub 'find_here_doc'
4300	0					0	if (DEVEL_MODE) {
4301							Fault(<<EOM);
4302							Program bug; didn't find here doc target
4303							EOM
4304							}
4305							$self->warning(
4306	0					0	"Possible program error: didn't find here doc target\n"
4307							);
4308	0					0	$self->report_definite_bug();
4309							}
4310							}
4311
4312							# target not found, expecting == UNKNOWN
4313							else {
4314							# assume it is a shift
4315							}
4316							}
4317							else {
4318							}
4319	7					12	return;
4320							} ## end sub do_LEFT_SHIFT
4321
4322							sub do_NEW_HERE_DOC {
4323
4324							# '<<~' = a here-doc, new type added in v26
4325
4326	2			2	0	5	my $self = shift;
4327
4328							return
4329	2	50				6	if ( $i >= $max_token_index ); # here-doc not possible if end of line
4330	2	50				7	if ( $expecting != OPERATOR ) {
4331							my (
4332	2					12	$found_target,
4333							$here_doc_target,
4334							$here_quote_character,
4335							$i_return,
4336							$saw_error,
4337
4338							) = $self->find_here_doc(
4339
4340							$expecting,
4341							$i,
4342							$rtokens,
4343							$rtoken_type,
4344							$rtoken_map,
4345							$max_token_index,
4346							);
4347	2					4	$i = $i_return;
4348
4349	2	50				6	if ($found_target) {
		0
4350
4351	2	50				14	if ( length($here_doc_target) > 80 ) {
		50
4352	0					0	my $truncated = substr( $here_doc_target, 0, 80 );
4353	0					0	$self->complain("Long here-target: '$truncated' ...\n");
4354							}
4355							elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
4356	0					0	$self->complain(
4357							"Unconventional here-target: '$here_doc_target'\n");
4358							}
4359							else {
4360							# nothing to complain about
4361							}
4362
4363							# Note that we put a leading space on the here quote
4364							# character indicate that it may be preceded by spaces
4365	2					5	$here_quote_character = SPACE . $here_quote_character;
4366	2					4	push @{$rhere_target_list},
	2					7
4367							[ $here_doc_target, $here_quote_character ];
4368	2					4	$type = 'h';
4369							}
4370
4371							# target not found ..
4372							elsif ( $expecting == TERM ) {
4373	0	0				0	if ( !$saw_error ) {
4374
4375							# shouldn't happen..arriving here implies an error in
4376							# the logic in sub 'find_here_doc'
4377	0					0	if (DEVEL_MODE) {
4378							Fault(<<EOM);
4379							Program bug; didn't find here doc target
4380							EOM
4381							}
4382							$self->warning(
4383	0					0	"Possible program error: didn't find here doc target\n"
4384							);
4385	0					0	$self->report_definite_bug();
4386							}
4387							}
4388
4389							# Target not found, expecting==UNKNOWN
4390							else {
4391	0					0	$self->warning("didn't find here doc target after '<<~'\n");
4392							}
4393							}
4394							else {
4395	0					0	$self->error_if_expecting_OPERATOR();
4396							}
4397	2					22	return;
4398							} ## end sub do_NEW_HERE_DOC
4399
4400							sub do_POINTER {
4401
4402							# '->'
4403	1173			1173	0	1577	return;
4404							}
4405
4406							sub do_PLUS_PLUS {
4407
4408	50			50	0	102	my $self = shift;
4409
4410							# '++'
4411							# type = 'pp' for pre-increment, '++' for post-increment
4412	50	100				170	if ( $expecting == OPERATOR ) { $type = '++' }
	41	100				83
4413	7					20	elsif ( $expecting == TERM ) { $type = 'pp' }
4414
4415							# handle ( $expecting == UNKNOWN )
4416							else {
4417
4418							# look ahead ..
4419	2					5	my ( $next_nonblank_token, $i_next ) =
4420							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
4421
4422							# Fix for c042: look past a side comment
4423	2	50				7	if ( $next_nonblank_token eq '#' ) {
4424	0					0	( $next_nonblank_token, $i_next ) =
4425							$self->find_next_nonblank_token( $max_token_index,
4426							$rtokens, $max_token_index );
4427							}
4428
4429	2	50				5	if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
	0					0
4430							}
4431	50					116	return;
4432							} ## end sub do_PLUS_PLUS
4433
4434							sub do_FAT_COMMA {
4435
4436	1103			1103	0	1521	my $self = shift;
4437
4438							# '=>'
4439	1103	50				2141	if ( $last_nonblank_type eq $tok ) {
4440	0					0	$self->complain("Repeated '=>'s \n");
4441							}
4442
4443							# patch for operator_expected: note if we are in the list (use.t)
4444							# TODO: make version numbers a new token type
4445	1103	100				2135	if ( $statement_type eq 'use' ) { $statement_type = '_use' }
	18					32
4446	1103					1488	return;
4447							} ## end sub do_FAT_COMMA
4448
4449							sub do_MINUS_MINUS {
4450
4451	2			2	0	4	my $self = shift;
4452
4453							# '--'
4454							# type = 'mm' for pre-decrement, '--' for post-decrement
4455
4456	2	50				10	if ( $expecting == OPERATOR ) { $type = '--' }
	0	50				0
4457	2					6	elsif ( $expecting == TERM ) { $type = 'mm' }
4458
4459							# handle ( $expecting == UNKNOWN )
4460							else {
4461
4462							# look ahead ..
4463	0					0	my ( $next_nonblank_token, $i_next ) =
4464							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
4465
4466							# Fix for c042: look past a side comment
4467	0	0				0	if ( $next_nonblank_token eq '#' ) {
4468	0					0	( $next_nonblank_token, $i_next ) =
4469							$self->find_next_nonblank_token( $max_token_index,
4470							$rtokens, $max_token_index );
4471							}
4472
4473	0	0				0	if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
	0					0
4474							}
4475
4476	2					4	return;
4477							} ## end sub do_MINUS_MINUS
4478
4479							sub do_DIGITS {
4480
4481	2508			2508	0	3258	my $self = shift;
4482
4483							# 'd' = string of digits
4484	2508	50				4390	$self->error_if_expecting_OPERATOR("Number")
4485							if ( $expecting == OPERATOR );
4486
4487	2508					5564	my $number = $self->scan_number_fast();
4488	2508	50				4868	if ( !defined($number) ) {
4489
4490							# shouldn't happen - we should always get a number
4491	0					0	if (DEVEL_MODE) {
4492							Fault(<<EOM);
4493							non-number beginning with digit--program bug
4494							EOM
4495							}
4496							$self->warning(
4497	0					0	"Unexpected error condition: non-number beginning with digit\n"
4498							);
4499	0					0	$self->report_definite_bug();
4500							}
4501	2508					3327	return;
4502							} ## end sub do_DIGITS
4503
4504							sub do_ATTRIBUTE_LIST {
4505
4506	45			45	0	105	my ( $self, $next_nonblank_token ) = @_;
4507
4508							# Called at a bareword encountered while in an attribute list
4509							# returns 'is_attribute':
4510							# true if attribute found
4511							# false if an attribute (continue parsing bareword)
4512
4513							# treat bare word followed by open paren like qw(
4514	45	100				112	if ( $next_nonblank_token eq '(' ) {
4515
4516							# For something like:
4517							# : prototype($$)
4518							# we should let do_scan_sub see it so that it can see
4519							# the prototype. All other attributes get parsed as a
4520							# quoted string.
4521	20	100				328	if ( $tok eq 'prototype' ) {
4522	2					4	$id_scan_state = 'prototype';
4523
4524							# start just after the word 'prototype'
4525	2					4	my $i_beg = $i + 1;
4526	2					18	( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub(
4527							{
4528							input_line => $input_line,
4529							i => $i,
4530							i_beg => $i_beg,
4531							tok => $tok,
4532							type => $type,
4533							rtokens => $rtokens,
4534							rtoken_map => $rtoken_map,
4535							id_scan_state => $id_scan_state,
4536							max_token_index => $max_token_index,
4537							}
4538							);
4539
4540							# If successful, mark as type 'q' to be consistent
4541							# with other attributes. Type 'w' would also work.
4542	2	50				12	if ( $i > $i_beg ) {
4543	2					4	$type = 'q';
4544	2					5	return 1;
4545							}
4546
4547							# If not successful, continue and parse as a quote.
4548							}
4549
4550							# All other attribute lists must be parsed as quotes
4551							# (see 'signatures.t' for good examples)
4552	18					39	$in_quote = $quote_items{'q'};
4553	18					34	$allowed_quote_modifiers = $quote_modifiers{'q'};
4554	18					26	$quote_starting_tok = 'q';
4555	18					29	$type = 'q';
4556	18					27	$quote_type = 'q';
4557	18					27	$quote_here_target_2 = undef;
4558	18					32	return 1;
4559							}
4560
4561							# handle bareword not followed by open paren
4562							else {
4563	25					39	$type = 'w';
4564	25					50	return 1;
4565							}
4566
4567							# attribute not found
4568	0					0	return;
4569							} ## end sub do_ATTRIBUTE_LIST
4570
4571							sub do_X_OPERATOR {
4572
4573	17			17	0	37	my $self = shift;
4574
4575							# We are at a pretoken starting with 'x' where an operator is expected
4576
4577	17	100				70	if ( $tok eq 'x' ) {
4578	15	50				55	if ( $rtokens->[ $i + 1 ] eq '=' ) { # x=
4579	0					0	$tok = 'x=';
4580	0					0	$type = $tok;
4581	0					0	$i++;
4582							}
4583							else {
4584	15					31	$type = 'x';
4585							}
4586							}
4587							else {
4588
4589							# Split a pretoken like 'x10' into 'x' and '10'.
4590							# Note: In previous versions of perltidy it was marked
4591							# as a number, $type = 'n', and fixed downstream by the
4592							# Formatter.
4593	2					3	$type = 'n';
4594	2	50				6	if ( $self->split_pretoken(1) ) {
4595	2					3	$type = 'x';
4596	2					3	$tok = 'x';
4597							}
4598							}
4599	17					39	return;
4600							} ## end sub do_X_OPERATOR
4601
4602							sub do_USE_CONSTANT {
4603
4604	16			16	0	29	my $self = shift;
4605
4606							# We just saw 'use constant' and must look ahead
4607
4608	16					59	$self->scan_bare_identifier();
4609	16					63	my ( $next_nonblank_tok2, $i_next2_uu ) =
4610							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
4611
4612	16	50				47	if ($next_nonblank_tok2) {
4613
4614	16	100				53	if ( $is_keyword{$next_nonblank_tok2} ) {
4615
4616							# Assume qw is used as a quote and okay, as in:
4617							# use constant qw{ DEBUG 0 };
4618							# Not worth trying to parse for just a warning
4619
4620							# NOTE: This warning is deactivated because recent
4621							# versions of perl do not complain here, but
4622							# the coding is retained for reference.
4623	1					4	if ( 0 && $next_nonblank_tok2 ne 'qw' ) {
4624							$self->warning(
4625							"Attempting to define constant '$next_nonblank_tok2' which is a perl keyword\n"
4626							);
4627							}
4628							}
4629
4630							else {
4631	15					42	$ris_constant->{$current_package}->{$next_nonblank_tok2} = 1;
4632							}
4633							}
4634	16					30	return;
4635							} ## end sub do_USE_CONSTANT
4636
4637							sub do_KEYWORD {
4638
4639	3060			3060	0	4080	my $self = shift;
4640
4641							# found a keyword - set any associated flags
4642	3060					4318	$type = 'k';
4643
4644							# Since for and foreach may not be followed immediately
4645							# by an opening paren, we have to remember which keyword
4646							# is associated with the next '('
4647							# Previously, before update c230 : if ( $is_for_foreach{$tok} ) {
4648							##(if elsif unless while until for foreach switch case given when catch)
4649	3060	100				6569	if ( $is_blocktype_with_paren{$tok} ) {
4650	495	100				1385	if ( new_statement_ok() ) {
4651	356					664	$want_paren = $tok;
4652							}
4653							}
4654
4655							# Catch some unexpected keyword errors; c517.
4656							# Note that we only check keywords for OPERATOR expected, not TERM.
4657							# This is because a large number of keywords which normally expect
4658							# a TERM will also take an OPERATOR.
4659	3060	50	66			6947	if ( $expecting == OPERATOR && $is_TERM_keyword{$tok} ) {
4660	0					0	$self->error_if_expecting_OPERATOR();
4661							}
4662
4663							# recognize 'use' statements, which are special
4664	3060	100	100			17768	if ( $is_use_require{$tok} ) {
		100	100
		100
		100
		100
		100
4665	178					298	$statement_type = $tok;
4666							}
4667
4668							# remember my and our to check for trailing ": shared"
4669							elsif ( $is_my_our_state{$tok} ) {
4670	747					1267	$statement_type = $tok;
4671							}
4672
4673							# Check for unexpected 'elsif'
4674							elsif ( $tok eq 'elsif' ) {
4675	33	0	0			422	if (
			33
4676
4677							!$is_if_elsif_unless{$last_nonblank_block_type}
4678
4679							# Allow isolated blocks of any kind during editing
4680							# by checking for a last noblank token of ';' and no
4681							# sequence numbers having been issued (c272). The check
4682							# on sequence number is not perfect but good enough.
4683							&& !(
4684							$last_nonblank_token eq ';'
4685							&& $next_sequence_number == SEQ_ROOT + 1
4686							)
4687
4688							)
4689							{
4690							## prevent formatting and avoid instability (b1553)
4691	0					0	$self->warning_do_not_format(
4692							"expecting '$tok' to follow one of 'if\|elsif\|unless'\n");
4693							}
4694							}
4695
4696							# Check for unexpected 'else'
4697							elsif ( $tok eq 'else' ) {
4698
4699							# patched for SWITCH/CASE
4700	49	0	66			231	if (
			0
			33
4701
4702							!$is_if_elsif_unless_case_when{$last_nonblank_block_type}
4703
4704							# patch to avoid an unwanted error message for
4705							# the case of a parenless 'case' (RT 105484):
4706							# switch ( 1 ) { case x { 2 } else { } }
4707							&& !$is_if_elsif_unless_case_when{$statement_type}
4708
4709							# Allow isolated blocks of any kind during editing (c272)
4710							&& !(
4711							$last_nonblank_token eq ';'
4712							&& $next_sequence_number == SEQ_ROOT + 1
4713							)
4714
4715							)
4716							{
4717							## prevent formatting and avoid instability (b1553)
4718	0					0	$self->warning_do_not_format(
4719							"expecting '$tok' to follow one of 'if\|elsif\|unless\|case\|when'\n"
4720							);
4721							}
4722							}
4723
4724							# patch for SWITCH/CASE if 'case' and 'when are
4725							# treated as keywords. Also 'default' for Switch::Plain
4726							elsif ($tok eq 'when'
4727							\|\| $tok eq 'case'
4728							\|\| $tok eq 'default' )
4729							{
4730	70					109	$statement_type = $tok; # next '{' is block
4731							}
4732
4733							# feature 'err' was removed in Perl 5.10. So mark this as
4734							# a bareword unless an operator is expected (see c158).
4735							elsif ( $tok eq 'err' ) {
4736	1	50				3	if ( $expecting != OPERATOR ) { $type = 'w' }
	1					2
4737							}
4738							else {
4739							## no special treatment needed
4740							}
4741
4742	3060					5052	return;
4743							} ## end sub do_KEYWORD
4744
4745							sub do_QUOTE_OPERATOR {
4746
4747	232			232	0	365	my $self = shift;
4748
4749							# We have arrived at a quote operator: q, qq, qw, qx, qr, s, y, tr, m
4750
4751	232	50				596	if ( $expecting == OPERATOR ) {
4752
4753							# Be careful not to call an error for a qw quote
4754							# where a parenthesized list is allowed. For example,
4755							# it could also be a for/foreach construct such as
4756							#
4757							# foreach my $key qw\Uno Due Tres Quadro\ {
4758							# print "Set $key\n";
4759							# }
4760							#
4761
4762							# Or it could be a function call.
4763							# NOTE: Braces in something like &{ xxx } are not
4764							# marked as a block, we might have a method call.
4765							# &method(...), $method->(..), &{method}(...),
4766							# $ref[2](list) is ok & short for $ref[2]->(list)
4767							#
4768							# See notes in 'sub code_block_type' and
4769							# 'sub is_non_structural_brace'
4770
4771							my $paren_list_possible = $tok eq 'qw'
4772							&& ( $last_nonblank_token =~ /^([\]\}\&]\|\-\>)/
4773	0		0			0	\|\| $is_for_foreach{$want_paren} );
4774
4775	0	0				0	if ( !$paren_list_possible ) {
4776	0					0	$self->error_if_expecting_OPERATOR();
4777							}
4778							}
4779	232					478	$in_quote = $quote_items{$tok};
4780	232					466	$allowed_quote_modifiers = $quote_modifiers{$tok};
4781	232					349	$quote_starting_tok = $tok;
4782	232					368	$quote_here_target_2 = undef;
4783
4784							# All quote types are 'Q' except possibly qw quotes.
4785							# qw quotes are special in that they may generally be trimmed
4786							# of leading and trailing whitespace. So they are given a
4787							# separate type, 'q', unless requested otherwise.
4788	232	100	66			966	$type =
4789							( $tok eq 'qw' && $rOpts_trim_qw )
4790							? 'q'
4791							: 'Q';
4792	232					431	$quote_type = $type;
4793	232					385	return;
4794							} ## end sub do_QUOTE_OPERATOR
4795
4796							sub do_UNKNOWN_BAREWORD {
4797
4798	1030			1030	0	2085	my ( $self, $next_nonblank_token ) = @_;
4799
4800							# We have encountered a bareword which needs more work to classify
4801
4802	1030					3072	$self->scan_bare_identifier();
4803
4804	1030	100	100			3060	if ( $statement_type eq 'use'
4805							&& $last_nonblank_token eq 'use' )
4806							{
4807	111					356	$rsaw_use_module->{$current_package}->{$tok} = 1;
4808							}
4809
4810	1030	100				2159	if ( $type eq 'w' ) {
4811
4812	1005	100				2149	if ( $expecting == OPERATOR ) {
4813
4814							# Patch to avoid error message for RPerl overloaded
4815							# operator functions: use overload
4816							# '+' => \&sse_add,
4817							# '-' => \&sse_sub,
4818							# '*' => \&sse_mul,
4819							# '/' => \&sse_div;
4820							# TODO: this could eventually be generalized
4821	2	50	33			18	if ( $rsaw_use_module->{$current_package}->{'RPerl'}
		50	33
		0
		0
4822							&& $tok =~ /^sse_(mul\|div\|add\|sub)$/ )
4823							{
4824
4825							}
4826
4827							# patch for Syntax::Operator::In, git #162
4828							elsif ( $tok eq 'in' && $next_nonblank_token eq ':' ) {
4829
4830							}
4831
4832							# Fix part 1 for git #63 in which a comment falls
4833							# between an -> and the following word. An
4834							# alternate fix would be to change operator_expected
4835							# to return an UNKNOWN for this type.
4836							elsif ( $last_nonblank_type eq '->' ) {
4837
4838							}
4839
4840							# don't complain about possible indirect object
4841							# notation.
4842							# For example:
4843							# package main;
4844							# sub new($) { ... }
4845							# $b = new A::; # calls A::new
4846							# $c = new A; # same thing but suspicious
4847							# This will call A::new but we have a 'new' in
4848							# main:: which looks like a constant.
4849							#
4850							elsif ( $last_nonblank_type eq 'C' ) {
4851	0	0				0	if ( $tok !~ /::$/ ) {
4852	0					0	$self->complain(<<EOM);
4853							Expecting operator after '$last_nonblank_token' but found bare word '$tok'
4854							Maybe indirect object notation?
4855							EOM
4856							}
4857							}
4858							else {
4859	0					0	$self->error_if_expecting_OPERATOR("bareword");
4860							}
4861							}
4862
4863							# mark bare words immediately followed by a paren as
4864							# functions
4865	1005					1793	$next_tok = $rtokens->[ $i + 1 ];
4866	1005	100				1987	if ( $next_tok eq '(' ) {
4867
4868							# Patch for issue c151, where we are processing a snippet and
4869							# have not seen that SPACE is a constant. In this case 'x' is
4870							# probably an operator. The only disadvantage with an incorrect
4871							# guess is that the space after it may be incorrect. For example
4872							# $str .= SPACE x ( 16 - length($str) ); See also b1410.
4873	294	50	33			1185	if ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { $type = 'x' }
	0	50				0
4874
4875							# Fix part 2 for git #63. Leave type as 'w' to keep
4876							# the type the same as if the -> were not separated
4877	294					615	elsif ( $last_nonblank_type ne '->' ) { $type = 'U' }
4878
4879							# not a special case
4880							else { }
4881
4882							}
4883
4884							# underscore after file test operator is file handle
4885	1005	50	66			2445	if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
4886	0					0	$type = 'Z';
4887							}
4888
4889							# patch for SWITCH/CASE if 'case' and 'when are
4890							# not treated as keywords:
4891	1005	50	33			4397	if (
			33
			33
4892							( $tok eq 'case' && $rbrace_type->[$brace_depth] eq 'switch' )
4893							\|\| ( $tok eq 'when'
4894							&& $rbrace_type->[$brace_depth] eq 'given' )
4895							)
4896							{
4897	0					0	$statement_type = $tok; # next '{' is block
4898	0					0	$type = 'k'; # for keyword syntax coloring
4899							}
4900	1005	100				2096	if ( $next_nonblank_token eq '(' ) {
4901
4902							# patch for SWITCH/CASE if switch and given not keywords
4903							# Switch is not a perl 5 keyword, but we will gamble
4904							# and mark switch followed by paren as a keyword. This
4905							# is only necessary to get html syntax coloring nice,
4906							# and does not commit this as being a switch/case.
4907	259	50	33			1617	if ( $tok eq 'switch' \|\| $tok eq 'given' ) {
		50	33
4908	0					0	$type = 'k'; # for keyword syntax coloring
4909							}
4910
4911							# mark 'x' as operator for something like this (see b1410)
4912							# my $line = join( LD_X, map { LD_H x ( $_ + 2 ) } @$widths );
4913							elsif ( $tok eq 'x' && $last_nonblank_type eq 'w' ) {
4914	0					0	$type = 'x';
4915							}
4916							else {
4917							## not a special case
4918							}
4919							}
4920							}
4921	1030					1654	return;
4922							} ## end sub do_UNKNOWN_BAREWORD
4923
4924							sub sub_attribute_ok_here {
4925
4926	37			37	0	108	my ( $self, $tok_kw, $next_nonblank_token, $i_next ) = @_;
4927
4928							# Decide if a ':' can introduce an attribute. For example,
4929							# something like 'sub :'
4930
4931							# Given:
4932							# $tok_kw = a bareword token
4933							# $next_nonblank_token = a following ':' being examined
4934							# $i_next = the index of the following ':'
4935
4936							# We will decide based on if the colon is followed by a bareword
4937							# which is not a keyword. Changed inext+1 to inext to fixed case
4938							# b1190.
4939	37					62	my $sub_attribute_ok_here;
4940	37	50	66			148	if ( $is_sub{$tok_kw}
			66
4941							&& $expecting != OPERATOR
4942							&& $next_nonblank_token eq ':' )
4943							{
4944	3					15	my ( $nn_nonblank_token, $i_nn_uu ) =
4945							$self->find_next_nonblank_token( $i_next, $rtokens,
4946							$max_token_index );
4947							$sub_attribute_ok_here =
4948							$nn_nonblank_token =~ /^\w/
4949							&& $nn_nonblank_token !~ /^\d/
4950	3		66			46	&& !$is_keyword{$nn_nonblank_token};
4951							}
4952	37					226	return $sub_attribute_ok_here;
4953							} ## end sub sub_attribute_ok_here
4954
4955	44			44		400	use constant DEBUG_BAREWORD => 0;
	44					77
	44					13070
4956
4957							sub saw_bareword_function {
4958	957			957	0	1787	my ( $self, $bareword ) = @_;
4959							$self->[_rbareword_info_]->{$current_package}->{$bareword}
4960	957					3723	->{function_count}++;
4961	957					1702	return;
4962							} ## end sub saw_bareword_function
4963
4964							sub saw_bareword_constant {
4965	180			180	0	345	my ( $self, $bareword ) = @_;
4966							$self->[_rbareword_info_]->{$current_package}->{$bareword}
4967	180					589	->{constant_count}++;
4968	180					319	return;
4969							} ## end sub saw_bareword_constant
4970
4971							sub get_bareword_counts {
4972	0			0	0	0	my ( $self, $bareword ) = @_;
4973
4974							# Given:
4975							# $bareword = a bareword
4976							# Return:
4977							# $function_count = number of times seen as function taking >0 args
4978							# $constant_count = number of times seen as function taking 0 args
4979							# Note:
4980							# $function_count > 0 implies that a TERM should come next
4981							# $constant_count > 0 implies that an OPERATOR may come next,
4982							# but this can be incorrect if $bareword can take 0 or more args.
4983							# This is used to help guess tokenization around unknown barewords.
4984	0					0	my $function_count;
4985							my $constant_count;
4986	0					0	my $rbareword_info_tok = $self->[_rbareword_info_]->{$current_package};
4987	0	0				0	if ($rbareword_info_tok) {
4988	0					0	$rbareword_info_tok = $rbareword_info_tok->{$bareword};
4989	0	0				0	if ($rbareword_info_tok) {
4990	0					0	$function_count = $rbareword_info_tok->{function_count};
4991	0					0	$constant_count = $rbareword_info_tok->{constant_count};
4992
4993							# a positive function count overrides a constant count
4994	0	0				0	if ($function_count) { $constant_count = 0 }
	0					0
4995							}
4996							}
4997	0	0				0	if ( !defined($function_count) ) { $function_count = 0 }
	0					0
4998	0	0				0	if ( !defined($constant_count) ) { $constant_count = 0 }
	0					0
4999	0					0	return ( $function_count, $constant_count );
5000							} ## end sub get_bareword_counts
5001
5002							# hashes used to help determine a bareword type
5003							my %is_wiUC;
5004							my %is_function_follower;
5005							my %is_constant_follower;
5006							my %is_use_require_no;
5007
5008							BEGIN {
5009	44			44		221	my @qz = qw( w i U C );
5010	44					286	$is_wiUC{$_} = 1 for @qz;
5011
5012	44					110	@qz = qw( use require no );
5013	44					172	$is_use_require_no{$_} = 1 for @qz;
5014
5015							# These pre-token types after a bareword imply that it
5016							# is not a constant, except when '(' is followed by ')'.
5017	44					139	@qz = qw# ( [ { $ @ " ' m #;
5018	44					301	$is_function_follower{$_} = 1 for @qz;
5019
5020							# These pre-token types after a bareword imply that it
5021							# MIGHT be a constant, but it also might be a function taking
5022							# 0 or more call args.
5023	44					110	@qz = qw# ; ) ] } if unless #;
5024	44					115	push @qz, COMMA;
5025	44					112595	$is_constant_follower{$_} = 1 for @qz;
5026							}
5027
5028							sub do_BAREWORD {
5029
5030	6616			6616	0	10033	my ($self) = @_;
5031
5032							# handle a bareword token:
5033							# returns
5034							# true if this token ends the current line
5035							# false otherwise
5036
5037	6616					7704	my $next_nonblank_token;
5038	6616					8340	my $i_next = $i + 1;
5039	6616	100	100			19849	if ( $i_next <= $max_token_index && $rtoken_type->[$i_next] eq 'b' ) {
5040	4001					5505	$i_next++;
5041							}
5042	6616	100				10515	if ( $i_next <= $max_token_index ) {
5043	6532					9768	$next_nonblank_token = $rtokens->[$i_next];
5044							}
5045							else {
5046	84					374	( $next_nonblank_token, $i_next ) =
5047							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
5048							}
5049
5050							# Fix for git #182: If --use-feature=class is set, then
5051							# a colon between words 'ADJUST' and 'params', and on the same line,
5052							# does not form the label 'ADJUST:'. It will get marked as type 'A'.
5053	6616					7742	my $is_not_label;
5054	6616	100	66			11515	if ( $tok eq 'ADJUST'
			66
			66
5055							&& $is_code_block_token{$tok}
5056							&& $rtokens->[$i_next] eq ':'
5057							&& $i_next < $max_token_index )
5058							{
5059	2	50				9	my $i_next2 =
5060							$rtoken_type->[ $i_next + 1 ] eq 'b' ? $i_next + 2 : $i_next + 1;
5061	2		33			18	$is_not_label =
5062							( $i_next2 <= $max_token_index
5063							&& $rtoken_type->[$i_next2] eq 'w'
5064							&& $rtokens->[$i_next2] eq 'params' );
5065							}
5066
5067							# a bare word immediately followed by :: is not a keyword;
5068							# use $tok_kw when testing for keywords to avoid a mistake
5069	6616					8602	my $tok_kw = $tok;
5070	6616	100	100			14446	if ( $rtokens->[ $i + 1 ] eq ':'
5071							&& $rtokens->[ $i + 2 ] eq ':' )
5072							{
5073	272					486	$tok_kw .= '::';
5074							}
5075
5076	6616	100				12161	if ( $self->[_in_attribute_list_] ) {
5077	45					149	my $is_attribute = $self->do_ATTRIBUTE_LIST($next_nonblank_token);
5078	45	50				127	return if ($is_attribute);
5079							}
5080
5081							#-----------------------------------------
5082							# Preliminary check for a lexical sub name
5083							#-----------------------------------------
5084	6571					7706	my $is_lexical_sub_type;
5085
5086							# Has this name been seen as a lexical sub?
5087	6571	50				13171	if ( my $rseqno_hash = $ris_lexical_sub->{$tok_kw} ) {
5088
5089							# Look back up the stack to see if it is still in scope.
5090							# Use the deepest we find if there are multiple versions.
5091	0					0	my @seqno_tested;
5092	0					0	my $cd_aa = $rcurrent_depth->[BRACE];
5093	0					0	foreach my $cd ( reverse( 0 .. $cd_aa ) ) {
5094	0	0				0	my $p_seqno =
5095							$cd
5096							? $rcurrent_sequence_number->[BRACE]->[$cd]
5097							: SEQ_ROOT;
5098
5099	0					0	push @seqno_tested, $p_seqno;
5100
5101							# Lexical subs use their containing sequence number as package
5102	0	0				0	if ( my $seqno_brace = $rseqno_hash->{$p_seqno} ) {
5103
5104							# This sub is in scope .. lookup its type
5105							$is_lexical_sub_type =
5106							$ris_constant->{$p_seqno}->{$tok_kw} ? 'C'
5107							: $ris_block_function->{$p_seqno}->{$tok_kw} ? 'G'
5108							: $ris_block_list_function->{$p_seqno}->{$tok_kw} ? 'G'
5109	0	0				0	: $ris_user_function->{$p_seqno}->{$tok_kw} ? 'U'
		0
		0
		0
5110							: 'U';
5111
5112							# But lexical subs do not apply within their defining code
5113	0					0	foreach (@seqno_tested) {
5114	0	0				0	next if ( $_ != $seqno_brace );
5115	0					0	$is_lexical_sub_type = undef;
5116	0					0	last;
5117							}
5118
5119	0					0	last;
5120							}
5121							}
5122							}
5123
5124							#----------------------------------------
5125							# Starting final if-elsif- chain of tests
5126							#----------------------------------------
5127
5128							# This is the return flag:
5129							# true => this is the last token on the line
5130							# false => keep tokenizing the line
5131	6571					7701	my $is_last;
5132
5133							# The following blocks of code must update these vars:
5134							# $type - the final token type, must always be set
5135
5136							# In addition, if additional pretokens are added:
5137							# $tok - the final token
5138							# $i - the index of the last pretoken
5139
5140							# They may also need to check and set various flags
5141
5142							# Scan a bare word following a -> as an identifier; it could
5143							# have a long package name. Fixes c037, c041.
5144	6571	100	100			86220	if ( $last_nonblank_token eq '->' ) {
		100	66
		100	100
		100	66
		100	66
		100	100
		100	66
		50	66
		50	100
		100	100
		50	33
		100	0
		100	0
		100	33
		100	0
		100	0
		100	66
		100	100
		100	100
			100
			100
			100
			66
			66
5145	786					2124	$self->scan_bare_identifier();
5146
5147							# a bareward after '->' gets type 'i'
5148	786					1127	$type = 'i';
5149							}
5150
5151							# Quote a word followed by => operator
5152							elsif (
5153							( $next_nonblank_token eq '=' && $rtokens->[ $i_next + 1 ] eq '>' )
5154
5155							# unless the word is __END__ or __DATA__ and is the only word on
5156							# the line.
5157							&& ( !defined( $is_END_DATA{$tok_kw} )
5158							\|\| $input_line !~ /^\s__(?:END\|DATA)__\s$/ )
5159							)
5160							{
5161							# Bareword followed by a fat comma - see 'git18.in'
5162							# This code was previously sub do_QUOTED_BAREWORD: see c316, c317
5163
5164							# Older perl:
5165							# 'v25=>1' is a v-string key!
5166							# '-v25=>1' is also a v-string key!
5167							# Deactivated: this is no longer true; see git #165
5168	812	100				2456	if ( 0 && $tok =~ /^v\d+$/ ) {
5169							$type = 'v';
5170							$self->complain("v-string used as hash key\n");
5171							$self->report_v_string($tok);
5172							}
5173
5174							# If tok is something like 'x17' then it could
5175							# actually be operator x followed by number 17.
5176							# For example, here:
5177							# 123x17 => [ 792, 1224 ],
5178							# (a key of 123 repeated 17 times, perhaps not
5179							# what was intended). We will mark x17 as type
5180							# 'n' and it will be split. If the previous token
5181							# was also a bareword then it is not very clear is
5182							# going on. In this case we will not be sure that
5183							# an operator is expected, so we just mark it as a
5184							# bareword. Perl is a little murky in what it does
5185							# with stuff like this, and its behavior can change
5186							# over time. Something like
5187							# a x18 => [792, 1224], will compile as
5188							# a key with 18 a's. But something like
5189							# push @array, a x18;
5190							# is a syntax error.
5191	0	100	33			0	elsif (
			66
5192							$expecting == OPERATOR
5193							&& substr( $tok, 0, 1 ) eq 'x'
5194							&& ( length($tok) == 1
5195							\|\| substr( $tok, 1, 1 ) =~ /^\d/ )
5196							)
5197							{
5198	3					9	$type = 'n';
5199	3	50				11	if ( $self->split_pretoken(1) ) {
5200	3					5	$type = 'x';
5201	3					6	$tok = 'x';
5202							}
5203	3					12	$self->complain("x operator in hash key\n");
5204							}
5205							else {
5206
5207							# git #18
5208	809					1157	$type = 'w';
5209	809					1916	$self->error_if_expecting_OPERATOR();
5210							}
5211							}
5212
5213							# quote a bare word within braces..like xxx->{s}; note that we
5214							# must be sure this is not a structural brace, to avoid
5215							# mistaking {s} in the following for a quoted bare word:
5216							# for(@[){s}bla}BLA}
5217							# Also treat q in something like var{-q} as a bare word, not
5218							# a quote operator
5219							elsif (
5220							$next_nonblank_token eq '}'
5221							&& (
5222							$last_nonblank_type eq 'L'
5223							\|\| ( $last_nonblank_type eq 'm'
5224							&& $last_last_nonblank_type eq 'L' )
5225							)
5226							)
5227							{
5228	138					253	$type = 'w';
5229							}
5230
5231							# handle operator x (now we know it isn't $x=)
5232							elsif (
5233							$expecting == OPERATOR
5234							&& substr( $tok, 0, 1 ) eq 'x'
5235							&& ( length($tok) == 1
5236							\|\| substr( $tok, 1, 1 ) =~ /^\d/ )
5237							)
5238							{
5239	17					79	$self->do_X_OPERATOR();
5240							}
5241							elsif ( $tok_kw eq 'CORE::' ) {
5242	3					5	$type = $tok = $tok_kw;
5243	3					5	$i += 2;
5244							}
5245							elsif ( ( $tok eq 'strict' )
5246							and ( $last_nonblank_token eq 'use' ) )
5247							{
5248	14					40	$self->[_saw_use_strict_] = 1;
5249	14					56	$self->scan_bare_identifier();
5250							}
5251
5252							elsif ( ( $tok eq 'warnings' )
5253							and ( $last_nonblank_token eq 'use' ) )
5254							{
5255	7					17	$self->[_saw_perl_dash_w_] = 1;
5256
5257							# scan as identifier, so that we pick up something like:
5258							# use warnings::register
5259	7					20	$self->scan_bare_identifier();
5260							}
5261
5262							elsif (
5263							$tok eq 'AutoLoader'
5264							&& $self->[_look_for_autoloader_]
5265							&& (
5266							$last_nonblank_token eq 'use'
5267
5268							# these regexes are from AutoSplit.pm, which we want
5269							# to mimic
5270							\|\| $input_line =~ /^\s*(use\|require)\s+AutoLoader\b/
5271							\|\| $input_line =~ /\bISA\s=.\bAutoLoader\b/
5272							)
5273							)
5274							{
5275	0					0	$self->write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
5276	0					0	$self->[_saw_autoloader_] = 1;
5277	0					0	$self->[_look_for_autoloader_] = 0;
5278	0					0	$self->scan_bare_identifier();
5279							}
5280
5281							elsif (
5282							$tok eq 'SelfLoader'
5283							&& $self->[_look_for_selfloader_]
5284							&& ( $last_nonblank_token eq 'use'
5285							\|\| $input_line =~ /^\s*(use\|require)\s+SelfLoader\b/
5286							\|\| $input_line =~ /\bISA\s=.\bSelfLoader\b/ )
5287							)
5288							{
5289	0					0	$self->write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
5290	0					0	$self->[_saw_selfloader_] = 1;
5291	0					0	$self->[_look_for_selfloader_] = 0;
5292	0					0	$self->scan_bare_identifier();
5293							}
5294
5295							elsif ( ( $tok eq 'constant' )
5296							and ( $last_nonblank_token eq 'use' ) )
5297							{
5298	16					54	$self->do_USE_CONSTANT();
5299							}
5300
5301							# Lexical sub names override keywords, labels. Based on testing,
5302							# this seems to be the correct location for this check.
5303							elsif ($is_lexical_sub_type) {
5304	0					0	$type = $is_lexical_sub_type;
5305							}
5306
5307							# various quote operators
5308							elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
5309	232					757	$self->do_QUOTE_OPERATOR();
5310							}
5311
5312							# check for a statement label
5313							elsif (
5314							( $next_nonblank_token eq ':' )
5315							&& !$is_not_label
5316							&& ( $rtokens->[ $i_next + 1 ] ne ':' )
5317							&& ( $i_next <= $max_token_index ) # colon on same line
5318
5319							# like 'sub : lvalue' ?
5320							&& !$self->sub_attribute_ok_here( $tok_kw, $next_nonblank_token,
5321							$i_next )
5322							&& new_statement_ok()
5323							)
5324							{
5325	33	100				179	if ( $tok !~ /[A-Z]/ ) {
5326	15					26	push @{ $self->[_rlower_case_labels_at_] }, $input_line_number;
	15					45
5327							}
5328	33					56	$type = 'J';
5329	33					78	$tok .= ':';
5330	33					53	$i = $i_next;
5331							}
5332
5333							# 'sub' or other sub alias
5334							elsif ( $is_sub{$tok_kw} ) {
5335
5336							# Guess what to do for unknown word 'method':
5337							# Updated for --use-feature=class (rt145706):
5338	352	100	100			1955	if ( $tok_kw eq 'method'
			100
5339							&& $guess_if_method
5340							&& !$self->method_ok_here($next_nonblank_token) )
5341							{
5342	7					23	$self->do_UNKNOWN_BAREWORD($next_nonblank_token);
5343							}
5344							else {
5345	345	50				829	$self->error_if_expecting_OPERATOR()
5346							if ( $expecting == OPERATOR );
5347	345					2349	initialize_subname();
5348	345					1092	$self->scan_id();
5349							}
5350							}
5351
5352							# 'package'
5353							elsif ( $is_package{$tok_kw} ) {
5354
5355							# Update for --use-feature=class (rt145706):
5356							# We have to be extra careful because 'class' may be used for other
5357							# purposes on older code; i.e.
5358							# class($x) - valid sub call
5359							# package($x) - error
5360	54	100				175	if ( $tok_kw eq 'class' ) {
5361	14	100	66			119	if ( $expecting == OPERATOR
			100
5362							\|\| $next_nonblank_token !~ /^[\w\:]/
5363							\|\| !$self->class_ok_here() )
5364							{
5365	4					12	$self->do_UNKNOWN_BAREWORD($next_nonblank_token);
5366							}
5367	10					24	else { $self->scan_id() }
5368							}
5369							else {
5370	40	50				106	$self->error_if_expecting_OPERATOR()
5371							if ( $expecting == OPERATOR );
5372	40					136	$self->scan_id();
5373							}
5374							}
5375
5376							# Fix for c035: split 'format' from 'is_format_END_DATA' to be
5377							# more restrictive. Require a new statement to be ok here.
5378							elsif ( $tok_kw eq 'format' && new_statement_ok() ) {
5379	1					2	$type = ';'; # make tokenizer look for TERM next
5380	1					3	$self->[_in_format_] = 1;
5381	1					2	$is_last = 1; ## is last token on this line
5382							}
5383
5384							# Note on token types for format, __DATA__, __END__:
5385							# It simplifies things to give these type ';', so that when we
5386							# start rescanning we will be expecting a token of type TERM.
5387							# We will switch to type 'k' before outputting the tokens.
5388							elsif ( defined( $is_END_DATA{$tok_kw} ) ) {
5389
5390							# Warn if this follows an operator expecting a term (c565)
5391	8	50				57	$self->error_if_expecting_TERM()
5392							if ( $expecting == TERM );
5393
5394	8					17	$type = ';'; # make tokenizer look for TERM next
5395
5396							# Remember that we are in one of these three sections
5397	8					25	$self->[ $is_END_DATA{$tok_kw} ] = 1;
5398	8					16	$is_last = 1; ## is last token on this line
5399							}
5400							elsif ( $is_keyword{$tok_kw} ) {
5401	3060					8635	$self->do_KEYWORD();
5402							}
5403
5404							# check for inline label following
5405							# /^(redo\|last\|next\|goto)$/
5406							elsif (( $last_nonblank_type eq 'k' )
5407							&& ( $is_redo_last_next_goto{$last_nonblank_token} ) )
5408							{
5409	19					42	$type = 'j';
5410							}
5411
5412							# something else --
5413							else {
5414	1019					2980	$self->do_UNKNOWN_BAREWORD($next_nonblank_token);
5415							}
5416
5417							#----------------------------------------------------------------
5418							# Save info for use in later guessing. Even for types 'i' and 'U'
5419							# because those may be marked as type 'w' (barewords) elsewhere.
5420							#----------------------------------------------------------------
5421	6571	100	100			22055	if ( $is_wiUC{$type}
			100
5422							&& $statement_type ne 'use'
5423							&& $statement_type ne '_use' )
5424							{
5425	2626					3678	my $result = "unknown";
5426
5427							# Words are marked 'function' if they appear in a role which
5428							# is not consistent with a constant value. Typically they are
5429							# function calls.
5430	2626	100	100			11941	if ( $type eq 'U'
		100	66
5431							\|\| $is_function_follower{$next_nonblank_token} )
5432							{
5433
5434	1015					1556	my $empty_parens = 0;
5435	1015	100	100			3403	if ( $next_nonblank_token eq '(' && $i_next < $max_token_index )
5436							{
5437	592					1173	my $tok_next_p1 = $rtokens->[ $i_next + 1 ];
5438	592	100	100			2329	if ( substr( $tok_next_p1, 0, 1 ) eq SPACE
5439							&& $i_next + 2 <= $max_token_index )
5440							{
5441	282					576	$tok_next_p1 = $rtokens->[ $i_next + 2 ];
5442							}
5443	592					1055	$empty_parens = $tok_next_p1 eq ')';
5444							}
5445
5446	1015	100				2158	if ( !$empty_parens ) {
5447
5448							# not a constant term - probably a function
5449	957					1418	$result = "function";
5450	957					2420	$self->saw_bareword_function($tok);
5451							}
5452							}
5453
5454							# Words are marked 'constant' if they appear in a role
5455							# consistent with a constant value. However, they may simply
5456							# be functions which optionally take zero args. So if a word
5457							# appears as both constant and function, it is not a constant.
5458							elsif ($type eq 'C'
5459							\|\| $is_constant_follower{$next_nonblank_token} )
5460							{
5461
5462	443		100			1780	my $is_hash_key = $next_nonblank_token eq '}'
5463							&& (
5464							$last_nonblank_type eq 'L'
5465							\|\| ( $last_nonblank_type eq 'm'
5466							&& $last_last_nonblank_type eq 'L' )
5467							);
5468
5469	443	100	100			2440	if (
			100
			100
5470
5471							# not a hash key like {bareword} or {-bareword}
5472							!$is_hash_key
5473
5474							# not a package name, etc
5475							&& ( $last_nonblank_type ne 'k'
5476							\|\| !$is_use_require_no{$last_nonblank_token} )
5477
5478							# skip arrow calls, which can go either way
5479							&& $last_nonblank_token ne '->'
5480							)
5481							{
5482							# possibly a constant or constant function
5483	180					260	$result = "constant";
5484	180					525	$self->saw_bareword_constant($tok);
5485							}
5486							else {
5487	263					455	$result = "other bareword";
5488							}
5489							}
5490							else {
5491							}
5492
5493	2626					3412	if ( DEBUG_BAREWORD && $result ne 'other bareword' ) {
5494							print
5495							"$input_line_number: $result: $tok: type=$type : last_tok=$last_nonblank_token : next_tok='$next_nonblank_token'\n";
5496							}
5497							}
5498	6571					11808	return $is_last;
5499
5500							} ## end sub do_BAREWORD
5501
5502							# Table of quote types checked for interpolated here targets.
5503							# Issue 310 has extensive test cases.
5504							my %is_interpolated_quote = (
5505							q{'} => 0,
5506							q{`} => 1,
5507							q{"} => 1,
5508							qq => 1,
5509							qx => 1,
5510							m => 1,
5511							qr => 1,
5512							q => 0,
5513							qw => 0,
5514							s => 1,
5515							y => 0,
5516							tr => 0,
5517							);
5518
5519							sub push_here_targets {
5520	2			2	0	4	my ($rht) = @_;
5521
5522							# Push here targets found in a quote onto the here target list
5523	2					2	push @{$rhere_target_list}, @{$rht};
	2					4
	2					4
5524
5525							# Change type from 'Q' to 'h' for quotes with here-doc targets so that
5526							# the formatter (see sub process_line_of_CODE) will not make any line
5527							# breaks after this point.
5528	2					3	$type = 'h';
5529	2	100				6	if ( $i_tok < 0 ) {
5530	1					2	my $ilast = $routput_token_list->[-1];
5531	1					2	$routput_token_type->[$ilast] = $type;
5532							}
5533	2					4	return;
5534							} ## end sub push_here_targets
5535
5536							sub do_FOLLOW_QUOTE {
5537
5538	3170			3170	0	3883	my $self = shift;
5539
5540							# Continue following a quote on a new line
5541	3170					3945	$type = $quote_type;
5542
5543							# initialize if continuation line
5544	3170	100				3514	if ( !@{$routput_token_list} ) {
	3170					5938
5545	245					337	push( @{$routput_token_list}, $i );
	245					381
5546	245					425	$routput_token_type->[$i] = $type;
5547							}
5548
5549							# Save starting lengths for here target search
5550	3170					4360	my $len_qs1 = length($quoted_string_1);
5551	3170					3768	my $len_qs2 = length($quoted_string_2);
5552	3170					3917	my $in_quote_start = $in_quote;
5553
5554							# scan for the end of the quote or pattern
5555							(
5556	3170					8459	$i,
5557							$in_quote,
5558							$quote_character,
5559							$quote_pos,
5560							$quote_depth,
5561							$quoted_string_1,
5562							$quoted_string_2,
5563
5564							) = $self->do_quote(
5565
5566							$i,
5567							$in_quote,
5568							$quote_character,
5569							$quote_pos,
5570							$quote_depth,
5571							$quoted_string_1,
5572							$quoted_string_2,
5573							$rtokens,
5574							$rtoken_type,
5575							$rtoken_map,
5576							$max_token_index,
5577
5578							);
5579
5580							# Save pattern and replacement text for rescanning for /e
5581	3170					5391	my $qs1_for_e_scan = $quoted_string_1;
5582
5583							# Check for possible here targets in an interpolated quote
5584	3170	100	100			9302	if ( $is_interpolated_quote{$quote_starting_tok}
5585							&& $in_quote < $in_quote_start )
5586							{
5587
5588							# post any saved target of a 2-part quote if the end is reached
5589	1431	100	100			4553	if ( !$in_quote && defined($quote_here_target_2) ) {
5590
5591							# Safety check
5592	1	50				4	if ( $quote_items{$quote_starting_tok} == 2 ) {
5593	1					3	push_here_targets($quote_here_target_2);
5594							}
5595							else {
5596	0					0	DEVEL_MODE
5597							&& Fault(
5598							"unexpected saved here target near line $input_line_number\n"
5599							);
5600							}
5601	1					2	$quote_here_target_2 = undef;
5602							}
5603
5604							# Single part quotes: use $quoted_string_1, and
5605							# $in_quote drops from 1 to 0 when the end is found
5606							# Dual part quotes ('s'): first part is in $quoted_string_2, and
5607							# $in_quote:
5608							# drops from 2 to 1 when the the first part is found
5609							# drops 1 to 0 when the the second part is found
5610							# drops from 2 to 0 if both parts are found in this call
5611							# The tricky part is that we must search for here targets whenever
5612							# $in_quote drops, but we can only post here targets after the end
5613							# of the last part is found (in_quote==0). See test 'here4.in'.
5614							# Update c310 added interpolated here docs and has many test cases.
5615
5616							# Initialize for the normal case of a single quote
5617	1431					2052	my $qs = $quoted_string_1;
5618	1431					1834	my $len_qs = $len_qs1;
5619	1431					2119	my $num_quotes = $in_quote_start - $in_quote;
5620
5621							# Dual part quotes (type 's') have first part in $quoted_string_2
5622	1431	100				2574	if ( $in_quote_start == 2 ) {
5623	31					49	$qs = $quoted_string_2;
5624	31					61	$len_qs = $len_qs2;
5625							}
5626
5627							# Loop to search 1 or 2 quotes for here targets
5628	1431					3301	foreach ( 1 .. $num_quotes ) {
5629
5630							# Perform quick tests to avoid a sub call:
5631	1460					2870	my $pos_shift = rindex( $qs, '<<' );
5632	1460	50	100			3329	if (
			33
			66
5633
5634							# '<<' in the last line
5635							$pos_shift >= $len_qs
5636
5637							# followed by a '}'
5638							&& rindex( $qs, '}' ) > $pos_shift
5639
5640							# preceded by '$' or '@'
5641							&& ( rindex( $qs, '$', $pos_shift ) >= 0
5642							\|\| rindex( $qs, '@', $pos_shift ) >= 0 )
5643							)
5644							{
5645
5646							# scan the quote for here targets
5647	2					9	my ( $rht, $qs_mod ) =
5648							$self->find_interpolated_here_targets( $qs, $len_qs );
5649	2	50				8	if ($rht) {
5650
5651							# only post here targets when end of quote is found
5652	2	100				5	if ($in_quote) {
5653	1					2	$quote_here_target_2 = $rht;
5654							}
5655							else {
5656	1					5	push_here_targets($rht);
5657
5658							# Replace the string with the modified version
5659							# in case it is re-scanned due to a /e modifier
5660	1					2	$qs1_for_e_scan = $qs_mod;
5661							}
5662							}
5663							}
5664
5665							# re-initialize for next pass
5666	1460					1922	$qs = $quoted_string_1;
5667	1460					2637	$len_qs = $len_qs1;
5668							} ## end while ( $num_quotes-- > 0)
5669							}
5670
5671	3170	100				5352	if ($in_quote) { return }
	244					427
5672
5673							# All done with this quote...
5674
5675							# re-initialize for next search
5676	2926					3677	$quote_character = EMPTY_STRING;
5677	2926					3482	$quote_pos = 0;
5678	2926					3526	$quote_type = 'Q';
5679	2926					3502	$quoted_string_1 = EMPTY_STRING;
5680	2926					3457	$quoted_string_2 = EMPTY_STRING;
5681	2926	100				5050	if ( ++$i > $max_token_index ) { return }
	126					256
5682
5683							# look for any modifiers
5684	2800	100				4789	if ($allowed_quote_modifiers) {
5685
5686							# check for exact quote modifiers
5687	162	100				679	if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) {
5688	31					53	my $str = $rtokens->[$i];
5689	31					47	my $saw_modifier_e;
5690	31					731	while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
5691	49					82	my $pos = pos($str);
5692	49					94	my $char = substr( $str, $pos - 1, 1 );
5693	49		100			255	$saw_modifier_e \|\|= ( $char eq 'e' );
5694							}
5695
5696							# For an 'e' quote modifier we must scan the replacement
5697							# text for here-doc targets...
5698							# but if the modifier starts a new line we must skip
5699							# this because either the here doc will be fully
5700							# contained in the replacement text (so we can
5701							# ignore it) or Perl will not find it. The modifier will have a
5702							# pretoken index $i=1 if it starts a new line, so we only look
5703							# for a here doc if $i>1. See test 'here2.in'.
5704	31	50	66			93	if ( $saw_modifier_e && $i > 1 ) {
5705	0					0	my $rht = $self->scan_replacement_text($qs1_for_e_scan);
5706	0	0				0	if ($rht) {
5707	0					0	push_here_targets($rht);
5708							}
5709							}
5710
5711	31	50				70	if ( defined( pos($str) ) ) {
5712
5713							# matched
5714	31	50				78	if ( pos($str) == length($str) ) {
5715	31	50				94	if ( ++$i > $max_token_index ) { return }
	0					0
5716							}
5717
5718							# Looks like a joined quote modifier
5719							# and keyword, maybe something like
5720							# s/xxx/yyy/gefor @k=...
5721							# Example is "galgen.pl". Would have to split
5722							# the word and insert a new token in the
5723							# pre-token list. This is so rare that I haven't
5724							# done it. Will just issue a warning citation.
5725
5726							# This error might also be triggered if my quote
5727							# modifier characters are incomplete
5728							else {
5729	0					0	$self->warning(<<EOM);
5730
5731							Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
5732							Please put a space between quote modifiers and trailing keywords.
5733							EOM
5734
5735							# print "token $rtokens->[$i]\n";
5736							# my $num = length($str) - pos($str);
5737							# $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num);
5738							# print "continuing with new token $rtokens->[$i]\n";
5739
5740							# skipping past this token does least damage
5741	0	0				0	if ( ++$i > $max_token_index ) { return }
	0					0
5742							}
5743							}
5744							else {
5745
5746							# example file: rokicki4.pl
5747							# This error might also be triggered if my quote
5748							# modifier characters are incomplete
5749	0					0	$self->write_logfile_entry(
5750							"Note: found word $str at quote modifier location\n");
5751							}
5752							}
5753
5754							# re-initialize
5755	162					261	$allowed_quote_modifiers = EMPTY_STRING;
5756							}
5757	2800					4041	return;
5758							} ## end sub do_FOLLOW_QUOTE
5759
5760							# ------------------------------------------------------------
5761							# begin hash of code for handling most token types
5762							# ------------------------------------------------------------
5763							my $tokenization_code = {
5764
5765							'$' => \&do_DOLLAR_SIGN,
5766							'(' => \&do_LEFT_PARENTHESIS,
5767							')' => \&do_RIGHT_PARENTHESIS,
5768							',' => \&do_COMMA,
5769							';' => \&do_SEMICOLON,
5770							'"' => \&do_QUOTATION_MARK,
5771							"'" => \&do_APOSTROPHE,
5772							'`' => \&do_BACKTICK,
5773							'/' => \&do_SLASH,
5774							'{' => \&do_LEFT_CURLY_BRACKET,
5775							'}' => \&do_RIGHT_CURLY_BRACKET,
5776							'&' => \&do_AMPERSAND,
5777							'<' => \&do_LESS_THAN_SIGN,
5778							'?' => \&do_QUESTION_MARK,
5779							'*' => \&do_STAR,
5780							'.' => \&do_DOT,
5781							':' => \&do_COLON,
5782							'+' => \&do_PLUS_SIGN,
5783							'@' => \&do_AT_SIGN,
5784							'%' => \&do_PERCENT_SIGN,
5785							'[' => \&do_LEFT_SQUARE_BRACKET,
5786							']' => \&do_RIGHT_SQUARE_BRACKET,
5787							'-' => \&do_MINUS_SIGN,
5788							'^' => \&do_CARAT_SIGN,
5789							'::' => \&do_DOUBLE_COLON,
5790							'<<' => \&do_LEFT_SHIFT,
5791							'<<~' => \&do_NEW_HERE_DOC,
5792							'->' => \&do_POINTER,
5793							'++' => \&do_PLUS_PLUS,
5794							'=>' => \&do_FAT_COMMA,
5795							'--' => \&do_MINUS_MINUS,
5796
5797							# No special code for these types yet, but syntax checks
5798							# could be added.
5799							## '&&' => \&do_LOGICAL_AND,
5800							## '\|\|' => \&do_LOGICAL_OR,
5801							## '>' => \&do_GREATER_THAN_SIGN,
5802							## '\|' => \&do_VERTICAL_LINE,
5803							## '//' => \&do_SLASH_SLASH,
5804							## '!' => undef,
5805							## '!=' => undef,
5806							## '!~' => undef,
5807							## '%=' => undef,
5808							## '&&=' => undef,
5809							## '&=' => undef,
5810							## '+=' => undef,
5811							## '-=' => undef,
5812							## '..' => undef,
5813							## '..' => undef,
5814							## '...' => undef,
5815							## '.=' => undef,
5816							## '<<=' => undef,
5817							## '<=' => undef,
5818							## '<=>' => undef,
5819							## '<>' => undef,
5820							## '=' => undef,
5821							## '==' => undef,
5822							## '=~' => undef,
5823							## '>=' => undef,
5824							## '>>' => undef,
5825							## '>>=' => undef,
5826							## '\\' => undef,
5827							## '^=' => undef,
5828							## '\|=' => undef,
5829							## '\|\|=' => undef,
5830							## '//=' => undef,
5831							## '~' => undef,
5832							## '~~' => undef,
5833							## '!~~' => undef,
5834
5835							};
5836
5837							# ------------------------------------------------------------
5838							# end hash of code for handling individual token types
5839							# ------------------------------------------------------------
5840
5841	44			44		388	use constant DEBUG_TOKENIZE => 0;
	44					79
	44					4639
5842
5843							my %is_arrow_or_Z;
5844
5845							BEGIN {
5846	44			44		244	my @qZ = qw( -> Z );
5847	44					161157	$is_arrow_or_Z{$_} = 1 for @qZ;
5848							}
5849
5850							sub tokenize_this_line {
5851
5852	8799			8799	0	14822	my ( $self, $line_of_tokens, $trimmed_input_line ) = @_;
5853
5854							# This routine tokenizes one line. The results are stored in
5855							# the hash ref '$line_of_tokens'.
5856
5857							# Given:
5858							# $line_of_tokens = ref to hash of values being filled for this line
5859							# $trimmed_input_line
5860							# = the input line without leading whitespace, OR
5861							# = undef if not available
5862							# Returns:
5863							# nothing
5864
5865	8799					13325	my $untrimmed_input_line = $line_of_tokens->{_line_text};
5866
5867							# Extract line number for use in error messages
5868	8799					12184	$input_line_number = $line_of_tokens->{_line_number};
5869
5870							#-------------------------------------
5871							# Check for start of pod documentation
5872							#-------------------------------------
5873	8799	100	100			27581	if ( !$in_quote
			66
5874							&& substr( $untrimmed_input_line, 0, 1 ) eq '='
5875							&& $untrimmed_input_line =~ /^=[A-Za-z_]/ )
5876							{
5877
5878							# Must not be in an equation where an '=' could be expected.
5879							# Perl has additional restrictions which are not checked here.
5880	15					29	my $blank_after_Z = 1;
5881	15					56	$expecting = $self->operator_expected( '=', 'b', $blank_after_Z );
5882	15	50				63	if ( $expecting == TERM ) {
5883	15					28	$self->[_in_pod_] = 1;
5884	15					35	return;
5885							}
5886							}
5887
5888							#--------------------------
5889							# Trim leading whitespace ?
5890							#--------------------------
5891							# Use untrimmed line if we are continuing in a type 'Q' quote
5892	8784	100	100			17727	if ( $in_quote && $quote_type eq 'Q' ) {
5893	58					102	$line_of_tokens->{_starting_in_quote} = 1;
5894	58					82	$input_line = $untrimmed_input_line;
5895	58					98	chomp $input_line;
5896							}
5897
5898							# Trim start of this line if we are not continuing a quoted line.
5899							# Do not trim end because we might end in a quote (test: deken4.pl)
5900							# Perl::Tidy::Formatter will delete needless trailing blanks
5901							else {
5902	8726					13600	$line_of_tokens->{_starting_in_quote} = 0;
5903
5904							# Use the pre-computed trimmed line if defined (most efficient)
5905	8726					11401	$input_line = $trimmed_input_line;
5906
5907							# otherwise trim the raw input line (much less efficient)
5908	8726	50				19499	if ( !defined($input_line) ) {
5909	0					0	$input_line = $untrimmed_input_line;
5910	0					0	$input_line =~ s/^\s+//;
5911							}
5912
5913	8726					11869	chomp $input_line;
5914
5915							# define 'guessed_indentation_level' if logfile will be saved
5916	8726	100	100			17970	if ( $self->[_save_logfile_] && length($input_line) ) {
5917	3					5	my $guess =
5918							$self->guess_old_indentation_level($untrimmed_input_line);
5919	3					5	$line_of_tokens->{_guessed_indentation_level} = $guess;
5920							}
5921							}
5922
5923							#------------
5924							# Blank lines
5925							#------------
5926	8784	100				15192	if ( !length($input_line) ) {
5927	1040					1856	$line_of_tokens->{_line_type} = 'CODE';
5928	1040					1933	$line_of_tokens->{_rtokens} = [];
5929	1040					1817	$line_of_tokens->{_rtoken_type} = [];
5930	1040					2199	$line_of_tokens->{_rlevels} = [];
5931	1040					1886	$line_of_tokens->{_rblock_type} = [];
5932	1040					1983	$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
5933	1040					1852	$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
5934	1040					1795	return;
5935							}
5936
5937							#---------
5938							# Comments
5939							#---------
5940	7744	100	100			21184	if ( !$in_quote && substr( $input_line, 0, 1 ) eq '#' ) {
5941
5942							# and check for skipped section
5943	884	50	66			4398	if (
			66
			66
5944							(
5945							substr( $input_line, 0, 4 ) eq '#<<V'
5946							\|\| $rOpts_code_skipping_begin
5947							)
5948							&& $rOpts_code_skipping
5949
5950							# note that the code_skipping_patterns require a newline
5951							&& ( $input_line . SPACE ) =~ /$code_skipping_pattern_begin/
5952							)
5953							{
5954	2					7	$self->[_in_code_skipping_] = $self->[_last_line_number_];
5955	2					4	return;
5956							}
5957
5958							# Look for format skipping tags, but just normal mode.
5959							# It will be used for these purposes:
5960							# - to inform the formatter of an end token with no begin token
5961							# - for making a hint when a brace error is detected
5962	882	100	100			8960	if (
		100	100
			100
			33
			66
			100
			100
			100
5963							(
5964							substr( $input_line, 0, 4 ) eq '#<<<'
5965							\|\| $rOpts_format_skipping_begin
5966							)
5967							&& $rOpts_format_skipping
5968
5969							# note that the format_skipping_patterns require a space
5970							&& ( $input_line . SPACE ) =~ /$format_skipping_pattern_begin/
5971
5972							# allow same token for begin and end
5973							&& (
5974							!$self->[_in_format_skipping_]
5975							\|\| ( $format_skipping_pattern_begin ne
5976							$format_skipping_pattern_end )
5977							)
5978							)
5979							{
5980	17					76	my $on_off = 1;
5981	17					33	my $lno = $self->[_last_line_number_];
5982	17					34	my $rformat_skipping_list = $self->[_rformat_skipping_list_];
5983
5984							# format markers must alternate between on and off
5985	17	50	66			23	if ( @{$rformat_skipping_list}
	17					91
5986							&& $rformat_skipping_list->[-1]->[0] == $on_off )
5987							{
5988	0					0	my $lno_last = $rformat_skipping_list->[-1]->[1];
5989	0					0	$self->warning_do_not_format(
5990							"consecutive format-skipping start markers - see line $lno_last\n"
5991							);
5992							}
5993	17					36	push @{$rformat_skipping_list}, [ $on_off, $lno, $input_line ];
	17					55
5994	17					38	$self->[_in_format_skipping_] = $lno;
5995							}
5996							elsif (
5997							(
5998							substr( $input_line, 0, 4 ) eq '#>>>'
5999							\|\| $rOpts_format_skipping_end
6000							)
6001							&& $rOpts_format_skipping
6002
6003							# note that the format_skipping_patterns require a newline
6004							&& ( $input_line . SPACE ) =~ /$format_skipping_pattern_end/
6005							)
6006							{
6007	20					54	my $lno = $self->[_last_line_number_];
6008	20					40	my $rformat_skipping_list = $self->[_rformat_skipping_list_];
6009	20					31	my $on_off = -1;
6010
6011							# markers must alternate between on and off
6012	20	50	66			37	if ( @{$rformat_skipping_list}
	20					136
6013							&& $rformat_skipping_list->[-1]->[0] == $on_off )
6014							{
6015	0					0	my $lno_last = $rformat_skipping_list->[-1]->[1];
6016	0					0	$self->warning_do_not_format(
6017							"consecutive format-skipping end markers - see line $lno_last\n"
6018							);
6019							}
6020
6021	20					38	push @{$rformat_skipping_list}, [ $on_off, $lno, $input_line ];
	20					76
6022	20					51	$self->[_in_format_skipping_] = 0;
6023							}
6024							else {
6025							# not a format skipping comment
6026							}
6027
6028							# Optional fast processing of a block comment
6029	882					1617	$line_of_tokens->{_line_type} = 'CODE';
6030	882					2165	$line_of_tokens->{_rtokens} = [$input_line];
6031	882					2026	$line_of_tokens->{_rtoken_type} = ['#'];
6032	882					2534	$line_of_tokens->{_rlevels} = [$level_in_tokenizer];
6033	882					2064	$line_of_tokens->{_rblock_type} = [EMPTY_STRING];
6034	882					1855	$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
6035	882					1765	$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
6036	882					1756	return;
6037							}
6038
6039							#-------------------------------------
6040							# Loop to find all tokens on this line
6041							#-------------------------------------
6042
6043							# Update the copy of the line for use in error messages
6044							# This must be exactly what we give the pre_tokenizer
6045	6860					9740	$self->[_line_of_text_] = $input_line;
6046
6047							# re-initialize for the main loop
6048	6860					10029	$routput_token_list = []; # stack of output token indexes
6049	6860					12173	$routput_token_type = []; # token types
6050	6860					17675	$routput_block_type = []; # types of code block
6051	6860					16106	$routput_type_sequence = []; # nesting sequential number
6052
6053	6860					13960	$rhere_target_list = [];
6054
6055	6860					8993	$tok = $last_nonblank_token;
6056	6860					8738	$type = $last_nonblank_type;
6057	6860					8267	$prototype = $last_nonblank_prototype;
6058	6860					8205	$last_nonblank_i = -1;
6059	6860					8442	$block_type = $last_nonblank_block_type;
6060	6860					8002	$container_type = $last_nonblank_container_type;
6061	6860					7922	$type_sequence = $last_nonblank_type_sequence;
6062	6860					7547	$indent_flag = 0;
6063	6860					7613	$peeked_ahead = 0;
6064
6065	6860					15579	$self->tokenizer_main_loop();
6066
6067							#-------------------------------------------------
6068							# Done tokenizing this line ... package the result
6069							#-------------------------------------------------
6070	6860					18189	$self->tokenizer_wrapup_line($line_of_tokens);
6071
6072	6860					10690	return;
6073							} ## end sub tokenize_this_line
6074
6075							sub tokenizer_main_loop {
6076
6077	6860			6860	0	9953	my ($self) = @_;
6078
6079							# Break one input line into tokens
6080							# We are working on closure variables.
6081
6082							# Start by breaking the line into pre-tokens
6083	6860					15521	( $rtokens, $rtoken_map, $rtoken_type ) = pre_tokenize($input_line);
6084
6085							# Verify that all leading whitespace has been trimmed
6086							# except for quotes of type 'Q' (c273).
6087	6860	50	66			25658	if ( @{$rtokens}
	6860		33			26494
			66
6088							&& $rtoken_type->[0] eq 'b'
6089							&& !( $in_quote && $quote_type eq 'Q' ) )
6090							{
6091
6092							# Shouldn't happen if calling sub did trim operation correctly.
6093	0					0	DEVEL_MODE && Fault(<<EOM);
6094							leading blank at line
6095							$input_line
6096							EOM
6097
6098							# Fix by removing the leading blank token. This fix has been
6099							# tested and works correctly even if no whitespaces was trimmed.
6100							# But it is an inefficient way to do things because, for example,
6101							# it forces all comments to be processed by sub pre_tokenize.
6102							# And it may cause indented code-skipping comments to be missed.
6103	0					0	shift @{$rtokens};
	0					0
6104	0					0	shift @{$rtoken_map};
	0					0
6105	0					0	shift @{$rtoken_type};
	0					0
6106							}
6107
6108	6860					8330	$max_token_index = scalar( @{$rtokens} ) - 1;
	6860					9551
6109	6860					8062	push( @{$rtokens}, SPACE, SPACE, SPACE )
	6860					14237
6110							; # extra whitespace simplifies logic
6111	6860					7961	push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced
	6860					11613
6112	6860					7967	push( @{$rtoken_type}, 'b', 'b', 'b' );
	6860					12334
6113
6114							# initialize for main loop
6115	6860					7993	if (0) { #<<< this is not necessary
6116							foreach my $ii ( 0 .. $max_token_index + 3 ) {
6117							$routput_token_type->[$ii] = EMPTY_STRING;
6118							$routput_block_type->[$ii] = EMPTY_STRING;
6119							$routput_type_sequence->[$ii] = EMPTY_STRING;
6120							$routput_indent_flag->[$ii] = 0;
6121							}
6122							}
6123
6124	6860					8018	$i = -1;
6125	6860					8340	$i_tok = -1;
6126
6127							#-----------------------
6128							# main tokenization loop
6129							#-----------------------
6130
6131							# we are looking at each pre-token of one line and combining them
6132							# into tokens
6133	6860					12701	while ( ++$i <= $max_token_index ) {
6134
6135							# continue looking for the end of a quote
6136	59584	100				82393	if ($in_quote) {
6137	3170					7822	$self->do_FOLLOW_QUOTE();
6138	3170	100	100			8844	last if ( $in_quote \|\| $i > $max_token_index );
6139							}
6140
6141	59214	100	100			124275	if ( $type ne 'b' && $type ne 'CORE::' ) {
6142
6143							# try to catch some common errors
6144	41499	100	100			69369	if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
6145
6146	2065	100				4637	if ( $last_nonblank_token eq 'eq' ) {
		50
6147	9					38	$self->complain("Should 'eq' be '==' here ?\n");
6148							}
6149							elsif ( $last_nonblank_token eq 'ne' ) {
6150	0					0	$self->complain("Should 'ne' be '!=' here ?\n");
6151							}
6152							else {
6153							# that's all
6154							}
6155							}
6156
6157							# fix c090, only rotate vars if a new token will be stored
6158	41499	100				59952	if ( $i_tok >= 0 ) {
6159
6160	34828					39549	$last_last_nonblank_token = $last_nonblank_token;
6161	34828					37251	$last_last_nonblank_type = $last_nonblank_type;
6162
6163	34828					38039	$last_nonblank_prototype = $prototype;
6164	34828					38098	$last_nonblank_block_type = $block_type;
6165	34828					37220	$last_nonblank_container_type = $container_type;
6166	34828					38563	$last_nonblank_type_sequence = $type_sequence;
6167	34828					35590	$last_nonblank_i = $i_tok;
6168	34828					35317	$last_nonblank_token = $tok;
6169	34828					36504	$last_nonblank_type = $type;
6170							}
6171
6172							# Check for patches
6173	41499	100				68796	if ( $is_arrow_or_Z{$last_last_nonblank_type} ) {
6174
6175							# Patch for c030: Fix things in case a '->' got separated
6176							# from the subsequent identifier by a side comment. We
6177							# need the last_nonblank_token to have a leading -> to
6178							# avoid triggering an operator expected error message at
6179							# the next '('. See also fix for git #63.
6180	1218	100				2511	if ( $last_last_nonblank_type eq '->' ) {
		50
6181	1172	100	66			4001	if ( $last_nonblank_type eq 'w'
6182							\|\| $last_nonblank_type eq 'i' )
6183							{
6184	793					1351	$last_nonblank_token = '->' . $last_nonblank_token;
6185	793					1186	$last_nonblank_type = 'i';
6186							}
6187							}
6188
6189							# Fix part #3 for git82: propagate type 'Z' though L-R pair
6190							elsif ( $last_last_nonblank_type eq 'Z' ) {
6191	46	100				138	if ( $last_nonblank_type eq 'R' ) {
6192	1					3	$last_nonblank_type = $last_last_nonblank_type;
6193	1					1	$last_nonblank_token = $last_last_nonblank_token;
6194							}
6195							}
6196							else {
6197							# No other patches
6198							}
6199							}
6200							}
6201
6202							# store previous token type
6203	59214	100				80553	if ( $i_tok >= 0 ) {
6204	52543					83840	$routput_token_type->[$i_tok] = $type;
6205	52543					74068	$routput_block_type->[$i_tok] = $block_type;
6206	52543					69132	$routput_type_sequence->[$i_tok] = $type_sequence;
6207	52543					62772	$routput_indent_flag->[$i_tok] = $indent_flag;
6208							}
6209
6210							# get the next pre-token and type
6211							# $tok and $type will be modified to make the output token
6212	59214					77133	my $pre_tok = $tok = $rtokens->[$i]; # get the next pre-token
6213	59214					73829	my $pre_type = $type = $rtoken_type->[$i]; # and type
6214
6215							# re-initialize various flags for the next output token
6216							(
6217
6218							# remember the starting index of this token; we will update $i
6219	59214					91052	$i_tok,
6220							$block_type,
6221							$container_type,
6222							$type_sequence,
6223							$indent_flag,
6224							$prototype,
6225							)
6226							= (
6227
6228							$i,
6229							EMPTY_STRING,
6230							EMPTY_STRING,
6231							EMPTY_STRING,
6232							0,
6233							EMPTY_STRING,
6234							);
6235
6236							# this pre-token will start an output token
6237	59214					59789	push( @{$routput_token_list}, $i_tok );
	59214					79284
6238
6239							#---------------------------------------------------
6240							# The token search leads to one of 5 main END NODES:
6241							#---------------------------------------------------
6242
6243							#-----------------------
6244							# END NODE 1: whitespace
6245							#-----------------------
6246	59214	100				95378	next if ( $pre_type eq 'b' );
6247
6248							#----------------------
6249							# END NODE 2: a comment
6250							#----------------------
6251	41354	100				60099	if ( $pre_type eq '#' ) {
6252
6253							# push non-indenting brace stack Look for a possible
6254							# non-indenting brace. This is only used to give a hint in
6255							# case the file is unbalanced.
6256							# Hardwired to '#<<<' for efficiency. We will not use the
6257							# result later if the pattern has been changed (very unusual).
6258	363	100	66			1653	if ( $last_nonblank_token eq '{'
			66
			33
			66
6259							&& $last_nonblank_block_type
6260							&& $last_nonblank_type_sequence
6261							&& !$self->[_in_format_skipping_]
6262							&& $rOpts_non_indenting_braces )
6263							{
6264	46					90	my $offset = $rtoken_map->[$i_tok];
6265	46					122	my $text = substr( $input_line, $offset, 5 );
6266	46					77	my $len = length($text);
6267	46	100	66			358	if ( $len == 4 && $text eq '#<<<'
			66
			66
6268							\|\| $len > 4 && $text eq '#<<< ' )
6269							{
6270	6					9	push @{ $self->[_rnon_indenting_brace_stack_] },
	6					16
6271							$last_nonblank_type_sequence;
6272							}
6273							}
6274	363					651	last;
6275							}
6276
6277							# continue gathering identifier if necessary
6278	40991	100				59659	if ($id_scan_state) {
6279
6280	17	100	66			76	if ( $is_sub{$id_scan_state} \|\| $is_package{$id_scan_state} ) {
6281	10					32	$self->scan_id();
6282							}
6283							else {
6284	7					25	$self->scan_identifier();
6285							}
6286
6287	17	100				49	if ($id_scan_state) {
6288
6289							# Still scanning ...
6290							# Check for side comment between sub and prototype (c061)
6291
6292							# done if nothing left to scan on this line
6293	1	50				5	last if ( $i > $max_token_index );
6294
6295	1					4	my ( $next_nonblank_token_uu, $i_next ) =
6296							find_next_nonblank_token_on_this_line( $i, $rtokens,
6297							$max_token_index );
6298
6299							# done if it was just some trailing space
6300	1	50				4	last if ( $i_next > $max_token_index );
6301
6302							# something remains on the line ... must be a side comment
6303	1					6	next;
6304							}
6305
6306	16	100	100			87	next if ( ( $i > 0 ) \|\| $type );
6307
6308							# didn't find any token; start over
6309	7					16	$type = $pre_type;
6310	7					12	$tok = $pre_tok;
6311							}
6312
6313							#-----------------------------------------------------------
6314							# Combine pre-tokens into digraphs and trigraphs if possible
6315							#-----------------------------------------------------------
6316
6317							# See if we can make a digraph...
6318							# The following tokens are excluded and handled specially:
6319							# '/=' is excluded because the / might start a pattern.
6320							# 'x=' is excluded since it might be $x=, with $ on previous line
6321							# '*' and = might be typeglobs of punctuation variables
6322							# I have allowed tokens starting with <, such as <=,
6323							# because I don't think these could be valid angle operators.
6324							# test file: storrs4.pl
6325	40981	100	100			97132	if ( $can_start_digraph{$tok}
			100
6326							&& $i < $max_token_index
6327							&& $is_digraph{ $tok . $rtokens->[ $i + 1 ] } )
6328							{
6329
6330	3047					4085	my $combine_ok = 1;
6331	3047					5237	my $test_tok = $tok . $rtokens->[ $i + 1 ];
6332
6333							# check for special cases which cannot be combined
6334
6335							# Smartmatch is being deprecated, but may exist in older
6336							# scripts.
6337	3047	100				9021	if ( $test_tok eq '~~' ) {
		100
		100
6338
6339							# Do not combine if a TERM is required
6340	111	100				224	if ( $self->operator_expected( $tok, '~', undef ) == TERM )
6341							{
6342
6343							# block types ';' may actually be hash refs, c567
6344	1	50				3	if ( $last_nonblank_type eq '}' ) {
6345	1					3	my $blk = $rbrace_type->[ $brace_depth + 1 ];
6346	1	50	33			6	if ( !$blk \|\| $blk ne ';' ) { $combine_ok = 0 }
	0					0
6347							}
6348							else {
6349	0					0	$combine_ok = 0;
6350							}
6351							}
6352							}
6353
6354							# '//' must be defined_or operator if an operator is expected.
6355							# TODO: Code for other ambiguous digraphs (/=, x=, *, =)
6356							# could be migrated here for clarity
6357
6358							# Patch for RT#102371, misparsing a // in the following snippet:
6359							# state $b //= ccc();
6360							# The solution is to always accept the digraph (or trigraph)
6361							# after type 'Z' (possible file handle). The reason is that
6362							# sub operator_expected gives TERM expected here, which is
6363							# wrong in this case.
6364							elsif ( $test_tok eq '//' ) {
6365	16	50				47	if ( $last_nonblank_type ne 'Z' ) {
6366
6367							# note that here $tok = '/' and the next tok and type
6368							# is '/'
6369	16					29	my $blank_after_Z;
6370	16					53	$expecting =
6371							$self->operator_expected( $tok, '/', $blank_after_Z );
6372
6373							# Patched for RT#101547, was
6374							# 'unless ($expecting==OPERATOR)'
6375	16	100				43	$combine_ok = 0 if ( $expecting == TERM );
6376							}
6377							}
6378
6379							# Patch for RT #114359: mis-parsing of "print $x ** 0.5;
6380							# Accept the digraphs '**' only after type 'Z'
6381							# Otherwise postpone the decision.
6382							elsif ( $test_tok eq '**' ) {
6383	45	100				147	if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 }
	43					75
6384							}
6385							else {
6386							## no other special cases
6387							}
6388
6389	3047	100	100			15477	if (
			66
			100
6390
6391							# still ok to combine?
6392							$combine_ok
6393
6394							&& ( $test_tok ne '/=' ) # might be pattern
6395							&& ( $test_tok ne 'x=' ) # might be $x
6396							&& ( $test_tok ne '*=' ) # typeglob?
6397
6398							# Moved above as part of fix for
6399							# RT #114359: Missparsing of "print $x ** 0.5;
6400							# && ( $test_tok ne '**' ) # typeglob?
6401							)
6402							{
6403	2994					3762	$tok = $test_tok;
6404	2994					3487	$i++;
6405
6406							# Now try to assemble trigraphs. Note that all possible
6407							# perl trigraphs can be constructed by appending a character
6408							# to a digraph.
6409	2994					4407	$test_tok = $tok . $rtokens->[ $i + 1 ];
6410
6411	2994	100				5586	if ( $is_trigraph{$test_tok} ) {
6412	87					141	$tok = $test_tok;
6413	87					147	$i++;
6414							}
6415
6416							# The only current tetragraph is the double diamond operator
6417							# and its first three characters are NOT a trigraph, so
6418							# we do can do a special test for it
6419							else {
6420	2907	100				5775	if ( $test_tok eq '<<>' ) {
6421	1					3	$test_tok .= $rtokens->[ $i + 2 ];
6422	1	50				4	if ( $is_tetragraph{$test_tok} ) {
6423	1					2	$tok = $test_tok;
6424	1					2	$i += 2;
6425							}
6426							}
6427							}
6428							}
6429							}
6430
6431	40981					44810	$type = $tok;
6432	40981					53410	$next_tok = $rtokens->[ $i + 1 ];
6433	40981					50179	$next_type = $rtoken_type->[ $i + 1 ];
6434
6435							# expecting an operator here? first try table lookup, then function
6436	40981					55433	$expecting = $op_expected_table{$last_nonblank_type};
6437	40981	100				59396	if ( !defined($expecting) ) {
6438	11705		100			20925	my $blank_after_Z = $last_nonblank_type eq 'Z'
6439							&& ( $i == 0 \|\| $rtoken_type->[ $i - 1 ] eq 'b' );
6440	11705					24970	$expecting =
6441							$self->operator_expected( $tok, $next_type, $blank_after_Z );
6442							}
6443
6444	40981					42289	DEBUG_TOKENIZE && do {
6445							local $LIST_SEPARATOR = ')(';
6446							my @debug_list = (
6447							$last_nonblank_token, $tok,
6448							$next_tok, $brace_depth,
6449							$rbrace_type->[$brace_depth], $paren_depth,
6450							$rparen_type->[$paren_depth],
6451							);
6452							print {*STDOUT} "TOKENIZE:(@debug_list)\n";
6453							};
6454
6455							# The next token is '$tok'.
6456							# Now we have to define its '$type'
6457
6458							#------------------------
6459							# END NODE 3: a bare word
6460							#------------------------
6461	40981	100				58274	if ( $pre_type eq 'w' ) {
6462	6616					16038	my $is_last = $self->do_BAREWORD();
6463	6616	100				11425	last if ($is_last);
6464	6607					14421	next;
6465							}
6466
6467							# Turn off attribute list on first non-blank, non-bareword,
6468							# and non-comment (added to fix c038)
6469	34365					41814	$self->[_in_attribute_list_] = 0;
6470
6471							#-------------------------------
6472							# END NODE 4: a string of digits
6473							#-------------------------------
6474	34365	100				48537	if ( $pre_type eq 'd' ) {
6475	2508					6975	$self->do_DIGITS();
6476	2508					4790	next;
6477							}
6478
6479							#------------------------------------------
6480							# END NODE 5: everything else (punctuation)
6481							#------------------------------------------
6482	31857					47517	my $code = $tokenization_code->{$tok};
6483	31857	100				45635	if ($code) {
6484	29616					63461	$code->($self);
6485	29616	100				44869	redo if ($in_quote);
6486							}
6487
6488							# Check for a non-TERM where a TERM is expected. Note that this
6489							# checks all symbols, even those without a $code (update c566)
6490	29181	100				54302	if ( $expecting == TERM ) {
6491							my $is_not_term =
6492							$type eq ';'
6493							\|\| $type eq ','
6494	11678		100			40806	\|\| $is_binary_operator_type{$type};
6495	11678	100				26410	if ($is_not_term) {
6496	277					879	$self->error_if_expecting_TERM();
6497							}
6498							}
6499							} ## End main tokenizer loop
6500
6501							# Store the final token
6502	6860	100				11522	if ( $i_tok >= 0 ) {
6503	6671					12048	$routput_token_type->[$i_tok] = $type;
6504	6671					10184	$routput_block_type->[$i_tok] = $block_type;
6505	6671					10031	$routput_type_sequence->[$i_tok] = $type_sequence;
6506	6671					9167	$routput_indent_flag->[$i_tok] = $indent_flag;
6507							}
6508
6509							# Remember last nonblank values
6510	6860	100	100			18870	if ( $type ne 'b' && $type ne '#' ) {
6511
6512	6349					7510	$last_last_nonblank_token = $last_nonblank_token;
6513	6349					7419	$last_last_nonblank_type = $last_nonblank_type;
6514
6515	6349					7726	$last_nonblank_prototype = $prototype;
6516	6349					7265	$last_nonblank_block_type = $block_type;
6517	6349					7336	$last_nonblank_container_type = $container_type;
6518	6349					7440	$last_nonblank_type_sequence = $type_sequence;
6519	6349					6990	$last_nonblank_token = $tok;
6520	6349					7406	$last_nonblank_type = $type;
6521							}
6522
6523							# reset indentation level if necessary at a sub or package
6524							# in an attempt to recover from a nesting error
6525	6860	50				11466	if ( $level_in_tokenizer < 0 ) {
6526	0	0				0	if ( $input_line =~ /^\s*(sub\|package)\s+(\w+)/ ) {
6527	0					0	reset_indentation_level(0);
6528	0					0	$self->brace_warning("resetting level to 0 at $1 $2\n");
6529							}
6530							}
6531
6532	6860					9063	$self->[_in_quote_] = $in_quote;
6533							$self->[_quote_target_] =
6534							$in_quote
6535							? (
6536							$matching_end_token{$quote_character}
6537	6860	100				12334	? $matching_end_token{$quote_character}
		100
6538							: $quote_character
6539							)
6540							: EMPTY_STRING;
6541	6860					10782	$self->[_rhere_target_list_] = $rhere_target_list;
6542
6543	6860					9698	return;
6544							} ## end sub tokenizer_main_loop
6545
6546							sub tokenizer_wrapup_line {
6547	6860			6860	0	10675	my ( $self, $line_of_tokens ) = @_;
6548
6549							#---------------------------------------------------------
6550							# Package a line of tokens for shipping back to the caller
6551							#---------------------------------------------------------
6552
6553							# Arrays to hold token values for this line:
6554							my (
6555	6860					9899	@output_levels, @output_block_type, @output_type_sequence,
6556							@output_token_type, @output_tokens,
6557							);
6558
6559	6860					14666	$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
6560
6561							# Remember starting nesting block string
6562	6860					9195	my $nesting_block_string_0 = $nesting_block_string;
6563
6564							#-----------------
6565							# Loop over tokens
6566							#-----------------
6567							# $i is the index of the pretoken which starts this full token
6568	6860					8009	foreach my $ii ( @{$routput_token_list} ) {
	6860					11601
6569
6570	59459					68915	my $type_i = $routput_token_type->[$ii];
6571
6572							#----------------------------------------
6573							# Section 1. Handle a non-sequenced token
6574							#----------------------------------------
6575	59459	100				75378	if ( !$routput_type_sequence->[$ii] ) {
6576
6577							#-------------------------------
6578							# Section 1.1. types ';' and 't'
6579							#-------------------------------
6580							# - output anonymous 'sub' as keyword (type 'k')
6581							# - output __END__, __DATA__, and format as type 'k' instead
6582							# of ';' to make html colors correct, etc.
6583	48567	100				82490	if ( $is_semicolon_or_t{$type_i} ) {
		50
6584	3153					4593	my $tok_i = $rtokens->[$ii];
6585	3153	100				6759	if ( $is_END_DATA_format_sub{$tok_i} ) {
6586	187					337	$type_i = 'k';
6587							}
6588							}
6589
6590							#----------------------------------------------
6591							# Section 1.2. Check for an invalid token type.
6592							#----------------------------------------------
6593							# This can happen by running perltidy on non-scripts although
6594							# it could also be bug introduced by programming change. Perl
6595							# silently accepts a 032 (^Z) and takes it as the end
6596							elsif ( !$is_valid_token_type{$type_i} ) {
6597	0	0				0	if ( !$self->[_in_error_] ) {
6598	0					0	my $val = ord($type_i);
6599	0					0	$self->warning(
6600							"unexpected character decimal $val ($type_i) in script\n"
6601							);
6602	0					0	$self->[_in_error_] = 1;
6603							}
6604							}
6605							else {
6606							# valid token type other than ; and t
6607							}
6608
6609							#----------------------------------------------------
6610							# Section 1.3. Store values for a non-sequenced token
6611							#----------------------------------------------------
6612	48567					63421	push( @output_levels, $level_in_tokenizer );
6613	48567					59820	push( @output_block_type, EMPTY_STRING );
6614	48567					58213	push( @output_type_sequence, EMPTY_STRING );
6615	48567					74844	push( @output_token_type, $type_i );
6616
6617							}
6618
6619							#------------------------------------
6620							# Section 2. Handle a sequenced token
6621							# One of { [ ( ? : ) ] }
6622							#------------------------------------
6623							else {
6624
6625							# $level_i is the level we will store. Levels of braces are
6626							# set so that the leading braces have a HIGHER level than their
6627							# CONTENTS, which is convenient for indentation.
6628	10892					12269	my $level_i = $level_in_tokenizer;
6629
6630							# $tok_i is the PRE-token. It only equals the token for symbols
6631	10892					13367	my $tok_i = $rtokens->[$ii];
6632
6633							# $routput_indent_flag->[$ii] indicates that we need a change
6634							# in level at a nested ternary, as follows
6635							# 1 => at a nested ternary ?
6636							# -1 => at a nested ternary :
6637							# 0 => otherwise
6638
6639							#--------------------------------------------
6640							# Section 2.1 Handle a level-increasing token
6641							#--------------------------------------------
6642	10892	100				21627	if ( $is_opening_or_ternary_type{$type_i} ) {
		50
6643
6644	5446	100				8571	if ( $type_i eq '?' ) {
6645
6646	193	100				570	if ( $routput_indent_flag->[$ii] > 0 ) {
6647	8					15	$level_in_tokenizer++;
6648
6649							# break BEFORE '?' in a nested ternary
6650	8					11	$level_i = $level_in_tokenizer;
6651	8					18	$nesting_block_string .= "$nesting_block_flag";
6652
6653							}
6654							}
6655							else {
6656
6657	5253					6773	$nesting_token_string .= $tok_i;
6658
6659	5253	100	100			11342	if ( $type_i eq '{' \|\| $type_i eq 'L' ) {
6660
6661	4813					5801	$level_in_tokenizer++;
6662
6663	4813	100				7709	if ( $routput_block_type->[$ii] ) {
6664	1115					1569	$nesting_block_flag = 1;
6665	1115					1668	$nesting_block_string .= '1';
6666							}
6667							else {
6668	3698					4774	$nesting_block_flag = 0;
6669	3698					5321	$nesting_block_string .= '0';
6670							}
6671							}
6672							}
6673							}
6674
6675							#---------------------------------------------
6676							# Section 2.2. Handle a level-decreasing token
6677							#---------------------------------------------
6678							elsif ( $is_closing_or_ternary_type{$type_i} ) {
6679
6680	5446	100				9582	if ( $type_i ne ':' ) {
6681	5253					8346	my $char = chop $nesting_token_string;
6682	5253	50				11465	if ( $char ne $matching_start_token{$tok_i} ) {
6683	0					0	$nesting_token_string .= $char . $tok_i;
6684							}
6685							}
6686
6687	5446	100	100			14059	if (
			100
			100
6688							$type_i eq '}'
6689							\|\| $type_i eq 'R'
6690
6691							# only the second and higher ? : have levels
6692							\|\| $type_i eq ':' && $routput_indent_flag->[$ii] < 0
6693							)
6694							{
6695
6696	4821					5970	$level_i = --$level_in_tokenizer;
6697
6698	4821	50				7827	if ( $level_in_tokenizer < 0 ) {
6699	0	0				0	if ( !$self->[_saw_negative_indentation_] ) {
6700	0					0	$self->[_saw_negative_indentation_] = 1;
6701	0					0	$self->warning(
6702							"Starting negative indentation\n");
6703							}
6704							}
6705
6706							# restore previous level values
6707	4821	50				8149	if ( length($nesting_block_string) > 1 )
6708							{ # true for valid script
6709	4821					5791	chop $nesting_block_string;
6710	4821					8489	$nesting_block_flag =
6711							substr( $nesting_block_string, -1 ) eq '1';
6712							}
6713
6714							}
6715							}
6716
6717							#-----------------------------------------------------
6718							# Section 2.3. Unexpected sequenced token type - error
6719							#-----------------------------------------------------
6720							else {
6721
6722							# The tokenizer should only be assigning sequence numbers
6723							# to types { [ ( ? ) ] } :
6724	0					0	DEVEL_MODE && Fault(<<EOM);
6725							unexpected sequence number on token type $type_i with pre-tok=$tok_i
6726							EOM
6727							}
6728
6729							#------------------------------------------------
6730							# Section 2.4. Store values for a sequenced token
6731							#------------------------------------------------
6732
6733							# The starting nesting block string, which is used in any .LOG
6734							# output, should include the first token of the line
6735	10892	100				16503	if ( !@output_levels ) {
6736	1776					2674	$nesting_block_string_0 = $nesting_block_string;
6737							}
6738
6739							# Store values for a sequenced token
6740	10892					15435	push( @output_levels, $level_i );
6741	10892					17567	push( @output_block_type, $routput_block_type->[$ii] );
6742	10892					15407	push( @output_type_sequence, $routput_type_sequence->[$ii] );
6743	10892					19292	push( @output_token_type, $type_i );
6744
6745							}
6746							} ## End loop to over tokens
6747
6748							#---------------------
6749							# Post-loop operations
6750							#---------------------
6751
6752	6860					14186	$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string_0;
6753
6754							# Form and store the tokens
6755	6860	50				11565	if (@output_levels) {
6756
6757	6860					7467	my $im = shift @{$routput_token_list};
	6860					10777
6758	6860					10229	my $offset = $rtoken_map->[$im];
6759	6860					7674	foreach my $ii ( @{$routput_token_list} ) {
	6860					9672
6760	52599					55642	my $numc = $rtoken_map->[$ii] - $offset;
6761	52599					77440	push( @output_tokens, substr( $input_line, $offset, $numc ) );
6762	52599					52524	$offset += $numc;
6763
6764							# programming note: it seems most efficient to 'next' out of
6765							# a critical loop like this as early as possible. So instead
6766							# of 'if ( DEVEL_MODE && $numc < 0 )' we write:
6767	52599					55888	next unless (DEVEL_MODE);
6768	0	0				0	next if ( $numc > 0 );
6769
6770							# Should not happen unless @{$rtoken_map} is corrupted
6771	0					0	Fault("number of characters is '$numc' but should be >0\n");
6772							}
6773
6774							# Form and store the final token of this line
6775	6860					9653	my $numc = length($input_line) - $offset;
6776	6860					12043	push( @output_tokens, substr( $input_line, $offset, $numc ) );
6777
6778	6860					8315	if (DEVEL_MODE) {
6779							if ( $numc <= 0 ) {
6780
6781							# check '$rtoken_map' and '$routput_token_list'
6782							Fault("Number of Characters is '$numc' but should be >0\n");
6783							}
6784
6785							# Make sure we didn't gain or lose any characters
6786							my $test_line = join EMPTY_STRING, @output_tokens;
6787							if ( $test_line ne $input_line ) {
6788							my $len_input = length($input_line);
6789							my $len_test = length($test_line);
6790
6791							# check '$rtoken_map' and '$routput_token_list'
6792							Fault(<<EOM);
6793							Reconstructed line difers from input; input_length=$len_input test_length=$len_test
6794							input:'$input_line'
6795							test :'$test_line'
6796							EOM
6797							}
6798							}
6799							}
6800
6801							# Wrap up this line of tokens for shipping to the Formatter
6802	6860					17468	$line_of_tokens->{_rtoken_type} = \@output_token_type;
6803	6860					14048	$line_of_tokens->{_rtokens} = \@output_tokens;
6804	6860					13291	$line_of_tokens->{_rblock_type} = \@output_block_type;
6805	6860					11445	$line_of_tokens->{_rtype_sequence} = \@output_type_sequence;
6806	6860					11259	$line_of_tokens->{_rlevels} = \@output_levels;
6807
6808							#-----------------------------------------------------------------
6809							# Compare input indentation with computed levels at closing braces
6810							#-----------------------------------------------------------------
6811							# This may provide a useful hint for error location if the file
6812							# is not balanced in braces. Closing braces are used because they
6813							# have a well-defined indentation and can be processed efficiently.
6814	6860	100				12596	if ( $output_tokens[0] eq '}' ) {
6815
6816	737					1218	my $blk = $output_block_type[0];
6817	737	100	100			4171	if (
			66
6818							(
6819							# builtin block types without continuation indentation
6820							$is_zero_continuation_block_type{$blk}
6821
6822							# or a named sub, but skip sub aliases for efficiency,
6823							# since this is just for diagnostic info
6824							\|\| substr( $blk, 0, 4 ) eq 'sub '
6825							)
6826
6827							# and we are not in format skipping
6828							&& !$self->[_in_format_skipping_]
6829							)
6830							{
6831
6832							# subtract 1 space for newline in untrimmed line
6833	395					811	my $untrimmed_input_line = $line_of_tokens->{_line_text};
6834	395					886	my $space_count =
6835							length($untrimmed_input_line) - length($input_line) - 1;
6836
6837							# check for tabs
6838	395	100	100			1540	if ( $space_count
6839							&& ord( substr( $untrimmed_input_line, 0, 1 ) ) == ORD_TAB )
6840							{
6841	15	50				125	if ( $untrimmed_input_line =~ /^(\t+)?(\s+)?/ ) {
6842	15	50				65	if ($1) { $space_count += length($1) * $tabsize }
	15					38
6843	15	100				49	if ($2) { $space_count += length($2) }
	1					4
6844							}
6845							}
6846
6847							# '$guess' = the level according to indentation
6848	395					968	my $guess = int( $space_count / $rOpts_indent_columns );
6849
6850							# subtract 1 level from guess for --indent-closing-brace
6851	395	100				1001	$guess -= 1 if ($rOpts_indent_closing_brace);
6852
6853							# subtract 1 from $level for each non-indenting brace level
6854	395					561	my $adjust = @{ $self->[_rnon_indenting_brace_stack_] };
	395					692
6855
6856	395					655	my $level = $output_levels[0];
6857
6858							# find the difference between expected and indentation guess
6859	395					665	my $level_diff = $level - $adjust - $guess;
6860
6861	395					601	my $rhash = $self->[_rclosing_brace_indentation_hash_];
6862
6863							# results are only valid if we guess correctly at the
6864							# first spaced brace
6865	395	100	100			1503	if ( $space_count && !defined( $rhash->{valid} ) ) {
6866	79					199	$rhash->{valid} = !$level_diff;
6867							}
6868
6869							# save the result
6870	395					771	my $rhistory_line_number = $rhash->{rhistory_line_number};
6871	395					707	my $rhistory_level_diff = $rhash->{rhistory_level_diff};
6872	395					718	my $rhistory_anchor_point = $rhash->{rhistory_anchor_point};
6873
6874	395	100				960	if ( $rhistory_level_diff->[-1] != $level_diff ) {
6875
6876							# Patch for non-indenting-braces: if we guess zero and
6877							# match before all non-indenting braces have been found,
6878							# it means that we would need negative indentation to
6879							# match if/when the brace is found. So we have a problem
6880							# from here on. We indicate this with a value 2 instead
6881							# of 1 as a signal to stop outputting the table here.
6882	55					95	my $anchor = 1;
6883	55	50	66			277	if ( $guess == 0 && $adjust > 0 ) { $anchor = 2 }
	0					0
6884
6885							# add an anchor point
6886	55					89	push @{$rhistory_level_diff}, $level_diff;
	55					119
6887	55					95	push @{$rhistory_line_number}, $input_line_number;
	55					120
6888	55					94	push @{$rhistory_anchor_point}, $anchor;
	55					132
6889							}
6890							else {
6891
6892							# add a movable point following an anchor point
6893	340	100				766	if ( $rhistory_anchor_point->[-1] ) {
6894	152					291	push @{$rhistory_level_diff}, $level_diff;
	152					316
6895	152					276	push @{$rhistory_line_number}, $input_line_number;
	152					288
6896	152					244	push @{$rhistory_anchor_point}, 0;
	152					367
6897							}
6898
6899							# extend a movable point
6900							else {
6901	188					443	$rhistory_line_number->[-1] = $input_line_number;
6902							}
6903							}
6904							}
6905							}
6906
6907	6860					13713	return;
6908							} ## end sub tokenizer_wrapup_line
6909
6910							} ## end tokenize_this_line
6911
6912							#######################################################################
6913							# Tokenizer routines which assist in identifying token types
6914							#######################################################################
6915
6916							# Define Global '%op_expected_table'
6917							# = hash table of operator expected values based on last nonblank token
6918
6919							# exceptions to perl's weird parsing rules after type 'Z'
6920							my %is_weird_parsing_rule_exception;
6921
6922							my %is_paren_dollar;
6923
6924							my %is_n_v;
6925
6926							BEGIN {
6927
6928							# Always expecting TERM following these types:
6929							# note: this is identical to '@value_requestor_type' defined later.
6930							# Fix for c250: add new type 'P' for package (expecting VERSION or {}
6931							# after package NAMESPACE, so expecting TERM)
6932							# Fix for c250: add new type 'S' for sub (not expecting operator)
6933	44			44		867	my @q = qw#
6934							; ! + x & ? F J - p / Y : % f U ~ A G j L P S * . \| ^ < = [ m { > t
6935							\|\| >= != mm = => .. !~ == && \|= .= pp -= =~ += <= %= ^= x= ~~ * << /=
6936							&= // >> ~. &. \|. ^.
6937							... **= <<= >>= &&= \|\|= //= <=> !~~ &.= \|.= ^.= <<~
6938							#;
6939	44					174	push @q, BACKSLASH;
6940	44					96	push @q, COMMA;
6941	44					83	push @q, '('; # for completeness, not currently a token type
6942	44					94	push @q, '->'; # was previously in UNKNOWN
6943	44					2435	$op_expected_table{$_} = TERM for @q;
6944
6945							# No UNKNOWN table types:
6946							# removed '->' for c030, now always TERM
6947							# removed 'w' for c392 to allow use of 'function_count' info in the sub
6948
6949							# Always expecting OPERATOR ...
6950							# 'n' and 'v' are currently excluded because they might be VERSION numbers
6951							# 'i' is currently excluded because it might be a package
6952							# 'q' is currently excluded because it might be a prototype
6953							# Fix for c030: removed '->' from this list:
6954							# Fix for c250: added 'i' because new type 'P' was added
6955	44					258	@q = qw( -- C h R ++ ] Q <> i );
6956	44					80	push @q, ')';
6957	44					585	$op_expected_table{$_} = OPERATOR for @q;
6958
6959							# Fix for git #62: added '*' and '%'
6960	44					122	@q = qw( < ? * % );
6961	44					115	$is_weird_parsing_rule_exception{$_} = 1 for @q;
6962
6963	44					85	@q = qw<) $>;
6964	44					100	$is_paren_dollar{$_} = 1 for @q;
6965
6966	44					83	@q = qw( n v );
6967	44					1377	$is_n_v{$_} = 1 for @q;
6968
6969							} ## end BEGIN
6970
6971	44			44		288	use constant DEBUG_OPERATOR_EXPECTED => 0;
	44					67
	44					97167
6972
6973							sub operator_expected {
6974
6975	11847			11847	0	21236	my ( $self, $tok, $next_type, $blank_after_Z ) = @_;
6976
6977							# Returns a parameter indicating what types of tokens can occur next
6978
6979							# Call format:
6980							# $op_expected =
6981							# $self->operator_expected( $tok, $next_type, $blank_after_Z );
6982							# where
6983							# $tok is the current token
6984							# $next_type is the type of the next token (blank or not)
6985							# $blank_after_Z = flag for guessing after a type 'Z':
6986							# true if $tok follows type 'Z' with intermediate blank
6987							# false if $tok follows type 'Z' with no intermediate blank
6988							# ignored if $tok does not follow type 'Z'
6989
6990							# Many perl symbols have two or more meanings. For example, '<<'
6991							# can be a shift operator or a here-doc operator. The
6992							# interpretation of these symbols depends on the current state of
6993							# the tokenizer, which may either be expecting a term or an
6994							# operator. For this example, a << would be a shift if an OPERATOR
6995							# is expected, and a here-doc if a TERM is expected. This routine
6996							# is called to make this decision for any current token. It returns
6997							# one of three possible values:
6998							#
6999							# OPERATOR - operator expected (or at least, not a term)
7000							# UNKNOWN - can't tell
7001							# TERM - a term is expected (or at least, not an operator)
7002							#
7003							# The decision is based on what has been seen so far. This
7004							# information is stored in the "$last_nonblank_type" and
7005							# "$last_nonblank_token" variables. For example, if the
7006							# $last_nonblank_type is '=~', then we are expecting a TERM, whereas
7007							# if $last_nonblank_type is 'n' (numeric), we are expecting an
7008							# OPERATOR.
7009							#
7010							# If a UNKNOWN is returned, the calling routine must guess. A major
7011							# goal of this tokenizer is to minimize the possibility of returning
7012							# UNKNOWN, because a wrong guess can spoil the formatting of a
7013							# script.
7014							#
7015							# Adding NEW_TOKENS: it is critically important that this routine be
7016							# updated to allow it to determine if an operator or term is to be
7017							# expected after the new token. Doing this simply involves adding
7018							# the new token character to one of the regexes in this routine or
7019							# to one of the hash lists
7020							# that it uses, which are initialized in the BEGIN section.
7021							# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
7022							# $statement_type
7023
7024							# When possible, token types should be selected such that we can determine
7025							# the 'operator_expected' value by a simple hash lookup. If there are
7026							# exceptions, that is an indication that a new type is needed.
7027
7028							#--------------------------------------------
7029							# Section 1: Table lookup will get most cases
7030							#--------------------------------------------
7031
7032							# Many types are can be obtained by a table lookup. This typically handles
7033							# more than half of the calls. For speed, the caller may try table lookup
7034							# first before calling this sub.
7035	11847					15396	my $op_expected = $op_expected_table{$last_nonblank_type};
7036	11847	100				17770	if ( defined($op_expected) ) {
7037							DEBUG_OPERATOR_EXPECTED
7038	78					111	&& print {*STDOUT}
7039							"OPERATOR_EXPECTED: Table Lookup; returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
7040	78					193	return $op_expected;
7041							}
7042
7043							DEBUG_OPERATOR_EXPECTED
7044	11769					12596	&& print {*STDOUT}
7045							"OPERATOR_EXPECTED: in hardwired table for last type $last_nonblank_type token $last_nonblank_token\n";
7046
7047							#---------------------------------------------
7048							# Section 2: Handle special cases if necessary
7049							#---------------------------------------------
7050
7051							# Types 'k', '}' and 'Z' depend on context
7052							# Types 'n', 'v', 'q' also depend on context.
7053
7054							# identifier...
7055							# Fix for c250: removed coding for type 'i' because 'i' and new type 'P'
7056							# are now done by hash table lookup
7057
7058							#--------------------
7059							# Section 2A: keyword
7060							#--------------------
7061	11769	100				18699	if ( $last_nonblank_type eq 'k' ) {
7062
7063							# keywords expecting TERM:
7064	3063	100				7700	if ( $expecting_term_token{$last_nonblank_token} ) {
7065
7066							# Exceptions from TERM:
7067
7068							# // may follow perl functions which may be unary operators
7069							# see test file dor.t (defined or);
7070	2943	100	100			6011	if (
			100
7071							$tok eq '/'
7072							&& $next_type eq '/'
7073							&& $is_keyword_rejecting_slash_as_pattern_delimiter{
7074							$last_nonblank_token}
7075							)
7076							{
7077	1					3	return OPERATOR;
7078							}
7079
7080							# Patch to allow a ? following 'split' to be a deprecated pattern
7081							# delimiter. This patch is coordinated with the omission of split
7082							# from the list
7083							# %is_keyword_rejecting_question_as_pattern_delimiter. This patch
7084							# will force perltidy to guess.
7085	2942	50	66			5916	if ( $tok eq '?'
7086							&& $last_nonblank_token eq 'split' )
7087							{
7088	0					0	return UNKNOWN;
7089							}
7090
7091	2942					5593	return TERM;
7092							}
7093
7094							# keywords expecting OPERATOR:
7095	120	100				362	if ( $expecting_operator_token{$last_nonblank_token} ) {
7096	7					19	return OPERATOR;
7097							}
7098
7099	113					299	return TERM;
7100
7101							} ## end type 'k'
7102
7103							#------------------------------------
7104							# Section 2B: Closing container token
7105							#------------------------------------
7106
7107							# Note that the actual token for type '}' may also be a ')'.
7108
7109							# Also note that $last_nonblank_token is not the token corresponding to
7110							# $last_nonblank_type when the type is a closing container. In that
7111							# case it is the token before the corresponding opening container token.
7112							# So for example, for this snippet
7113							# $a = do { BLOCK } / 2;
7114							# the $last_nonblank_token is 'do' when $last_nonblank_type eq '}'.
7115
7116	8706	100				15110	if ( $last_nonblank_type eq '}' ) {
7117
7118							#-------------------------------------------
7119							# Section 2B1: Closing structural ')' or ']'
7120							#-------------------------------------------
7121	4140	100	100			10520	if ( $last_nonblank_token eq ')' \|\| $last_nonblank_token eq ']' ) {
7122	2794					5600	return OPERATOR;
7123							}
7124
7125							#-------------------------------------
7126							# Section 2B2: Closing block brace '}'
7127							#-------------------------------------
7128	1346					2381	my $blk = $rbrace_type->[ $brace_depth + 1 ];
7129
7130							# Non-blocks
7131	1346	100				2576	if ( !defined($blk) ) {
7132	2					4	return OPERATOR;
7133							}
7134
7135							# Unidentified block type
7136	1344	100				2921	if ( !$blk ) {
7137	367					776	return UNKNOWN;
7138							}
7139
7140							# Blocks followed by a TERM
7141	977	100	100			9842	if ( $is_zero_continuation_block_type{$blk}
			100
			100
			100
			100
			100
			66
			100
7142							\|\| $is_sort_map_grep{$blk}
7143							\|\| $is_grep_alias{$blk}
7144							\|\| substr( $blk, -1, 1 ) eq ':' && $blk =~ /^\w+:$/
7145							\|\| substr( $blk, 0, 3 ) eq 'sub' && $blk =~ /^sub\s/
7146							\|\| substr( $blk, 0, 7 ) eq 'package' && $blk =~ /^package\s/ )
7147							{
7148	635					1563	return TERM;
7149							}
7150
7151							# Blocks followed by an OPERATOR
7152							# do eval sub
7153	342	100	100			1360	if ( $is_block_operator{$blk}
7154							\|\| $is_sub{$blk} )
7155							{
7156	274					667	return OPERATOR;
7157							}
7158
7159							# Any other block type is marked UNKNOWN to be safe (c566).
7160							# For example, a block type marked ';' could be a hash ref:
7161							# { map { $_ => 'x' } keys %main:: } ~~ \%main::;
7162							# The tokenizer would have to analyze the contents to distinguish
7163							# between a block closure and a hash ref brace in this case. So mark
7164							# this as UNKNOWN and let the lower level routines figure it out.
7165	68					196	return UNKNOWN;
7166							}
7167
7168							#-------------------------------
7169							# Section 2C: number or v-string
7170							#-------------------------------
7171							# An exception is for VERSION numbers a 'use' statement. It has the format
7172							# use Module VERSION LIST
7173							# We could avoid this exception by writing a special sub to parse 'use'
7174							# statements and perhaps mark these numbers with a new type V (for VERSION)
7175	4566	100				9462	if ( $is_n_v{$last_nonblank_type} ) {
7176	2621	100				4533	if ( $statement_type eq 'use' ) {
7177	11					32	return UNKNOWN;
7178							}
7179	2610					4756	return OPERATOR;
7180							}
7181
7182							#---------------------
7183							# Section 2D: qw quote
7184							#---------------------
7185							# TODO: labeled prototype words would better be given type 'A' or maybe
7186							# 'J'; not 'q'; or maybe mark as type 'Y'?
7187	1945	100				3962	if ( $last_nonblank_type eq 'q' ) {
7188	158	50				400	if ( $last_nonblank_token eq 'prototype' ) {
7189	0					0	return TERM;
7190							}
7191
7192							# update for --use-feature=class (rt145706):
7193							# Look for class VERSION after possible attribute, as in
7194							# class Example::Subclass : isa(Example::Base) 1.345 { ... }
7195	158	100				396	if ( $statement_type =~ /^package\b/ ) {
7196	3					5	return TERM;
7197							}
7198
7199							# everything else
7200	155					323	return OPERATOR;
7201							}
7202
7203							#---------------------
7204							# Section 2E: bareword
7205							#---------------------
7206	1787	100				3607	if ( $last_nonblank_type eq 'w' ) {
7207
7208							# It is safest to return UNKNOWN if a possible ? pattern delimiter may
7209							# follow (git #32, c469) and let the guess algorithm handle it.
7210	1738	100				3193	if ( $tok eq '?' ) { return UNKNOWN }
	7					18
7211
7212							# see if this has been seen in the role of a function taking args
7213	1731					3232	my $rinfo = $self->[_rbareword_info_]->{$current_package};
7214	1731	100				3140	if ($rinfo) {
7215	1228					1974	$rinfo = $rinfo->{$last_nonblank_token};
7216	1228	100				2440	if ($rinfo) {
7217	317					537	my $function_count = $rinfo->{function_count};
7218	317	100	66			1108	if ( $function_count && $function_count > 0 ) { return TERM }
	135					363
7219							}
7220							}
7221	1596					3251	return UNKNOWN;
7222							}
7223
7224							#-----------------------------------
7225							# Section 2F: file handle or similar
7226							#-----------------------------------
7227	49	100				134	if ( $last_nonblank_type eq 'Z' ) {
7228
7229							# angle.t
7230	45	100				193	if ( $last_nonblank_token =~ /^\w/ ) {
7231	2					5	return UNKNOWN;
7232							}
7233
7234							# Exception to weird parsing rules for 'x(' ... see case b1205:
7235							# In something like 'print $vv x(...' the x is an operator;
7236							# Likewise in 'print $vv x$ww' the x is an operator (case b1207)
7237							# otherwise x follows the weird parsing rules.
7238	43	50	33			154	if ( $tok eq 'x' && $next_type =~ /^[\(\$\@\%]$/ ) {
7239	0					0	return OPERATOR;
7240							}
7241
7242							# The 'weird parsing rules' of next section do not work for '<' and '?'
7243							# It is best to mark them as unknown. Test case:
7244							# print $fh <DATA>;
7245	43	100				132	if ( $is_weird_parsing_rule_exception{$tok} ) {
7246	4					8	return UNKNOWN;
7247							}
7248
7249							# For possible file handle like "$a", Perl uses weird parsing rules.
7250							# For example:
7251							# print $a/2,"/hi"; - division
7252							# print $a / 2,"/hi"; - division
7253							# print $a/ 2,"/hi"; - division
7254							# print $a /2,"/hi"; - pattern (and error)!
7255							# Some examples where this logic works okay, for '&','*','+':
7256							# print $fh &xsi_protos(@mods);
7257							# my $x = new $CompressClass *FH;
7258							# print $OUT +( $count % 15 ? ", " : "\n\t" );
7259	39	50	66			142	if ( $blank_after_Z
7260							&& $next_type ne 'b' )
7261							{
7262	0					0	return TERM;
7263							}
7264
7265							# Note that '?' and '<' have been moved above
7266							# ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]\|\>\>)$/ ) {
7267	39	100				206	if ( $tok =~ /^([x\/\+\-\*\%\&\.]\|\>\>)$/ ) {
7268
7269							# Do not complain in 'use' statements, which have special syntax.
7270							# For example, from RT#130344:
7271							# use lib $FindBin::Bin . '/lib';
7272	9	50				20	if ( $statement_type ne 'use' ) {
7273	9					29	$self->complain(
7274							"operator in possible indirect object location not recommended\n"
7275							);
7276							}
7277	9					19	return OPERATOR;
7278							}
7279
7280							# all other cases
7281
7282	30					81	return UNKNOWN;
7283							}
7284
7285							#--------------------------
7286							# Section 2F: anything else
7287							#--------------------------
7288	4					9	return UNKNOWN;
7289
7290							} ## end sub operator_expected
7291
7292							sub new_statement_ok {
7293
7294							# Returns:
7295							# true if a new statement can begin here
7296							# false otherwise
7297
7298							# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
7299							# $brace_depth, $rbrace_type
7300
7301							# Uses:
7302							# - See if a 'class' statement can occur here
7303							# - See if a keyword begins at a new statement; i.e. is an 'if' a
7304							# block if or a trailing if? Also see if 'format' starts a statement.
7305							# - Decide if a ':' is part of a statement label (not a ternary)
7306
7307							# Curly braces are tricky because some small blocks do not get marked as
7308							# blocks..
7309
7310							# if it follows an opening curly brace..
7311	545	100	100	545	0	2305	if ( $last_nonblank_token eq '{' ) {
		100
7312
7313							# The safe thing is to return true in all cases because:
7314							# - a ternary ':' cannot occur here
7315							# - an 'if' here, for example, cannot be a trailing if
7316							# See test case c231 for an example.
7317							# This works but could be improved, if necessary, by returning
7318							# 'false' at obvious non-blocks.
7319	66					239	return 1;
7320							}
7321
7322							# if it follows a closing code block curly brace..
7323							elsif ($last_nonblank_token eq '}'
7324							&& $last_nonblank_type eq $last_nonblank_token )
7325							{
7326
7327							# A new statement can follow certain closing block braces ...
7328							# Previously, a true was always returned, and this worked ok.
7329							# Update c443: now we return false for certain blocks which must be
7330							# followed by a ';'. See comments elsewhere on
7331							# '%is_zero_continuation_block_type'. The value of $brace_depth has
7332							# also been corrected, it was off by 1.
7333	115					244	my $block_type = $rbrace_type->[ $brace_depth + 1 ];
7334							return $block_type
7335	115		66			685	&& !$is_sort_map_grep_eval_do_sub{$block_type};
7336							}
7337
7338							# otherwise, it is a label if and only if it follows a ';' (real or fake)
7339							# or another label
7340							else {
7341	364		100			1789	return ( $last_nonblank_type eq ';' \|\| $last_nonblank_type eq 'J' );
7342							}
7343							} ## end sub new_statement_ok
7344
7345							sub code_block_type {
7346
7347	1444			1444	0	3236	my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
7348
7349							# Decide if this is a block of code, and its type.
7350							# Must be called only when $type = $token = '{'
7351							# The problem is to distinguish between the start of a block of code
7352							# and the start of an anonymous hash reference
7353							# Returns "" if not code block, otherwise returns 'last_nonblank_token'
7354							# to indicate the type of code block. (For example, 'last_nonblank_token'
7355							# might be 'if' for an if block, 'else' for an else block, etc).
7356							# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
7357							# $last_nonblank_block_type, $brace_depth, $rbrace_type
7358
7359							# handle case of multiple '{'s
7360
7361							#print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
7362
7363	1444	100	66			17736	if ( $last_nonblank_token eq '{'
		100	66
		100	66
		100	100
		100	66
		100	66
		50
		50
		100
		100
		100
7364							&& $last_nonblank_type eq $last_nonblank_token )
7365							{
7366
7367							# opening brace where a statement may appear is probably
7368							# a code block but might be and anonymous hash reference
7369	98	50				274	if ( $rbrace_type->[$brace_depth] ) {
7370	98					281	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
7371							$max_token_index );
7372							}
7373
7374							# cannot start a code block within an anonymous hash
7375							else {
7376	0					0	return EMPTY_STRING;
7377							}
7378							}
7379
7380							elsif ( $last_nonblank_token eq ';' ) {
7381
7382							# an opening brace where a statement may appear is probably
7383							# a code block but might be and anonymous hash reference
7384	51					180	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
7385							$max_token_index );
7386							}
7387
7388							# handle case of '}{'
7389							elsif ($last_nonblank_token eq '}'
7390							&& $last_nonblank_type eq $last_nonblank_token )
7391							{
7392
7393							# a } { situation ...
7394							# could be hash reference after code block..(blktype1.t)
7395	10	50				34	if ($last_nonblank_block_type) {
7396	10					37	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
7397							$max_token_index );
7398							}
7399
7400							# must be a block if it follows a closing hash reference
7401							else {
7402	0					0	return $last_nonblank_token;
7403							}
7404							}
7405
7406							#--------------------------------------------------------------
7407							# NOTE: braces after type characters start code blocks, but for
7408							# simplicity these are not identified as such. See also
7409							# sub is_non_structural_brace.
7410							#--------------------------------------------------------------
7411
7412							## elsif ( $last_nonblank_type eq 't' ) {
7413							## return $last_nonblank_token;
7414							## }
7415
7416							# brace after label:
7417							elsif ( $last_nonblank_type eq 'J' ) {
7418	34					95	return $last_nonblank_token;
7419							}
7420
7421							# otherwise, see if a block must follow the previous token (such as 'if'):
7422							elsif ($is_code_block_token{$last_nonblank_token}
7423							\|\| $is_grep_alias{$last_nonblank_token} )
7424							{
7425
7426							# Bug Patch: Note that the opening brace after the 'if' in the following
7427							# snippet is an anonymous hash ref and not a code block!
7428							# print 'hi' if { x => 1, }->{x};
7429							# We can identify this situation because the last nonblank type
7430							# will be a keyword (instead of a closing paren)
7431	547	50	33			2321	if (
			66
7432							$last_nonblank_type eq 'k'
7433							&& ( $last_nonblank_token eq 'if'
7434							\|\| $last_nonblank_token eq 'unless' )
7435							)
7436							{
7437	0					0	return EMPTY_STRING;
7438							}
7439							else {
7440	547					1531	return $last_nonblank_token;
7441							}
7442							}
7443
7444							# or a sub or package BLOCK
7445							# Fixed for c250 to include new package type 'P', and change 'i' to 'S'
7446							elsif (
7447							$last_nonblank_type eq 'P'
7448							\|\| $last_nonblank_type eq 'S'
7449							\|\| ( $last_nonblank_type eq 't'
7450							&& substr( $last_nonblank_token, 0, 3 ) eq 'sub' )
7451							)
7452							{
7453	346					999	return $last_nonblank_token;
7454							}
7455
7456							elsif ( $statement_type =~ /^(sub\|package)\b/ ) {
7457	0					0	return $statement_type;
7458							}
7459
7460							# user-defined subs with block parameters (like grep/map/eval)
7461							elsif ( $last_nonblank_type eq 'G' ) {
7462	0					0	return $last_nonblank_token;
7463							}
7464
7465							# check bareword
7466							elsif ( $last_nonblank_type eq 'w' ) {
7467
7468							# check for syntax 'use MODULE LIST'
7469							# This fixes b1022 b1025 b1027 b1028 b1029 b1030 b1031
7470	24	100				92	return EMPTY_STRING if ( $statement_type eq 'use' );
7471
7472	23					94	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
7473							$max_token_index );
7474							}
7475
7476							# Patch for bug # RT #94338 reported by Daniel Trizen
7477							# for-loop in a parenthesized block-map triggering an error message:
7478							# map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
7479							# Check for a code block within a parenthesized function call
7480							elsif ( $last_nonblank_token eq '(' ) {
7481	87					173	my $paren_type = $rparen_type->[$paren_depth];
7482
7483							# /^(map\|grep\|sort)$/
7484	87	100	66			395	if ( $paren_type && $is_sort_map_grep{$paren_type} ) {
7485
7486							# We will mark this as a code block but use type 't' instead
7487							# of the name of the containing function. This will allow for
7488							# correct parsing but will usually produce better formatting.
7489							# Braces with block type 't' are not broken open automatically
7490							# in the formatter as are other code block types, and this usually
7491							# works best.
7492	1					3	return 't'; # (Not $paren_type)
7493							}
7494							else {
7495	86					213	return EMPTY_STRING;
7496							}
7497							}
7498
7499							# handle unknown syntax ') {'
7500							# we previously appended a '()' to mark this case
7501							elsif ( $last_nonblank_token =~ /$/ ) {
7502	16					92	return $last_nonblank_token;
7503							}
7504
7505							# anything else must be anonymous hash reference
7506							else {
7507	231					555	return EMPTY_STRING;
7508							}
7509							} ## end sub code_block_type
7510
7511							sub decide_if_code_block {
7512
7513							# USES GLOBAL VARIABLES: $last_nonblank_token
7514	182			182	0	416	my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
7515
7516	182					607	my ( $next_nonblank_token, $i_next_uu ) =
7517							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
7518
7519							# we are at a '{' where a statement may appear.
7520							# We must decide if this brace starts an anonymous hash or a code
7521							# block.
7522							# return "" if anonymous hash, and $last_nonblank_token otherwise
7523
7524							# initialize to be code BLOCK
7525	182					341	my $code_block_type = $last_nonblank_token;
7526
7527							# Check for the common case of an empty anonymous hash reference:
7528							# Maybe something like sub { { } }
7529	182	100				448	if ( $next_nonblank_token eq '}' ) {
7530	5					8	$code_block_type = EMPTY_STRING;
7531							}
7532
7533							else {
7534
7535							# To guess if this '{' is an anonymous hash reference, look ahead
7536							# and test as follows:
7537							#
7538							# it is a hash reference if next come:
7539							# - a string or digit followed by a comma or =>
7540							# - bareword followed by =>
7541							# otherwise it is a code block
7542							#
7543							# Examples of anonymous hash ref:
7544							# {'aa',};
7545							# {1,2}
7546							#
7547							# Examples of code blocks:
7548							# {1; print "hello\n", 1;}
7549							# {$a,1};
7550
7551							# We are only going to look ahead one more (nonblank/comment) line.
7552							# Strange formatting could cause a bad guess, but that's unlikely.
7553	177					279	my @pre_types;
7554							my @pre_tokens;
7555
7556							# Ignore the rest of this line if it is a side comment
7557	177	100				450	if ( $next_nonblank_token ne '#' ) {
7558	152					471	@pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ];
	152					829
7559	152					346	@pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ];
	152					923
7560							}
7561
7562							# Here 20 is arbitrary but generous, and prevents wasting lots of time
7563							# in mangled files
7564	177					648	my ( $rpre_tokens, $rpre_types ) =
7565							$self->peek_ahead_for_n_nonblank_pre_tokens(20);
7566	177	100	66			539	if ( defined($rpre_types) && @{$rpre_types} ) {
	169					510
7567	169					251	push @pre_types, @{$rpre_types};
	169					642
7568	169					297	push @pre_tokens, @{$rpre_tokens};
	169					780
7569							}
7570
7571							# put a sentinel token to simplify stopping the search
7572	177					366	push @pre_types, '}';
7573	177					301	push @pre_types, '}';
7574
7575	177					247	my $jbeg = 0;
7576	177	100				469	$jbeg = 1 if ( $pre_types[0] eq 'b' );
7577
7578							# first look for one of these
7579							# - bareword
7580							# - bareword with leading -
7581							# - digit
7582							# - quoted string
7583	177					294	my $j = $jbeg;
7584	177	100	33			1045	if ( $pre_types[$j] =~ /^[\'\"]/ ) {
		100
		100
		50
7585
7586							# find the closing quote; don't worry about escapes
7587	1					3	my $quote_mark = $pre_types[$j];
7588	1					4	foreach my $k ( $j + 1 .. @pre_types - 2 ) {
7589	1	50				3	if ( $pre_types[$k] eq $quote_mark ) {
7590	1					3	$j = $k + 1;
7591	1					2	last;
7592							}
7593							}
7594							}
7595							elsif ( $pre_types[$j] eq 'd' ) {
7596	8					11	$j++;
7597							}
7598							elsif ( $pre_types[$j] eq 'w' ) {
7599	74					155	$j++;
7600							}
7601							elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
7602	0					0	$j++;
7603							}
7604							else {
7605							# none of the above
7606							}
7607	177	100				451	if ( $j > $jbeg ) {
7608
7609	83	100				256	$j++ if ( $pre_types[$j] eq 'b' );
7610
7611							# Patched for RT #95708
7612	83	100	33			576	if (
			66
			66
7613
7614							# it is a comma which is not a pattern delimiter except for qw
7615							(
7616							$pre_types[$j] eq COMMA
7617							&& !$is_q_qq_qx_qr_s_y_tr_m{ $pre_tokens[$jbeg] }
7618							)
7619
7620							# or a =>
7621							\|\| ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
7622							)
7623							{
7624	18					29	$code_block_type = EMPTY_STRING;
7625							}
7626							}
7627
7628	177	100				487	if ($code_block_type) {
7629
7630							# Patch for cases b1085 b1128: It is uncertain if this is a block.
7631							# If this brace follows a bareword, then append a space as a signal
7632							# to the formatter that this may not be a block brace. To find the
7633							# corresponding code in Formatter.pm search for 'b1085'.
7634							# But not for the word 'method': fixes c534; this will cause the
7635							# formatter to mark an asub block instead of a sub block.
7636	159	100	66			1324	if ( $code_block_type =~ /^\w/ && $code_block_type ne 'method' ) {
7637	20					123	$code_block_type .= SPACE;
7638							}
7639							}
7640							}
7641
7642	182					550	return $code_block_type;
7643							} ## end sub decide_if_code_block
7644
7645							sub report_unexpected {
7646
7647							# report unexpected token type and show where it is
7648							# USES GLOBAL VARIABLES: (none)
7649	0			0	0	0	my ( $self, $rcall_hash ) = @_;
7650
7651	0					0	my $found = $rcall_hash->{found};
7652	0					0	my $expecting = $rcall_hash->{expecting};
7653	0					0	my $i_tok = $rcall_hash->{i_tok};
7654	0					0	my $last_nonblank_i = $rcall_hash->{last_nonblank_i};
7655	0					0	my $rpretoken_map = $rcall_hash->{rpretoken_map};
7656	0					0	my $rpretoken_type = $rcall_hash->{rpretoken_type};
7657	0					0	my $input_line = $rcall_hash->{input_line};
7658
7659	0	0				0	if ( ++$self->[_unexpected_error_count_] <= MAX_NAG_MESSAGES ) {
7660	0					0	my $msg = "found $found where $expecting expected";
7661	0					0	my $pos = $rpretoken_map->[$i_tok];
7662	0					0	$self->interrupt_logfile();
7663	0					0	my $input_line_number = $self->[_last_line_number_];
7664	0					0	my ( $offset, $numbered_line, $underline ) =
7665							make_numbered_line( $input_line_number, $input_line, $pos );
7666	0					0	$underline = write_on_underline( $underline, $pos - $offset, '^' );
7667
7668	0					0	my $trailer = EMPTY_STRING;
7669	0	0	0			0	if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
7670	0					0	my $pos_prev = $rpretoken_map->[$last_nonblank_i];
7671	0					0	my $num;
7672	0	0				0	if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) {
7673	0					0	$num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev;
7674							}
7675							else {
7676	0					0	$num = $pos - $pos_prev;
7677							}
7678	0	0				0	if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
	0					0
	0					0
7679
7680							$underline =
7681	0					0	write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
7682	0					0	$trailer = " (previous token underlined)";
7683							}
7684	0					0	$underline =~ s/\s+$//;
7685	0					0	$self->warning( $numbered_line . "\n" );
7686	0					0	$self->warning( $underline . "\n" );
7687	0					0	$self->warning( $msg . $trailer . "\n" );
7688	0					0	$self->resume_logfile();
7689							}
7690	0					0	return;
7691							} ## end sub report_unexpected
7692
7693							my %is_sigil_or_paren;
7694							my %is_R_closing_sb;
7695
7696							BEGIN {
7697
7698	44			44		278	my @q = qw< $ & % * @ ) >;
7699	44					298	$is_sigil_or_paren{$_} = 1 for @q;
7700
7701	44					114	@q = qw( R ] );
7702	44					71695	$is_R_closing_sb{$_} = 1 for @q;
7703							} ## end BEGIN
7704
7705							sub is_non_structural_brace {
7706
7707							# Decide if a brace or bracket is structural or non-structural
7708							# by looking at the previous token and type
7709							# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
7710
7711							# EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
7712							# Tentatively deactivated because it caused the wrong operator expectation
7713							# for this code:
7714							# $user = @vars[1] / 100;
7715							# Must update sub operator_expected before re-implementing.
7716							# if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
7717							# return 0;
7718							# }
7719
7720							#--------------------------------------------------------------
7721							# NOTE: braces after type characters start code blocks, but for
7722							# simplicity these are not identified as such. See also
7723							# sub code_block_type
7724							#--------------------------------------------------------------
7725
7726							##if ($last_nonblank_type eq 't') {return 0}
7727
7728							# otherwise, it is non-structural if it is decorated
7729							# by type information.
7730							# For example, the '{' here is non-structural: ${xxx}
7731							# Removed '::' to fix c074
7732							## $last_nonblank_token =~ /^([\$\@\*\&\%\)]\|->\|::)/
7733							return (
7734							## $last_nonblank_token =~ /^([\$\@\*\&\%\)]\|->)/
7735							$is_sigil_or_paren{ substr( $last_nonblank_token, 0, 1 ) }
7736							\|\| substr( $last_nonblank_token, 0, 2 ) eq '->'
7737
7738							# or if we follow a hash or array closing curly brace or bracket
7739							# For example, the second '{' in this is non-structural: $a{'x'}{'y'}
7740							# because the first '}' would have been given type 'R'
7741							##\|\| $last_nonblank_type =~ /^([R\]])$/
7742	2822		66	2822	0	14523	\|\| $is_R_closing_sb{$last_nonblank_type}
7743							);
7744							} ## end sub is_non_structural_brace
7745
7746							#######################################################################
7747							# Tokenizer routines for tracking container nesting depths
7748							#######################################################################
7749
7750							# The following routines keep track of nesting depths of the nesting
7751							# types, ( [ { and ?. This is necessary for determining the indentation
7752							# level, and also for debugging programs. Not only do they keep track of
7753							# nesting depths of the individual brace types, but they check that each
7754							# of the other brace types is balanced within matching pairs. For
7755							# example, if the program sees this sequence:
7756							#
7757							# { ( ( ) }
7758							#
7759							# then it can determine that there is an extra left paren somewhere
7760							# between the { and the }. And so on with every other possible
7761							# combination of outer and inner brace types. For another
7762							# example:
7763							#
7764							# ( [ ..... ] ] )
7765							#
7766							# which has an extra ] within the parens.
7767							#
7768							# The brace types have indexes 0 .. 3 which are indexes into
7769							# the matrices.
7770							#
7771							# The pair ? : are treated as just another nesting type, with ? acting
7772							# as the opening brace and : acting as the closing brace.
7773							#
7774							# The matrix
7775							#
7776							# $rdepth_array->[$a][$b][ $rcurrent_depth->[$a] ] = $rcurrent_depth->[$b];
7777							#
7778							# saves the nesting depth of brace type $b (where $b is either of the other
7779							# nesting types) when brace type $a enters a new depth. When this depth
7780							# decreases, a check is made that the current depth of brace types $b is
7781							# unchanged, or otherwise there must have been an error. This can
7782							# be very useful for localizing errors, particularly when perl runs to
7783							# the end of a large file (such as this one) and announces that there
7784							# is a problem somewhere.
7785							#
7786							# A numerical sequence number is maintained for every nesting type,
7787							# so that each matching pair can be uniquely identified in a simple
7788							# way.
7789
7790							sub increase_nesting_depth {
7791	5446			5446	0	9085	my ( $self, $aa, $pos ) = @_;
7792
7793							# Given:
7794							# $aa = integer code of container type, 0-3
7795							# $pos = position of character, for error message
7796
7797							# USES GLOBAL VARIABLES: $rcurrent_depth,
7798							# $rcurrent_sequence_number, $rdepth_array,
7799							# $rstarting_line_of_current_depth, $statement_type
7800	5446					7951	my $cd_aa = ++$rcurrent_depth->[$aa];
7801	5446					6305	$total_depth++;
7802	5446					8998	$rtotal_depth->[$aa]->[$cd_aa] = $total_depth;
7803	5446					7428	my $input_line_number = $self->[_last_line_number_];
7804	5446					7376	my $input_line = $self->[_line_of_text_];
7805
7806							# Sequence numbers increment by number of items. This keeps
7807							# a unique set of numbers but still allows the relative location
7808							# of any type to be determined.
7809
7810							# make a new unique sequence number
7811	5446					7540	my $seqno = $next_sequence_number++;
7812
7813	5446					8693	$rcurrent_sequence_number->[$aa]->[$cd_aa] = $seqno;
7814
7815	5446					15655	$rstarting_line_of_current_depth->[$aa]->[$cd_aa] =
7816							[ $input_line_number, $input_line, $pos ];
7817
7818	5446					13554	for my $bb ( 0 .. @closing_brace_names - 1 ) {
7819	21784	100				30670	next if ( $bb == $aa );
7820	16338					27812	$rdepth_array->[$aa]->[$bb]->[$cd_aa] = $rcurrent_depth->[$bb];
7821							}
7822
7823							# set a flag for indenting a nested ternary statement
7824	5446					7390	my $indent = 0;
7825	5446	100				8998	if ( $aa == QUESTION_COLON ) {
7826	193					406	$rnested_ternary_flag->[$cd_aa] = 0;
7827	193	100				497	if ( $cd_aa > 1 ) {
7828	17	100				58	if ( $rnested_ternary_flag->[ $cd_aa - 1 ] == 0 ) {
7829	16					37	my $pdepth = $rtotal_depth->[$aa]->[ $cd_aa - 1 ];
7830	16	100				56	if ( $pdepth == $total_depth - 1 ) {
7831	8					13	$indent = 1;
7832	8					20	$rnested_ternary_flag->[ $cd_aa - 1 ] = -1;
7833							}
7834							}
7835							}
7836							}
7837
7838							# Fix part #1 for git82: save last token type for propagation of type 'Z'
7839	5446					17219	$rnested_statement_type->[$aa]->[$cd_aa] =
7840							[ $statement_type, $last_nonblank_type, $last_nonblank_token ];
7841	5446					7333	$statement_type = EMPTY_STRING;
7842	5446					11617	return ( $seqno, $indent );
7843							} ## end sub increase_nesting_depth
7844
7845							sub is_balanced_closing_container {
7846
7847	47			47	0	93	my ($aa) = @_;
7848
7849							# Return true if a closing container can go here without error
7850							# Return false if not
7851							# Given:
7852							# $aa = integer code of container type, 0-3
7853
7854							# cannot close if there was no opening
7855	47					65	my $cd_aa = $rcurrent_depth->[$aa];
7856	47	100				150	return if ( $cd_aa <= 0 );
7857
7858							# check that any other brace types $bb contained within would be balanced
7859	8					51	for my $bb ( 0 .. @closing_brace_names - 1 ) {
7860	8	50				20	next if ( $bb == $aa );
7861							return
7862							if (
7863	8	50				39	$rdepth_array->[$aa]->[$bb]->[$cd_aa] != $rcurrent_depth->[$bb] );
7864							}
7865
7866							# OK, everything will be balanced
7867	0					0	return 1;
7868							} ## end sub is_balanced_closing_container
7869
7870							sub decrease_nesting_depth {
7871
7872	5446			5446	0	8923	my ( $self, $aa, $pos ) = @_;
7873
7874							# Given:
7875							# $aa = integer code of container type, 0-3
7876							# $pos = position of character, for error message
7877
7878							# USES GLOBAL VARIABLES: $rcurrent_depth,
7879							# $rcurrent_sequence_number, $rdepth_array, $rstarting_line_of_current_depth
7880							# $statement_type
7881	5446					6319	my $seqno = 0;
7882	5446					11775	my $input_line_number = $self->[_last_line_number_];
7883	5446					7527	my $input_line = $self->[_line_of_text_];
7884
7885	5446					6191	my $outdent = 0;
7886	5446					6360	$total_depth--;
7887	5446					7821	my $cd_aa = $rcurrent_depth->[$aa];
7888	5446	50				8957	if ( $cd_aa > 0 ) {
7889
7890							# set a flag for un-indenting after seeing a nested ternary statement
7891	5446					8249	$seqno = $rcurrent_sequence_number->[$aa]->[$cd_aa];
7892	5446	100				9281	if ( $aa == QUESTION_COLON ) {
7893	193					376	$outdent = $rnested_ternary_flag->[$cd_aa];
7894							}
7895
7896							# Fix part #2 for git82: use saved type for propagation of type 'Z'
7897							# through type L-R braces. Perl seems to allow ${bareword}
7898							# as an indirect object, but nothing much more complex than that.
7899							( $statement_type, my $saved_type, my $saved_token_uu ) =
7900	5446					6181	@{ $rnested_statement_type->[$aa]->[ $rcurrent_depth->[$aa] ] };
	5446					13257
7901	5446	50	100			14477	if ( $aa == BRACE
			66
			66
7902							&& $saved_type eq 'Z'
7903							&& $last_nonblank_type eq 'w'
7904							&& $rbrace_structural_type->[$brace_depth] eq 'L' )
7905							{
7906	1					2	$last_nonblank_type = $saved_type;
7907							}
7908
7909							# check that any brace types $bb contained within are balanced
7910	5446					12043	for my $bb ( 0 .. @closing_brace_names - 1 ) {
7911	21784	100				30593	next if ( $bb == $aa );
7912
7913	16338	50				31569	if ( $rdepth_array->[$aa]->[$bb]->[$cd_aa] !=
7914							$rcurrent_depth->[$bb] )
7915							{
7916	0					0	my $diff =
7917							$rcurrent_depth->[$bb] -
7918							$rdepth_array->[$aa]->[$bb]->[$cd_aa];
7919
7920							# don't whine too many times
7921	0					0	my $saw_brace_error = $self->get_saw_brace_error();
7922	0	0	0			0	if (
			0
7923							$saw_brace_error <= MAX_NAG_MESSAGES
7924
7925							# if too many closing types have occurred, we probably
7926							# already caught this error
7927							&& ( ( $diff > 0 ) \|\| ( $saw_brace_error <= 0 ) )
7928							)
7929							{
7930	0					0	$self->interrupt_logfile();
7931	0					0	my $rsl = $rstarting_line_of_current_depth->[$aa]->[$cd_aa];
7932	0					0	my $sl = $rsl->[0];
7933	0					0	my $rel = [ $input_line_number, $input_line, $pos ];
7934	0					0	my $el = $rel->[0];
7935	0					0	my ($ess);
7936
7937	0	0	0			0	if ( $diff == 1 \|\| $diff == -1 ) {
7938	0					0	$ess = EMPTY_STRING;
7939							}
7940							else {
7941	0					0	$ess = 's';
7942							}
7943	0	0				0	my $bname =
7944							( $diff > 0 )
7945							? $opening_brace_names[$bb]
7946							: $closing_brace_names[$bb];
7947	0					0	$self->write_error_indicator_pair( @{$rsl}, '^' );
	0					0
7948	0					0	my $msg = <<"EOM";
7949							Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
7950							EOM
7951
7952	0	0				0	if ( $diff > 0 ) {
7953	0					0	my $rml =
7954							$rstarting_line_of_current_depth->[$bb]
7955							->[ $rcurrent_depth->[$bb] ];
7956	0					0	my $ml = $rml->[0];
7957	0					0	$msg .=
7958							" The most recent un-matched $bname is on line $ml\n";
7959	0					0	$self->write_error_indicator_pair( @{$rml}, '^' );
	0					0
7960							}
7961	0					0	$self->write_error_indicator_pair( @{$rel}, '^' );
	0					0
7962	0					0	$self->warning($msg);
7963	0					0	$self->resume_logfile();
7964							}
7965	0					0	$self->increment_brace_error();
7966	0	0				0	if ( $bb eq BRACE ) { $self->[_show_indentation_table_] = 1 }
	0					0
7967							}
7968							}
7969	5446					8153	$rcurrent_depth->[$aa]--;
7970							}
7971							else {
7972
7973	0					0	my $saw_brace_error = $self->get_saw_brace_error();
7974	0	0				0	if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
7975	0					0	my $msg = <<"EOM";
7976							There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
7977							EOM
7978	0					0	$self->indicate_error( $msg, $input_line_number, $input_line, $pos,
7979							'^' );
7980							}
7981	0					0	$self->increment_brace_error();
7982	0	0				0	if ( $aa eq BRACE ) { $self->[_show_indentation_table_] = 1 }
	0					0
7983
7984							# keep track of errors in braces alone (ignoring ternary nesting errors)
7985	0	0				0	$self->[_true_brace_error_count_]++
7986							if ( $closing_brace_names[$aa] ne "':'" );
7987							}
7988	5446					11244	return ( $seqno, $outdent );
7989							} ## end sub decrease_nesting_depth
7990
7991							sub check_final_nesting_depths {
7992
7993							# USES GLOBAL VARIABLES: $rcurrent_depth, $rstarting_line_of_current_depth
7994	649			649	0	1110	my $self = shift;
7995
7996	649					2239	for my $aa ( 0 .. @closing_brace_names - 1 ) {
7997
7998	2596					3415	my $cd_aa = $rcurrent_depth->[$aa];
7999	2596	50				4519	if ($cd_aa) {
8000	0					0	my $rsl = $rstarting_line_of_current_depth->[$aa]->[$cd_aa];
8001	0					0	my $sl = $rsl->[0];
8002
8003							# Add hint for something like a missing terminal ':' of a ternary
8004	0					0	my $hint = EMPTY_STRING;
8005	0	0				0	if ( $cd_aa == 1 ) {
8006	0					0	$hint =
8007							" .. did not find its closing $closing_brace_names[$aa]";
8008							}
8009	0					0	my $msg = <<"EOM";
8010							Final nesting depth of $opening_brace_names[$aa]s is $cd_aa
8011							The most recent un-matched $opening_brace_names[$aa] is on line $sl$hint
8012							EOM
8013	0					0	$self->indicate_error( $msg, @{$rsl}, '^' );
	0					0
8014	0					0	$self->increment_brace_error();
8015	0	0				0	if ( $aa eq BRACE ) { $self->[_show_indentation_table_] = 1 }
	0					0
8016							}
8017							}
8018	649					1063	return;
8019							} ## end sub check_final_nesting_depths
8020
8021							#######################################################################
8022							# Tokenizer routines for looking ahead in input stream
8023							#######################################################################
8024
8025							sub peek_ahead_for_n_nonblank_pre_tokens {
8026
8027	184			184	0	389	my ( $self, $max_pretokens ) = @_;
8028
8029							# Given:
8030							# $max_pretokens = number of pretokens wanted
8031							# Return:
8032							# next $max_pretokens pretokens if they exist
8033							# undef's if hits eof without seeing any pretokens
8034
8035							# USES GLOBAL VARIABLES: (none)
8036	184					281	my $line;
8037	184					287	my $i = 0;
8038	184					331	my ( $rpre_tokens, $rmap, $rpre_types );
8039
8040	184					562	while ( defined( $line = $self->peek_ahead( $i++ ) ) ) {
8041	202					1337	$line =~ s/^\s+//; # trim leading blanks
8042	202	100				512	next if ( length($line) <= 0 ); # skip blank
8043	196	100				607	next if ( $line =~ /^#/ ); # skip comment
8044	176					404	( $rpre_tokens, $rmap, $rpre_types ) =
8045							pre_tokenize( $line, $max_pretokens );
8046	176					321	last;
8047							} ## end while ( defined( $line = ...))
8048	184					511	return ( $rpre_tokens, $rpre_types );
8049							} ## end sub peek_ahead_for_n_nonblank_pre_tokens
8050
8051							# look ahead for next non-blank, non-comment line of code
8052							sub peek_ahead_for_nonblank_token {
8053
8054	139			139	0	342	my ( $self, $rtokens, $max_token_index ) = @_;
8055
8056							# Given:
8057							# $rtokens = ref to token array
8058							# $max_token_index = index of last token in $rtokens
8059							# Task:
8060							# Update $rtokens with next nonblank token
8061
8062							# USES GLOBAL VARIABLES: (none)
8063	139					240	my $line;
8064	139					227	my $i = 0;
8065
8066	139					537	while ( defined( $line = $self->peek_ahead( $i++ ) ) ) {
8067	190					843	$line =~ s/^\s+//; # trim leading blanks
8068	190	100				554	next if ( length($line) <= 0 ); # skip blank
8069	161	100				526	next if ( $line =~ /^#/ ); # skip comment
8070
8071							# Updated from 2 to 3 to get trigraphs, added for case b1175
8072	137					400	my ( $rtok, $rmap_uu, $rtype_uu ) = pre_tokenize( $line, 3 );
8073	137					286	my $j = $max_token_index + 1;
8074
8075	137					231	foreach my $tok ( @{$rtok} ) {
	137					315
8076	394	100				815	last if ( $tok =~ "\n" );
8077	351					717	$rtokens->[ ++$j ] = $tok;
8078							}
8079	137					468	last;
8080							} ## end while ( defined( $line = ...))
8081	139					303	return;
8082							} ## end sub peek_ahead_for_nonblank_token
8083
8084							#######################################################################
8085							# Tokenizer guessing routines for ambiguous situations
8086							#######################################################################
8087
8088							my %is_non_ternary_pretok;
8089
8090							BEGIN {
8091
8092							# Some pre-tokens which cannot immediately follow a ternary '?'
8093	44			44		295	my @q = qw# ; ? : ) } ] = > #;
8094	44					152	push @q, COMMA;
8095	44					181	%is_non_ternary_pretok = map { $_ => 1 } @q;
	396					21911
8096							}
8097
8098							sub guess_if_pattern_or_conditional {
8099
8100	12			12	0	37	my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map_uu, $max_token_index )
8101							= @_;
8102
8103							# This routine is called when we have encountered a ? following an
8104							# unknown bareword, and we must decide if it starts a pattern or not
8105							# Given:
8106							# $i - token index of the ? starting possible pattern
8107							# $rtokens ... = the token arrays
8108							# Return:
8109							# $is_pattern = 0 if probably not pattern, =1 if probably a pattern
8110							# msg = a warning or diagnostic message
8111
8112							# USES GLOBAL VARIABLES: $last_nonblank_token
8113
8114	12					22	my $is_pattern = 0;
8115	12					31	my $msg =
8116							"guessing that ? after type='$last_nonblank_type' token='$last_nonblank_token' starts a ";
8117
8118	12	50				65	if ( $i >= $max_token_index ) {
8119	0					0	$msg .= "conditional (no end to pattern found on the line)\n";
8120	0					0	$is_pattern = 0;
8121	0					0	return ( $is_pattern, $msg );
8122							}
8123
8124							# See if we can rule out a ternary operator here before proceeding. c547.
8125	12					43	my ( $next_nonblank_token, $i_next_uu ) =
8126							find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
8127	12	50	33			103	if (
			33
8128							!(
8129							$is_non_ternary_pretok{$next_nonblank_token}
8130							\|\| ( $last_nonblank_type eq 'k' && $last_nonblank_token eq 'split' )
8131							)
8132							)
8133							{
8134							# A ternary cannot be ruled out here. We will assume this is a ternary
8135							# operator since '?' as pattern delimiter is deprecated.
8136	12					21	$is_pattern = 0;
8137	12					48	$msg .= "conditional (cannot rule it out)";
8138	12					43	return ( $is_pattern, $msg );
8139							}
8140
8141							# If we get here, then we either have a ternary with a syntax error or some
8142							# ancient code which uses ? as a pattern delimiter. We will only select the
8143							# pattern delimiter if we can find its matching closing delimiter.
8144
8145	0					0	my $ibeg = $i;
8146	0					0	$i = $ibeg + 1;
8147							##my $next_token = $rtokens->[$i]; # first token after ?
8148
8149							# look for a possible ending ? on this line..
8150	0					0	my $in_quote = 1;
8151	0					0	my $quote_depth = 0;
8152	0					0	my $quote_character = EMPTY_STRING;
8153	0					0	my $quote_pos = 0;
8154	0					0	my $quoted_string;
8155							(
8156
8157	0					0	$i,
8158							$in_quote,
8159							$quote_character,
8160							$quote_pos,
8161							$quote_depth,
8162							$quoted_string,
8163
8164							) = $self->follow_quoted_string(
8165
8166							$ibeg,
8167							$in_quote,
8168							$rtokens,
8169							$rtoken_type,
8170							$quote_character,
8171							$quote_pos,
8172							$quote_depth,
8173							$max_token_index,
8174
8175							);
8176
8177	0	0				0	if ($in_quote) {
8178
8179							# we didn't find an ending ? on this line,
8180							# so we bias towards conditional
8181	0					0	$is_pattern = 0;
8182	0					0	$msg .= "conditional (no ending ? on this line)\n";
8183	0					0	return ( $is_pattern, $msg );
8184							}
8185
8186							# we found an ending ?, so we bias towards a pattern
8187
8188							# Watch out for an ending ? in quotes, like this
8189							# my $case_flag = File::Spec->case_tolerant ? '(?i)' : '';
8190	0					0	my $s_quote = 0;
8191	0					0	my $d_quote = 0;
8192	0					0	my $colons = 0;
8193	0					0	foreach my $ii ( $ibeg + 1 .. $i - 1 ) {
8194	0					0	my $tok = $rtokens->[$ii];
8195	0	0				0	if ( $tok eq ":" ) { $colons++ }
	0					0
8196	0	0				0	if ( $tok eq "'" ) { $s_quote++ }
	0					0
8197	0	0				0	if ( $tok eq '"' ) { $d_quote++ }
	0					0
8198							}
8199	0	0	0			0	if ( $s_quote % 2 \|\| $d_quote % 2 \|\| $colons ) {
			0
8200	0					0	$is_pattern = 0;
8201	0					0	$msg .= "conditional: found ending ? but unbalanced quote chars\n";
8202	0					0	return ( $is_pattern, $msg );
8203							}
8204	0	0				0	if ( $self->pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
8205	0					0	$is_pattern = 1;
8206	0					0	$msg .= "pattern (found ending ? and pattern expected)\n";
8207	0					0	return ( $is_pattern, $msg );
8208							}
8209
8210							# NOTE: An ultimate decision could be made on version, since ? is a ternary
8211							# after version 5.22. But we may be formatting an ancient script with a
8212							# newer perl, and it might run on an older perl, so we cannot be certain.
8213							# if ($] >=5.022) {$is_pattern=0} else { ... not sure
8214
8215	0					0	$msg .= "conditional (but uncertain)\n";
8216	0					0	return ( $is_pattern, $msg );
8217							} ## end sub guess_if_pattern_or_conditional
8218
8219							my %is_known_constant;
8220							my %is_known_function;
8221
8222							BEGIN {
8223
8224							# Constants like 'pi' in Trig.pm are common
8225	44			44		244	my @q = qw( pi pi2 pi4 pip2 pip4 );
8226	44					353	$is_known_constant{$_} = 1 for @q;
8227
8228							# parenless calls of 'ok' are common
8229	44					108	@q = qw( ok );
8230	44					76040	$is_known_function{$_} = 1 for @q;
8231							} ## end BEGIN
8232
8233							sub guess_if_pattern_or_division {
8234
8235	0			0	0	0	my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map, $max_token_index ) =
8236							@_;
8237
8238							# This routine is called when we have encountered a / following an
8239							# unknown bareword, and we must decide if it starts a pattern or is a
8240							# division.
8241							# Given:
8242							# $i - token index of the / starting possible pattern
8243							# $rtokens ... = the token arrays
8244							# Return:
8245							# $is_pattern = 0 if probably division, =1 if probably a pattern
8246							# msg = a warning or diagnostic message
8247							# USES GLOBAL VARIABLES: $last_nonblank_token
8248	0					0	my $msg = "guessing that / after '$last_nonblank_token' starts a ";
8249	0					0	my $ibeg = $i;
8250	0					0	my $is_pattern = 0;
8251
8252	0					0	my $divide_possible =
8253							$self->is_possible_numerator( $i, $rtokens, $max_token_index );
8254
8255	0	0				0	if ( $divide_possible < 0 ) {
8256	0					0	$msg .= "pattern (division not possible here)\n";
8257	0					0	$is_pattern = 1;
8258	0					0	$self->saw_bareword_function($last_nonblank_token);
8259	0					0	return ( $is_pattern, $msg );
8260							}
8261	0	0				0	if ( $divide_possible == 4 ) {
8262	0					0	$msg .= "division (pattern not possible here)\n";
8263	0					0	$is_pattern = 0;
8264	0					0	return ( $is_pattern, $msg );
8265							}
8266
8267							# anything left on line?
8268	0	0				0	if ( $i >= $max_token_index ) {
8269	0					0	$msg .= "division (line ends with this /)\n";
8270	0					0	$is_pattern = 0;
8271	0					0	return ( $is_pattern, $msg );
8272							}
8273
8274							# quick check for no pattern-ending slash on this line
8275	0					0	my $pos_beg = $rtoken_map->[$ibeg];
8276	0					0	my $input_line = $self->[_line_of_text_];
8277	0	0				0	if ( index( $input_line, '/', $pos_beg + 1 ) < 0 ) {
8278	0					0	$msg .= "division (no ending / on this line)\n";
8279	0					0	$is_pattern = 0;
8280	0					0	return ( $is_pattern, $msg );
8281							}
8282
8283							# Setup spacing rule before we change $i below..
8284	0					0	$i = $ibeg + 1;
8285	0					0	my $next_token = $rtokens->[$i]; # first token after slash
8286
8287							# There are four possible spacings around the first slash:
8288							#
8289							# return pi/two;#/; -/-
8290							# return pi/ two;#/; -/+
8291							# return pi / two;#/; +/+
8292							# return pi /two;#/; +/- <-- possible pattern
8293							#
8294							# Spacing rule: a space before the slash but not after the slash
8295							# usually indicates a pattern. We can use this to break ties.
8296							# Note: perl seems to take a newline as a space in this rule (c243)
8297	0		0			0	my $space_before = $i < 2 \|\| $rtokens->[ $i - 2 ] =~ m/^\s/;
8298	0					0	my $space_after = $next_token =~ m/^\s/;
8299	0		0			0	my $is_pattern_by_spacing = $space_before && !$space_after;
8300
8301							# Make an accurate search for a possible terminating / on this line..
8302	0					0	my $in_quote = 1;
8303	0					0	my $quote_depth = 0;
8304	0					0	my $quote_character = EMPTY_STRING;
8305	0					0	my $quote_pos = 0;
8306	0					0	my $quoted_string;
8307							(
8308
8309	0					0	$i,
8310							$in_quote,
8311							$quote_character,
8312							$quote_pos,
8313							$quote_depth,
8314							$quoted_string,
8315							)
8316							= $self->follow_quoted_string(
8317
8318							$ibeg,
8319							$in_quote,
8320							$rtokens,
8321							$rtoken_type,
8322							$quote_character,
8323							$quote_pos,
8324							$quote_depth,
8325							$max_token_index,
8326							);
8327
8328							# if we didn't find an ending / on this line ..
8329	0	0				0	if ($in_quote) {
8330	0					0	$is_pattern = 0;
8331	0					0	$msg .= "division (no ending / on this line)\n";
8332	0					0	return ( $is_pattern, $msg );
8333							}
8334
8335							# we found an ending /, see if it might terminate a pattern
8336	0					0	my $pattern_expected =
8337							$self->pattern_expected( $i, $rtokens, $max_token_index );
8338
8339	0	0				0	if ( $pattern_expected < 0 ) {
8340	0					0	$is_pattern = 0;
8341	0					0	$msg .= "division (pattern not possible)\n";
8342	0					0	return ( $is_pattern, $msg );
8343							}
8344
8345							# Both pattern and divide can work here...
8346							# Check for known constants in the numerator, like 'pi'
8347	0	0				0	if ( $is_known_constant{$last_nonblank_token} ) {
8348	0					0	$msg .=
8349							"division (pattern works too but saw known constant '$last_nonblank_token')\n";
8350	0					0	$is_pattern = 0;
8351	0					0	return ( $is_pattern, $msg );
8352							}
8353
8354							# Check for known functions like 'ok'
8355	0	0				0	if ( $is_known_function{$last_nonblank_token} ) {
8356	0					0	$msg .= "pattern (division works too but saw '$last_nonblank_token')\n";
8357	0					0	$is_pattern = 1;
8358	0					0	return ( $is_pattern, $msg );
8359							}
8360
8361							# If one rule is more probable, use it
8362	0	0				0	if ( $divide_possible > $pattern_expected ) {
8363	0					0	$msg .= "division (more likely based on following tokens)\n";
8364	0					0	$is_pattern = 0;
8365	0					0	return ( $is_pattern, $msg );
8366							}
8367
8368							# finally, we have to use the spacing rule
8369	0	0				0	if ($is_pattern_by_spacing) {
8370	0					0	$msg .= "pattern (guess on spacing, but division possible too)\n";
8371	0					0	$is_pattern = 1;
8372							}
8373							else {
8374	0					0	$msg .= "division (guess on spacing, but pattern is possible too)\n";
8375	0					0	$is_pattern = 0;
8376							}
8377
8378	0					0	return ( $is_pattern, $msg );
8379							} ## end sub guess_if_pattern_or_division
8380
8381							sub guess_if_here_doc {
8382
8383	0			0	0	0	my ( $self, $next_token ) = @_;
8384
8385							# Try to resolve here-doc vs. shift by looking ahead for
8386							# non-code or the end token (currently only looks for end token)
8387
8388							# Given:
8389							# $next_token = the next token after '<<'
8390
8391							# Return:
8392							# 1 if it is probably a here doc
8393							# 0 if not
8394
8395							# USES GLOBAL VARIABLES: $current_package $ris_constant,
8396
8397							# This is how many lines we will search for a target as part of the
8398							# guessing strategy. There is probably little reason to change it.
8399	0					0	my $HERE_DOC_WINDOW = 40;
8400
8401	0					0	my $here_doc_expected = 0;
8402	0					0	my $line;
8403	0					0	my $k = 0;
8404	0					0	my $msg = "checking <<";
8405
8406	0					0	while ( defined( $line = $self->peek_ahead( $k++ ) ) ) {
8407	0					0	chomp $line;
8408	0	0				0	if ( $line eq $next_token ) {
8409	0					0	$msg .= " -- found target $next_token ahead $k lines\n";
8410	0					0	$here_doc_expected = 1; # got it
8411	0					0	last;
8412							}
8413	0	0				0	last if ( $k >= $HERE_DOC_WINDOW );
8414							} ## end while ( defined( $line = ...))
8415
8416	0	0				0	if ( !$here_doc_expected ) {
8417
8418	0	0				0	if ( !defined($line) ) {
8419	0					0	$here_doc_expected = -1; # hit eof without seeing target
8420	0					0	$msg .= " -- must be shift; target $next_token not in file\n";
8421							}
8422							else { # still unsure..taking a wild guess
8423
8424	0	0				0	if ( !$ris_constant->{$current_package}->{$next_token} ) {
8425	0					0	$here_doc_expected = 1;
8426	0					0	$msg .=
8427							" -- guessing it's a here-doc ($next_token not a constant)\n";
8428							}
8429							else {
8430	0					0	$msg .=
8431							" -- guessing it's a shift ($next_token is a constant)\n";
8432							}
8433	0					0	if (DEBUG_GUESS_MODE) {
8434							$self->warning("DEBUG_GUESS_MODE message:\n$msg\n");
8435							}
8436							}
8437							}
8438	0					0	$self->write_logfile_entry($msg);
8439	0					0	return $here_doc_expected;
8440							} ## end sub guess_if_here_doc
8441
8442							#######################################################################
8443							# Tokenizer Routines for scanning identifiers and related items
8444							#######################################################################
8445
8446							sub scan_bare_identifier_do {
8447
8448							my (
8449
8450	1862			1862	0	4844	$self,
8451
8452							$input_line,
8453							$i,
8454							$tok,
8455							$type,
8456							$prototype,
8457							$rtoken_map,
8458							$max_token_index,
8459
8460							) = @_;
8461
8462							# This routine is called to scan a token starting with an alphanumeric
8463							# variable or package separator, :: or '.
8464
8465							# Given:
8466							# current scan state variables
8467
8468							# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
8469							# $last_nonblank_type, $rparen_type, $paren_depth
8470
8471	1862					2713	my $package = undef;
8472
8473	1862					2475	my $i_beg = $i;
8474
8475							# we have to back up one pretoken at a :: since each : is one pretoken
8476	1862	100				3909	if ( $tok eq '::' ) { $i_beg-- }
	9					10
8477	1862					2769	my $pos_beg = $rtoken_map->[$i_beg];
8478	1862					5602	pos($input_line) = $pos_beg;
8479
8480							# Examples:
8481							# A::B::C
8482							# A::
8483							# ::A
8484							# A'B
8485	1862	50				11714	if (
8486							$input_line =~ m{
8487							\G\s* # start at pos
8488							( (?:\w(?:'\|::)) ) # $1 = maybe package name like A:: A::B:: or A'
8489							(\w+)? # $2 = maybe followed by sub name
8490							}gcx
8491							)
8492							{
8493	1862					2974	my $pos = pos($input_line);
8494	1862					2743	my $numc = $pos - $pos_beg;
8495	1862					3602	$tok = substr( $input_line, $pos_beg, $numc );
8496
8497							# type 'w' includes anything without leading type info
8498							# ($,%,@,*) including something like abc::def::ghi
8499	1862					2627	$type = 'w';
8500
8501	1862					2617	my $sub_name = EMPTY_STRING;
8502	1862	100				4470	if ( defined($2) ) { $sub_name = $2; }
	1857					3191
8503	1862	100	66			6790	if ( defined($1) && length($1) ) {
8504	280					513	$package = $1;
8505
8506							# patch: check for package call A::B::C->
8507							# in this case, C is part of the package name
8508	280	100				681	if ($sub_name) {
8509	275	100				1075	if ( $input_line =~ m{ \G\s*(?:->) }gcx ) {
8510	117					235	$package .= $sub_name;
8511	117					222	$sub_name = EMPTY_STRING;
8512							}
8513	275					615	pos($input_line) = $pos;
8514							}
8515
8516							# patch: don't allow isolated package name which just ends
8517							# in the old style package separator (single quote). Example:
8518							# use CGI':all';
8519	280	50	66			1122	if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
8520	0					0	$pos--;
8521							}
8522
8523	280					595	$package =~ s/\'/::/g;
8524	280	100				678	if ( $package =~ /^\:/ ) { $package = 'main' . $package }
	9					15
8525	280					903	$package =~ s/::$//;
8526							}
8527							else {
8528	1582					2358	$package = $current_package;
8529
8530							# patched for c043, part 1: keyword does not follow '->'
8531	1582	50	66			4708	if ( $is_keyword{$tok} && $last_nonblank_type ne '->' ) {
8532	0					0	$type = 'k';
8533							}
8534							}
8535
8536							# if it is a bareword.. patched for c043, part 2: not following '->'
8537	1862	100	66			6342	if ( $type eq 'w' && $last_nonblank_type ne '->' ) {
8538
8539							# check for v-string with leading 'v' type character
8540							# (This seems to have precedence over filehandle, type 'Y')
8541	1076	100	100			14320	if ( substr( $tok, 0, 1 ) eq 'v' && $tok =~ /^v\d[_\d]*$/ ) {
		100	66
		100	66
		50	66
		50
		100
		100
8542
8543							# we only have the first part - something like 'v101' -
8544							# look for more
8545	2	50				10	if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
8546	2					4	$pos = pos($input_line);
8547	2					6	$numc = $pos - $pos_beg;
8548	2					5	$tok = substr( $input_line, $pos_beg, $numc );
8549							}
8550	2					6	$type = 'v';
8551	2					14	$self->report_v_string($tok);
8552							}
8553
8554							# bareword after sort has implied empty prototype; for example:
8555							# @sorted = sort numerically ( 53, 29, 11, 32, 7 );
8556							# This has priority over whatever the user has specified.
8557							elsif ($last_nonblank_token eq 'sort'
8558							&& $last_nonblank_type eq 'k' )
8559							{
8560	1					2	$type = 'Z';
8561							}
8562
8563							# issue c382: this elsif statement moved from above because
8564							# previous check for type 'Z' after sort has priority.
8565							elsif ( $ris_constant->{$package}->{$sub_name} ) {
8566	12					25	$type = 'C';
8567							}
8568
8569							# Note: strangely, perl does not seem to really let you create
8570							# functions which act like eval and do, in the sense that eval
8571							# and do may have operators following the final }, but any operators
8572							# that you create with prototype (&) apparently do not allow
8573							# trailing operators, only terms. This seems strange.
8574							# If this ever changes, here is the update
8575							# to make perltidy behave accordingly:
8576
8577							# elsif ( $ris_block_function->{$package}{$tok} ) {
8578							# $tok='eval'; # patch to do braces like eval - doesn't work
8579							# $type = 'k';
8580							#}
8581							# TODO: This could become a separate type to allow for different
8582							# future behavior:
8583							elsif ( $ris_block_function->{$package}->{$sub_name} ) {
8584	0					0	$type = 'G';
8585							}
8586							elsif ( $ris_block_list_function->{$package}->{$sub_name} ) {
8587	0					0	$type = 'G';
8588							}
8589							elsif ( $ris_user_function->{$package}->{$sub_name} ) {
8590	6					16	$type = 'U';
8591	6					17	$prototype = $ruser_function_prototype->{$package}->{$sub_name};
8592							}
8593
8594							# check for indirect object
8595							elsif (
8596
8597							# added 2001-03-27: must not be followed immediately by '('
8598							# see fhandle.t
8599							( $input_line !~ m/\G\(/gc )
8600
8601							# and
8602							&& (
8603
8604							# preceded by keyword like 'print', 'printf' and friends
8605							$is_indirect_object_taker{$last_nonblank_token}
8606
8607							# or preceded by something like 'print(' or 'printf('
8608							\|\| (
8609							( $last_nonblank_token eq '(' )
8610							&& $is_indirect_object_taker{
8611							$rparen_type->[$paren_depth]
8612							}
8613
8614							)
8615							)
8616							)
8617							{
8618
8619							# may not be indirect object unless followed by a space;
8620							# updated 2021-01-16 to consider newline to be a space.
8621							# updated for case b990 to look for either ';' or space
8622	4	50	33			39	if ( pos($input_line) == length($input_line)
8623							\|\| $input_line =~ m/\G[;\s]/gc )
8624							{
8625	4					8	$type = 'Y';
8626
8627							# Abandon Hope ...
8628							# Perl's indirect object notation is a very bad
8629							# thing and can cause subtle bugs, especially for
8630							# beginning programmers. And I haven't even been
8631							# able to figure out a sane warning scheme which
8632							# doesn't get in the way of good scripts.
8633
8634							# Complain if a filehandle has any lower case
8635							# letters. This is suggested good practice.
8636							# Use 'sub_name' because something like
8637							# main::MYHANDLE is ok for filehandle
8638	4	100				16	if ( $sub_name =~ /[a-z]/ ) {
8639
8640							# could be bug caused by older perltidy if
8641							# followed by '('
8642	1	50				20	if ( $input_line =~ m/\G\s*\(/gc ) {
8643	1					6	$self->complain(
8644							"Caution: unknown word '$tok' in indirect object slot\n"
8645							);
8646							}
8647							}
8648							}
8649
8650							# bareword not followed by a space -- may not be filehandle
8651							# (may be function call defined in a 'use' statement)
8652							else {
8653	0					0	$type = 'Z';
8654							}
8655							}
8656
8657							# none of the above special types
8658							else {
8659							}
8660							}
8661
8662							# Now we must convert back from character position
8663							# to pre_token index.
8664							# I don't think an error flag can occur here ..but who knows
8665	1862					2537	my $error;
8666	1862					4679	( $i, $error ) =
8667							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
8668	1862	50				4058	if ($error) {
8669	0					0	$self->warning(
8670							"scan_bare_identifier: Possibly invalid tokenization\n");
8671							}
8672							}
8673
8674							# no match but line not blank - could be syntax error
8675							# perl will take '::' alone without complaint
8676							else {
8677	0					0	$type = 'w';
8678
8679							# change this warning to log message if it becomes annoying
8680	0					0	$self->warning("didn't find identifier after leading ::\n");
8681							}
8682	1862					7171	return ( $i, $tok, $type, $prototype );
8683							} ## end sub scan_bare_identifier_do
8684
8685							sub scan_id_do {
8686
8687							my (
8688
8689	405			405	0	1245	$self,
8690
8691							$input_line,
8692							$i,
8693							$tok,
8694							$rtokens,
8695							$rtoken_map,
8696							$id_scan_state,
8697							$max_token_index,
8698
8699							) = @_;
8700
8701							# Scan identifier following a type token.
8702							# Given:
8703							# current scan state variables
8704
8705							# This is the new scanner and may eventually replace scan_identifier.
8706							# Only type 'sub' and 'package' are implemented.
8707							# Token types $ * % @ & -> are not yet implemented.
8708							#
8709							# The type of call depends on $id_scan_state: $id_scan_state = ''
8710							# for starting call, in which case $tok must be the token defining
8711							# the type.
8712							#
8713							# If the type token is the last nonblank token on the line, a value
8714							# of $id_scan_state = $tok is returned, indicating that further
8715							# calls must be made to get the identifier. If the type token is
8716							# not the last nonblank token on the line, the identifier is
8717							# scanned and handled and a value of '' is returned.
8718
8719	44			44		380	use constant DEBUG_NSCAN => 0;
	44					99
	44					57837
8720	405					658	my $type = EMPTY_STRING;
8721	405					560	my $i_beg;
8722
8723							#print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
8724							#my ($a,$b,$c) = caller;
8725							#print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
8726
8727							# on re-entry, start scanning at first token on the line
8728	405	100				801	if ($id_scan_state) {
8729	10					19	$i_beg = $i;
8730	10					19	$type = EMPTY_STRING;
8731							}
8732
8733							# on initial entry, start scanning just after type token
8734							else {
8735	395					588	$i_beg = $i + 1;
8736	395					528	$id_scan_state = $tok;
8737	395					668	$type = 't';
8738							}
8739
8740							# find $i_beg = index of next nonblank token,
8741							# and handle empty lines
8742	405					600	my $blank_line = 0;
8743	405					586	my $is_lexical_method = 0;
8744	405					741	my $next_nonblank_token = $rtokens->[$i_beg];
8745	405	100				937	if ( $i_beg > $max_token_index ) {
8746	2					4	$blank_line = 1;
8747							}
8748							else {
8749
8750							# only a '#' immediately after a '$' is not a comment
8751	403	50				1024	if ( $next_nonblank_token eq '#' ) {
8752	0	0				0	if ( $tok ne '$' ) {
8753	0					0	$blank_line = 1;
8754							}
8755							}
8756
8757	403	100				1576	if ( $next_nonblank_token =~ /^\s/ ) {
8758	383					1136	( $next_nonblank_token, $i_beg ) =
8759							find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
8760							$max_token_index );
8761	383	100				1897	if ( $next_nonblank_token =~ /(^#\|^\s*$)/ ) {
8762	4					13	$blank_line = 1;
8763							}
8764							}
8765
8766							# Patch for Object::Pad lexical method like 'method $var {':
8767							# Skip past a '$'
8768	403	100	100			1991	if ( !$blank_line
			66
8769							&& $next_nonblank_token eq '$'
8770							&& $id_scan_state eq 'method' )
8771							{
8772	2					6	( $next_nonblank_token, $i_beg ) =
8773							find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
8774							$max_token_index );
8775	2					4	$is_lexical_method = 1;
8776							}
8777							}
8778
8779							# handle non-blank line; identifier, if any, must follow
8780	405	100				1028	if ( !$blank_line ) {
8781
8782	399	100				970	if ( $is_sub{$id_scan_state} ) {
		50
8783	349					3770	( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub(
8784							{
8785							input_line => $input_line,
8786							i => $i,
8787							i_beg => $i_beg,
8788							tok => $tok,
8789							type => $type,
8790							rtokens => $rtokens,
8791							rtoken_map => $rtoken_map,
8792							id_scan_state => $id_scan_state,
8793							max_token_index => $max_token_index,
8794							is_lexical_method => $is_lexical_method,
8795							}
8796							);
8797							}
8798
8799							elsif ( $is_package{$id_scan_state} ) {
8800	50					447	( $i, $tok, $type ) = $self->do_scan_package(
8801							{
8802							input_line => $input_line,
8803							i => $i,
8804							i_beg => $i_beg,
8805							tok => $tok,
8806							type => $type,
8807							rtokens => $rtokens,
8808							rtoken_map => $rtoken_map,
8809							max_token_index => $max_token_index,
8810							}
8811							);
8812	50					193	$id_scan_state = EMPTY_STRING;
8813							}
8814
8815							else {
8816	0					0	$self->warning("invalid token in scan_id: $tok\n");
8817	0					0	$id_scan_state = EMPTY_STRING;
8818							}
8819							}
8820
8821	405	50	33			2111	if ( $id_scan_state && ( !defined($type) \|\| !$type ) ) {
			66
8822
8823							# shouldn't happen:
8824	0					0	if (DEVEL_MODE) {
8825							Fault(<<EOM);
8826							Program bug in scan_id: undefined type but scan_state=$id_scan_state
8827							EOM
8828							}
8829							$self->warning(
8830	0					0	"Possible program bug in sub scan_id: undefined type but scan_state=$id_scan_state\n"
8831							);
8832	0					0	$self->report_definite_bug();
8833							}
8834
8835	405					530	DEBUG_NSCAN && do {
8836							print {*STDOUT}
8837							"NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
8838							};
8839	405					1513	return ( $i, $tok, $type, $id_scan_state );
8840							} ## end sub scan_id_do
8841
8842							sub check_prototype {
8843	173			173	0	386	my ( $proto, $package, $subname ) = @_;
8844
8845							# Classify a sub based on its prototype
8846	173	50				402	return if ( !defined($package) );
8847	173	50				424	return if ( !defined($subname) );
8848	173	100				533	if ( defined($proto) ) {
8849	34					127	$proto =~ s/^\s\(\s//;
8850	34					111	$proto =~ s/\s*\)$//;
8851	34	100				90	if ($proto) {
8852	5					18	$ris_user_function->{$package}->{$subname} = 1;
8853	5					17	$ruser_function_prototype->{$package}->{$subname} = "($proto)";
8854
8855							# prototypes containing '&' must be treated specially..
8856	5	100				18	if ( $proto =~ /\&/ ) {
8857
8858							# right curly braces of prototypes ending in
8859							# '&' may be followed by an operator
8860	1	50				2	if ( $proto =~ /\&$/ ) {
8861	0					0	$ris_block_function->{$package}->{$subname} = 1;
8862							}
8863
8864							# right curly braces of prototypes NOT ending in
8865							# '&' may NOT be followed by an operator
8866							else {
8867	1					3	$ris_block_list_function->{$package}->{$subname} = 1;
8868							}
8869							}
8870							}
8871							else {
8872	29					69	$ris_constant->{$package}->{$subname} = 1;
8873							}
8874							}
8875							else {
8876	139					369	$ris_user_function->{$package}->{$subname} = 1;
8877							}
8878	173					334	return;
8879							} ## end sub check_prototype
8880
8881							sub do_scan_package {
8882
8883	50			50	0	110	my ( $self, $rcall_hash ) = @_;
8884
8885							# Parse a package name.
8886
8887	50					100	my $input_line = $rcall_hash->{input_line};
8888	50					82	my $i = $rcall_hash->{i};
8889	50					75	my $i_beg = $rcall_hash->{i_beg};
8890	50					90	my $tok = $rcall_hash->{tok};
8891	50					79	my $type = $rcall_hash->{type};
8892	50					70	my $rtokens = $rcall_hash->{rtokens};
8893	50					84	my $rtoken_map = $rcall_hash->{rtoken_map};
8894	50					62	my $max_token_index = $rcall_hash->{max_token_index};
8895
8896							# This is called with $i_beg equal to the index of the first nonblank
8897							# token following a 'package' token.
8898							# USES GLOBAL VARIABLES: $current_package,
8899
8900							# package NAMESPACE
8901							# package NAMESPACE VERSION
8902							# package NAMESPACE BLOCK
8903							# package NAMESPACE VERSION BLOCK
8904							#
8905							# If VERSION is provided, package sets the $VERSION variable in the given
8906							# namespace to a version object with the VERSION provided. VERSION must be
8907							# a "strict" style version number as defined by the version module: a
8908							# positive decimal number (integer or decimal-fraction) without
8909							# exponentiation or else a dotted-decimal v-string with a leading 'v'
8910							# character and at least three components.
8911							# reference http://perldoc.perl.org/functions/package.html
8912
8913	50					80	my $package = undef;
8914	50					92	my $pos_beg = $rtoken_map->[$i_beg];
8915	50					163	pos($input_line) = $pos_beg;
8916
8917							# handle non-blank line; package name, if any, must follow
8918	50	50				278	if ( $input_line =~ m/\G\s((?:\w(?:'\|::))\w)/gc ) {
8919	50					110	$package = $1;
8920	50	50	33			243	$package = ( defined($1) && $1 ) ? $1 : 'main';
8921	50					114	$package =~ s/\'/::/g;
8922	50	50				134	if ( $package =~ /^\:/ ) { $package = 'main' . $package }
	0					0
8923	50					86	$package =~ s/::$//;
8924	50					78	my $pos = pos($input_line);
8925	50					83	my $numc = $pos - $pos_beg;
8926	50					122	$tok = 'package ' . substr( $input_line, $pos_beg, $numc );
8927	50					70	$type = 'P'; # Fix for c250, previously 'i'
8928
8929							# Now we must convert back from character position
8930							# to pre_token index.
8931							# I don't think an error flag can occur here ..but ?
8932	50					67	my $error;
8933	50					182	( $i, $error ) =
8934							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
8935	50	50				175	if ($error) { $self->warning("Possibly invalid package\n") }
	0					0
8936	50					79	$current_package = $package;
8937
8938							# we should now have package NAMESPACE
8939							# now expecting VERSION, BLOCK, or ; to follow ...
8940							# package NAMESPACE VERSION
8941							# package NAMESPACE BLOCK
8942							# package NAMESPACE VERSION BLOCK
8943	50					167	my ( $next_nonblank_token, $i_next_uu ) =
8944							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
8945
8946							# check that something recognizable follows, but do not parse.
8947							# A VERSION number will be parsed later as a number or v-string in the
8948							# normal way. What is important is to set the statement type if
8949							# everything looks okay so that the operator_expected() routine
8950							# knows that the number is in a package statement.
8951							# Examples of valid primitive tokens that might follow are:
8952							# 1235 . ; { } v3 v
8953							# FIX: added a '#' since a side comment may also follow
8954							# Added ':' for class attributes (for --use-feature=class, rt145706)
8955	50	50				244	if ( $next_nonblank_token =~ /^([v\.\d;\{\}\#\:])\|v\d\|\d+$/ ) {
8956	50					118	$statement_type = $tok;
8957							}
8958							else {
8959	0					0	$self->warning(
8960							"Unexpected '$next_nonblank_token' after package name '$tok'\n"
8961							);
8962							}
8963							}
8964
8965							# no match but line not blank --
8966							# could be a label with name package, like package: , for example.
8967							else {
8968	0					0	$type = 'k';
8969							}
8970
8971	50					188	return ( $i, $tok, $type );
8972							} ## end sub do_scan_package
8973
8974							{ ## begin closure for sub scan_complex_identifier
8975
8976	44			44		358	use constant DEBUG_SCAN_ID => 0;
	44					80
	44					5339
8977
8978							# Constant hash:
8979							my %is_special_variable_char;
8980
8981							BEGIN {
8982
8983							# These are the only characters which can (currently) form special
8984							# variables, like $^W: (issue c066).
8985	44			44		297	my @q = qw{
8986							? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ ] ^ _
8987							};
8988	44					110	push @q, BACKSLASH;
8989	44					149469	$is_special_variable_char{$_} = 1 for @q;
8990							} ## end BEGIN
8991
8992							# These are the possible states for this scanner:
8993							my $scan_state_SIGIL = '$';
8994							my $scan_state_ALPHA = 'A';
8995							my $scan_state_COLON = ':';
8996							my $scan_state_LPAREN = '(';
8997							my $scan_state_RPAREN = ')';
8998							my $scan_state_AMPERSAND = '&';
8999							my $scan_state_SPLIT = '^';
9000
9001							# Only these non-blank states may be returned to caller:
9002							my %is_returnable_scan_state = (
9003							$scan_state_SIGIL => 1,
9004							$scan_state_AMPERSAND => 1,
9005							);
9006
9007							# USES GLOBAL VARIABLES:
9008							# $context, $last_nonblank_token, $last_nonblank_type
9009
9010							#-----------
9011							# call args:
9012							#-----------
9013							my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
9014							$expecting, $container_type );
9015
9016							#-------------------------------------------
9017							# my variables, re-initialized on each call:
9018							#-------------------------------------------
9019							my $i_begin; # starting index $i
9020							my $type; # returned identifier type
9021							my $tok_begin; # starting token
9022							my $tok; # returned token
9023							my $id_scan_state_begin; # starting scan state
9024							my $identifier_begin; # starting identifier
9025							my $i_save; # a last good index, in case of error
9026							my $message; # hold error message for log file
9027							my $tok_is_blank;
9028							my $last_tok_is_blank;
9029							my $in_prototype_or_signature;
9030							my $saw_alpha;
9031							my $saw_type;
9032							my $allow_tick;
9033
9034							sub initialize_my_scan_id_vars {
9035
9036							# Initialize all 'my' vars on entry
9037	551			551	0	892	$i_begin = $i;
9038	551					834	$type = EMPTY_STRING;
9039	551					908	$tok_begin = $rtokens->[$i_begin];
9040	551					754	$tok = $tok_begin;
9041	551	50				1352	if ( $tok_begin eq ':' ) { $tok_begin = '::' }
	0					0
9042	551					777	$id_scan_state_begin = $id_scan_state;
9043	551					761	$identifier_begin = $identifier;
9044	551					746	$i_save = undef;
9045
9046	551					784	$message = EMPTY_STRING;
9047	551					706	$tok_is_blank = undef; # a flag to speed things up
9048	551					660	$last_tok_is_blank = undef;
9049
9050	551		100			1570	$in_prototype_or_signature =
9051							$container_type && $container_type =~ /^sub\b/;
9052
9053							# these flags will be used to help figure out the type:
9054	551					711	$saw_alpha = undef;
9055	551					763	$saw_type = undef;
9056
9057							# allow old package separator (') except in 'use' statement
9058	551					890	$allow_tick = ( $last_nonblank_token ne 'use' );
9059	551					810	return;
9060							} ## end sub initialize_my_scan_id_vars
9061
9062							#----------------------------------
9063							# Routines for handling scan states
9064							#----------------------------------
9065							sub do_id_scan_state_dollar {
9066
9067	609			609	0	824	my $self = shift;
9068
9069							# We saw a sigil, now looking to start a variable name
9070	609	100	66			3855	if ( $tok eq '$' ) {
		100	33
		100
		50
		50
		100
		100
		100
		100
9071
9072	59					126	$identifier .= $tok;
9073
9074							# we've got a punctuation variable if end of line (punct.t)
9075	59	50				170	if ( $i == $max_token_index ) {
9076	0					0	$type = 'i';
9077	0					0	$id_scan_state = EMPTY_STRING;
9078							}
9079							}
9080							elsif ( $tok =~ /^\w/ ) { # alphanumeric ..
9081	310					432	$saw_alpha = 1;
9082	310					451	$identifier .= $tok;
9083
9084							# now need :: except for special digit vars like '$1' (c208)
9085	310	100				890	$id_scan_state = $tok =~ /^\d/ ? EMPTY_STRING : $scan_state_COLON;
9086							}
9087							elsif ( $tok eq '::' ) {
9088	16					42	$id_scan_state = $scan_state_ALPHA;
9089	16					37	$identifier .= $tok;
9090							}
9091
9092							# POSTDEFREF ->@ ->% ->& ->*
9093							elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
9094	0					0	$identifier .= $tok;
9095							}
9096							elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
9097	0					0	$saw_alpha = 1;
9098	0					0	$id_scan_state = $scan_state_COLON; # now need ::
9099	0					0	$identifier .= $tok;
9100
9101							# Perl will accept leading digits in identifiers,
9102							# although they may not always produce useful results.
9103							# Something like $main::0 is ok. But this also works:
9104							#
9105							# sub howdy::123::bubba{ print "bubba $54321!\n" }
9106							# howdy::123::bubba();
9107							#
9108							}
9109							elsif ( $tok eq '#' ) {
9110
9111	100					177	my $is_punct_var = $identifier eq '$$';
9112
9113							# side comment or identifier?
9114	100	100	66			946	if (
			66
			66
			33
9115
9116							# A '#' starts a comment if it follows a space. For example,
9117							# the following is equivalent to $ans=40.
9118							# my $ #
9119							# ans = 40;
9120							!$last_tok_is_blank
9121
9122							# a # inside a prototype or signature can only start a
9123							# comment
9124							&& !$in_prototype_or_signature
9125
9126							# these are valid punctuation vars: *# %# @# $#
9127							# May also be '$#array' or POSTDEFREF ->$#
9128							&& ( $identifier =~ /^[\%\@\$\*]$/
9129							\|\| $identifier =~ /\$$/ )
9130
9131							# but a '#' after '$$' is a side comment; see c147
9132							&& !$is_punct_var
9133
9134							)
9135							{
9136	96					228	$identifier .= $tok; # keep same state, a $ could follow
9137							}
9138							else {
9139
9140							# otherwise it is a side comment
9141	4	50				16	if ( $identifier eq '->' ) { }
		50
		50
9142	0					0	elsif ($is_punct_var) { $type = 'i' }
9143	4					7	elsif ( $id_scan_state eq $scan_state_SIGIL ) { $type = 't' }
9144	0					0	else { $type = 'i' }
9145	4					6	$i = $i_save;
9146	4					6	$id_scan_state = EMPTY_STRING;
9147							}
9148							}
9149
9150							elsif ( $tok eq '{' ) {
9151
9152							# check for something like ${#} or ${?}, where ? is a special char
9153	47	100	100			520	if (
			66
			100
			100
9154							(
9155							$identifier eq '$'
9156							\|\| $identifier eq '@'
9157							\|\| $identifier eq '$#'
9158							)
9159							&& $i + 2 <= $max_token_index
9160							&& $rtokens->[ $i + 2 ] eq '}'
9161							&& $rtokens->[ $i + 1 ] !~ /[\s\w]/
9162							)
9163							{
9164	1					3	my $next2 = $rtokens->[ $i + 2 ];
9165	1					2	my $next1 = $rtokens->[ $i + 1 ];
9166	1					4	$identifier .= $tok . $next1 . $next2;
9167	1					1	$i += 2;
9168	1					2	$id_scan_state = EMPTY_STRING;
9169							}
9170							else {
9171
9172							# skip something like ${xxx} or ->{
9173	46					74	$id_scan_state = EMPTY_STRING;
9174
9175							# if this is the first token of a line, any tokens for this
9176							# identifier have already been accumulated
9177	46	100	66			165	if ( $identifier eq '$' \|\| $i == 0 ) {
9178	31					85	$identifier = EMPTY_STRING;
9179							}
9180	46					77	$i = $i_save;
9181							}
9182							}
9183
9184							# space ok after leading $ % * & @
9185							elsif ( $tok =~ /^\s*$/ ) {
9186
9187	20					40	$tok_is_blank = 1;
9188
9189							# note: an id with a leading '&' does not actually come this way
9190	20	50				72	if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
		0
9191
9192	20	100				53	if ( length($identifier) > 1 ) {
9193	8					17	$id_scan_state = EMPTY_STRING;
9194	8					13	$i = $i_save;
9195	8					19	$type = 'i'; # probably punctuation variable
9196							}
9197							else {
9198
9199							# fix c139: trim line-ending type 't'
9200	12	100				53	if ( $i == $max_token_index ) {
		100
9201	1					2	$i = $i_save;
9202	1					2	$type = 't';
9203							}
9204
9205							# spaces after $'s are common, and space after @
9206							# is harmless, so only complain about space
9207							# after other type characters. Space after $ and
9208							# @ will be removed in formatting. Report space
9209							# after % and * because they might indicate a
9210							# parsing error. In other words '% ' might be a
9211							# modulo operator. Delete this warning if it
9212							# gets annoying.
9213							elsif ( $identifier !~ /^[\@\$]$/ ) {
9214	1					2	$message =
9215							"Space in identifier, following $identifier\n";
9216							}
9217							else {
9218							# silently accept space after '$' and '@' sigils
9219							}
9220							}
9221							}
9222
9223							elsif ( $identifier eq '->' ) {
9224
9225							# space after '->' is ok except at line end ..
9226							# so trim line-ending in type '->' (fixes c139)
9227	0	0				0	if ( $i == $max_token_index ) {
9228	0					0	$i = $i_save;
9229	0					0	$type = '->';
9230							}
9231							}
9232
9233							# stop at space after something other than -> or sigil
9234							# Example of what can arrive here:
9235							# eval { $MyClass->$$ };
9236							else {
9237	0					0	$id_scan_state = EMPTY_STRING;
9238	0					0	$i = $i_save;
9239	0					0	$type = 'i';
9240							}
9241							}
9242							elsif ( $tok eq '^' ) {
9243
9244							# check for some special variables like $^ $^W
9245	11	50				36	if ( $identifier =~ /^[\$\*\@\%]$/ ) {
9246	11					26	$identifier .= $tok;
9247	11					16	$type = 'i';
9248
9249							# There may be one more character, not a space, after the ^
9250	11					17	my $next1 = $rtokens->[ $i + 1 ];
9251	11					22	my $chr = substr( $next1, 0, 1 );
9252	11	100				31	if ( $is_special_variable_char{$chr} ) {
9253
9254							# It is something like $^W
9255							# Test case (c066) : $^Oeq'linux'
9256	9					13	$i++;
9257	9					15	$identifier .= $next1;
9258
9259							# If pretoken $next1 is more than one character long,
9260							# set a flag indicating that it needs to be split.
9261	9	100				24	$id_scan_state =
9262							( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING;
9263							}
9264							else {
9265
9266							# it is just $^
9267							# Simple test case (c065): '$aa=$^if($bb)';
9268	2					3	$id_scan_state = EMPTY_STRING;
9269							}
9270							}
9271							else {
9272	0					0	$id_scan_state = EMPTY_STRING;
9273	0					0	$i = $i_save;
9274							}
9275							}
9276							else { # something else
9277
9278	46	100	66			376	if ( $in_prototype_or_signature && $tok =~ /^[\),=#]/ ) {
		100	66
		100
		50
		0
		0
9279
9280							# We might be in an extrusion of
9281							# sub foo2 ( $first, $, $third ) {
9282							# looking at a line starting with a comma, like
9283							# $
9284							# ,
9285							# in this case the comma ends the signature variable
9286							# '$' which will have been previously marked type 't'
9287							# rather than 'i'.
9288	3	100				8	if ( $i == $i_begin ) {
9289	1					3	$identifier = EMPTY_STRING;
9290	1					1	$type = EMPTY_STRING;
9291							}
9292
9293							# at a # we have to mark as type 't' because more may
9294							# follow, otherwise, in a signature we can let '$' be an
9295							# identifier here for better formatting.
9296							# See 'mangle4.in' for a test case.
9297							else {
9298	2					3	$type = 'i';
9299	2	50	33			10	if ( $id_scan_state eq $scan_state_SIGIL && $tok eq '#' ) {
9300	0					0	$type = 't';
9301							}
9302	2					2	$i = $i_save;
9303							}
9304	3					6	$id_scan_state = EMPTY_STRING;
9305							}
9306
9307							# check for various punctuation variables
9308							elsif ( $identifier =~ /^[\$\*\@\%]$/ ) {
9309	35					79	$identifier .= $tok;
9310							}
9311
9312							# POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#*
9313							elsif ($tok eq '*'
9314							&& $identifier =~ /\-\>([\@\%\$\*\&]\|\$\#)$/ )
9315							{
9316	6					9	$identifier .= $tok;
9317							}
9318
9319							elsif ( $identifier eq '$#' ) {
9320
9321	2	50				9	if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
	0	50				0
	0					0
9322
9323							# perl seems to allow just these: $#: $#- $#+
9324							elsif ( $tok =~ /^[\:\-\+]$/ ) {
9325	0					0	$type = 'i';
9326	0					0	$identifier .= $tok;
9327							}
9328							else {
9329	2					4	$i = $i_save;
9330	2					6	$self->write_logfile_entry(
9331							'Use of $# is deprecated' . "\n" );
9332							}
9333							}
9334							elsif ( $identifier eq '$$' ) {
9335
9336							# perl does not allow references to punctuation
9337							# variables without braces. For example, this
9338							# won't work:
9339							# $:=\4;
9340							# $a = $$:;
9341							# You would have to use
9342							# $a = ${$:};
9343
9344							# '$$' alone is punctuation variable for PID
9345	0					0	$i = $i_save;
9346	0	0				0	if ( $tok eq '{' ) { $type = 't' }
	0					0
9347	0					0	else { $type = 'i' }
9348							}
9349							elsif ( $identifier eq '->' ) {
9350	0					0	$i = $i_save;
9351							}
9352							else {
9353	0					0	$i = $i_save;
9354	0	0				0	if ( length($identifier) == 1 ) {
9355	0					0	$identifier = EMPTY_STRING;
9356							}
9357							}
9358	46					76	$id_scan_state = EMPTY_STRING;
9359							}
9360	609					897	return;
9361							} ## end sub do_id_scan_state_dollar
9362
9363							sub do_id_scan_state_alpha {
9364
9365	119			119	0	182	my $self = shift;
9366
9367							# looking for alphanumeric after ::
9368	119					293	$tok_is_blank = $tok =~ /^\s*$/;
9369
9370	119	100	33			369	if ( $tok =~ /^\w/ ) { # found it
		50	66
		50	33
		50
9371	106					161	$identifier .= $tok;
9372	106					173	$id_scan_state = $scan_state_COLON; # now need ::
9373	106					177	$saw_alpha = 1;
9374							}
9375							elsif ( $tok eq "'" && $allow_tick ) {
9376	0					0	$identifier .= $tok;
9377	0					0	$id_scan_state = $scan_state_COLON; # now need ::
9378	0					0	$saw_alpha = 1;
9379							}
9380							elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
9381	0					0	$id_scan_state = $scan_state_LPAREN;
9382	0					0	$identifier .= $tok;
9383							}
9384							elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
9385	0					0	$id_scan_state = $scan_state_RPAREN;
9386	0					0	$identifier .= $tok;
9387							}
9388							else {
9389	13					21	$id_scan_state = EMPTY_STRING;
9390	13					14	$i = $i_save;
9391							}
9392	119					176	return;
9393							} ## end sub do_id_scan_state_alpha
9394
9395							sub do_id_scan_state_colon {
9396
9397	470			470	0	659	my $self = shift;
9398
9399							# looking for possible :: after alphanumeric
9400
9401	470					1402	$tok_is_blank = $tok =~ /^\s*$/;
9402
9403	470	100	66			3078	if ( $tok eq '::' ) { # got it
		100	66
		100	66
		50
		50
9404	103					167	$identifier .= $tok;
9405	103					142	$id_scan_state = $scan_state_ALPHA; # now require alpha
9406							}
9407							elsif ( $tok =~ /^\w/ ) { # more alphanumeric is ok here
9408	20					39	$identifier .= $tok;
9409	20					33	$id_scan_state = $scan_state_COLON; # now need ::
9410	20					39	$saw_alpha = 1;
9411							}
9412							elsif ( $tok eq "'" && $allow_tick ) { # tick
9413
9414	12	50				35	if ( $is_keyword{$identifier} ) {
9415	0					0	$id_scan_state = EMPTY_STRING; # that's all
9416	0					0	$i = $i_save;
9417							}
9418							else {
9419	12					47	$identifier .= $tok;
9420							}
9421							}
9422							elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
9423	0					0	$id_scan_state = $scan_state_LPAREN;
9424	0					0	$identifier .= $tok;
9425							}
9426							elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
9427	0					0	$id_scan_state = $scan_state_RPAREN;
9428	0					0	$identifier .= $tok;
9429							}
9430							else {
9431	335					488	$id_scan_state = EMPTY_STRING; # that's all
9432	335					469	$i = $i_save;
9433							}
9434	470					659	return;
9435							} ## end sub do_id_scan_state_colon
9436
9437							sub do_id_scan_state_left_paren {
9438
9439	0			0	0	0	my $self = shift;
9440
9441							# looking for possible '(' of a prototype
9442
9443	0	0				0	if ( $tok eq '(' ) { # got it
		0
9444	0					0	$identifier .= $tok;
9445	0					0	$id_scan_state = $scan_state_RPAREN; # now find the end of it
9446							}
9447							elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
9448	0					0	$identifier .= $tok;
9449	0					0	$tok_is_blank = 1;
9450							}
9451							else {
9452	0					0	$id_scan_state = EMPTY_STRING; # that's all - no prototype
9453	0					0	$i = $i_save;
9454							}
9455	0					0	return;
9456							} ## end sub do_id_scan_state_left_paren
9457
9458							sub do_id_scan_state_right_paren {
9459
9460	0			0	0	0	my $self = shift;
9461
9462							# looking for a ')' of prototype to close a '('
9463
9464	0					0	$tok_is_blank = $tok =~ /^\s*$/;
9465
9466	0	0				0	if ( $tok eq ')' ) { # got it
		0
9467	0					0	$identifier .= $tok;
9468	0					0	$id_scan_state = EMPTY_STRING; # all done
9469							}
9470							elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
9471	0					0	$identifier .= $tok;
9472							}
9473							else { # probable error in script, but keep going
9474	0					0	$self->warning(
9475							"Unexpected '$tok' while seeking end of prototype\n");
9476	0					0	$identifier .= $tok;
9477							}
9478	0					0	return;
9479							} ## end sub do_id_scan_state_right_paren
9480
9481							sub do_id_scan_state_ampersand {
9482
9483	104			104	0	142	my $self = shift;
9484
9485							# Starting sub call after seeing an '&'
9486	104	100	33			620	if ( $tok =~ /^[\$\w]/ ) { # alphanumeric ..
		50
		100
		50
		50
		0
9487	87					152	$id_scan_state = $scan_state_COLON; # now need ::
9488	87					119	$saw_alpha = 1;
9489	87					131	$identifier .= $tok;
9490							}
9491							elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
9492	0					0	$id_scan_state = $scan_state_COLON; # now need ::
9493	0					0	$saw_alpha = 1;
9494	0					0	$identifier .= $tok;
9495							}
9496							elsif ( $tok =~ /^\s*$/ ) { # allow space
9497	2					3	$tok_is_blank = 1;
9498
9499							# fix c139: trim line-ending type 't'
9500	2	50	33			9	if ( length($identifier) == 1 && $i == $max_token_index ) {
9501	2					4	$i = $i_save;
9502	2					3	$type = 't';
9503							}
9504							}
9505							elsif ( $tok eq '::' ) { # leading ::
9506	0					0	$id_scan_state = $scan_state_ALPHA; # accept alpha next
9507	0					0	$identifier .= $tok;
9508							}
9509							elsif ( $tok eq '{' ) {
9510	15	50	33			72	if ( $identifier eq '&' \|\| $i == 0 ) {
9511	15					27	$identifier = EMPTY_STRING;
9512							}
9513	15					25	$i = $i_save;
9514	15					25	$id_scan_state = EMPTY_STRING;
9515							}
9516							elsif ( $tok eq '^' ) {
9517	0	0				0	if ( $identifier eq '&' ) {
9518
9519							# Special variable (c066)
9520	0					0	$identifier .= $tok;
9521	0					0	$type = 'i';
9522
9523							# To be a special $^ variable, there may be one more character,
9524							# not a space, after the ^
9525	0					0	my $next1 = $rtokens->[ $i + 1 ];
9526	0					0	my $chr = substr( $next1, 0, 1 );
9527	0	0				0	if ( $is_special_variable_char{$chr} ) {
9528
9529							# It is something like &^O
9530	0					0	$i++;
9531	0					0	$identifier .= $next1;
9532
9533							# If pretoken $next1 is more than one character long,
9534							# set a flag indicating that it needs to be split.
9535	0	0				0	$id_scan_state =
9536							( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING;
9537							}
9538							else {
9539
9540							# It is &^. This is parsed by perl as a call to sub '^',
9541							# even though it would be difficult to create a sub '^'.
9542							# So we mark it as an identifier (c068).
9543	0					0	$id_scan_state = EMPTY_STRING;
9544							}
9545							}
9546							else {
9547	0					0	$identifier = EMPTY_STRING;
9548	0					0	$i = $i_save;
9549							}
9550							}
9551							else {
9552
9553							# punctuation variable?
9554							# testfile: cunningham4.pl
9555							#
9556							# We have to be careful here. If we are in an unknown state,
9557							# we will reject the punctuation variable. In the following
9558							# example the '&' is a binary operator but we are in an unknown
9559							# state because there is no sigil on 'Prima', so we don't
9560							# know what it is. But it is a bad guess that
9561							# '&~' is a function variable.
9562							# $self->{text}->{colorMap}->[
9563							# Prima::PodView::COLOR_CODE_FOREGROUND
9564							# & ~tb::COLOR_INDEX ] =
9565							# $sec->{ColorCode}
9566
9567							# Fix for case c033: a '#' here starts a side comment
9568	0	0	0			0	if ( $identifier eq '&' && $expecting && $tok ne '#' ) {
			0
9569	0					0	$identifier .= $tok;
9570							}
9571							else {
9572	0					0	$identifier = EMPTY_STRING;
9573	0					0	$i = $i_save;
9574	0					0	$type = '&';
9575							}
9576	0					0	$id_scan_state = EMPTY_STRING;
9577							}
9578	104					150	return;
9579							} ## end sub do_id_scan_state_ampersand
9580
9581							#-------------------
9582							# hash of scanner subs
9583							#-------------------
9584							my $scan_identifier_code = {
9585							$scan_state_SIGIL => \&do_id_scan_state_dollar,
9586							$scan_state_ALPHA => \&do_id_scan_state_alpha,
9587							$scan_state_COLON => \&do_id_scan_state_colon,
9588							$scan_state_LPAREN => \&do_id_scan_state_left_paren,
9589							$scan_state_RPAREN => \&do_id_scan_state_right_paren,
9590							$scan_state_AMPERSAND => \&do_id_scan_state_ampersand,
9591							};
9592
9593							sub scan_complex_identifier {
9594
9595							(
9596	551			551	0	1673	my $self,
9597
9598							$i,
9599							$id_scan_state,
9600							$identifier,
9601							$rtokens,
9602							$max_token_index,
9603							$expecting,
9604							$container_type,
9605
9606							) = @_;
9607
9608							# This routine assembles tokens into identifiers. It maintains a
9609							# scan state, id_scan_state. It updates id_scan_state based upon
9610							# current id_scan_state and token, and returns an updated
9611							# id_scan_state and the next index after the identifier.
9612
9613							# This routine now serves a backup for sub scan_simple_identifier
9614							# which handles most identifiers.
9615
9616							# Note that $self must be a 'my' variable and not be a closure
9617							# variables like the other args. Otherwise it will not get
9618							# deleted by a DESTROY call at the end of a file. Then an
9619							# attempt to create multiple tokenizers can occur when multiple
9620							# files are processed, causing an error.
9621
9622							# return flag telling caller to split the pretoken
9623	551					2570	my $split_pretoken_flag;
9624
9625							#-------------------
9626							# Initialize my vars
9627							#-------------------
9628
9629	551					1601	initialize_my_scan_id_vars();
9630
9631							#--------------------------------------------------------
9632							# get started by defining a type and a state if necessary
9633							#--------------------------------------------------------
9634
9635	551	100				1156	if ( !$id_scan_state ) {
9636	544					722	$context = UNKNOWN_CONTEXT;
9637
9638							# fixup for digraph
9639	544	50				1112	if ( $tok eq '>' ) {
9640	0					0	$tok = '->';
9641	0					0	$tok_begin = $tok;
9642							}
9643	544					796	$identifier = $tok;
9644
9645	544	100	100			2580	if ( $last_nonblank_token eq '->' ) {
		100	100
		100	0
		50
		0
		0
		0
		0
9646	8					13	$identifier = '->' . $identifier;
9647	8					13	$id_scan_state = $scan_state_SIGIL;
9648							}
9649							elsif ( $tok eq '$' \|\| $tok eq '*' ) {
9650	355					645	$id_scan_state = $scan_state_SIGIL;
9651	355					541	$context = SCALAR_CONTEXT;
9652							}
9653							elsif ( $tok eq '%' \|\| $tok eq '@' ) {
9654	79					124	$id_scan_state = $scan_state_SIGIL;
9655	79					129	$context = LIST_CONTEXT;
9656							}
9657							elsif ( $tok eq '&' ) {
9658	102					166	$id_scan_state = $scan_state_AMPERSAND;
9659							}
9660							elsif ( $tok eq 'sub' or $tok eq 'package' ) {
9661	0					0	$saw_alpha = 0; # 'sub' is considered type info here
9662	0					0	$id_scan_state = $scan_state_SIGIL;
9663	0					0	$identifier .=
9664							SPACE; # need a space to separate sub from sub name
9665							}
9666							elsif ( $tok eq '::' ) {
9667	0					0	$id_scan_state = $scan_state_ALPHA;
9668							}
9669							elsif ( $tok =~ /^\w/ ) {
9670	0					0	$id_scan_state = $scan_state_COLON;
9671	0					0	$saw_alpha = 1;
9672							}
9673							elsif ( $tok eq '->' ) {
9674	0					0	$id_scan_state = $scan_state_SIGIL;
9675							}
9676							else {
9677
9678							# shouldn't happen: bad call parameter
9679	0					0	my $msg =
9680							"Program bug detected: scan_complex_identifier received bad starting token = '$tok'\n";
9681	0					0	if (DEVEL_MODE) { Fault($msg) }
9682	0	0				0	if ( !$self->[_in_error_] ) {
9683	0					0	$self->warning($msg);
9684	0					0	$self->[_in_error_] = 1;
9685							}
9686	0					0	$id_scan_state = EMPTY_STRING;
9687
9688							# emergency return
9689	0					0	goto RETURN;
9690							}
9691	544					818	$saw_type = !$saw_alpha;
9692							}
9693							else {
9694	7					18	$i--;
9695	7					19	$saw_alpha = ( $tok =~ /^\w/ );
9696	7					19	$saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
9697
9698							# check for a valid starting state
9699	7					10	if ( DEVEL_MODE && !$is_returnable_scan_state{$id_scan_state} ) {
9700							Fault(<<EOM);
9701							Unexpected starting scan state in sub scan_complex_identifier: '$id_scan_state'
9702							EOM
9703							}
9704							}
9705
9706							#------------------------------
9707							# loop to gather the identifier
9708							#------------------------------
9709
9710	551					794	$i_save = $i;
9711
9712	551		100			2043	while ( $i < $max_token_index && $id_scan_state ) {
9713
9714							# Be sure we have code to handle this state before we proceed
9715	1305					2182	my $code = $scan_identifier_code->{$id_scan_state};
9716	1305	100				2232	if ( !$code ) {
9717
9718	3	50				7	if ( $id_scan_state eq $scan_state_SPLIT ) {
9719							## OK: this is the signal to exit and split the pretoken
9720							}
9721
9722							# unknown state - should not happen
9723							else {
9724	0					0	if (DEVEL_MODE) {
9725							Fault(<<EOM);
9726							Unknown scan state in sub scan_complex_identifier: '$id_scan_state'
9727							Scan state at sub entry was '$id_scan_state_begin'
9728							EOM
9729							}
9730	0					0	$id_scan_state = EMPTY_STRING;
9731	0					0	$i = $i_save;
9732							}
9733	3					5	last;
9734							}
9735
9736							# Remember the starting index for progress check below
9737	1302					1452	my $i_start_loop = $i;
9738
9739	1302					1573	$last_tok_is_blank = $tok_is_blank;
9740	1302	100				1905	if ($tok_is_blank) { $tok_is_blank = undef }
	11					20
9741	1291					1467	else { $i_save = $i }
9742
9743	1302					1829	$tok = $rtokens->[ ++$i ];
9744
9745							# patch to make digraph :: if necessary
9746	1302	100	100			2670	if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) {
9747	119					161	$tok = '::';
9748	119					177	$i++;
9749							}
9750
9751	1302					2678	$code->($self);
9752
9753							# check for forward progress: a decrease in the index $i
9754							# implies that scanning has finished
9755	1302	100				3467	last if ( $i <= $i_start_loop );
9756
9757							} ## end while ( $i < $max_token_index...)
9758
9759							#-------------
9760							# Check result
9761							#-------------
9762
9763							# Be sure a valid state is returned
9764	551	100				1102	if ($id_scan_state) {
9765
9766	24	100				112	if ( !$is_returnable_scan_state{$id_scan_state} ) {
9767
9768	17	100				54	if ( $id_scan_state eq $scan_state_SPLIT ) {
9769	3					5	$split_pretoken_flag = 1;
9770							}
9771
9772	17	50				53	if ( $id_scan_state eq $scan_state_RPAREN ) {
9773	0					0	$self->warning(
9774							"Hit end of line while seeking ) to end prototype\n");
9775							}
9776
9777	17					33	$id_scan_state = EMPTY_STRING;
9778							}
9779
9780							# Patch: the deprecated variable $# does not combine with anything
9781							# on the next line.
9782	24	50				65	if ( $identifier eq '$#' ) { $id_scan_state = EMPTY_STRING }
	0					0
9783							}
9784
9785							# Be sure the token index is valid
9786	551	50				1206	if ( $i < 0 ) { $i = 0 }
	0					0
9787
9788							# Be sure a token type is defined
9789	551	100				1182	if ( !$type ) {
9790
9791	523	100				1015	if ($saw_type) {
		100
9792
9793	517	100	66			2035	if ($saw_alpha) {
		50	100
		100	66
			66
9794
9795							# The type without the -> should be the same as with the -> so
9796							# that if they get separated we get the same bond strengths,
9797							# etc. See b1234
9798	404	50	66			1372	if ( $identifier =~ /^->/
			33
9799							&& $last_nonblank_type eq 'w'
9800							&& substr( $identifier, 2, 1 ) =~ /^\w/ )
9801							{
9802	0					0	$type = 'w';
9803							}
9804	404					776	else { $type = 'i' }
9805							}
9806							elsif ( $identifier eq '->' ) {
9807	0					0	$type = '->';
9808							}
9809							elsif (
9810							( length($identifier) > 1 )
9811
9812							# In something like '@$=' we have an identifier '@$'
9813							# In something like '$${' we have type '$$' (and only
9814							# part of an identifier)
9815							&& !( $identifier =~ /\$$/ && $tok eq '{' )
9816							&& $identifier ne 'sub '
9817							&& $identifier ne 'package '
9818							)
9819							{
9820	53					105	$type = 'i';
9821							}
9822	60					114	else { $type = 't' }
9823							}
9824							elsif ($saw_alpha) {
9825
9826							# type 'w' includes anything without leading type info
9827							# ($,%,@,*) including something like abc::def::ghi
9828	5					8	$type = 'w';
9829
9830							# Fix for b1337, if restarting scan after line break between
9831							# '->' or sigil and identifier name, use type 'i'
9832	5	50	33			25	if ( $id_scan_state_begin
9833							&& $identifier =~ /^([\$\%\@\*\&]\|->)/ )
9834							{
9835	5					6	$type = 'i';
9836							}
9837							}
9838							else {
9839	1					2	$type = EMPTY_STRING;
9840							} # this can happen on a restart
9841							}
9842
9843							# See if we formed an identifier...
9844	551	100				1089	if ($identifier) {
9845	504					744	$tok = $identifier;
9846	504	100				1101	if ($message) { $self->write_logfile_entry($message) }
	1					6
9847							}
9848
9849							# did not find an identifier, back up
9850							else {
9851	47					78	$tok = $tok_begin;
9852	47					76	$i = $i_begin;
9853							}
9854
9855							RETURN:
9856
9857	551					665	DEBUG_SCAN_ID && do {
9858							my ( $a, $b, $c ) = caller();
9859							print {*STDOUT}
9860							"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
9861							print {*STDOUT}
9862							"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
9863							};
9864
9865							return (
9866
9867	551					2324	$i,
9868							$tok,
9869							$type,
9870							$id_scan_state,
9871							$identifier,
9872							$split_pretoken_flag,
9873							);
9874							} ## end sub scan_complex_identifier
9875							} ## end closure for sub scan_complex_identifier
9876
9877							{ ## closure for sub do_scan_sub
9878
9879							# saved package and subnames in case prototype is on separate line
9880							my ( $package_saved, $subname_saved );
9881
9882							# initialize subname each time a new 'sub' keyword is encountered
9883							sub initialize_subname {
9884	345			345	0	613	$package_saved = EMPTY_STRING;
9885	345					529	$subname_saved = EMPTY_STRING;
9886	345					520	return;
9887							}
9888
9889							use constant {
9890	44					98728	SUB_CALL => 1,
9891							PAREN_CALL => 2,
9892							PROTOTYPE_CALL => 3,
9893	44			44		349	};
	44					75
9894
9895							sub do_scan_sub {
9896
9897	351			351	0	723	my ( $self, $rcall_hash ) = @_;
9898
9899							# Parse a sub name and prototype.
9900
9901	351					744	my $input_line = $rcall_hash->{input_line};
9902	351					615	my $i = $rcall_hash->{i};
9903	351					589	my $i_beg = $rcall_hash->{i_beg};
9904	351					615	my $tok = $rcall_hash->{tok};
9905	351					587	my $type = $rcall_hash->{type};
9906	351					622	my $rtokens = $rcall_hash->{rtokens};
9907	351					561	my $rtoken_map = $rcall_hash->{rtoken_map};
9908	351					567	my $id_scan_state = $rcall_hash->{id_scan_state};
9909	351					537	my $max_token_index = $rcall_hash->{max_token_index};
9910
9911	351	100				772	my $id_prefix = $rcall_hash->{is_lexical_method} ? '$' : EMPTY_STRING;
9912
9913							# At present there are three basic CALL TYPES which are
9914							# distinguished by the starting value of '$tok':
9915							# 1. $tok='sub', id_scan_state='sub'
9916							# it is called with $i_beg equal to the index of the first nonblank
9917							# token following a 'sub' token.
9918							# 2. $tok='(', id_scan_state='sub',
9919							# it is called with $i_beg equal to the index of a '(' which may
9920							# start a prototype.
9921							# 3. $tok='prototype', id_scan_state='prototype'
9922							# it is called with $i_beg equal to the index of a '(' which is
9923							# preceded by ': prototype' and has $id_scan_state eq 'prototype'
9924
9925							# Examples:
9926
9927							# A single type 1 call will get both the sub and prototype
9928							# sub foo1 ( $$ ) { }
9929							# ^
9930
9931							# The subname will be obtained with a 'sub' call
9932							# The prototype on line 2 will be obtained with a '(' call
9933							# sub foo1
9934							# ^ <---call type 1
9935							# ( $$ ) { }
9936							# ^ <---call type 2
9937
9938							# The subname will be obtained with a 'sub' call
9939							# The prototype will be obtained with a 'prototype' call
9940							# sub foo1 ( $x, $y ) : prototype ( $$ ) { }
9941							# ^ <---type 1 ^ <---type 3
9942
9943							# TODO: add future error checks to be sure we have a valid
9944							# sub name. For example, 'sub &doit' is wrong. Also, be sure
9945							# a name is given if and only if a non-anonymous sub is
9946							# appropriate.
9947							# USES GLOBAL VARS: $current_package, $last_nonblank_token,
9948							# $rsaw_function_definition,
9949							# $statement_type
9950
9951	351					500	my $i_entry = $i;
9952
9953							# Determine the CALL TYPE
9954							# 1=sub
9955							# 2=(
9956							# 3=prototype
9957	351	100				1065	my $call_type =
		100
9958							$tok eq 'prototype' ? PROTOTYPE_CALL
9959							: $tok eq '(' ? PAREN_CALL
9960							: SUB_CALL;
9961
9962	351					576	$id_scan_state = EMPTY_STRING; # normally we get everything in one call
9963	351					520	my $subname = $subname_saved;
9964	351					562	my $package = $package_saved;
9965	351					556	my $proto = undef;
9966	351					486	my $attrs = undef;
9967	351					463	my $match;
9968
9969	351					560	my $pos_beg = $rtoken_map->[$i_beg];
9970	351					1125	pos($input_line) = $pos_beg;
9971
9972							# Look for the sub NAME if this is a SUB call
9973	351	100	100			2695	if (
9974							$call_type == SUB_CALL
9975							&& $input_line =~ m{\G\s*
9976							((?:\w(?:'\|::))) # package - something that ends in :: or '
9977							(\w+) # NAME - required
9978							}gcx
9979							)
9980							{
9981	158					283	$match = 1;
9982	158					339	$subname = $2;
9983							my $is_lexical_sub = $last_nonblank_type eq 'k'
9984	158		33			495	&& $is_my_our_state{$last_nonblank_token};
9985	158	0	33			1924	if ( $is_lexical_sub && $1 ) {
9986	0					0	$self->warning(
9987							"'$last_nonblank_token' sub $subname cannot be in package '$1'\n"
9988							);
9989	0					0	$is_lexical_sub = 0;
9990							}
9991
9992	158	50				383	if ($is_lexical_sub) {
9993
9994							# Lexical subs use the containing block sequence number as a
9995							# package name.
9996	0					0	my $seqno =
9997							$rcurrent_sequence_number->[BRACE]
9998							->[ $rcurrent_depth->[BRACE] ];
9999	0	0				0	$seqno = SEQ_ROOT if ( !defined($seqno) );
10000	0					0	$package = $seqno;
10001
10002							# The value will eventually be the sequence number of the
10003							# opening curly brace of the definition (if any). We use -1
10004							# until we find it.
10005	0					0	$ris_lexical_sub->{$subname}->{$package} = -1;
10006
10007							# Set a special signal to tell sub do_LEFT_CURLY_BRACKET to
10008							# update this value if the next opening sub block brace is for
10009							# this sub. The reason we need this value is to avoid applying
10010							# this new sub in its own definition block. Note that '911' is
10011							# not a possible sub name. Search for '911' for related code.
10012	0					0	$ris_lexical_sub->{911} = [ $subname, $package ];
10013
10014							# Complain if lexical sub name hides a quote operator
10015	0	0				0	if ( $is_q_qq_qw_qx_qr_s_y_tr_m{$subname} ) {
10016	0					0	$self->complain(
10017							"'my' sub '$subname' matches a builtin quote operator\n"
10018							);
10019							## OLD CODING, before improved handling of lexical subs:
10020							## This may end badly, it is safest to avoid formatting.
10021							## For an example, see perl527/lexsub.t (issue c203)
10022							## $self->[_do_not_format_] = 1;
10023							}
10024							}
10025							else {
10026	158	100	66			827	$package = ( defined($1) && $1 ) ? $1 : $current_package;
10027	158					390	$package =~ s/\'/::/g;
10028	158	50				440	if ( $package =~ /^\:/ ) { $package = 'main' . $package }
	0					0
10029	158					349	$package =~ s/::$//;
10030							}
10031
10032	158					280	my $pos = pos($input_line);
10033	158					271	my $numc = $pos - $pos_beg;
10034	158					393	$tok = 'sub ' . $id_prefix . substr( $input_line, $pos_beg, $numc );
10035	158					248	$type = 'S'; ## Fix for c250, was 'i';
10036
10037							# remember the sub name in case another call is needed to
10038							# get the prototype
10039	158					244	$package_saved = $package;
10040	158					296	$subname_saved = $subname;
10041							}
10042
10043							# Now look for PROTO ATTRS for all call types
10044							# Look for prototype/attributes which are usually on the same
10045							# line as the sub name but which might be on a separate line.
10046							# For example, we might have an anonymous sub with attributes,
10047							# or a prototype on a separate line from its sub name
10048
10049							# NOTE: We only want to parse PROTOTYPES here. If we see anything that
10050							# does not look like a prototype, we assume it is a SIGNATURE and we
10051							# will stop and let the standard tokenizer handle it. In
10052							# particular, we stop if we see any nested parens, braces, or commas.
10053							# Also note, a valid prototype cannot contain any alphabetic character
10054							# -- see https://perldoc.perl.org/perlsub
10055							# But it appears that an underscore is valid in a prototype, so the
10056							# regex below uses [A-Za-z] rather than \w
10057							# This is the old regex which has been replaced:
10058							# $input_line =~ m/\G(\s$[^$$\}\{\,#]$)? # PROTO
10059							# Added '=' for issue c362
10060	351					1020	my $saw_opening_paren = $input_line =~ /\G\s*\(/;
10061	351	100	100			3222	if (
			66
10062							$input_line =~ m{\G(\s$[^$$\}\{\,#A-Za-z=]$)? # PROTO
10063							(\s*:)? # ATTRS leading ':'
10064							}gcx
10065							&& ( $1 \|\| $2 )
10066							)
10067							{
10068	45					84	$proto = $1;
10069	45					77	$attrs = $2;
10070
10071							# Append the prototype to the starting token if it is 'sub' or
10072							# 'prototype'. This is not necessary but for compatibility with
10073							# previous versions when the -csc flag is used:
10074	45	100	100			245	if ( $proto && ( $match \|\| $call_type == PROTOTYPE_CALL ) ) {
		100	100
10075	24					39	$tok .= $proto;
10076							}
10077
10078							# If we just entered the sub at an opening paren on this call, not
10079							# a following :prototype, label it with the previous token. This is
10080							# necessary to propagate the sub name to its opening block.
10081							elsif ( $call_type == PAREN_CALL ) {
10082	2					4	$tok = $last_nonblank_token;
10083							}
10084							else {
10085							}
10086
10087	45		100			139	$match \|\|= 1;
10088
10089							# Patch part #1 to fixes cases b994 and b1053:
10090							# Mark an anonymous sub keyword without prototype as type 'k', i.e.
10091							# 'sub : lvalue { ...'
10092	45					67	$type = 'S'; ## C250, was 'i';
10093	45	100	100			229	if ( $tok eq 'sub' && !$proto ) { $type = 'k' }
	2					5
10094							}
10095
10096	351	100				770	if ($match) {
10097
10098							# ATTRS: if there are attributes, back up and let the ':' be
10099							# found later by the scanner.
10100	173					285	my $pos = pos($input_line);
10101	173	100				432	if ($attrs) {
10102	15					27	$pos -= length($attrs);
10103							}
10104
10105	173					335	my $next_nonblank_token = $tok;
10106
10107							# catch case of line with leading ATTR ':' after anonymous sub
10108	173	100	100			548	if ( $pos == $pos_beg && $tok eq ':' ) {
10109	1					2	$type = 'A';
10110	1					3	$self->[_in_attribute_list_] = 1;
10111							}
10112
10113							# Otherwise, if we found a match we must convert back from
10114							# string position to the pre_token index for continued parsing.
10115							else {
10116
10117							# I don't think an error flag can occur here ..but ?
10118	172					266	my $error;
10119	172					569	( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
10120							$max_token_index );
10121	172	50				438	if ($error) { $self->warning("Possibly invalid sub\n") }
	0					0
10122
10123							# Patch part #2 to fixes cases b994 and b1053:
10124							# Do not let spaces be part of the token of an anonymous sub
10125							# keyword which we marked as type 'k' above...i.e. for
10126							# something like:
10127							# 'sub : lvalue { ...'
10128							# Back up and let it be parsed as a blank
10129	172	50	66			600	if ( $type eq 'k'
			66
			33
10130							&& $attrs
10131							&& $i > $i_entry
10132							&& substr( $rtokens->[$i], 0, 1 ) =~ m/\s/ )
10133							{
10134	2					6	$i--;
10135							}
10136
10137							# check for multiple definitions of a sub
10138	172					370	( $next_nonblank_token, my $i_next_uu ) =
10139							find_next_nonblank_token_on_this_line( $i, $rtokens,
10140							$max_token_index );
10141							}
10142
10143	173	100				724	if ( $next_nonblank_token =~ /^(\s*\|#)$/ )
10144							{ # skip blank or side comment
10145	7					38	my ( $rpre_tokens, $rpre_types_uu ) =
10146							$self->peek_ahead_for_n_nonblank_pre_tokens(1);
10147	7	50	33			29	if ( defined($rpre_tokens) && @{$rpre_tokens} ) {
	7					27
10148	7					19	$next_nonblank_token = $rpre_tokens->[0];
10149							}
10150							else {
10151	0					0	$next_nonblank_token = '}';
10152							}
10153							}
10154
10155							# See what's next...
10156	173	100				700	if ( $next_nonblank_token eq '{' ) {
		100
		50
		100
		50
		0
10157	135	100				346	if ($subname) {
10158
10159							# Check for multiple definitions of a sub, but
10160							# it is ok to have multiple sub BEGIN, etc,
10161							# so we do not complain if name is all caps
10162	125	50	33			635	if ( $rsaw_function_definition->{$subname}->{$package}
10163							&& $subname !~ /^[A-Z]+$/ )
10164							{
10165							my $lno =
10166	0					0	$rsaw_function_definition->{$subname}->{$package};
10167	0	0				0	if ( $package =~ /^\d/ ) {
10168	0					0	$self->warning(
10169							"already saw definition of lexical 'sub $subname' at line $lno\n"
10170							);
10171
10172							}
10173							else {
10174	0					0	if ( !DEVEL_MODE ) {
10175	0					0	$self->warning(
10176							"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
10177							);
10178							}
10179							}
10180							}
10181	125					383	$rsaw_function_definition->{$subname}->{$package} =
10182							$self->[_last_line_number_];
10183							}
10184							}
10185							elsif ( $next_nonblank_token eq ';' ) {
10186							}
10187							elsif ( $next_nonblank_token eq '}' ) {
10188							}
10189
10190							# ATTRS - if an attribute list follows, remember the name
10191							# of the sub so the next opening brace can be labeled.
10192							# Setting 'statement_type' causes any ':'s to introduce
10193							# attributes.
10194							elsif ( $next_nonblank_token eq ':' ) {
10195	16	100				48	if ( $call_type == SUB_CALL ) {
10196	14	100				66	$statement_type =
10197							substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
10198							}
10199							}
10200
10201							# if we stopped before an open paren ...
10202							elsif ( $next_nonblank_token eq '(' ) {
10203
10204							# If we DID NOT see this paren above then it must be on the
10205							# next line so we will set a flag to come back here and see if
10206							# it is a PROTOTYPE
10207
10208							# Otherwise, we assume it is a SIGNATURE rather than a
10209							# PROTOTYPE and let the normal tokenizer handle it as a list
10210	21	100				91	if ( !$saw_opening_paren ) {
10211	4					8	$id_scan_state = 'sub'; # we must come back to get proto
10212							}
10213	21	50				64	if ( $call_type == SUB_CALL ) {
10214	21	50				73	$statement_type =
10215							substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
10216							}
10217							}
10218
10219							# something else..
10220							elsif ($next_nonblank_token) {
10221
10222	0	0	0			0	if ( $rcall_hash->{tok} eq 'method' && $call_type == SUB_CALL )
10223							{
10224							# For a method call, silently ignore this error (rt145706)
10225							# to avoid needless warnings. Example which can produce it:
10226							# test(method Pack (), "method");
10227
10228							# TODO: scan for use feature 'class' and:
10229							# - if we saw 'use feature 'class' then issue the warning.
10230							# - if we did not see use feature 'class' then issue the
10231							# warning and suggest turning off --use-feature=class
10232							}
10233							else {
10234	0	0				0	$subname = EMPTY_STRING unless ( defined($subname) );
10235	0					0	$self->warning(
10236							"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
10237							);
10238							}
10239							}
10240
10241							# EOF technically ok
10242							else {
10243							}
10244
10245	173					585	check_prototype( $proto, $package, $subname );
10246							}
10247
10248							# no match to either sub name or prototype, but line not blank
10249							else {
10250
10251							}
10252	351					1792	return ( $i, $tok, $type, $id_scan_state );
10253							} ## end sub do_scan_sub
10254							}
10255
10256							#########################################################################
10257							# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
10258							#########################################################################
10259
10260							sub find_next_nonblank_token {
10261	458			458	0	1183	my ( $self, $i, $rtokens, $max_token_index ) = @_;
10262
10263							# Returns the next nonblank token after the token at index $i
10264							# To skip past a side comment, and any subsequent block comments
10265							# and blank lines, call with i=$max_token_index
10266
10267							# Skip any ending blank (fix c258). It would be cleaner if caller passed
10268							# $rtoken_map, so we could check for type 'b', and avoid a regex test, but
10269							# benchmarking shows that this test does not take significant time. So
10270							# that would be a nice update but not essential. Also note that ending
10271							# blanks will not occur for text previously processed by perltidy.
10272	458	100	100			1832	if ( $i == $max_token_index - 1
10273							&& $rtokens->[$max_token_index] =~ /^\s+$/ )
10274							{
10275	9					22	$i++;
10276							}
10277
10278	458	100				1095	if ( $i >= $max_token_index ) {
10279	141	100				459	if ( !peeked_ahead() ) {
10280	139					380	peeked_ahead(1);
10281	139					540	$self->peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
10282							}
10283							}
10284
10285	458					910	my $next_nonblank_token = $rtokens->[ ++$i ];
10286
10287							# Any more tokens?
10288	458	50	33			1852	return ( SPACE, $i )
10289							if ( !defined($next_nonblank_token) \|\| !length($next_nonblank_token) );
10290
10291							# Skip over whitespace
10292	458					936	my $ord = ord( substr( $next_nonblank_token, 0, 1 ) );
10293	458	0	66			2523	if (
			33
			66
10294
10295							( $ord <= ORD_PRINTABLE_MIN \|\| $ord >= ORD_PRINTABLE_MAX )
10296
10297							# Quick test for ascii space or tab
10298							&& (
10299							( $ord == ORD_SPACE \|\| $ord == ORD_TAB )
10300
10301							# Slow test to for something else identified as whitespace
10302							\|\| $next_nonblank_token =~ /^\s+$/
10303							)
10304							)
10305							{
10306	301					563	$next_nonblank_token = $rtokens->[ ++$i ];
10307	301	50				747	return ( SPACE, $i ) unless ( defined($next_nonblank_token) );
10308							}
10309
10310							# We should be at a nonblank now
10311	458					1422	return ( $next_nonblank_token, $i );
10312
10313							} ## end sub find_next_nonblank_token
10314
10315							sub find_next_noncomment_token {
10316	104			104	0	258	my ( $self, $i, $rtokens, $max_token_index ) = @_;
10317
10318							# Given the current character position, look ahead past any comments
10319							# and blank lines and return the next token, including digraphs and
10320							# trigraphs.
10321
10322	104					379	my ( $next_nonblank_token, $i_next ) =
10323							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
10324
10325							# skip past any side comment
10326	104	50				318	if ( $next_nonblank_token eq '#' ) {
10327	0					0	( $next_nonblank_token, $i_next ) =
10328							$self->find_next_nonblank_token( $i_next, $rtokens,
10329							$max_token_index );
10330							}
10331
10332							# check for a digraph
10333	104	50	33			701	if ( $next_nonblank_token
			33
10334							&& $next_nonblank_token ne SPACE
10335							&& defined( $rtokens->[ $i_next + 1 ] ) )
10336							{
10337	104					237	my $test2 = $next_nonblank_token . $rtokens->[ $i_next + 1 ];
10338	104	100				320	if ( $is_digraph{$test2} ) {
10339	15					24	$next_nonblank_token = $test2;
10340	15					22	$i_next = $i_next + 1;
10341
10342							# check for a trigraph
10343	15	50				48	if ( defined( $rtokens->[ $i_next + 1 ] ) ) {
10344	15					32	my $test3 = $next_nonblank_token . $rtokens->[ $i_next + 1 ];
10345	15	50				46	if ( $is_trigraph{$test3} ) {
10346	0					0	$next_nonblank_token = $test3;
10347	0					0	$i_next = $i_next + 1;
10348							}
10349							}
10350							}
10351							}
10352
10353	104					257	return ( $next_nonblank_token, $i_next );
10354							} ## end sub find_next_noncomment_token
10355
10356							sub is_possible_numerator {
10357
10358	0			0	0	0	my ( $self, $i, $rtokens, $max_token_index ) = @_;
10359
10360							# Look at the next non-comment character and decide if it could be a
10361							# numerator. Returns the following code:
10362							# -1 - division not possible
10363							# 0 - can't tell if division possible
10364							# 1 - division possible
10365							# 2 - division probable: number follows
10366							# 3 - division very probable: number and one of ; ] } follow
10367							# 4 - is division, not pattern: number and ) follow
10368
10369	0					0	my $divide_possible_code = 0;
10370
10371	0					0	my $next_token = $rtokens->[ $i + 1 ];
10372	0	0				0	if ( $next_token eq '=' ) { $i++; } # handle /=
	0					0
10373	0					0	my ( $next_nonblank_token, $i_next ) =
10374							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
10375
10376	0	0				0	if ( $next_nonblank_token eq '#' ) {
10377	0					0	( $next_nonblank_token, $i_next ) =
10378							$self->find_next_nonblank_token( $max_token_index, $rtokens,
10379							$max_token_index );
10380							}
10381
10382	0	0				0	if ( $next_nonblank_token =~ / [ \( \$ \w \. \@ ] /x ) {
		0
10383	0					0	$divide_possible_code = 1;
10384
10385							# look ahead one more token for some common patterns, such as
10386							# pi/2) pi/2; pi/2}
10387	0	0				0	if ( $next_nonblank_token =~ /^\d/ ) {
10388	0					0	my ( $next_next_nonblank_token, $i_next_next_uu ) =
10389							$self->find_next_nonblank_token( $i_next, $rtokens,
10390							$max_token_index );
10391	0	0	0			0	if ( $next_next_nonblank_token eq ')' ) {
		0	0
10392	0					0	$divide_possible_code = 4;
10393							}
10394							elsif ($next_next_nonblank_token eq ';'
10395							\|\| $next_next_nonblank_token eq ']'
10396							\|\| $next_next_nonblank_token eq '}' )
10397							{
10398	0					0	$divide_possible_code = 3;
10399							}
10400							else {
10401	0					0	$divide_possible_code = 2;
10402							}
10403							}
10404							}
10405							elsif ( $next_nonblank_token =~ /^\s*$/ ) {
10406	0					0	$divide_possible_code = 0;
10407							}
10408							else {
10409	0					0	$divide_possible_code = -1;
10410							}
10411
10412	0					0	return $divide_possible_code;
10413							} ## end sub is_possible_numerator
10414
10415							{ ## closure for sub pattern_expected
10416							my %pattern_test;
10417
10418							BEGIN {
10419
10420							# List of tokens which may follow a pattern. Note that we will not
10421							# have formed digraphs at this point, so we will see '&' instead of
10422							# '&&' and '\|' instead of '\|\|'
10423
10424							# /(\)\|\}\|\;\|\&\&\|\\|\\|\|and\|or\|while\|if\|unless)/
10425	44			44		285	my @q = qw( & && \| \|\| ? : + - * and or while if unless );
10426	44					207	push @q, ')', '}', ']', '>', COMMA, ';';
10427	44					118924	$pattern_test{$_} = 1 for @q;
10428							} ## end BEGIN
10429
10430							sub pattern_expected {
10431
10432	0			0	0	0	my ( $self, $i, $rtokens, $max_token_index ) = @_;
10433
10434							# This a filter for a possible pattern.
10435							# It looks at the token after a possible pattern and tries to
10436							# determine if that token could end a pattern.
10437							# returns -
10438							# 1 - yes
10439							# 0 - can't tell
10440							# -1 - no
10441	0					0	my $is_pattern = 0;
10442
10443	0					0	my $next_token = $rtokens->[ $i + 1 ];
10444
10445							# skip a possible quote modifier
10446	0					0	my $possible_modifiers = $quote_modifiers{'m'};
10447	0	0				0	if ( $next_token =~ /^$possible_modifiers/ ) {
10448	0					0	$i++;
10449							}
10450
10451	0					0	my ( $next_nonblank_token, $i_next_uu ) =
10452							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
10453
10454	0	0				0	if ( $pattern_test{$next_nonblank_token} ) {
10455	0					0	$is_pattern = 1;
10456							}
10457							else {
10458
10459							# Added '#' to fix issue c044
10460	0	0	0			0	if ( $next_nonblank_token =~ /^\s*$/
10461							\|\| $next_nonblank_token eq '#' )
10462							{
10463	0					0	$is_pattern = 0;
10464							}
10465							else {
10466	0					0	$is_pattern = -1;
10467							}
10468							}
10469	0					0	return $is_pattern;
10470							} ## end sub pattern_expected
10471							}
10472
10473							sub find_next_nonblank_token_on_this_line {
10474	579			579	0	1060	my ( $i, $rtokens, $max_token_index ) = @_;
10475	579					820	my $next_nonblank_token;
10476
10477	579	100				1118	if ( $i < $max_token_index ) {
10478	571					948	$next_nonblank_token = $rtokens->[ ++$i ];
10479
10480	571	100				2275	if ( $next_nonblank_token =~ /^\s*$/ ) {
10481
10482	164	100				415	if ( $i < $max_token_index ) {
10483	162					358	$next_nonblank_token = $rtokens->[ ++$i ];
10484							}
10485							}
10486							}
10487							else {
10488	8					19	$next_nonblank_token = EMPTY_STRING;
10489							}
10490	579					1538	return ( $next_nonblank_token, $i );
10491							} ## end sub find_next_nonblank_token_on_this_line
10492
10493							sub find_angle_operator_termination {
10494
10495	8			8	0	35	my ( $self, $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index )
10496							= @_;
10497
10498							# We are looking at a '<' and want to know if it is an angle operator.
10499							# Return:
10500							# $i = pretoken index of ending '>' if found, current $i otherwise
10501							# $type = 'Q' if found, '>' otherwise
10502
10503	8					14	my $i = $i_beg;
10504	8					16	my $type = '<';
10505	8					39	pos($input_line) = 1 + $rtoken_map->[$i];
10506
10507							# The token sequence '><' implies a markup language
10508	8	50				27	if ( $last_nonblank_token eq '>' ) {
10509	0					0	$self->[_html_tag_count_]++;
10510							}
10511
10512	8					14	my $filter;
10513
10514	8					16	my $expecting_TERM = $expecting == TERM;
10515
10516							# we just have to find the next '>' if a term is expected
10517	8	100				23	if ($expecting_TERM) { $filter = '[\>]' }
	6	50				13
10518
10519							# we have to guess if we don't know what is expected
10520	2					3	elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\\|\<]' }
10521
10522							# shouldn't happen - we shouldn't be here if operator is expected
10523							else {
10524	0					0	if (DEVEL_MODE) {
10525							Fault(<<EOM);
10526							Bad call to find_angle_operator_termination
10527							EOM
10528							}
10529	0					0	return ( $i, $type );
10530							}
10531
10532							# To illustrate what we might be looking at, in case we are
10533							# guessing, here are some examples of valid angle operators
10534							# (or file globs):
10535							# <tmp_imp/*>
10536							# <FH>
10537							# <$fh>
10538							# <.c .h>
10539							# <_>
10540							# <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
10541							# <${PREFIX}img.$IMAGE_TYPE>
10542							# <img*.$IMAGE_TYPE>
10543							# <Timg*.$IMAGE_TYPE>
10544							# <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
10545							#
10546							# Here are some examples of lines which do not have angle operators:
10547							# return unless $self->[2]++ < $#{$self->[1]};
10548							# < 2 \|\| @$t >
10549							#
10550							# the following line from dlister.pl caused trouble:
10551							# print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
10552							#
10553							# If the '<' starts an angle operator, it must end on this line and
10554							# it must not have certain characters like ';' and '=' in it. I use
10555							# this to limit the testing. This filter should be improved if
10556							# possible.
10557
10558	8	50				190	if ( $input_line =~ /($filter)/g ) {
10559
10560	8	50				35	if ( $1 eq '>' ) {
10561
10562							# We MAY have found an angle operator termination if we get
10563							# here, but we need to do more to be sure we haven't been
10564							# fooled.
10565	8					15	my $pos = pos($input_line);
10566
10567	8					16	my $pos_beg = $rtoken_map->[$i];
10568	8					25	my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
10569
10570							# Test for '<' after possible filehandle, issue c103
10571							# print $fh <>; # syntax error
10572							# print $fh <DATA>; # ok
10573							# print $fh < DATA>; # syntax error at '>'
10574							# print STDERR < DATA>; # ok, prints word 'DATA'
10575							# print BLABLA <DATA>; # ok; does nothing unless BLABLA is defined
10576	8	100				21	if ( $last_nonblank_type eq 'Z' ) {
10577
10578							# $str includes brackets; something like '<DATA>'
10579	1	0	33			10	if ( substr( $last_nonblank_token, 0, 1 ) !~ /[A-Za-z_]/
10580							&& substr( $str, 1, 1 ) !~ /[A-Za-z_]/ )
10581							{
10582	0					0	return ( $i, $type );
10583							}
10584							}
10585
10586							# Reject if the closing '>' follows a '-' as in:
10587							# if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
10588	8	100				25	if ( $expecting eq UNKNOWN ) {
10589	2					3	my $check = substr( $input_line, $pos - 2, 1 );
10590	2	100				4	if ( $check eq '-' ) {
10591	1					4	return ( $i, $type );
10592							}
10593							}
10594
10595							######################################debug#####
10596							#$self->write_diagnostics( "ANGLE? :$str\n");
10597							#print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
10598							######################################debug#####
10599	7					14	$type = 'Q';
10600	7					12	my $error;
10601	7					22	( $i, $error ) =
10602							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
10603
10604							# It may be possible that a quote ends midway in a pretoken.
10605							# If this happens, it may be necessary to split the pretoken.
10606	7	50				24	if ($error) {
10607	0					0	if (DEVEL_MODE) {
10608							Fault(<<EOM);
10609							unexpected error condition returned by inverse_pretoken_map
10610							EOM
10611							}
10612							$self->warning(
10613	0					0	"Possible tokenization error..please check this line\n");
10614							}
10615
10616							# Check for accidental formatting of a markup language doc...
10617							# Formatting will be skipped if we set _html_tag_count_ and
10618							# also set a warning of any kind.
10619	7					11	my $is_html_tag;
10620	7		33			23	my $is_first_string =
10621							$i_beg == 0 && $self->[_last_line_number_] == 1;
10622
10623							# html comment '<!...' of any type
10624	7	50	33			158	if ( $str =~ /^<\s*!/ ) {
		50
		50
10625	0					0	$is_html_tag = 1;
10626	0	0				0	if ($is_first_string) {
10627	0					0	$self->warning(
10628							"looks like a markup language, continuing error checks\n"
10629							);
10630							}
10631							}
10632
10633							# html end tag, something like </h1>
10634							elsif ( $str =~ /^<\s\/\w+\s>$/ ) {
10635	0					0	$is_html_tag = 1;
10636							}
10637
10638							# xml prolog?
10639							elsif ( $str =~ /^<\?xml\s.*\?>$/i && $is_first_string ) {
10640	0					0	$is_html_tag = 1;
10641	0					0	$self->warning(
10642							"looks like a markup language, continuing error checks\n");
10643							}
10644							else {
10645							## doesn't look like a markup tag
10646							}
10647
10648	7	50				22	if ($is_html_tag) {
10649	0					0	$self->[_html_tag_count_]++;
10650							}
10651
10652							# count blanks on inside of brackets
10653	7					15	my $blank_count = 0;
10654	7	100				31	$blank_count++ if ( $str =~ /<\s+/ );
10655	7	100				30	$blank_count++ if ( $str =~ /\s+>/ );
10656
10657							# Now let's see where we stand....
10658							# OK if math op not possible
10659	7	100				26	if ($expecting_TERM) {
		50
		50
		0
10660							}
10661
10662							elsif ($is_html_tag) {
10663							}
10664
10665							# OK if there are no more than 2 non-blank pre-tokens inside
10666							# (not possible to write 2 token math between < and >)
10667							# This catches most common cases
10668							elsif ( $i <= $i_beg + 3 + $blank_count ) {
10669
10670							# No longer any need to document this common case
10671							## $self->write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
10672							}
10673
10674							# OK if there is some kind of identifier inside
10675							# print $fh <tvg::INPUT>;
10676							elsif ( $str =~ /^<\s\$?(\w\|::\|\s)+\s>$/ ) {
10677	0					0	$self->write_diagnostics("ANGLE (contains identifier): $str\n");
10678							}
10679
10680							# Not sure..
10681							else {
10682
10683							# Let's try a Brace Test: any braces inside must balance
10684	0					0	my $br = $str =~ tr/\{/{/ - $str =~ tr/\}/}/;
10685	0					0	my $sb = $str =~ tr/\[/[/ - $str =~ tr/\]/]/;
10686	0					0	my $pr = $str =~ tr/$/(/ - $str =~ tr/$/)/;
10687
10688							# if braces do not balance - not angle operator
10689	0	0	0			0	if ( $br \|\| $sb \|\| $pr ) {
			0
10690	0					0	$i = $i_beg;
10691	0					0	$type = '<';
10692	0					0	$self->write_diagnostics(
10693							"NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
10694							}
10695
10696							# we should keep doing more checks here...to be continued
10697							# Tentatively accepting this as a valid angle operator.
10698							# There are lots more things that can be checked.
10699							else {
10700	0					0	$self->write_diagnostics(
10701							"ANGLE-Guessing yes: $str expecting=$expecting\n");
10702	0					0	$self->write_logfile_entry(
10703							"Guessing angle operator here: $str\n");
10704							}
10705							}
10706							}
10707
10708							# didn't find ending >
10709							else {
10710	0	0				0	if ($expecting_TERM) {
10711	0					0	$self->warning("No ending > for angle operator\n");
10712							}
10713							}
10714							}
10715	7					27	return ( $i, $type );
10716							} ## end sub find_angle_operator_termination
10717
10718							sub scan_number_do {
10719
10720	683			683	0	1488	my ( $self, $input_line, $i, $rtoken_map, $input_type, $max_token_index ) =
10721							@_;
10722
10723							# Scan a number in any of the formats that Perl accepts
10724							# Underbars (_) are allowed in decimal numbers.
10725							# Given:
10726							# $input_line - the string to scan
10727							# $i - pre_token index to start scanning
10728							# $rtoken_map - reference to the pre_token map giving starting
10729							# character position in $input_line of token $i
10730							# Return:
10731							# $i - last pre_token index of the number just scanned
10732							# $type - the token type ('v' or 'n')
10733							# number - the number (characters); or undef if not a number
10734
10735	683					1087	my $pos_beg = $rtoken_map->[$i];
10736	683					848	my $pos;
10737							##my $i_begin = $i;
10738	683					999	my $number = undef;
10739	683					864	my $type = $input_type;
10740
10741	683					1363	my $first_char = substr( $input_line, $pos_beg, 1 );
10742
10743							# Look for bad starting characters; Shouldn't happen..
10744	683	50				2841	if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
10745	0					0	if (DEVEL_MODE) {
10746							Fault(<<EOM);
10747							Program bug - scan_number given bad first character = '$first_char'
10748							EOM
10749							}
10750	0					0	return ( $i, $type, $number );
10751							}
10752
10753							# handle v-string without leading 'v' character ('Two Dot' rule)
10754							# (vstring.t)
10755							# Here is the format prior to including underscores:
10756							## if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
10757	683					1828	pos($input_line) = $pos_beg;
10758	683	50				2996	if ( $input_line =~ /\G((\d[_\d]*)?\.[\d_]+(\.[\d_]+)+)/g ) {
10759	0					0	$pos = pos($input_line);
10760	0					0	my $numc = $pos - $pos_beg;
10761	0					0	$number = substr( $input_line, $pos_beg, $numc );
10762	0					0	$type = 'v';
10763							}
10764
10765							# handle octal, hex, binary
10766	683	50				1408	if ( !defined($number) ) {
10767	683					1129	pos($input_line) = $pos_beg;
10768
10769							# Perl 5.22 added floating point literals, like '0x0.b17217f7d1cf78p0'
10770							# For reference, the format prior to hex floating point is:
10771							# /\G[+-]?0(([xX][0-9a-fA-F_]+)\|([0-7_]+)\|([bB][01_]+))/g )
10772							# (hex) (octal) (binary)
10773	683	100				2157	if (
10774							$input_line =~ m{
10775
10776							\G[+-]?0( # leading [signed] 0
10777
10778							# a hex float, i.e. '0x0.b17217f7d1cf78p0'
10779							([xX][0-9a-fA-F_]* # X and optional leading digits
10780							(\.([0-9a-fA-F][0-9a-fA-F_]*)?)? # optional decimal and fraction
10781							[Pp][+-]?[0-9a-fA-F] # REQUIRED exponent with digit
10782							[0-9a-fA-F_]*) # optional Additional exponent digits
10783
10784							# or hex integer
10785							\|([xX][0-9a-fA-F_]+)
10786
10787							# or octal fraction
10788							\|([oO]?[0-7_]+ # string of octal digits
10789							(\.([0-7][0-7_]*)?)? # optional decimal and fraction
10790							[Pp][+-]?[0-7] # REQUIRED exponent, no underscore
10791							[0-7_]*) # Additional exponent digits with underscores
10792
10793							# or octal integer
10794							\|([oO]?[0-7_]+) # string of octal digits
10795
10796							# or a binary float
10797							\|([bB][01_]* # 'b' with string of binary digits
10798							(\.([01][01_]*)?)? # optional decimal and fraction
10799							[Pp][+-]?[01] # Required exponent indicator, no underscore
10800							[01_]*) # additional exponent bits
10801
10802							# or binary integer
10803							\|([bB][01_]+) # 'b' with string of binary digits
10804
10805							)}gx
10806							)
10807							{
10808	72					103	$pos = pos($input_line);
10809	72					133	my $numc = $pos - $pos_beg;
10810	72					114	$number = substr( $input_line, $pos_beg, $numc );
10811	72					115	$type = 'n';
10812							}
10813							}
10814
10815							# handle decimal
10816	683	100				1301	if ( !defined($number) ) {
10817	611					944	pos($input_line) = $pos_beg;
10818
10819	611	50				2570	if ( $input_line =~ /\G([+-]?[\d_](\.[\d_])?([Ee][+-]?(\d+))?)/g ) {
10820	611					876	$pos = pos($input_line);
10821
10822							# watch out for things like 0..40 which would give 0. by this;
10823	611	100	100			1869	if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
10824							&& ( substr( $input_line, $pos, 1 ) eq '.' ) )
10825							{
10826	38					50	$pos--;
10827							}
10828	611					798	my $numc = $pos - $pos_beg;
10829	611					931	$number = substr( $input_line, $pos_beg, $numc );
10830	611					914	$type = 'n';
10831							}
10832							}
10833
10834							# filter out non-numbers like e + - . e2 .e3 +e6
10835							# the rule: at least one digit, and any 'e' must be preceded by a digit
10836	683	100	66			3027	if (
			66
10837							$number !~ /\d/ # no digits
10838							\|\| ( $number =~ /^(.*)[eE]/
10839							&& $1 !~ /\d/ ) # or no digits before the 'e'
10840							)
10841							{
10842	304					467	$number = undef;
10843	304					399	$type = $input_type;
10844	304					1081	return ( $i, $type, $number );
10845							}
10846
10847							# Found a number; now we must convert back from character position
10848							# to pre_token index. An error here implies user syntax error.
10849							# An example would be an invalid octal number like '009'.
10850	379					519	my $error;
10851	379					754	( $i, $error ) =
10852							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
10853	379	50				709	if ($error) { $self->warning("Possibly invalid number\n") }
	0					0
10854
10855	379					1327	return ( $i, $type, $number );
10856							} ## end sub scan_number_do
10857
10858							sub inverse_pretoken_map {
10859
10860							# Starting with the current pre_token index $i, scan forward until
10861							# finding the index of the next pre_token whose position is $pos.
10862	2470			2470	0	4736	my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
10863	2470					3437	my $error = 0;
10864
10865	2470					5200	while ( ++$i <= $max_token_index ) {
10866
10867	4631	100				8686	if ( $pos <= $rtoken_map->[$i] ) {
10868
10869							# Let the calling routine handle errors in which we do not
10870							# land on a pre-token boundary. It can happen by running
10871							# perltidy on some non-perl scripts, for example.
10872	2426	50				4695	if ( $pos < $rtoken_map->[$i] ) { $error = 1 }
	0					0
10873	2426					3097	$i--;
10874	2426					3651	last;
10875							}
10876							} ## end while ( ++$i <= $max_token_index)
10877	2470					5173	return ( $i, $error );
10878							} ## end sub inverse_pretoken_map
10879
10880							sub find_here_doc {
10881
10882							my (
10883
10884	9			9	0	25	$self,
10885
10886							$expecting,
10887							$i,
10888							$rtokens,
10889							$rtoken_type,
10890							$rtoken_map_uu,
10891							$max_token_index,
10892
10893							) = @_;
10894
10895							# Find the target of a here document, if any
10896							# Given:
10897							# $i - token index of the second < of <<
10898							# ($i must be less than the last token index if this is called)
10899							# Return:
10900							# $found_target = 0 didn't find target; =1 found target
10901							# HERE_TARGET - the target string (may be empty string)
10902							# $i - unchanged if not here doc,
10903							# or index of the last token of the here target
10904							# $saw_error - flag noting unbalanced quote on here target
10905	9					18	my $ibeg = $i;
10906	9					18	my $found_target = 0;
10907	9					19	my $here_doc_target = EMPTY_STRING;
10908	9					18	my $here_quote_character = EMPTY_STRING;
10909	9					14	my $saw_error = 0;
10910	9					20	my ( $next_nonblank_token, $i_next_nonblank, $next_token );
10911	9					21	$next_token = $rtokens->[ $i + 1 ];
10912
10913							# perl allows a backslash before the target string (heredoc.t)
10914	9					18	my $backslash = 0;
10915	9	50				33	if ( $next_token eq BACKSLASH ) {
10916	0					0	$backslash = 1;
10917	0					0	$next_token = $rtokens->[ $i + 2 ];
10918							}
10919
10920	9					38	( $next_nonblank_token, $i_next_nonblank ) =
10921							find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
10922
10923	9	100	33			57	if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
		50
		50
10924
10925	6					12	my $in_quote = 1;
10926	6					10	my $quote_depth = 0;
10927	6					12	my $quote_pos = 0;
10928	6					7	my $quoted_string;
10929
10930							(
10931
10932	6					26	$i,
10933							$in_quote,
10934							$here_quote_character,
10935							$quote_pos,
10936							$quote_depth,
10937							$quoted_string,
10938							)
10939							= $self->follow_quoted_string(
10940
10941							$i_next_nonblank,
10942							$in_quote,
10943							$rtokens,
10944							$rtoken_type,
10945							$here_quote_character,
10946							$quote_pos,
10947							$quote_depth,
10948							$max_token_index,
10949							);
10950
10951	6	50				17	if ($in_quote) { # didn't find end of quote, so no target found
10952	0					0	$i = $ibeg;
10953	0	0				0	if ( $expecting == TERM ) {
10954	0					0	$self->warning(
10955							"Did not find here-doc string terminator ($here_quote_character) before end of line \n"
10956							);
10957	0					0	$saw_error = 1;
10958							}
10959							}
10960							else { # found ending quote
10961	6					11	$found_target = 1;
10962
10963	6					11	my $tokj;
10964	6					22	foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) {
10965	6					13	$tokj = $rtokens->[$j];
10966
10967							# we have to remove any backslash before the quote character
10968							# so that the here-doc-target exactly matches this string
10969							next
10970	6	0	33			24	if ( $tokj eq BACKSLASH
			33
10971							&& $j < $i - 1
10972							&& $rtokens->[ $j + 1 ] eq $here_quote_character );
10973	6					16	$here_doc_target .= $tokj;
10974							}
10975							}
10976							}
10977
10978							elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
10979	0					0	$found_target = 1;
10980	0					0	$self->write_logfile_entry(
10981							"found blank here-target after <<; suggest using \"\"\n");
10982	0					0	$i = $ibeg;
10983							}
10984							elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
10985
10986	3					7	my $here_doc_expected;
10987	3	50				25	if ( $expecting == UNKNOWN ) {
10988	0					0	$here_doc_expected = $self->guess_if_here_doc($next_token);
10989							}
10990							else {
10991	3					8	$here_doc_expected = 1;
10992							}
10993
10994	3	50				10	if ($here_doc_expected) {
10995	3					6	$found_target = 1;
10996	3					5	$here_doc_target = $next_token;
10997	3					6	$i = $ibeg + 1;
10998							}
10999
11000							}
11001							else {
11002
11003	0	0				0	if ( $expecting == TERM ) {
11004	0					0	$found_target = 1;
11005	0					0	$self->write_logfile_entry("Note: bare here-doc operator <<\n");
11006							}
11007							else {
11008	0					0	$i = $ibeg;
11009							}
11010							}
11011
11012							# patch to neglect any prepended backslash
11013	9	50	33			50	if ( $found_target && $backslash ) { $i++ }
	0					0
11014
11015	9					48	return ( $found_target, $here_doc_target, $here_quote_character, $i,
11016							$saw_error );
11017							} ## end sub find_here_doc
11018
11019							sub do_quote {
11020
11021							my (
11022
11023	3170			3170	0	8218	$self,
11024
11025							$i,
11026							$in_quote,
11027							$quote_character,
11028							$quote_pos,
11029							$quote_depth,
11030							$quoted_string_1,
11031							$quoted_string_2,
11032							$rtokens,
11033							$rtoken_type,
11034							$rtoken_map_uu,
11035							$max_token_index,
11036
11037							) = @_;
11038
11039							# Follow (or continue following) quoted string(s)
11040							# $in_quote = return code:
11041							# 0 - ok, found end
11042							# 1 - still must find end of quote whose target is $quote_character
11043							# 2 - still looking for end of first of two quotes
11044							#
11045							# Returns updated strings:
11046							# $quoted_string_1 = quoted string seen while in_quote=1
11047							# $quoted_string_2 = quoted string seen while in_quote=2
11048
11049	3170					4037	my $quoted_string;
11050	3170	100				5638	if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
11051	31					67	my $ibeg = $i;
11052							(
11053
11054	31					109	$i,
11055							$in_quote,
11056							$quote_character,
11057							$quote_pos,
11058							$quote_depth,
11059							$quoted_string,
11060							)
11061							= $self->follow_quoted_string(
11062
11063							$ibeg,
11064							$in_quote,
11065							$rtokens,
11066							$rtoken_type,
11067							$quote_character,
11068							$quote_pos,
11069							$quote_depth,
11070							$max_token_index,
11071							);
11072	31					76	$quoted_string_2 .= $quoted_string;
11073	31	50				76	if ( $in_quote == 1 ) {
11074	31	100				132	if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
	1					1
11075	31					65	$quote_character = EMPTY_STRING;
11076							}
11077							else {
11078	0					0	$quoted_string_2 .= "\n";
11079							}
11080							}
11081
11082	3170	50				5541	if ( $in_quote == 1 ) { # one (more) quote to follow
11083	3170					3833	my $ibeg = $i;
11084							(
11085
11086	3170					7816	$i,
11087							$in_quote,
11088							$quote_character,
11089							$quote_pos,
11090							$quote_depth,
11091							$quoted_string,
11092							)
11093							= $self->follow_quoted_string(
11094
11095							$ibeg,
11096							$in_quote,
11097							$rtokens,
11098							$rtoken_type,
11099							$quote_character,
11100							$quote_pos,
11101							$quote_depth,
11102							$max_token_index,
11103							);
11104	3170					5680	$quoted_string_1 .= $quoted_string;
11105	3170	100				5609	if ( $in_quote == 1 ) {
11106	244					388	$quoted_string_1 .= "\n";
11107							}
11108							}
11109							return (
11110
11111	3170					9611	$i,
11112							$in_quote,
11113							$quote_character,
11114							$quote_pos,
11115							$quote_depth,
11116							$quoted_string_1,
11117							$quoted_string_2,
11118
11119							);
11120							} ## end sub do_quote
11121
11122	44			44		367	use constant DEBUG_FIND_INTERPOLATED_HERE_TARGETS => 0;
	44					87
	44					27873
11123
11124							sub find_interpolated_here_targets {
11125	2			2	0	4	my ( $self, $quoted_string, $len_starting_lines ) = @_;
11126
11127							# Search for here targets in a quoted string
11128							# Given:
11129							# $quoted_string = the complete string of an interpolated quote
11130							# $len_starting_lines = number of characters of the first n-1 lines
11131							# (=0 if this is a single-line quote)
11132							# Task:
11133							# Find and return a list of all here targets on the last line;
11134							# i.e., if here target is index ii, we only return the
11135							# target if $rmap->[$ii]>=$len_starting_lines
11136
11137							# The items returned are the format needed for @{$rhere_target_list};
11138							# [ $here_doc_target, $here_quote_character ]
11139							# there can be multiple here targets.
11140
11141	2					3	my $rht;
11142
11143							# Break the entire quote into pre-tokens, even if multi-line, because we
11144							# have to determine which parts are in single quotes
11145	2					5	my ( $rtokens, $rmap, $rtoken_type ) = pre_tokenize($quoted_string);
11146	2					4	my $max_ii = @{$rtokens} - 1;
	2					4
11147
11148							# Depth of braces controlled by a sigil
11149	2					3	my $code_depth = 0;
11150
11151							# Loop over pre-tokens
11152	2					4	my $ii = -1;
11153	2					7	while ( ++$ii <= $max_ii ) {
11154	21					24	my $token = $rtokens->[$ii];
11155	21					19	if (DEBUG_FIND_INTERPOLATED_HERE_TARGETS) {
11156							print "i=$ii tok=$token block=$code_depth\n";
11157							}
11158
11159	21	100				28	if ( $token eq BACKSLASH ) {
11160	4	50				9	if ( !$code_depth ) { $ii++ }
	0					0
11161	4					5	next;
11162							}
11163
11164							# Look for start of interpolation code block, '${', '@{', '$var{', etc
11165	17	100				22	if ( !$code_depth ) {
11166	4	50	33			10	if ( $token eq '$' \|\| $token eq '@' ) {
11167
11168	4	100	66			19	$ii++
11169							if ( $ii < $max_ii && $rtoken_type->[ $ii + 1 ] eq 'b' );
11170
11171	4		33			23	while (
			33
11172							$ii < $max_ii
11173							&& ( $rtoken_type->[ $ii + 1 ] eq 'w'
11174							\|\| $rtoken_type->[ $ii + 1 ] eq '::' )
11175							)
11176							{
11177	0					0	$ii++;
11178							} ## end while ( $ii < $max_ii && ...)
11179
11180	4	50	33			13	$ii++
11181							if ( $ii < $max_ii && $rtoken_type->[ $ii + 1 ] eq 'b' );
11182
11183	4	50	33			18	if ( $ii < $max_ii && $rtokens->[ $ii + 1 ] eq '{' ) {
11184	4					5	$ii++;
11185	4					6	$code_depth++;
11186							}
11187							}
11188	4					6	next;
11189							}
11190
11191							# Continue interpolating while $code_depth > 0..
11192	13	50				20	if ( $token eq '{' ) {
11193	0					0	$code_depth++;
11194	0					0	next;
11195							}
11196	13	100				18	if ( $token eq '}' ) {
11197	4					4	$code_depth--;
11198	4					7	next;
11199							}
11200
11201							# Look for '<<'
11202	9	50	66			29	if ( $token ne '<'
			66
11203							\|\| $ii >= $max_ii - 1
11204							\|\| $rtokens->[ $ii + 1 ] ne '<' )
11205							{
11206	5					7	next;
11207							}
11208
11209							# Remember the location of the first '<' so it can be modified
11210	4					5	my $ii_left_shift = $ii;
11211
11212	4					5	$ii++;
11213
11214							# or '<<~'
11215	4	50	33			14	if ( $rtoken_type->[ $ii + 1 ] eq '~' && $ii < $max_ii - 2 ) {
11216	0					0	$ii++;
11217							}
11218
11219							# blanks ok before targets in quotes
11220	4					6	my $saw_blank;
11221	4	100	66			13	if ( $rtoken_type->[ $ii + 1 ] eq 'b' && $ii < $max_ii - 2 ) {
11222	2					3	$saw_blank = 1;
11223	2					3	$ii++;
11224							}
11225
11226	4					5	my $next_type = $rtoken_type->[ $ii + 1 ];
11227
11228							# Look for unquoted targets like "${ \<<END1 }"
11229	4	100	66			14	if ( $next_type eq 'w' ) {
		50	33
11230	2	50				5	if ($saw_blank) {
11231							## error: blank target is deprecated
11232							}
11233							else {
11234	2					4	$ii++;
11235	2	50				7	if ( $rmap->[$ii] >= $len_starting_lines ) {
11236	2					3	push @{$rht}, [ $rtokens->[$ii], EMPTY_STRING ];
	2					7
11237
11238							# Modify the string so the target is not found again
11239	2					4	my $pos_left_shift = $rmap->[$ii_left_shift];
11240	2					6	substr( $quoted_string, $pos_left_shift, 1, SPACE );
11241	2					5	substr( $quoted_string, $pos_left_shift + 1, 1, SPACE );
11242							}
11243							}
11244							}
11245
11246							# Look for quoted targets like "${ \<< 'END1' }${ \<<\"END2\" }";
11247							elsif ( $next_type eq "'" \|\| $next_type eq '"' \|\| $next_type eq '`' ) {
11248	2					3	my $quote_char = $next_type;
11249	2					4	$ii++;
11250	2					2	my $here_target = EMPTY_STRING;
11251	2		66			8	while ( ++$ii <= $max_ii && $rtokens->[$ii] ne $quote_char ) {
11252							next
11253	2	50	66			8	if ( $quote_char ne "'" && $rtokens->[$ii] eq BACKSLASH );
11254	2					48	$here_target .= $rtokens->[$ii];
11255							}
11256	2	50				11	if ( $rmap->[$ii] >= $len_starting_lines ) {
11257	2					3	push @{$rht}, [ $here_target, $quote_char ];
	2					10
11258							}
11259							}
11260							else {
11261							## no here target found
11262							}
11263	4					10	next;
11264							} ## end while ( ++$ii <= $max_ii )
11265	2					14	return ( $rht, $quoted_string );
11266							} ## end sub find_interpolated_here_targets
11267
11268							# Some possible non-word quote delimiters, for preliminary checking
11269							my %is_punct_char;
11270
11271							BEGIN {
11272
11273	44			44		339	my @q = qw# / " ' { } ( ) [ ] < > ; + - * \| % ! x ~ = ? : . ^ & #;
11274	44					138	push @q, '#';
11275	44					114	push @q, COMMA;
11276	44					131088	$is_punct_char{$_} = 1 for @q;
11277							}
11278
11279							sub follow_quoted_string {
11280
11281							my (
11282
11283	3207			3207	0	6516	$self,
11284
11285							$i_beg,
11286							$in_quote,
11287							$rtokens,
11288							$rtoken_type,
11289							$beginning_tok,
11290							$quote_pos,
11291							$quote_depth,
11292							$max_token_index,
11293
11294							) = @_;
11295
11296							# Scan for a specific token, skipping escaped characters.
11297							# If the quote character is blank, use the first non-blank character.
11298							# Given:
11299							# $rtokens = reference to the array of tokens
11300							# $i = the token index of the first character to search
11301							# $in_quote = number of quoted strings being followed
11302							# $beginning_tok = the starting quote character
11303							# $quote_pos = index to check next for alphanumeric delimiter
11304							# Return:
11305							# $i = the token index of the ending quote character
11306							# $in_quote = decremented if found end, unchanged if not
11307							# $beginning_tok = the starting quote character
11308							# $quote_pos = index to check next for alphanumeric delimiter
11309							# $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
11310							# $quoted_string = the text of the quote (without quotation tokens)
11311	3207					4126	my ( $tok, $end_tok );
11312	3207					4104	my $i = $i_beg - 1;
11313	3207					3997	my $quoted_string = EMPTY_STRING;
11314
11315	3207					3789	0 && do {
11316							print {*STDOUT}
11317							"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
11318							};
11319
11320							# for a non-blank token, get the corresponding end token
11321	3207	100	33			11897	if (
			66
11322							$is_punct_char{$beginning_tok}
11323							\|\| ( length($beginning_tok)
11324							&& $beginning_tok !~ /^\s+$/ )
11325							)
11326							{
11327							$end_tok =
11328							$matching_end_token{$beginning_tok}
11329	244	100				637	? $matching_end_token{$beginning_tok}
11330							: $beginning_tok;
11331							}
11332
11333							# for a blank token, find and use the first non-blank one
11334							else {
11335	2963	100				5130	my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
11336
11337	2963					5439	while ( $i < $max_token_index ) {
11338	2965					4585	$tok = $rtokens->[ ++$i ];
11339
11340	2965	100				5371	if ( $rtoken_type->[$i] ne 'b' ) {
11341
11342	2963	50	66			6324	if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
11343	0					0	$i = $max_token_index;
11344							}
11345							else {
11346
11347	2963	100				5032	if ( length($tok) > 1 ) {
11348	1	50				4	if ( $quote_pos <= 0 ) { $quote_pos = 1 }
	1					1
11349	1					3	$beginning_tok = substr( $tok, $quote_pos - 1, 1 );
11350							}
11351							else {
11352	2962					3531	$beginning_tok = $tok;
11353	2962					3642	$quote_pos = 0;
11354							}
11355							$end_tok =
11356							$matching_end_token{$beginning_tok}
11357	2963	100				5670	? $matching_end_token{$beginning_tok}
11358							: $beginning_tok;
11359	2963					3382	$quote_depth = 1;
11360	2963					4290	last;
11361							}
11362							}
11363							else {
11364	2					4	$allow_quote_comments = 1;
11365							}
11366							} ## end while ( $i < $max_token_index)
11367							}
11368
11369							# There are two different loops which search for the ending quote
11370							# character. In the rare case of an alphanumeric quote delimiter, we
11371							# have to look through alphanumeric tokens character-by-character, since
11372							# the pre-tokenization process combines multiple alphanumeric
11373							# characters, whereas for a non-alphanumeric delimiter, only tokens of
11374							# length 1 can match.
11375
11376							#----------------------------------------------------------------
11377							# Case 1 (rare): loop for case of alphanumeric quote delimiter..
11378							# "quote_pos" is the position the current word to begin searching
11379							#----------------------------------------------------------------
11380	3207	100	100			7275	if ( !$is_punct_char{$beginning_tok} && $beginning_tok =~ /\w/ ) {
11381
11382							# Note this because it is not recommended practice except
11383							# for obfuscated perl contests
11384	1	50				3	if ( $in_quote == 1 ) {
11385	1					4	$self->write_logfile_entry(
11386							"Note: alphanumeric quote delimiter ($beginning_tok) \n");
11387							}
11388
11389							# Note: changed < to <= here to fix c109. Relying on extra end blanks.
11390	1					3	while ( $i <= $max_token_index ) {
11391
11392	4	100	66			11	if ( $quote_pos == 0 \|\| ( $i < 0 ) ) {
11393	3					4	$tok = $rtokens->[ ++$i ];
11394
11395	3	100				7	if ( $tok eq BACKSLASH ) {
11396
11397							# retain backslash unless it hides the end token
11398	1	50				4	$quoted_string .= $tok
11399							unless ( $rtokens->[ $i + 1 ] eq $end_tok );
11400	1					3	$quote_pos++;
11401	1	50				4	last if ( $i >= $max_token_index );
11402	1					2	$tok = $rtokens->[ ++$i ];
11403							}
11404							}
11405	4					6	my $old_pos = $quote_pos;
11406
11407	4					6	$quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
11408
11409	4	100				6	if ( $quote_pos > 0 ) {
11410
11411	1					2	$quoted_string .=
11412							substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
11413
11414							# NOTE: any quote modifiers will be at the end of '$tok'. If we
11415							# wanted to check them, this is the place to get them. But
11416							# this quote form is rarely used in practice, so it isn't
11417							# worthwhile.
11418
11419	1					2	$quote_depth--;
11420
11421	1	50				2	if ( $quote_depth == 0 ) {
11422	1					2	$in_quote--;
11423	1					2	last;
11424							}
11425							}
11426							else {
11427	3	50				6	if ( $old_pos <= length($tok) ) {
11428	3					6	$quoted_string .= substr( $tok, $old_pos );
11429							}
11430							}
11431							} ## end while ( $i <= $max_token_index)
11432							}
11433
11434							#-----------------------------------------------------------------------
11435							# Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
11436							#-----------------------------------------------------------------------
11437							else {
11438
11439	3206					5856	while ( $i < $max_token_index ) {
11440	12368					14515	$tok = $rtokens->[ ++$i ];
11441
11442	12368	100				21230	if ( $tok eq $end_tok ) {
		100
		100
11443	2963					3414	$quote_depth--;
11444
11445	2963	100				4819	if ( $quote_depth == 0 ) {
11446	2962					3338	$in_quote--;
11447	2962					3715	last;
11448							}
11449							}
11450							elsif ( $tok eq $beginning_tok ) {
11451	1					2	$quote_depth++;
11452							}
11453							elsif ( $tok eq BACKSLASH ) {
11454
11455							# retain backslash unless it hides the beginning or end token
11456	451					684	$tok = $rtokens->[ ++$i ];
11457	451	100	100			1808	$quoted_string .= BACKSLASH
11458							if ( $tok ne $end_tok && $tok ne $beginning_tok );
11459							}
11460							else {
11461							## nothing special
11462							}
11463	9406					14101	$quoted_string .= $tok;
11464							} ## end while ( $i < $max_token_index)
11465							}
11466	3207	100				5431	if ( $i > $max_token_index ) { $i = $max_token_index }
	10					12
11467							return (
11468
11469	3207					11335	$i,
11470							$in_quote,
11471							$beginning_tok,
11472							$quote_pos,
11473							$quote_depth,
11474							$quoted_string,
11475
11476							);
11477							} ## end sub follow_quoted_string
11478
11479							sub indicate_error {
11480	0			0	0	0	my ( $self, $msg, $line_number, $input_line, $pos, $caret ) = @_;
11481
11482							# write input line and line with carat's showing where error was detected
11483	0					0	$self->interrupt_logfile();
11484	0					0	$self->warning($msg);
11485	0					0	$self->write_error_indicator_pair( $line_number, $input_line, $pos,
11486							$caret );
11487	0					0	$self->resume_logfile();
11488	0					0	return;
11489							} ## end sub indicate_error
11490
11491							sub write_error_indicator_pair {
11492	0			0	0	0	my ( $self, $line_number, $input_line, $pos, $caret ) = @_;
11493	0					0	my ( $offset, $numbered_line, $underline ) =
11494							make_numbered_line( $line_number, $input_line, $pos );
11495	0					0	$underline = write_on_underline( $underline, $pos - $offset, $caret );
11496	0					0	$self->warning( $numbered_line . "\n" );
11497	0					0	$underline =~ s/\s+$//;
11498	0					0	$self->warning( $underline . "\n" );
11499	0					0	return;
11500							} ## end sub write_error_indicator_pair
11501
11502							sub make_numbered_line {
11503
11504	0			0	0	0	my ( $lineno, $str, $pos ) = @_;
11505
11506							# Given:
11507							# $lineno=line number
11508							# $str = an input line
11509							# $pos = character position of interest
11510							# Create a string not longer than 80 characters of the form:
11511							# $lineno: sub_string
11512							# such that the sub_string of $str contains the position of interest
11513							#
11514							# Here is an example of what we want, in this case we add trailing
11515							# '...' because the line is long.
11516							#
11517							# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
11518							#
11519							# Here is another example, this time in which we used leading '...'
11520							# because of excessive length:
11521							#
11522							# 2: ... er of the World Wide Web Consortium's
11523							#
11524							# input parameters are:
11525							# $lineno = line number
11526							# $str = the text of the line
11527							# $pos = position of interest (the error) : 0 = first character
11528							#
11529							# We return :
11530							# - $offset = an offset which corrects the position in case we only
11531							# display part of a line, such that $pos-$offset is the effective
11532							# position from the start of the displayed line.
11533							# - $numbered_line = the numbered line as above,
11534							# - $underline = a blank 'underline' which is all spaces with the same
11535							# number of characters as the numbered line.
11536
11537	0	0				0	my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
11538	0					0	my $excess = length($str) - $offset - 68;
11539	0	0				0	my $numc = ( $excess > 0 ) ? 68 : undef;
11540
11541	0	0				0	if ( defined($numc) ) {
11542	0	0				0	if ( $offset == 0 ) {
11543	0					0	$str = substr( $str, $offset, $numc - 4 ) . " ...";
11544							}
11545							else {
11546	0					0	$str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
11547							}
11548							}
11549							else {
11550
11551	0	0				0	if ( $offset == 0 ) {
11552							}
11553							else {
11554	0					0	$str = "... " . substr( $str, $offset + 4 );
11555							}
11556							}
11557
11558	0					0	my $numbered_line = sprintf( "%d: ", $lineno );
11559	0					0	$offset -= length($numbered_line);
11560	0					0	$numbered_line .= $str;
11561	0					0	my $underline = SPACE x length($numbered_line);
11562	0					0	return ( $offset, $numbered_line, $underline );
11563							} ## end sub make_numbered_line
11564
11565							sub write_on_underline {
11566
11567	0			0	0	0	my ( $underline, $pos, $pos_chr ) = @_;
11568
11569							# The "underline" is a string that shows where an error is; it starts
11570							# out as a string of blanks with the same length as the numbered line of
11571							# code above it, and we have to add marking to show where an error is.
11572							# In the example below, we want to write the string '--^' just below
11573							# the line of bad code:
11574							#
11575							# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
11576							# ---^
11577							# We are given the current underline string, plus a position and a
11578							# string to write on it.
11579							#
11580							# In the above example, there will be 2 calls to do this:
11581							# First call: $pos=19, pos_chr=^
11582							# Second call: $pos=16, pos_chr=---
11583							#
11584							# This is a trivial thing to do with substr, but there is some
11585							# checking to do.
11586
11587							# check for error..shouldn't happen
11588	0	0	0			0	if ( $pos < 0 \|\| $pos > length($underline) ) {
11589	0					0	return $underline;
11590							}
11591	0					0	my $excess = length($pos_chr) + $pos - length($underline);
11592	0	0				0	if ( $excess > 0 ) {
11593	0					0	$pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
11594							}
11595	0					0	substr( $underline, $pos, length($pos_chr), $pos_chr );
11596	0					0	return ($underline);
11597							} ## end sub write_on_underline
11598
11599							sub pre_tokenize {
11600
11601	7175			7175	0	12383	my ( $str, ($max_tokens_wanted) ) = @_;
11602
11603							# Input parameters:
11604							# $str = string to be parsed
11605							# $max_tokens_wanted > 0 to stop on reaching this many tokens.
11606							# = undef or 0 means get all tokens
11607
11608							# Break a string, $str, into a sequence of preliminary tokens (pre-tokens).
11609							# We look for these types of tokens:
11610							# words (type='w'), example: 'max_tokens_wanted'
11611							# digits (type = 'd'), example: '0755'
11612							# whitespace (type = 'b'), example: ' '
11613							# single character punct (type = char) example: '='
11614
11615							# Later operations will combine one or more of these pre-tokens into final
11616							# tokens. We cannot do better than this yet because we might be in a
11617							# quoted string or pattern.
11618
11619							# An advantage of doing this pre-tokenization step is that it keeps almost
11620							# all of the regex parsing very simple and localized right here. A
11621							# disadvantage is that in some extremely rare instances we will have to go
11622							# back and split a pre-token.
11623
11624							# Return parameters:
11625	7175					10432	my @tokens = (); # array of the tokens themselves
11626	7175					12657	my @token_map = (0); # string position of start of each token
11627	7175					9194	my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
11628
11629	7175	100				11981	if ( !$max_tokens_wanted ) { $max_tokens_wanted = -1 }
	6862					8409
11630
11631	7175					12727	while ( $max_tokens_wanted-- ) {
11632
11633	96069	100				200751	if (
11634							$str =~ m{
11635							\G(
11636							(\s+) # type 'b' = whitespace - this must come before \W
11637							\| (\W) # or type=char = single-character, non-whitespace punct
11638							\| (\d+) # or type 'd' = sequence of digits - must come before \w
11639							\| (\w+) # or type 'w' = words not starting with a digit
11640							)
11641							}gcx
11642							)
11643							{
11644	89052					136248	push @tokens, $1;
11645	89052	100				177240	push @type,
		100
		100
11646							defined($2) ? 'b' : defined($3) ? $1 : defined($4) ? 'd' : 'w';
11647	89052					137013	push @token_map, pos($str);
11648							}
11649
11650							# that's all..
11651							else {
11652	7017					38716	return ( \@tokens, \@token_map, \@type );
11653							}
11654							} ## end while ( $max_tokens_wanted...)
11655
11656	158					617	return ( \@tokens, \@token_map, \@type );
11657							} ## end sub pre_tokenize
11658
11659							sub show_tokens {
11660
11661							# This is an uncalled debug routine, saved for reference
11662	0			0	0		my ( $rtokens, $rtoken_map ) = @_;
11663	0						my $num = scalar( @{$rtokens} );
	0
11664
11665	0						foreach my $i ( 0 .. $num - 1 ) {
11666	0						my $len = length( $rtokens->[$i] );
11667	0						print {*STDOUT} "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n";
	0
11668							}
11669	0						return;
11670							} ## end sub show_tokens
11671
11672							sub dump_token_types {
11673	0			0	0		my ( $class, $fh ) = @_;
11674
11675							# This should be the latest list of token types in use
11676							# adding NEW_TOKENS: add a comment here
11677	0						$fh->print(<<'END_OF_LIST');
11678
11679							Here is a list of the token types currently used for lines of type 'CODE'.
11680							For the following tokens, the "type" of a token is just the token itself.
11681
11682							.. :: << >> ** && .. \|\| // -> => += -= .= %= &= \|= ^= *= <>
11683							( ) <= >= == =~ !~ != ++ -- /= x=
11684							... **= <<= >>= &&= \|\|= //= <=>
11685							, + - / * \| % ! x ~ = \ ? : . < > ^ & ^^
11686
11687							The following additional token types are defined:
11688
11689							type meaning
11690							b blank (white space)
11691							{ indent: opening structural curly brace or square bracket or paren
11692							(code block, anonymous hash reference, or anonymous array reference)
11693							} outdent: right structural curly brace or square bracket or paren
11694							[ left non-structural square bracket (enclosing an array index)
11695							] right non-structural square bracket
11696							( left non-structural paren (all but a list right of an =)
11697							) right non-structural paren
11698							L left non-structural curly brace (enclosing a key)
11699							R right non-structural curly brace
11700							; terminal semicolon
11701							f indicates a semicolon in a "for" statement
11702							h here_doc operator <<
11703							# a comment
11704							Q indicates a quote or pattern
11705							q indicates a qw quote block
11706							k a perl keyword
11707							C user-defined constant or constant function (with void prototype = ())
11708							U user-defined function taking parameters
11709							G user-defined function taking block parameter (like grep/map/eval)
11710							S sub definition (reported as type 'i' in older versions)
11711							P package definition (reported as type 'i' in older versions)
11712							t type indicator such as %,$,@,*,&,sub
11713							w bare word (perhaps a subroutine call)
11714							i identifier of some type (with leading %, $, @, *, &, sub, -> )
11715							n a number
11716							v a v-string
11717							F a file test operator (like -e)
11718							Y File handle
11719							Z identifier in indirect object slot: may be file handle, object
11720							J LABEL: code block label
11721							j LABEL after next, last, redo, goto
11722							p unary +
11723							m unary -
11724							pp pre-increment operator ++
11725							mm pre-decrement operator --
11726							A : used as attribute separator
11727
11728							Here are the '_line_type' codes used internally:
11729							SYSTEM - system-specific code before hash-bang line
11730							CODE - line of perl code (including comments)
11731							POD_START - line starting pod, such as '=head'
11732							POD - pod documentation text
11733							POD_END - last line of pod section, '=cut'
11734							HERE - text of here-document
11735							HERE_END - last line of here-doc (target word)
11736							FORMAT - format section
11737							FORMAT_END - last line of format section, '.'
11738							SKIP - code skipping section
11739							SKIP_END - last line of code skipping section, '#>>V'
11740							DATA_START - __DATA__ line
11741							DATA - unidentified text following __DATA__
11742							END_START - __END__ line
11743							END - unidentified text following __END__
11744							ERROR - we are in big trouble, probably not a perl script
11745							END_OF_LIST
11746
11747	0						return;
11748							} ## end sub dump_token_types
11749
11750							#------------------
11751							# About Token Types
11752							#------------------
11753
11754							# The array "valid_token_types" in the BEGIN section has an up-to-date list
11755							# of token types. Sub 'dump_token_types' should be kept up to date with
11756							# token types.
11757
11758							# Ideally, tokens are the smallest pieces of text
11759							# such that a newline may be inserted between any pair of tokens without
11760							# changing or invalidating the program. This version comes close to this,
11761							# although there are necessarily a few exceptions which must be caught by
11762							# the formatter. Many of these involve the treatment of bare words.
11763							#
11764							# To simplify things, token types are either a single character, or they
11765							# are identical to the tokens themselves.
11766							#
11767							# As a debugging aid, the -D flag creates a file containing a side-by-side
11768							# comparison of the input string and its tokenization for each line of a file.
11769							# This is an invaluable debugging aid.
11770							#
11771							# In addition to tokens, and some associated quantities, the tokenizer
11772							# also returns flags indication any special line types. These include
11773							# quotes, here_docs, formats.
11774							#
11775							#------------------
11776							# Adding NEW_TOKENS
11777							#------------------
11778							#
11779							# Here are some notes on the minimal steps. I wrote these notes while
11780							# adding the 'v' token type for v-strings, which are things like version
11781							# numbers 5.6.0, and ip addresses, and will use that as an example. ( You
11782							# can use your editor to search for the string "NEW_TOKENS" to find the
11783							# appropriate sections to change):
11784
11785							# *. For another example, search for the smartmatch operator '~~'
11786							# with your editor to see where updates were made for it.
11787
11788							# *. For another example, search for the string 'c250', which shows
11789							# locations where changes for new types 'P' and 'S' were made.
11790
11791							# *. Think of a new, unused character for the token type, and add to
11792							# the array @valid_token_types in the BEGIN section of this package.
11793							# For example, I used 'v' for v-strings.
11794							#
11795							# *. Implement coding to recognize the $type of the token in this routine.
11796							# This is the hardest part, and is best done by imitating or modifying
11797							# some of the existing coding. For example, to recognize v-strings, I
11798							# patched 'sub scan_bare_identifier' to recognize v-strings beginning with
11799							# 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
11800							#
11801							# *. Update sub operator_expected. This update is critically important but
11802							# the coding is trivial. Look at the comments in that routine for help.
11803							# For v-strings, which should behave like numbers, I just added 'v' to the
11804							# regex used to handle numbers and strings (types 'n' and 'Q').
11805							#
11806							# *. Implement a 'bond strength' rule in sub set_bond_strengths in
11807							# Perl::Tidy::Formatter for breaking lines around this token type. You can
11808							# skip this step and take the default at first, then adjust later to get
11809							# desired results. For adding type 'v', I looked at sub bond_strength and
11810							# saw that number type 'n' was using default strengths, so I didn't do
11811							# anything. I may tune it up someday if I don't like the way line
11812							# breaks with v-strings look.
11813							#
11814							# *. Implement a 'whitespace' rule in sub set_whitespace_flags in
11815							# Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
11816							# and saw that type 'n' used spaces on both sides, so I just added 'v'
11817							# to the array @spaces_both_sides.
11818							#
11819							# *. Update HtmlWriter package so that users can colorize the token as
11820							# desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
11821							# that package. For v-strings, I initially chose to use a default color
11822							# equal to the default for numbers, but it might be nice to change that
11823							# eventually.
11824							#
11825							# *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
11826							#
11827							# *. Run lots and lots of debug tests. Start with special files designed
11828							# to test the new token type. Run with the -D flag to create a .DEBUG
11829							# file which shows the tokenization. When these work ok, test as many old
11830							# scripts as possible. Start with all of the '.t' files in the 'test'
11831							# directory of the distribution file. Compare .tdy output with previous
11832							# version and updated version to see the differences. Then include as
11833							# many more files as possible. My own technique has been to collect a huge
11834							# number of perl scripts (thousands!) into one directory and run perltidy
11835							# *, then run diff between the output of the previous version and the
11836							# current version.
11837
11838							BEGIN {
11839
11840							# These names are used in error messages
11841	44			44		358	@opening_brace_names = qw# '{' '[' '(' '?' #;
11842	44					159	@closing_brace_names = qw# '}' ']' ')' ':' #;
11843
11844	44					114	my @q;
11845
11846	44					278	my @digraphs = qw#
11847							.. :: << >> ** && \|\| // -> => += -= .= %= &= \|= ^= *= <>
11848							<= >= == =~ !~ != ++ -- /= x= ~~ ~. \|. &. ^. ^^
11849							#;
11850	44					1201	$is_digraph{$_} = 1 for @digraphs;
11851
11852	44					179	@q = qw(
11853							. : < > * & \| / - = + - % ^ ! x ~
11854							);
11855	44					409	$can_start_digraph{$_} = 1 for @q;
11856
11857	44					200	my @trigraphs =
11858							qw( ... **= <<= >>= &&= \|\|= //= <=> !~~ &.= \|.= ^.= <<~ ^^= );
11859	44					532	$is_trigraph{$_} = 1 for @trigraphs;
11860
11861	44					192	my @tetragraphs = qw( <<>> );
11862	44					161	$is_tetragraph{$_} = 1 for @tetragraphs;
11863
11864							# make a hash of all valid token types for self-checking the tokenizer
11865							# (adding NEW_TOKENS : select a new character and add to this list)
11866							# fix for c250: added new token type 'P' and 'S'
11867	44					424	my @valid_token_types = qw#
11868							A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P S
11869							{ } ( ) [ ] ; + - / * \| % ! x ~ = ? : . < > ^ &
11870							#;
11871	44					164	push @valid_token_types, BACKSLASH;
11872	44					331	push( @valid_token_types, @digraphs );
11873	44					180	push( @valid_token_types, @trigraphs );
11874	44					95	push( @valid_token_types, @tetragraphs );
11875	44					96	push( @valid_token_types, ( '#', COMMA, 'CORE::' ) );
11876	44					2363	$is_valid_token_type{$_} = 1 for @valid_token_types;
11877
11878							# a list of file test letters, as in -e (Table 3-4 of 'camel 3')
11879	44					323	my @file_test_operators =
11880							qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z );
11881	44					631	$is_file_test_operator{$_} = 1 for @file_test_operators;
11882
11883							# these functions have prototypes of the form (&), so when they are
11884							# followed by a block, that block MAY BE followed by an operator.
11885							# Smartmatch operator ~~ may be followed by anonymous hash or array ref
11886	44					149	@q = qw( do eval );
11887	44					158	$is_block_operator{$_} = 1 for @q;
11888
11889							# these functions allow an identifier in the indirect object slot
11890	44					135	@q = qw( print printf sort exec system say );
11891	44					281	$is_indirect_object_taker{$_} = 1 for @q;
11892
11893							# Keywords which definitely produce error if an OPERATOR is expected
11894	44					113	@q = qw( my our state local use require );
11895	44					282	$is_TERM_keyword{$_} = 1 for @q;
11896
11897							# Note: 'field' will be added by sub check_options if --use-feature=class
11898	44					100	@q = qw( my our state );
11899	44					140	$is_my_our_state{$_} = 1 for @q;
11900
11901							# These tokens may precede a code block
11902							# patched for SWITCH/CASE/CATCH. Actually these could be removed
11903							# now and we could let the extended-syntax coding handle them.
11904							# Added 'default' for Switch::Plain.
11905							# Note: 'ADJUST' will be added by sub check_options if --use-feature=class
11906	44					271	@q = qw(
11907							BEGIN END CHECK INIT AUTOLOAD DESTROY
11908							UNITCHECK continue if elsif else unless
11909							do while until eval for foreach
11910							map grep sort switch case given
11911							when default catch try finally
11912							);
11913	44					1050	$is_code_block_token{$_} = 1 for @q;
11914
11915							# These block types terminate statements and do not need a trailing
11916							# semicolon; patched for SWITCH/CASE/; This may be updated in sub
11917							# check_options.
11918	44					266	@q = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
11919							if elsif else unless while until for foreach switch case given when );
11920	44					556	$is_zero_continuation_block_type{$_} = 1 for @q;
11921
11922							# Note: this hash was formerly named '%is_not_zero_continuation_block_type'
11923							# to contrast it with the block types in '%is_zero_continuation_block_type'
11924							# Note: added 'sub' for anonymous sub blocks (c443)
11925	44					144	@q = qw( sort map grep eval do sub );
11926	44					178	$is_sort_map_grep_eval_do_sub{$_} = 1 for @q;
11927
11928	44					140	@q = qw( sort map grep );
11929	44					112	$is_sort_map_grep{$_} = 1 for @q;
11930
11931	44					101	%is_grep_alias = ();
11932
11933							# I'll build the list of keywords incrementally
11934	44					67	my @Keywords = ();
11935
11936							# keywords and tokens after which a value or pattern is expected,
11937							# but not an operator. In other words, these should consume terms
11938							# to their right, or at least they are not expected to be followed
11939							# immediately by operators.
11940	44					1228	my @value_requestor = qw(
11941							AUTOLOAD BEGIN CHECK DESTROY
11942							END EQ GE GT
11943							INIT LE LT NE
11944							UNITCHECK abs accept alarm
11945							and atan2 bind binmode
11946							bless break caller chdir
11947							chmod chomp chop chown
11948							chr chroot close closedir
11949							cmp connect continue cos
11950							crypt dbmclose dbmopen defined
11951							delete die dump each
11952							else elsif eof eq
11953							evalbytes exec exists exit
11954							exp fc fcntl fileno
11955							flock for foreach formline
11956							ge getc getgrgid getgrnam
11957							gethostbyaddr gethostbyname getnetbyaddr getnetbyname
11958							getpeername getpgrp getpriority getprotobyname
11959							getprotobynumber getpwnam getpwuid getservbyname
11960							getservbyport getsockname getsockopt glob
11961							gmtime goto grep gt
11962							hex if index int
11963							ioctl join keys kill
11964							last lc lcfirst le
11965							length link listen local
11966							localtime lock log lstat
11967							lt map mkdir msgctl
11968							msgget msgrcv msgsnd my
11969							ne next no not
11970							oct open opendir or
11971							ord our pack pipe
11972							pop pos print printf
11973							prototype push quotemeta rand
11974							read readdir readlink readline
11975							readpipe recv redo ref
11976							rename require reset return
11977							reverse rewinddir rindex rmdir
11978							scalar seek seekdir select
11979							semctl semget semop send
11980							sethostent setnetent setpgrp setpriority
11981							setprotoent setservent setsockopt shift
11982							shmctl shmget shmread shmwrite
11983							shutdown sin sleep socket
11984							socketpair sort splice split
11985							sprintf sqrt srand stat
11986							state study substr symlink
11987							syscall sysopen sysread sysseek
11988							system syswrite tell telldir
11989							tie tied truncate uc
11990							ucfirst umask undef unless
11991							unlink unpack unshift untie
11992							until use utime values
11993							vec waitpid warn while
11994							write xor case catch
11995							default err given isa
11996							say switch when
11997							);
11998
11999							# Note: 'ADJUST', 'field' are added by sub check_options
12000							# if --use-feature=class
12001
12002							# patched above for SWITCH/CASE given/when err say
12003							# 'err' is a fairly safe addition.
12004							# Added 'default' for Switch::Plain. Note that we could also have
12005							# a separate set of keywords to include if we see 'use Switch::Plain'
12006	44					1429	push( @Keywords, @value_requestor );
12007
12008							# These are treated the same but are not keywords:
12009	44					128	my @extra_vr = qw( constant vars );
12010	44					215	push( @value_requestor, @extra_vr );
12011
12012	44					6594	$expecting_term_token{$_} = 1 for @value_requestor;
12013
12014							# this list contains keywords which do not look for arguments,
12015							# so that they might be followed by an operator, or at least
12016							# not a term.
12017	44					250	my @operator_requestor = qw(
12018							endgrent endhostent endnetent endprotoent
12019							endpwent endservent fork getgrent
12020							gethostent getlogin getnetent getppid
12021							getprotoent getpwent getservent setgrent
12022							setpwent time times wait
12023							wantarray
12024							);
12025
12026	44					135	push( @Keywords, @operator_requestor );
12027
12028							# These are treated the same but are not considered keywords:
12029	44					91	my @extra_or = qw( STDERR STDIN STDOUT );
12030
12031	44					92	push( @operator_requestor, @extra_or );
12032
12033	44					857	$expecting_operator_token{$_} = 1 for @operator_requestor;
12034
12035							# these token TYPES expect trailing operator but not a term
12036							# note: ++ and -- are post-increment and decrement, 'C' = constant
12037	44					105	my @operator_requestor_types = qw( ++ -- C <> q );
12038
12039							# NOTE: This hash is available but not currently used
12040	44					195	$expecting_operator_types{$_} = 1 for @operator_requestor_types;
12041
12042							# these token TYPES consume values (terms)
12043							# note: pp and mm are pre-increment and decrement
12044							# f=semicolon in for, F=file test operator
12045	44					904	my @value_requestor_type = qw#
12046							L { ( [ ~ !~ =~ ; . .. ... A : && ! \|\| // = + - x
12047							*= += -= .= /= = %= x= &= \|= ^= <<= >>= &&= \|\|= //=
12048							<= >= == != => > < % * / ? & \| ** <=> ~~ !~~ <<~
12049							f F pp mm Y p m U J G j >> << ^ t
12050							~. ^. \|. &. ^.= \|.= &.= ^^
12051							#;
12052	44					163	push @value_requestor_type, BACKSLASH;
12053	44					83	push @value_requestor_type, COMMA;
12054
12055							# NOTE: This hash is available but not currently used
12056	44					1410	$expecting_term_types{$_} = 1 for @value_requestor_type;
12057
12058							# Note: the following valid token types are not assigned here to
12059							# hashes requesting to be followed by values or terms, but are
12060							# instead currently hard-coded into sub operator_expected:
12061							# ) -> :: Q R Z ] b h i k n v w } #
12062
12063							# A syntax error will occur if following operators are not followed by a
12064							# TERM (with an exception made for tokens in sub signatures).
12065							# NOTE: this list does not include unary operator '!'
12066
12067							# Note the following omissions from the syntax checking operators below
12068							# 'U' = user sub, depends on prototype
12069							# 'F' = file test works on $_ if no following term
12070							# 'Y' = indirect object, too risky to check syntax
12071
12072	44					443	my @binary_ops = qw#
12073							!~ =~ . .. : && \|\| // = + - x
12074							*= += -= .= /= = %= x= &= \|= ^= <<= >>= &&= \|\|= //=
12075							<= >= == != > < % * / ? & \| ** <=> ~~ !~~ <<~
12076							>> << ^
12077							^. \|. &. ^.= \|.= &.= ^^
12078							#;
12079	44					772	$is_binary_operator_type{$_} = 1 for @binary_ops;
12080
12081							# Note: omitting unary file test type 'F' here because it assumes $_
12082	44					129	my @unary_ops = qw# ! ~ ~. m p mm pp #;
12083	44					477	$is_binary_operator_type{$_} = 1 for @binary_ops;
12084	44					89	push @unary_ops, BACKSLASH;
12085	44					1165	$is_binary_or_unary_operator_type{$_} = 1 for ( @binary_ops, @unary_ops );
12086
12087	44					207	my @binary_keywords = qw( and or err eq ne cmp );
12088	44					192	$is_binary_keyword{$_} = 1 for @binary_keywords;
12089
12090	44					189	$is_binary_or_unary_keyword{$_} = 1 for ( @binary_keywords, 'not' );
12091
12092							# A syntax error occurs if a binary operator follows any of these types:
12093							# NOTE: a ',' cannot be included because of parenless calls (c015).
12094							# For example this is valid: print "hello\n", && print "goodbye\n";
12095							# NOTE: the 'not' keyword could be added to a corresponding _keyword list
12096							# NOTE: label 'j' omitted, for example: -f $file ? redo BLOCK : last BLOCK;
12097							# NOTE: Removed 'A': fixes git162.t
12098	44					185	@q = qw< L { [ ( ; f J t >;
12099	44					348	$is_not_a_TERM_producer_type{$_} = 1 for ( @q, @unary_ops );
12100
12101	44					182	@q = qw( q qq qx qr s y tr m );
12102	44					268	$is_q_qq_qx_qr_s_y_tr_m{$_} = 1 for @q;
12103
12104							# Note added 'qw' here
12105	44					144	@q = qw( q qq qw qx qr s y tr m );
12106	44					233	$is_q_qq_qw_qx_qr_s_y_tr_m{$_} = 1 for @q;
12107
12108							# Quote modifiers:
12109							# original ref: camel 3 p 147,
12110							# but perl may accept undocumented flags
12111							# perl 5.10 adds 'p' (preserve)
12112							# Perl version 5.22 added 'n'
12113							# From http://perldoc.perl.org/perlop.html we have
12114							# /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
12115							# s/PATTERN/REPLACEMENT/msixpodualngcer
12116							# y/SEARCHLIST/REPLACEMENTLIST/cdsr
12117							# tr/SEARCHLIST/REPLACEMENTLIST/cdsr
12118							# qr/STRING/msixpodualn
12119	44					242	%quote_modifiers = (
12120							's' => '[msixpodualngcer]',
12121							'y' => '[cdsr]',
12122							'tr' => '[cdsr]',
12123							'm' => '[msixpodualngc]',
12124							'qr' => '[msixpodualn]',
12125							'q' => EMPTY_STRING,
12126							'qq' => EMPTY_STRING,
12127							'qw' => EMPTY_STRING,
12128							'qx' => EMPTY_STRING,
12129							);
12130
12131							# Note: 'class' will be added by sub check_options if -use-feature=class
12132	44					133	@q = qw( package );
12133	44					157	$is_package{$_} = 1 for @q;
12134
12135	44					100	@q = qw( if elsif unless );
12136	44					118	$is_if_elsif_unless{$_} = 1 for @q;
12137
12138	44					107	@q = qw( ; t );
12139	44					122	$is_semicolon_or_t{$_} = 1 for @q;
12140
12141	44					116	@q = qw( if elsif unless case when );
12142	44					147	$is_if_elsif_unless_case_when{$_} = 1 for @q;
12143
12144							# Hash of other possible line endings which may occur.
12145							# Keep these coordinated with the regex where this is used.
12146							# Note: chr(13) = chr(015)="\r".
12147	44					90	@q = ( chr(13), chr(29), chr(26) );
12148	44					167	$other_line_endings{$_} = 1 for @q;
12149
12150							# These keywords are handled specially in the tokenizer code:
12151	44					176	my @special_keywords =
12152							qw( do eval format m package q qq qr qw qx s sub tr y );
12153	44					365	push( @Keywords, @special_keywords );
12154
12155							# Keywords after which list formatting may be used
12156							# WARNING: do not include \|map\|grep\|eval or perl may die on
12157							# syntax errors (map1.t).
12158	44					527	my @keyword_taking_list = qw(
12159							and chmod chomp chop
12160							chown dbmopen die elsif
12161							exec fcntl for foreach
12162							formline getsockopt given if
12163							index ioctl join kill
12164							local msgctl msgrcv msgsnd
12165							my open or our
12166							pack print printf push
12167							read readpipe recv return
12168							reverse rindex seek select
12169							semctl semget send setpriority
12170							setsockopt shmctl shmget shmread
12171							shmwrite socket socketpair sort
12172							splice split sprintf state
12173							substr syscall sysopen sysread
12174							sysseek system syswrite tie
12175							unless unlink unpack unshift
12176							until vec warn when
12177							while
12178							);
12179
12180							# NOTE: This hash is available but not currently used
12181	44					1306	$is_keyword_taking_list{$_} = 1 for @keyword_taking_list;
12182
12183							# perl functions which may be unary operators.
12184
12185							# This list is used to decide if a pattern delimited by slashes, /pattern/,
12186							# can follow one of these keywords.
12187	44					140	@q = qw( chomp eof eval fc lc pop shift uc undef );
12188
12189	44					276	$is_keyword_rejecting_slash_as_pattern_delimiter{$_} = 1 for @q;
12190
12191							# These are keywords for which an arg may optionally be omitted. They are
12192							# currently only used to disambiguate a ? used as a ternary from one used
12193							# as a (deprecated) pattern delimiter. In the future, they might be used
12194							# to give a warning about ambiguous syntax before a /.
12195							# Note: split has been omitted (see note below).
12196	44					487	my @keywords_taking_optional_arg = qw(
12197							abs alarm caller chdir chomp chop
12198							chr chroot close cos defined die
12199							eof eval evalbytes exit exp fc
12200							getc glob gmtime hex int last
12201							lc lcfirst length localtime log lstat
12202							mkdir next oct ord pop pos
12203							print printf prototype quotemeta rand readline
12204							readlink readpipe redo ref require reset
12205							reverse rmdir say select shift sin
12206							sleep sqrt srand stat study tell
12207							uc ucfirst umask undef unlink warn
12208							write
12209							);
12210	44					1055	$is_keyword_taking_optional_arg{$_} = 1 for @keywords_taking_optional_arg;
12211
12212							# This list is used to decide if a pattern delimited by question marks,
12213							# ?pattern?, can follow one of these keywords. Note that from perl 5.22
12214							# on, a ?pattern? is not recognized, so we can be much more strict than
12215							# with a /pattern/. Note that 'split' is not in this list. In current
12216							# versions of perl a question following split must be a ternary, but
12217							# in older versions it could be a pattern. The guessing algorithm will
12218							# decide. We are combining two lists here to simplify the test.
12219	44					810	@q = ( @keywords_taking_optional_arg, @operator_requestor );
12220	44					1695	$is_keyword_rejecting_question_as_pattern_delimiter{$_} = 1 for @q;
12221
12222							# These are not used in any way yet
12223							# my @unused_keywords = qw(
12224							# __FILE__
12225							# __LINE__
12226							# __PACKAGE__
12227							# );
12228
12229							# The list of keywords was originally extracted from function 'keyword' in
12230							# perl file toke.c version 5.005.03, using this utility, plus a
12231							# little editing: (file getkwd.pl):
12232							# while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
12233							# Add 'get' prefix where necessary, then split into the above lists.
12234							# This list should be updated as necessary.
12235							# The list should not contain these special variables:
12236							# ARGV DATA ENV SIG STDERR STDIN STDOUT
12237							# __DATA__ __END__
12238
12239	44					4429	$is_keyword{$_} = 1 for @Keywords;
12240
12241	44					8067	%matching_end_token = (
12242							'{' => '}',
12243							'(' => ')',
12244							'[' => ']',
12245							'<' => '>',
12246							);
12247							} ## end BEGIN
12248
12249							} ## end package Perl::Tidy::Tokenizer
12250							1;