| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
##################################################################### |
|
2
|
|
|
|
|
|
|
# |
|
3
|
|
|
|
|
|
|
# Perl::Tidy::Tokenizer reads a source and breaks it into a stream of tokens |
|
4
|
|
|
|
|
|
|
# |
|
5
|
|
|
|
|
|
|
# Usage Outline: |
|
6
|
|
|
|
|
|
|
# |
|
7
|
|
|
|
|
|
|
# STEP 1: initialize or re-initialize Tokenizer with user options |
|
8
|
|
|
|
|
|
|
# Perl::Tidy::Tokenizer::check_options($rOpts); |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# STEP 2: create a tokenizer for a specific input source object |
|
11
|
|
|
|
|
|
|
# my $tokenizer = Perl::Tidy::Tokenizer->new( |
|
12
|
|
|
|
|
|
|
# source_object => $source, |
|
13
|
|
|
|
|
|
|
# ... |
|
14
|
|
|
|
|
|
|
# ); |
|
15
|
|
|
|
|
|
|
# |
|
16
|
|
|
|
|
|
|
# STEP 3: get and process each tokenized 'line' (a hash ref of token info) |
|
17
|
|
|
|
|
|
|
# while ( my $line = $tokenizer->get_line() ) { |
|
18
|
|
|
|
|
|
|
# $formatter->write_line($line); |
|
19
|
|
|
|
|
|
|
# } |
|
20
|
|
|
|
|
|
|
# |
|
21
|
|
|
|
|
|
|
# STEP 4: report errors |
|
22
|
|
|
|
|
|
|
# my $severe_error = $tokenizer->report_tokenization_errors(); |
|
23
|
|
|
|
|
|
|
# |
|
24
|
|
|
|
|
|
|
# The source object can be a STRING ref, an ARRAY ref, or an object with a |
|
25
|
|
|
|
|
|
|
# get_line() method which supplies one line (a character string) perl call. |
|
26
|
|
|
|
|
|
|
# |
|
27
|
|
|
|
|
|
|
# NOTE: This is not a real class. Only one tokenizer my be used. |
|
28
|
|
|
|
|
|
|
# |
|
29
|
|
|
|
|
|
|
######################################################################## |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
package Perl::Tidy::Tokenizer; |
|
32
|
44
|
|
|
44
|
|
262
|
use strict; |
|
|
44
|
|
|
|
|
90
|
|
|
|
44
|
|
|
|
|
1514
|
|
|
33
|
44
|
|
|
44
|
|
177
|
use warnings; |
|
|
44
|
|
|
|
|
72
|
|
|
|
44
|
|
|
|
|
1886
|
|
|
34
|
44
|
|
|
44
|
|
166
|
use English qw( -no_match_vars ); |
|
|
44
|
|
|
|
|
69
|
|
|
|
44
|
|
|
|
|
233
|
|
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
our $VERSION = '20260204'; |
|
37
|
|
|
|
|
|
|
|
|
38
|
44
|
|
|
44
|
|
13408
|
use Carp; |
|
|
44
|
|
|
|
|
83
|
|
|
|
44
|
|
|
|
|
2210
|
|
|
39
|
|
|
|
|
|
|
|
|
40
|
44
|
|
|
44
|
|
183
|
use constant DEVEL_MODE => 0; |
|
|
44
|
|
|
|
|
73
|
|
|
|
44
|
|
|
|
|
2208
|
|
|
41
|
44
|
|
|
44
|
|
171
|
use constant DEBUG_GUESS_MODE => 0; |
|
|
44
|
|
|
|
|
72
|
|
|
|
44
|
|
|
|
|
1818
|
|
|
42
|
44
|
|
|
44
|
|
218
|
use constant EMPTY_STRING => q{}; |
|
|
44
|
|
|
|
|
74
|
|
|
|
44
|
|
|
|
|
1600
|
|
|
43
|
44
|
|
|
44
|
|
172
|
use constant SPACE => q{ }; |
|
|
44
|
|
|
|
|
113
|
|
|
|
44
|
|
|
|
|
1662
|
|
|
44
|
44
|
|
|
44
|
|
168
|
use constant COMMA => q{,}; |
|
|
44
|
|
|
|
|
63
|
|
|
|
44
|
|
|
|
|
1662
|
|
|
45
|
44
|
|
|
44
|
|
177
|
use constant BACKSLASH => q{\\}; |
|
|
44
|
|
|
|
|
82
|
|
|
|
44
|
|
|
|
|
2557
|
|
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
{ #<<< A non-indenting brace to contain all lexical variables |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# List of hash keys to prevent -duk from listing them. |
|
50
|
|
|
|
|
|
|
# (note the backtick in this list) |
|
51
|
|
|
|
|
|
|
my @unique_hash_keys_uu = qw( ` RPerl _rtype_sequence _ending_in_quote ); |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# Parent sequence number of tree of containers; must be 1 |
|
54
|
44
|
|
|
44
|
|
203
|
use constant SEQ_ROOT => 1; |
|
|
44
|
|
|
|
|
94
|
|
|
|
44
|
|
|
|
|
1667
|
|
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# Defaults for guessing old indentation |
|
57
|
44
|
|
|
44
|
|
197
|
use constant INDENT_COLUMNS_DEFAULT => 4; |
|
|
44
|
|
|
|
|
86
|
|
|
|
44
|
|
|
|
|
1443
|
|
|
58
|
44
|
|
|
44
|
|
147
|
use constant TAB_SIZE_DEFAULT => 8; |
|
|
44
|
|
|
|
|
69
|
|
|
|
44
|
|
|
|
|
1409
|
|
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# Decimal values of some ascii characters for quick checks |
|
61
|
44
|
|
|
44
|
|
147
|
use constant ORD_TAB => 9; |
|
|
44
|
|
|
|
|
81
|
|
|
|
44
|
|
|
|
|
1304
|
|
|
62
|
44
|
|
|
44
|
|
171
|
use constant ORD_SPACE => 32; |
|
|
44
|
|
|
|
|
63
|
|
|
|
44
|
|
|
|
|
1345
|
|
|
63
|
44
|
|
|
44
|
|
190
|
use constant ORD_PRINTABLE_MIN => 33; |
|
|
44
|
|
|
|
|
63
|
|
|
|
44
|
|
|
|
|
1830
|
|
|
64
|
44
|
|
|
44
|
|
198
|
use constant ORD_PRINTABLE_MAX => 126; |
|
|
44
|
|
|
|
|
64
|
|
|
|
44
|
|
|
|
|
7960
|
|
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# GLOBAL VARIABLES which change during tokenization: |
|
67
|
|
|
|
|
|
|
# These could also be stored in $self but it is more convenient and |
|
68
|
|
|
|
|
|
|
# efficient to make them global lexical variables. |
|
69
|
|
|
|
|
|
|
# INITIALIZER: sub prepare_for_a_new_file |
|
70
|
|
|
|
|
|
|
my ( |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
$brace_depth, |
|
73
|
|
|
|
|
|
|
$context, |
|
74
|
|
|
|
|
|
|
$current_package, |
|
75
|
|
|
|
|
|
|
$last_nonblank_block_type, |
|
76
|
|
|
|
|
|
|
$last_nonblank_token, |
|
77
|
|
|
|
|
|
|
$last_nonblank_type, |
|
78
|
|
|
|
|
|
|
$next_sequence_number, |
|
79
|
|
|
|
|
|
|
$paren_depth, |
|
80
|
|
|
|
|
|
|
$rbrace_context, |
|
81
|
|
|
|
|
|
|
$rbrace_package, |
|
82
|
|
|
|
|
|
|
$rbrace_structural_type, |
|
83
|
|
|
|
|
|
|
$rbrace_type, |
|
84
|
|
|
|
|
|
|
$rcurrent_depth, |
|
85
|
|
|
|
|
|
|
$rcurrent_sequence_number, |
|
86
|
|
|
|
|
|
|
$ris_lexical_sub, |
|
87
|
|
|
|
|
|
|
$rdepth_array, |
|
88
|
|
|
|
|
|
|
$ris_block_function, |
|
89
|
|
|
|
|
|
|
$ris_block_list_function, |
|
90
|
|
|
|
|
|
|
$ris_constant, |
|
91
|
|
|
|
|
|
|
$ris_user_function, |
|
92
|
|
|
|
|
|
|
$rnested_statement_type, |
|
93
|
|
|
|
|
|
|
$rnested_ternary_flag, |
|
94
|
|
|
|
|
|
|
$rparen_semicolon_count, |
|
95
|
|
|
|
|
|
|
$rparen_vars, |
|
96
|
|
|
|
|
|
|
$rparen_type, |
|
97
|
|
|
|
|
|
|
$rsaw_function_definition, |
|
98
|
|
|
|
|
|
|
$rsaw_use_module, |
|
99
|
|
|
|
|
|
|
$rsquare_bracket_structural_type, |
|
100
|
|
|
|
|
|
|
$rsquare_bracket_type, |
|
101
|
|
|
|
|
|
|
$rstarting_line_of_current_depth, |
|
102
|
|
|
|
|
|
|
$rtotal_depth, |
|
103
|
|
|
|
|
|
|
$ruser_function_prototype, |
|
104
|
|
|
|
|
|
|
$square_bracket_depth, |
|
105
|
|
|
|
|
|
|
$statement_type, |
|
106
|
|
|
|
|
|
|
$total_depth, |
|
107
|
|
|
|
|
|
|
); |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my ( |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# GLOBAL CONSTANTS for routines in this package, |
|
112
|
|
|
|
|
|
|
# INITIALIZER: BEGIN block. |
|
113
|
|
|
|
|
|
|
%can_start_digraph, |
|
114
|
|
|
|
|
|
|
%expecting_operator_token, |
|
115
|
|
|
|
|
|
|
%expecting_operator_types, |
|
116
|
|
|
|
|
|
|
%expecting_term_token, |
|
117
|
|
|
|
|
|
|
%expecting_term_types, |
|
118
|
|
|
|
|
|
|
%is_block_operator, |
|
119
|
|
|
|
|
|
|
%is_digraph, |
|
120
|
|
|
|
|
|
|
%is_file_test_operator, |
|
121
|
|
|
|
|
|
|
%is_if_elsif_unless, |
|
122
|
|
|
|
|
|
|
%is_if_elsif_unless_case_when, |
|
123
|
|
|
|
|
|
|
%is_indirect_object_taker, |
|
124
|
|
|
|
|
|
|
%is_keyword_rejecting_question_as_pattern_delimiter, |
|
125
|
|
|
|
|
|
|
%is_keyword_rejecting_slash_as_pattern_delimiter, |
|
126
|
|
|
|
|
|
|
%is_keyword_taking_list, |
|
127
|
|
|
|
|
|
|
%is_keyword_taking_optional_arg, |
|
128
|
|
|
|
|
|
|
%is_q_qq_qw_qx_qr_s_y_tr_m, |
|
129
|
|
|
|
|
|
|
%is_q_qq_qx_qr_s_y_tr_m, |
|
130
|
|
|
|
|
|
|
%quote_modifiers, |
|
131
|
|
|
|
|
|
|
%is_semicolon_or_t, |
|
132
|
|
|
|
|
|
|
%is_sort_map_grep, |
|
133
|
|
|
|
|
|
|
%is_sort_map_grep_eval_do_sub, |
|
134
|
|
|
|
|
|
|
%is_tetragraph, |
|
135
|
|
|
|
|
|
|
%is_trigraph, |
|
136
|
|
|
|
|
|
|
%is_valid_token_type, |
|
137
|
|
|
|
|
|
|
%other_line_endings, |
|
138
|
|
|
|
|
|
|
%is_binary_operator_type, |
|
139
|
|
|
|
|
|
|
%is_binary_keyword, |
|
140
|
|
|
|
|
|
|
%is_binary_or_unary_operator_type, |
|
141
|
|
|
|
|
|
|
%is_binary_or_unary_keyword, |
|
142
|
|
|
|
|
|
|
%is_not_a_TERM_producer_type, |
|
143
|
|
|
|
|
|
|
@closing_brace_names, |
|
144
|
|
|
|
|
|
|
@opening_brace_names, |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# GLOBAL CONSTANT hash lookup table of operator expected values |
|
147
|
|
|
|
|
|
|
# INITIALIZER: BEGIN block |
|
148
|
|
|
|
|
|
|
%op_expected_table, |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# GLOBAL VARIABLES which are constant after being configured. |
|
151
|
|
|
|
|
|
|
# INITIALIZER: BEGIN block and modified by sub check_options |
|
152
|
|
|
|
|
|
|
%is_code_block_token, |
|
153
|
|
|
|
|
|
|
%is_zero_continuation_block_type, |
|
154
|
|
|
|
|
|
|
%is_keyword, |
|
155
|
|
|
|
|
|
|
%is_TERM_keyword, |
|
156
|
|
|
|
|
|
|
%is_my_our_state, |
|
157
|
|
|
|
|
|
|
%is_package, |
|
158
|
|
|
|
|
|
|
%matching_end_token, |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# INITIALIZER: sub check_options |
|
161
|
|
|
|
|
|
|
$code_skipping_pattern_begin, |
|
162
|
|
|
|
|
|
|
$code_skipping_pattern_end, |
|
163
|
|
|
|
|
|
|
$format_skipping_pattern_begin, |
|
164
|
|
|
|
|
|
|
$format_skipping_pattern_end, |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
$rOpts_code_skipping, |
|
167
|
|
|
|
|
|
|
$rOpts_code_skipping_begin, |
|
168
|
|
|
|
|
|
|
$rOpts_format_skipping, |
|
169
|
|
|
|
|
|
|
$rOpts_format_skipping_begin, |
|
170
|
|
|
|
|
|
|
$rOpts_format_skipping_end, |
|
171
|
|
|
|
|
|
|
$rOpts_starting_indentation_level, |
|
172
|
|
|
|
|
|
|
$rOpts_indent_columns, |
|
173
|
|
|
|
|
|
|
$rOpts_look_for_hash_bang, |
|
174
|
|
|
|
|
|
|
$rOpts_look_for_autoloader, |
|
175
|
|
|
|
|
|
|
$rOpts_look_for_selfloader, |
|
176
|
|
|
|
|
|
|
$rOpts_trim_qw, |
|
177
|
|
|
|
|
|
|
$rOpts_extended_syntax, |
|
178
|
|
|
|
|
|
|
$rOpts_continuation_indentation, |
|
179
|
|
|
|
|
|
|
$rOpts_outdent_labels, |
|
180
|
|
|
|
|
|
|
$rOpts_maximum_level_errors, |
|
181
|
|
|
|
|
|
|
$rOpts_maximum_unexpected_errors, |
|
182
|
|
|
|
|
|
|
$rOpts_indent_closing_brace, |
|
183
|
|
|
|
|
|
|
$rOpts_non_indenting_braces, |
|
184
|
|
|
|
|
|
|
$rOpts_non_indenting_brace_prefix, |
|
185
|
|
|
|
|
|
|
$rOpts_whitespace_cycle, |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
$tabsize, |
|
188
|
|
|
|
|
|
|
%is_END_DATA_format_sub, |
|
189
|
|
|
|
|
|
|
%is_grep_alias, |
|
190
|
|
|
|
|
|
|
%is_sub, |
|
191
|
|
|
|
|
|
|
$guess_if_method, |
|
192
|
|
|
|
|
|
|
); |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# possible values of operator_expected() |
|
195
|
44
|
|
|
44
|
|
183
|
use constant TERM => -1; |
|
|
44
|
|
|
|
|
84
|
|
|
|
44
|
|
|
|
|
1656
|
|
|
196
|
44
|
|
|
44
|
|
155
|
use constant UNKNOWN => 0; |
|
|
44
|
|
|
|
|
93
|
|
|
|
44
|
|
|
|
|
1378
|
|
|
197
|
44
|
|
|
44
|
|
190
|
use constant OPERATOR => 1; |
|
|
44
|
|
|
|
|
78
|
|
|
|
44
|
|
|
|
|
1556
|
|
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# possible values of context |
|
200
|
44
|
|
|
44
|
|
165
|
use constant SCALAR_CONTEXT => -1; |
|
|
44
|
|
|
|
|
55
|
|
|
|
44
|
|
|
|
|
1420
|
|
|
201
|
44
|
|
|
44
|
|
182
|
use constant UNKNOWN_CONTEXT => 0; |
|
|
44
|
|
|
|
|
72
|
|
|
|
44
|
|
|
|
|
1420
|
|
|
202
|
44
|
|
|
44
|
|
169
|
use constant LIST_CONTEXT => 1; |
|
|
44
|
|
|
|
|
83
|
|
|
|
44
|
|
|
|
|
1505
|
|
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# Maximum number of little messages; probably need not be changed. |
|
205
|
44
|
|
|
44
|
|
159
|
use constant MAX_NAG_MESSAGES => 6; |
|
|
44
|
|
|
|
|
63
|
|
|
|
44
|
|
|
|
|
7876
|
|
|
206
|
|
|
|
|
|
|
|
|
207
|
0
|
|
|
|
|
0
|
BEGIN { |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# Array index names for $self. |
|
210
|
|
|
|
|
|
|
# Do not combine with other BEGIN blocks (c101). |
|
211
|
44
|
|
|
44
|
|
299169
|
my $i = 0; |
|
212
|
|
|
|
|
|
|
use constant { |
|
213
|
44
|
|
|
|
|
13664
|
_rhere_target_list_ => $i++, |
|
214
|
|
|
|
|
|
|
_in_here_doc_ => $i++, |
|
215
|
|
|
|
|
|
|
_here_doc_target_ => $i++, |
|
216
|
|
|
|
|
|
|
_here_quote_character_ => $i++, |
|
217
|
|
|
|
|
|
|
_in_data_ => $i++, |
|
218
|
|
|
|
|
|
|
_in_end_ => $i++, |
|
219
|
|
|
|
|
|
|
_in_format_ => $i++, |
|
220
|
|
|
|
|
|
|
_in_error_ => $i++, |
|
221
|
|
|
|
|
|
|
_do_not_format_ => $i++, |
|
222
|
|
|
|
|
|
|
_warning_count_ => $i++, |
|
223
|
|
|
|
|
|
|
_html_tag_count_ => $i++, |
|
224
|
|
|
|
|
|
|
_in_pod_ => $i++, |
|
225
|
|
|
|
|
|
|
_in_code_skipping_ => $i++, |
|
226
|
|
|
|
|
|
|
_in_format_skipping_ => $i++, |
|
227
|
|
|
|
|
|
|
_rformat_skipping_list_ => $i++, |
|
228
|
|
|
|
|
|
|
_in_attribute_list_ => $i++, |
|
229
|
|
|
|
|
|
|
_in_quote_ => $i++, |
|
230
|
|
|
|
|
|
|
_quote_target_ => $i++, |
|
231
|
|
|
|
|
|
|
_line_start_quote_ => $i++, |
|
232
|
|
|
|
|
|
|
_starting_level_ => $i++, |
|
233
|
|
|
|
|
|
|
_know_starting_level_ => $i++, |
|
234
|
|
|
|
|
|
|
_last_line_number_ => $i++, |
|
235
|
|
|
|
|
|
|
_saw_perl_dash_P_ => $i++, |
|
236
|
|
|
|
|
|
|
_saw_perl_dash_w_ => $i++, |
|
237
|
|
|
|
|
|
|
_saw_use_strict_ => $i++, |
|
238
|
|
|
|
|
|
|
_saw_brace_error_ => $i++, |
|
239
|
|
|
|
|
|
|
_hit_bug_ => $i++, |
|
240
|
|
|
|
|
|
|
_look_for_autoloader_ => $i++, |
|
241
|
|
|
|
|
|
|
_look_for_selfloader_ => $i++, |
|
242
|
|
|
|
|
|
|
_saw_autoloader_ => $i++, |
|
243
|
|
|
|
|
|
|
_saw_selfloader_ => $i++, |
|
244
|
|
|
|
|
|
|
_saw_hash_bang_ => $i++, |
|
245
|
|
|
|
|
|
|
_saw_end_ => $i++, |
|
246
|
|
|
|
|
|
|
_saw_data_ => $i++, |
|
247
|
|
|
|
|
|
|
_saw_negative_indentation_ => $i++, |
|
248
|
|
|
|
|
|
|
_started_tokenizing_ => $i++, |
|
249
|
|
|
|
|
|
|
_debugger_object_ => $i++, |
|
250
|
|
|
|
|
|
|
_diagnostics_object_ => $i++, |
|
251
|
|
|
|
|
|
|
_logger_object_ => $i++, |
|
252
|
|
|
|
|
|
|
_save_logfile_ => $i++, |
|
253
|
|
|
|
|
|
|
_unexpected_error_count_ => $i++, |
|
254
|
|
|
|
|
|
|
_started_looking_for_here_target_at_ => $i++, |
|
255
|
|
|
|
|
|
|
_nearly_matched_here_target_at_ => $i++, |
|
256
|
|
|
|
|
|
|
_line_of_text_ => $i++, |
|
257
|
|
|
|
|
|
|
_rlower_case_labels_at_ => $i++, |
|
258
|
|
|
|
|
|
|
_maximum_level_ => $i++, |
|
259
|
|
|
|
|
|
|
_true_brace_error_count_ => $i++, |
|
260
|
|
|
|
|
|
|
_rOpts_ => $i++, |
|
261
|
|
|
|
|
|
|
_rinput_lines_ => $i++, |
|
262
|
|
|
|
|
|
|
_input_line_index_next_ => $i++, |
|
263
|
|
|
|
|
|
|
_rtrimmed_input_lines_ => $i++, |
|
264
|
|
|
|
|
|
|
_rclosing_brace_indentation_hash_ => $i++, |
|
265
|
|
|
|
|
|
|
_show_indentation_table_ => $i++, |
|
266
|
|
|
|
|
|
|
_rnon_indenting_brace_stack_ => $i++, |
|
267
|
|
|
|
|
|
|
_rbareword_info_ => $i++, |
|
268
|
44
|
|
|
44
|
|
273
|
}; |
|
|
44
|
|
|
|
|
99
|
|
|
269
|
|
|
|
|
|
|
} ## end BEGIN |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
{ ## closure for subs to count instances |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# methods to count instances |
|
274
|
|
|
|
|
|
|
my $_count = 0; |
|
275
|
0
|
|
|
0
|
0
|
0
|
sub get_count { return $_count; } |
|
276
|
649
|
|
|
649
|
|
1924
|
sub _increment_count { return ++$_count } |
|
277
|
649
|
|
|
649
|
|
1059
|
sub _decrement_count { return --$_count } |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub DESTROY { |
|
281
|
649
|
|
|
649
|
|
1250
|
my $self = shift; |
|
282
|
649
|
|
|
|
|
1996
|
_decrement_count(); |
|
283
|
649
|
|
|
|
|
15288
|
return; |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub AUTOLOAD { |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
# Catch any undefined sub calls so that we are sure to get |
|
289
|
|
|
|
|
|
|
# some diagnostic information. This sub should never be called |
|
290
|
|
|
|
|
|
|
# except for a programming error. |
|
291
|
0
|
|
|
0
|
|
0
|
our $AUTOLOAD; |
|
292
|
0
|
0
|
|
|
|
0
|
return if ( $AUTOLOAD =~ /\bDESTROY$/ ); |
|
293
|
0
|
|
|
|
|
0
|
my ( $pkg, $fname, $lno ) = caller(); |
|
294
|
0
|
|
|
|
|
0
|
my $my_package = __PACKAGE__; |
|
295
|
0
|
|
|
|
|
0
|
print {*STDERR} <<EOM; |
|
|
0
|
|
|
|
|
0
|
|
|
296
|
|
|
|
|
|
|
====================================================================== |
|
297
|
|
|
|
|
|
|
Error detected in package '$my_package', version $VERSION |
|
298
|
|
|
|
|
|
|
Received unexpected AUTOLOAD call for sub '$AUTOLOAD' |
|
299
|
|
|
|
|
|
|
Called from package: '$pkg' |
|
300
|
|
|
|
|
|
|
Called from File '$fname' at line '$lno' |
|
301
|
|
|
|
|
|
|
This error is probably due to a recent programming change |
|
302
|
|
|
|
|
|
|
====================================================================== |
|
303
|
|
|
|
|
|
|
EOM |
|
304
|
0
|
|
|
|
|
0
|
exit 1; |
|
305
|
|
|
|
|
|
|
} ## end sub AUTOLOAD |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
sub Die { |
|
308
|
0
|
|
|
0
|
0
|
0
|
my ($msg) = @_; |
|
309
|
0
|
|
|
|
|
0
|
Perl::Tidy::Die($msg); |
|
310
|
0
|
|
|
|
|
0
|
croak "unexpected return from Perl::Tidy::Die"; |
|
311
|
|
|
|
|
|
|
} |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
sub Warn { |
|
314
|
0
|
|
|
0
|
0
|
0
|
my ($msg) = @_; |
|
315
|
0
|
|
|
|
|
0
|
Perl::Tidy::Warn($msg); |
|
316
|
0
|
|
|
|
|
0
|
return; |
|
317
|
|
|
|
|
|
|
} |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
sub Fault { |
|
320
|
0
|
|
|
0
|
0
|
0
|
my ($msg) = @_; |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
# This routine is called for errors that really should not occur |
|
323
|
|
|
|
|
|
|
# except if there has been a bug introduced by a recent program change. |
|
324
|
|
|
|
|
|
|
# Please add comments at calls to Fault to explain why the call |
|
325
|
|
|
|
|
|
|
# should not occur, and where to look to fix it. |
|
326
|
0
|
|
|
|
|
0
|
my ( $package0_uu, $filename0_uu, $line0, $subroutine0_uu ) = caller(0); |
|
327
|
0
|
|
|
|
|
0
|
my ( $package1_uu, $filename1, $line1, $subroutine1 ) = caller(1); |
|
328
|
0
|
|
|
|
|
0
|
my ( $package2_uu, $filename2_uu, $line2_uu, $subroutine2 ) = caller(2); |
|
329
|
0
|
|
|
|
|
0
|
my $pkg = __PACKAGE__; |
|
330
|
0
|
|
|
|
|
0
|
my $input_stream_name = Perl::Tidy::get_input_stream_name(); |
|
331
|
|
|
|
|
|
|
|
|
332
|
0
|
|
|
|
|
0
|
Die(<<EOM); |
|
333
|
|
|
|
|
|
|
============================================================================== |
|
334
|
|
|
|
|
|
|
While operating on input stream with name: '$input_stream_name' |
|
335
|
|
|
|
|
|
|
A fault was detected at line $line0 of sub '$subroutine1' |
|
336
|
|
|
|
|
|
|
in file '$filename1' |
|
337
|
|
|
|
|
|
|
which was called from line $line1 of sub '$subroutine2' |
|
338
|
|
|
|
|
|
|
Message: '$msg' |
|
339
|
|
|
|
|
|
|
This is probably an error introduced by a recent programming change. |
|
340
|
|
|
|
|
|
|
$pkg reports VERSION='$VERSION'. |
|
341
|
|
|
|
|
|
|
============================================================================== |
|
342
|
|
|
|
|
|
|
EOM |
|
343
|
|
|
|
|
|
|
|
|
344
|
0
|
|
|
|
|
0
|
croak "unexpected return from sub Die"; |
|
345
|
|
|
|
|
|
|
} ## end sub Fault |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
sub bad_pattern { |
|
348
|
2588
|
|
|
2588
|
0
|
3758
|
my ($pattern) = @_; |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
# Return true if a regex pattern has an error |
|
351
|
|
|
|
|
|
|
# Note: Formatter.pm also has a copy of this |
|
352
|
2588
|
|
|
|
|
3217
|
my $regex_uu = eval { qr/$pattern/ }; |
|
|
2588
|
|
|
|
|
85460
|
|
|
353
|
2588
|
|
|
|
|
8153
|
return $EVAL_ERROR; |
|
354
|
|
|
|
|
|
|
} ## end sub bad_pattern |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub make_skipping_pattern { |
|
357
|
2588
|
|
|
2588
|
0
|
4501
|
my ( $rOpts, $opt_name, $default ) = @_; |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
# Make regex patterns for the format-skipping and code-skipping options |
|
360
|
2588
|
|
|
|
|
3970
|
my $param = $rOpts->{$opt_name}; |
|
361
|
2588
|
100
|
|
|
|
4217
|
if ( !$param ) { $param = $default } |
|
|
2584
|
|
|
|
|
3375
|
|
|
362
|
2588
|
|
|
|
|
6117
|
$param =~ s/^\s+//; |
|
363
|
2588
|
50
|
|
|
|
5913
|
if ( $param !~ /^#/ ) { |
|
364
|
0
|
|
|
|
|
0
|
Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n"); |
|
365
|
|
|
|
|
|
|
} |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# Note that the ending \s will match a newline |
|
368
|
2588
|
|
|
|
|
3926
|
my $pattern = '^\s*' . $param . '\s'; |
|
369
|
2588
|
50
|
|
|
|
4586
|
if ( bad_pattern($pattern) ) { |
|
370
|
0
|
|
|
|
|
0
|
Die( |
|
371
|
|
|
|
|
|
|
"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n" |
|
372
|
|
|
|
|
|
|
); |
|
373
|
|
|
|
|
|
|
} |
|
374
|
2588
|
|
|
|
|
4718
|
return $pattern; |
|
375
|
|
|
|
|
|
|
} ## end sub make_skipping_pattern |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
sub check_options { |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
# Check and pre-process tokenizer parameters |
|
380
|
647
|
|
|
647
|
0
|
1332
|
my $rOpts = shift; |
|
381
|
|
|
|
|
|
|
|
|
382
|
647
|
|
|
|
|
1768
|
%is_sub = (); |
|
383
|
647
|
|
|
|
|
1647
|
$is_sub{'sub'} = 1; |
|
384
|
|
|
|
|
|
|
|
|
385
|
647
|
|
|
|
|
3816
|
%is_END_DATA_format_sub = ( |
|
386
|
|
|
|
|
|
|
'__END__' => 1, |
|
387
|
|
|
|
|
|
|
'__DATA__' => 1, |
|
388
|
|
|
|
|
|
|
'format' => 1, |
|
389
|
|
|
|
|
|
|
'sub' => 1, |
|
390
|
|
|
|
|
|
|
); |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
# Install any aliases to 'sub' |
|
393
|
647
|
100
|
|
|
|
1928
|
if ( $rOpts->{'sub-alias-list'} ) { |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
# Note that any 'sub-alias-list' has been preprocessed to |
|
396
|
|
|
|
|
|
|
# be a trimmed, space-separated list which includes 'sub' |
|
397
|
|
|
|
|
|
|
# for example, it might be 'sub method fun' |
|
398
|
3
|
|
|
|
|
17
|
my @sub_alias_list = split /\s+/, $rOpts->{'sub-alias-list'}; |
|
399
|
3
|
|
|
|
|
8
|
foreach my $word (@sub_alias_list) { |
|
400
|
11
|
|
|
|
|
20
|
$is_sub{$word} = 1; |
|
401
|
11
|
|
|
|
|
23
|
$is_END_DATA_format_sub{$word} = 1; |
|
402
|
|
|
|
|
|
|
} |
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# Set global flag to say if we have to guess if bareword 'method' is |
|
406
|
|
|
|
|
|
|
# a sub when 'method' is in %is_sub. This will be true unless: |
|
407
|
|
|
|
|
|
|
# (1) the user entered 'method' as sub alias, or |
|
408
|
|
|
|
|
|
|
# (2) the user set --use-feature=class |
|
409
|
|
|
|
|
|
|
# In these two cases we can assume that 'method' is a sub alias. |
|
410
|
647
|
|
|
|
|
1128
|
$guess_if_method = 1; |
|
411
|
647
|
100
|
|
|
|
1840
|
if ( $is_sub{'method'} ) { $guess_if_method = 0 } |
|
|
2
|
|
|
|
|
4
|
|
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
#------------------------------------------------ |
|
414
|
|
|
|
|
|
|
# Update hash values for any -use-feature options |
|
415
|
|
|
|
|
|
|
#------------------------------------------------ |
|
416
|
|
|
|
|
|
|
|
|
417
|
647
|
|
|
|
|
1051
|
my $use_feature_class = 1; |
|
418
|
|
|
|
|
|
|
|
|
419
|
647
|
|
|
|
|
1292
|
my $str = $rOpts->{'use-feature'}; |
|
420
|
647
|
50
|
33
|
|
|
1943
|
if ( defined($str) && length($str) ) { |
|
421
|
0
|
|
|
|
|
0
|
$str =~ s/^\s+//; |
|
422
|
0
|
|
|
|
|
0
|
$str =~ s/\s+$//; |
|
423
|
0
|
0
|
|
|
|
0
|
if ( !length($str) ) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
## all spaces |
|
425
|
|
|
|
|
|
|
} |
|
426
|
|
|
|
|
|
|
elsif ( $str =~ /\bnoclass\b/ ) { |
|
427
|
0
|
|
|
|
|
0
|
$use_feature_class = 0; |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
elsif ( $str =~ /\bclass\b/ ) { |
|
430
|
0
|
|
|
|
|
0
|
$guess_if_method = 0; |
|
431
|
|
|
|
|
|
|
} |
|
432
|
|
|
|
|
|
|
else { |
|
433
|
|
|
|
|
|
|
# At present, only 'class' and 'noclass' are valid strings |
|
434
|
|
|
|
|
|
|
# This is just a Warn, for testing, but will eventually be Die |
|
435
|
0
|
|
|
|
|
0
|
Warn( |
|
436
|
|
|
|
|
|
|
"Unexpected text in --use-feature: expecting 'class' or 'noclass'\n" |
|
437
|
|
|
|
|
|
|
); |
|
438
|
|
|
|
|
|
|
} |
|
439
|
|
|
|
|
|
|
} |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
# These are the main updates for this option. There are additional |
|
442
|
|
|
|
|
|
|
# changes elsewhere, usually indicated with a comment 'rt145706' |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
# Update hash values for use_feature=class, added for rt145706 |
|
445
|
|
|
|
|
|
|
# see 'perlclass.pod' |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
# IMPORTANT: We are changing global hash values initially set in a BEGIN |
|
448
|
|
|
|
|
|
|
# block. Values must be defined (true or false) for each of these new |
|
449
|
|
|
|
|
|
|
# words whether true or false. Otherwise, programs using the module which |
|
450
|
|
|
|
|
|
|
# change options between runs (such as test code) will have |
|
451
|
|
|
|
|
|
|
# incorrect settings and fail. |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
# There are 4 new keywords: |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
# 'class' - treated specially as generalization of 'package' |
|
456
|
|
|
|
|
|
|
# Note: we must not set 'class' to be a keyword to avoid problems |
|
457
|
|
|
|
|
|
|
# with older uses. |
|
458
|
647
|
|
|
|
|
1707
|
$is_package{'class'} = $use_feature_class; |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# 'method' - treated like sub using the sub-alias-list option |
|
461
|
|
|
|
|
|
|
# Note: we must not set 'method' to be a keyword to avoid problems |
|
462
|
|
|
|
|
|
|
# with older uses. |
|
463
|
647
|
50
|
|
|
|
1558
|
if ($use_feature_class) { |
|
464
|
647
|
|
|
|
|
1322
|
$is_sub{'method'} = 1; |
|
465
|
647
|
|
|
|
|
1215
|
$is_END_DATA_format_sub{'method'} = 1; |
|
466
|
|
|
|
|
|
|
} |
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
# 'field' - added as a keyword, and works like 'my' |
|
469
|
|
|
|
|
|
|
# Setting zero_continuation_block_type allows inclusion in table of level |
|
470
|
|
|
|
|
|
|
# differences in case of a missing or extra brace (see sub wrapup). |
|
471
|
647
|
|
|
|
|
1586
|
$is_keyword{'field'} = $use_feature_class; |
|
472
|
647
|
|
|
|
|
1268
|
$is_my_our_state{'field'} = $use_feature_class; |
|
473
|
647
|
|
|
|
|
1358
|
$is_zero_continuation_block_type{'field'} = $use_feature_class; |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
# 'ADJUST' - added as a keyword and works like 'BEGIN' |
|
476
|
|
|
|
|
|
|
# See update git #182 for 'ADJUST :params' |
|
477
|
|
|
|
|
|
|
# Setting zero_continuation_block_type allows inclusion in table of level |
|
478
|
|
|
|
|
|
|
# differences in case of a missing or extra brace (see sub wrapup). |
|
479
|
647
|
|
|
|
|
1336
|
$is_keyword{'ADJUST'} = $use_feature_class; |
|
480
|
647
|
|
|
|
|
1280
|
$is_code_block_token{'ADJUST'} = $use_feature_class; |
|
481
|
647
|
|
|
|
|
1255
|
$is_zero_continuation_block_type{'ADJUST'} = $use_feature_class; |
|
482
|
|
|
|
|
|
|
|
|
483
|
647
|
|
|
|
|
1857
|
%is_grep_alias = (); |
|
484
|
647
|
50
|
|
|
|
1874
|
if ( $rOpts->{'grep-alias-list'} ) { |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
# Note that 'grep-alias-list' has been preprocessed to be a trimmed, |
|
487
|
|
|
|
|
|
|
# space-separated list |
|
488
|
647
|
|
|
|
|
2766
|
my @q = split /\s+/, $rOpts->{'grep-alias-list'}; |
|
489
|
647
|
|
|
|
|
4116
|
$is_grep_alias{$_} = 1 for @q; |
|
490
|
|
|
|
|
|
|
} |
|
491
|
|
|
|
|
|
|
|
|
492
|
647
|
|
|
|
|
1334
|
$rOpts_starting_indentation_level = $rOpts->{'starting-indentation-level'}; |
|
493
|
647
|
|
|
|
|
1306
|
$rOpts_indent_columns = $rOpts->{'indent-columns'}; |
|
494
|
647
|
|
|
|
|
1141
|
$rOpts_look_for_hash_bang = $rOpts->{'look-for-hash-bang'}; |
|
495
|
647
|
|
|
|
|
1266
|
$rOpts_look_for_autoloader = $rOpts->{'look-for-autoloader'}; |
|
496
|
647
|
|
|
|
|
1134
|
$rOpts_look_for_selfloader = $rOpts->{'look-for-selfloader'}; |
|
497
|
647
|
|
|
|
|
1171
|
$rOpts_trim_qw = $rOpts->{'trim-qw'}; |
|
498
|
647
|
|
|
|
|
1072
|
$rOpts_extended_syntax = $rOpts->{'extended-syntax'}; |
|
499
|
647
|
|
|
|
|
1120
|
$rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; |
|
500
|
647
|
|
|
|
|
1066
|
$rOpts_outdent_labels = $rOpts->{'outdent-labels'}; |
|
501
|
647
|
|
|
|
|
1108
|
$rOpts_maximum_level_errors = $rOpts->{'maximum-level-errors'}; |
|
502
|
647
|
|
|
|
|
1157
|
$rOpts_maximum_unexpected_errors = $rOpts->{'maximum-unexpected-errors'}; |
|
503
|
647
|
|
|
|
|
1181
|
$rOpts_code_skipping = $rOpts->{'code-skipping'}; |
|
504
|
647
|
|
|
|
|
1018
|
$rOpts_code_skipping_begin = $rOpts->{'code-skipping-begin'}; |
|
505
|
647
|
|
|
|
|
1034
|
$rOpts_format_skipping = $rOpts->{'format-skipping'}; |
|
506
|
647
|
|
|
|
|
1123
|
$rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'}; |
|
507
|
647
|
|
|
|
|
1167
|
$rOpts_format_skipping_end = $rOpts->{'format-skipping-end'}; |
|
508
|
647
|
|
|
|
|
1004
|
$rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'}; |
|
509
|
647
|
|
|
|
|
1135
|
$rOpts_non_indenting_braces = $rOpts->{'non-indenting-braces'}; |
|
510
|
647
|
|
|
|
|
1152
|
$rOpts_non_indenting_brace_prefix = $rOpts->{'non-indenting-brace-prefix'}; |
|
511
|
647
|
|
|
|
|
1077
|
$rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'}; |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
# In the Tokenizer, --indent-columns is just used for guessing old |
|
514
|
|
|
|
|
|
|
# indentation, and must be positive. If -i=0 is used for this run (which |
|
515
|
|
|
|
|
|
|
# is possible) we'll just guess that the old run used 4 spaces per level. |
|
516
|
647
|
100
|
|
|
|
1565
|
if ( !$rOpts_indent_columns ) { |
|
517
|
12
|
|
|
|
|
23
|
$rOpts_indent_columns = INDENT_COLUMNS_DEFAULT; |
|
518
|
|
|
|
|
|
|
} |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
# Define $tabsize, the number of spaces per tab for use in |
|
521
|
|
|
|
|
|
|
# guessing the indentation of source lines with leading tabs. |
|
522
|
|
|
|
|
|
|
# Assume same as for this run if tabs are used, otherwise assume |
|
523
|
|
|
|
|
|
|
# a default value, typically 8 |
|
524
|
|
|
|
|
|
|
$tabsize = |
|
525
|
|
|
|
|
|
|
$rOpts->{'entab-leading-whitespace'} |
|
526
|
|
|
|
|
|
|
? $rOpts->{'entab-leading-whitespace'} |
|
527
|
|
|
|
|
|
|
: $rOpts->{'tabs'} ? $rOpts->{'indent-columns'} |
|
528
|
647
|
50
|
|
|
|
2564
|
: $rOpts->{'default-tabsize'}; |
|
|
|
100
|
|
|
|
|
|
|
529
|
647
|
50
|
|
|
|
1507
|
if ( !$tabsize ) { $tabsize = TAB_SIZE_DEFAULT } |
|
|
0
|
|
|
|
|
0
|
|
|
530
|
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
$code_skipping_pattern_begin = |
|
532
|
647
|
|
|
|
|
2184
|
make_skipping_pattern( $rOpts, 'code-skipping-begin', '#<<V' ); |
|
533
|
647
|
|
|
|
|
1747
|
$code_skipping_pattern_end = |
|
534
|
|
|
|
|
|
|
make_skipping_pattern( $rOpts, 'code-skipping-end', '#>>V' ); |
|
535
|
|
|
|
|
|
|
|
|
536
|
647
|
|
|
|
|
1899
|
$format_skipping_pattern_begin = |
|
537
|
|
|
|
|
|
|
make_skipping_pattern( $rOpts, 'format-skipping-begin', '#<<<' ); |
|
538
|
647
|
|
|
|
|
1663
|
$format_skipping_pattern_end = |
|
539
|
|
|
|
|
|
|
make_skipping_pattern( $rOpts, 'format-skipping-end', '#>>>' ); |
|
540
|
|
|
|
|
|
|
|
|
541
|
647
|
|
|
|
|
1825
|
return; |
|
542
|
|
|
|
|
|
|
} ## end sub check_options |
|
543
|
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
sub new { |
|
545
|
|
|
|
|
|
|
|
|
546
|
649
|
|
|
649
|
0
|
2539
|
my ( $class, @arglist ) = @_; |
|
547
|
649
|
50
|
|
|
|
1801
|
if ( @arglist % 2 ) { croak "Odd number of items in arg hash list\n" } |
|
|
0
|
|
|
|
|
0
|
|
|
548
|
|
|
|
|
|
|
|
|
549
|
649
|
|
|
|
|
5029
|
my %defaults = ( |
|
550
|
|
|
|
|
|
|
source_object => undef, |
|
551
|
|
|
|
|
|
|
debugger_object => undef, |
|
552
|
|
|
|
|
|
|
diagnostics_object => undef, |
|
553
|
|
|
|
|
|
|
logger_object => undef, |
|
554
|
|
|
|
|
|
|
starting_level => undef, |
|
555
|
|
|
|
|
|
|
starting_line_number => 1, |
|
556
|
|
|
|
|
|
|
rOpts => {}, |
|
557
|
|
|
|
|
|
|
); |
|
558
|
649
|
|
|
|
|
3499
|
my %args = ( %defaults, @arglist ); |
|
559
|
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
# we are given an object with a get_line() method to supply source lines |
|
561
|
649
|
|
|
|
|
1616
|
my $source_object = $args{source_object}; |
|
562
|
649
|
|
|
|
|
1113
|
my $rOpts = $args{rOpts}; |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
# Check call args |
|
565
|
649
|
50
|
|
|
|
1582
|
if ( !defined($source_object) ) { |
|
566
|
0
|
|
|
|
|
0
|
Die( |
|
567
|
|
|
|
|
|
|
"Perl::Tidy::Tokenizer::new called without a 'source_object' parameter\n" |
|
568
|
|
|
|
|
|
|
); |
|
569
|
|
|
|
|
|
|
} |
|
570
|
649
|
50
|
|
|
|
2872
|
if ( !ref($source_object) ) { |
|
571
|
0
|
|
|
|
|
0
|
Die(<<EOM); |
|
572
|
|
|
|
|
|
|
sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference; |
|
573
|
|
|
|
|
|
|
'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method |
|
574
|
|
|
|
|
|
|
EOM |
|
575
|
|
|
|
|
|
|
} |
|
576
|
|
|
|
|
|
|
|
|
577
|
649
|
|
|
|
|
1110
|
my $logger_object = $args{logger_object}; |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
# Tokenizer state data is as follows: |
|
580
|
|
|
|
|
|
|
# _rhere_target_list_ reference to list of here-doc targets |
|
581
|
|
|
|
|
|
|
# _here_doc_target_ the target string for a here document |
|
582
|
|
|
|
|
|
|
# _here_quote_character_ the type of here-doc quoting (" ' ` or none) |
|
583
|
|
|
|
|
|
|
# to determine if interpolation is done |
|
584
|
|
|
|
|
|
|
# _quote_target_ character we seek if chasing a quote |
|
585
|
|
|
|
|
|
|
# _line_start_quote_ line where we started looking for a long quote |
|
586
|
|
|
|
|
|
|
# _in_here_doc_ flag indicating if we are in a here-doc |
|
587
|
|
|
|
|
|
|
# _in_pod_ flag set if we are in pod documentation |
|
588
|
|
|
|
|
|
|
# _in_code_skipping_ flag set if we are in a code skipping section |
|
589
|
|
|
|
|
|
|
# _in_format_skipping_ flag set if we are in a format skipping section |
|
590
|
|
|
|
|
|
|
# _in_error_ flag set if we saw severe error (binary in script) |
|
591
|
|
|
|
|
|
|
# _do_not_format_ flag set if formatting should be skipped |
|
592
|
|
|
|
|
|
|
# _warning_count_ number of calls to logger sub warning |
|
593
|
|
|
|
|
|
|
# _html_tag_count_ number of apparent html tags seen (indicates html) |
|
594
|
|
|
|
|
|
|
# _in_data_ flag set if we are in __DATA__ section |
|
595
|
|
|
|
|
|
|
# _in_end_ flag set if we are in __END__ section |
|
596
|
|
|
|
|
|
|
# _in_format_ flag set if we are in a format description |
|
597
|
|
|
|
|
|
|
# _in_attribute_list_ flag telling if we are looking for attributes |
|
598
|
|
|
|
|
|
|
# _in_quote_ flag telling if we are chasing a quote |
|
599
|
|
|
|
|
|
|
# _starting_level_ indentation level of first line |
|
600
|
|
|
|
|
|
|
# _diagnostics_object_ place to write debugging information |
|
601
|
|
|
|
|
|
|
# _unexpected_error_count_ error count used to limit output |
|
602
|
|
|
|
|
|
|
# _lower_case_labels_at_ line numbers where lower case labels seen |
|
603
|
|
|
|
|
|
|
# _hit_bug_ program bug detected |
|
604
|
|
|
|
|
|
|
|
|
605
|
649
|
|
|
|
|
1020
|
my $self = []; |
|
606
|
649
|
|
|
|
|
1389
|
$self->[_rhere_target_list_] = []; |
|
607
|
649
|
|
|
|
|
1340
|
$self->[_in_here_doc_] = 0; |
|
608
|
649
|
|
|
|
|
1489
|
$self->[_here_doc_target_] = EMPTY_STRING; |
|
609
|
649
|
|
|
|
|
1115
|
$self->[_here_quote_character_] = EMPTY_STRING; |
|
610
|
649
|
|
|
|
|
1221
|
$self->[_in_data_] = 0; |
|
611
|
649
|
|
|
|
|
1431
|
$self->[_in_end_] = 0; |
|
612
|
649
|
|
|
|
|
1225
|
$self->[_in_format_] = 0; |
|
613
|
649
|
|
|
|
|
1174
|
$self->[_in_error_] = 0; |
|
614
|
649
|
|
|
|
|
1262
|
$self->[_do_not_format_] = 0; |
|
615
|
649
|
|
|
|
|
1030
|
$self->[_warning_count_] = 0; |
|
616
|
649
|
|
|
|
|
1229
|
$self->[_html_tag_count_] = 0; |
|
617
|
649
|
|
|
|
|
1040
|
$self->[_in_pod_] = 0; |
|
618
|
|
|
|
|
|
|
$self->[_in_code_skipping_] = |
|
619
|
649
|
|
33
|
|
|
1996
|
$rOpts->{'code-skipping-from-start'} && $rOpts_code_skipping; |
|
620
|
649
|
|
|
|
|
1077
|
$self->[_in_format_skipping_] = 0; |
|
621
|
649
|
|
|
|
|
1273
|
$self->[_rformat_skipping_list_] = []; |
|
622
|
649
|
|
|
|
|
1284
|
$self->[_in_attribute_list_] = 0; |
|
623
|
649
|
|
|
|
|
1072
|
$self->[_in_quote_] = 0; |
|
624
|
649
|
|
|
|
|
1162
|
$self->[_quote_target_] = EMPTY_STRING; |
|
625
|
649
|
|
|
|
|
1541
|
$self->[_line_start_quote_] = -1; |
|
626
|
649
|
|
|
|
|
1147
|
$self->[_starting_level_] = $args{starting_level}; |
|
627
|
649
|
|
|
|
|
1379
|
$self->[_know_starting_level_] = defined( $args{starting_level} ); |
|
628
|
649
|
|
|
|
|
1365
|
$self->[_last_line_number_] = $args{starting_line_number} - 1; |
|
629
|
649
|
|
|
|
|
1423
|
$self->[_saw_perl_dash_P_] = 0; |
|
630
|
649
|
|
|
|
|
1299
|
$self->[_saw_perl_dash_w_] = 0; |
|
631
|
649
|
|
|
|
|
1006
|
$self->[_saw_use_strict_] = 0; |
|
632
|
649
|
|
|
|
|
1039
|
$self->[_saw_brace_error_] = 0; |
|
633
|
649
|
|
|
|
|
1039
|
$self->[_hit_bug_] = 0; |
|
634
|
649
|
|
|
|
|
1423
|
$self->[_look_for_autoloader_] = $rOpts_look_for_autoloader; |
|
635
|
649
|
|
|
|
|
1121
|
$self->[_look_for_selfloader_] = $rOpts_look_for_selfloader; |
|
636
|
649
|
|
|
|
|
1056
|
$self->[_saw_autoloader_] = 0; |
|
637
|
649
|
|
|
|
|
1084
|
$self->[_saw_selfloader_] = 0; |
|
638
|
649
|
|
|
|
|
990
|
$self->[_saw_hash_bang_] = 0; |
|
639
|
649
|
|
|
|
|
981
|
$self->[_saw_end_] = 0; |
|
640
|
649
|
|
|
|
|
1376
|
$self->[_saw_data_] = 0; |
|
641
|
649
|
|
|
|
|
1030
|
$self->[_saw_negative_indentation_] = 0; |
|
642
|
649
|
|
|
|
|
987
|
$self->[_started_tokenizing_] = 0; |
|
643
|
649
|
|
|
|
|
1061
|
$self->[_debugger_object_] = $args{debugger_object}; |
|
644
|
649
|
|
|
|
|
1048
|
$self->[_diagnostics_object_] = $args{diagnostics_object}; |
|
645
|
649
|
|
|
|
|
1000
|
$self->[_logger_object_] = $logger_object; |
|
646
|
649
|
|
|
|
|
1232
|
$self->[_unexpected_error_count_] = 0; |
|
647
|
649
|
|
|
|
|
1003
|
$self->[_started_looking_for_here_target_at_] = 0; |
|
648
|
649
|
|
|
|
|
969
|
$self->[_nearly_matched_here_target_at_] = undef; |
|
649
|
649
|
|
|
|
|
1027
|
$self->[_line_of_text_] = EMPTY_STRING; |
|
650
|
649
|
|
|
|
|
1050
|
$self->[_rlower_case_labels_at_] = undef; |
|
651
|
649
|
|
|
|
|
1021
|
$self->[_maximum_level_] = 0; |
|
652
|
649
|
|
|
|
|
940
|
$self->[_true_brace_error_count_] = 0; |
|
653
|
649
|
|
|
|
|
1430
|
$self->[_rnon_indenting_brace_stack_] = []; |
|
654
|
649
|
|
|
|
|
1072
|
$self->[_show_indentation_table_] = 0; |
|
655
|
649
|
|
|
|
|
1152
|
$self->[_rbareword_info_] = {}; |
|
656
|
|
|
|
|
|
|
|
|
657
|
649
|
|
|
|
|
4821
|
$self->[_rclosing_brace_indentation_hash_] = { |
|
658
|
|
|
|
|
|
|
valid => undef, |
|
659
|
|
|
|
|
|
|
rhistory_line_number => [0], |
|
660
|
|
|
|
|
|
|
rhistory_level_diff => [0], |
|
661
|
|
|
|
|
|
|
rhistory_anchor_point => [1], |
|
662
|
|
|
|
|
|
|
}; |
|
663
|
|
|
|
|
|
|
|
|
664
|
649
|
|
|
|
|
1318
|
$self->[_rOpts_] = $rOpts; |
|
665
|
649
|
|
100
|
|
|
3028
|
$self->[_save_logfile_] = |
|
666
|
|
|
|
|
|
|
defined($logger_object) && $logger_object->get_save_logfile(); |
|
667
|
|
|
|
|
|
|
|
|
668
|
649
|
|
|
|
|
1421
|
bless $self, $class; |
|
669
|
|
|
|
|
|
|
|
|
670
|
649
|
|
|
|
|
3311
|
$self->prepare_for_a_new_file($source_object); |
|
671
|
649
|
|
|
|
|
2789
|
$self->find_starting_indentation_level(); |
|
672
|
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
# This is not a full class yet, so die if an attempt is made to |
|
674
|
|
|
|
|
|
|
# create more than one object. |
|
675
|
|
|
|
|
|
|
|
|
676
|
649
|
50
|
|
|
|
1992
|
if ( _increment_count() > 1 ) { |
|
677
|
0
|
|
|
|
|
0
|
confess |
|
678
|
|
|
|
|
|
|
"Attempt to create more than 1 object in $class, which is not a true class yet\n"; |
|
679
|
|
|
|
|
|
|
} |
|
680
|
|
|
|
|
|
|
|
|
681
|
649
|
|
|
|
|
4773
|
return $self; |
|
682
|
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
} ## end sub new |
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
# Called externally |
|
686
|
|
|
|
|
|
|
sub get_unexpected_error_count { |
|
687
|
4
|
|
|
4
|
0
|
10
|
my ($self) = @_; |
|
688
|
4
|
|
|
|
|
16
|
return $self->[_unexpected_error_count_]; |
|
689
|
|
|
|
|
|
|
} |
|
690
|
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
# Called externally |
|
692
|
|
|
|
|
|
|
sub is_keyword { |
|
693
|
4
|
|
|
4
|
0
|
8
|
my ($str) = @_; |
|
694
|
4
|
|
|
|
|
16
|
return $is_keyword{$str}; |
|
695
|
|
|
|
|
|
|
} |
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
|
698
|
|
|
|
|
|
|
# Line input routines, previously handled by the LineBuffer class |
|
699
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
|
700
|
|
|
|
|
|
|
sub make_source_array { |
|
701
|
|
|
|
|
|
|
|
|
702
|
649
|
|
|
649
|
0
|
1323
|
my ( $self, $line_source_object ) = @_; |
|
703
|
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
# Convert the source into an array of lines |
|
705
|
|
|
|
|
|
|
# Given: |
|
706
|
|
|
|
|
|
|
# $line_source_object = the input source stream |
|
707
|
|
|
|
|
|
|
# Task: |
|
708
|
|
|
|
|
|
|
# Convert the source to an array ref and store in $self |
|
709
|
|
|
|
|
|
|
|
|
710
|
649
|
|
|
|
|
1006
|
my $rinput_lines = []; |
|
711
|
|
|
|
|
|
|
|
|
712
|
649
|
|
|
|
|
1224
|
my $rsource = ref($line_source_object); |
|
713
|
649
|
|
|
|
|
1074
|
my $source_string; |
|
714
|
|
|
|
|
|
|
|
|
715
|
649
|
50
|
|
|
|
2912
|
if ( !$rsource ) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
# shouldn't happen: this should have been checked in sub new |
|
718
|
0
|
|
|
|
|
0
|
Fault(<<EOM); |
|
719
|
|
|
|
|
|
|
sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference; |
|
720
|
|
|
|
|
|
|
'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method |
|
721
|
|
|
|
|
|
|
EOM |
|
722
|
|
|
|
|
|
|
} |
|
723
|
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
# handle an ARRAY ref |
|
725
|
|
|
|
|
|
|
elsif ( $rsource eq 'ARRAY' ) { |
|
726
|
0
|
|
|
|
|
0
|
$rinput_lines = $line_source_object; |
|
727
|
0
|
|
|
|
|
0
|
$source_string = join( EMPTY_STRING, @{$line_source_object} ); |
|
|
0
|
|
|
|
|
0
|
|
|
728
|
|
|
|
|
|
|
} |
|
729
|
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
# handle a SCALAR ref |
|
731
|
|
|
|
|
|
|
elsif ( $rsource eq 'SCALAR' ) { |
|
732
|
649
|
|
|
|
|
947
|
$source_string = ${$line_source_object}; |
|
|
649
|
|
|
|
|
1350
|
|
|
733
|
649
|
|
|
|
|
4236
|
my @lines = split /^/, $source_string; |
|
734
|
649
|
|
|
|
|
1668
|
$rinput_lines = \@lines; |
|
735
|
|
|
|
|
|
|
} |
|
736
|
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
# handle an object - must have a get_line method |
|
738
|
|
|
|
|
|
|
else { |
|
739
|
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
# This will die if user's object does have a 'get_line' method |
|
741
|
0
|
|
|
|
|
0
|
my $line; |
|
742
|
0
|
|
|
|
|
0
|
while ( defined( $line = $line_source_object->get_line() ) ) { |
|
743
|
0
|
|
|
|
|
0
|
push( @{$rinput_lines}, $line ); |
|
|
0
|
|
|
|
|
0
|
|
|
744
|
|
|
|
|
|
|
} |
|
745
|
0
|
|
|
|
|
0
|
$source_string = join( EMPTY_STRING, @{$rinput_lines} ); |
|
|
0
|
|
|
|
|
0
|
|
|
746
|
|
|
|
|
|
|
} |
|
747
|
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
# Get trimmed lines. It is much faster to strip leading whitespace from |
|
749
|
|
|
|
|
|
|
# the whole input file at once than line-by-line. |
|
750
|
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
# Add a terminal newline if needed to keep line count unchanged: |
|
752
|
|
|
|
|
|
|
# - avoids problem of losing a last line which is just \r and no \n (c283) |
|
753
|
|
|
|
|
|
|
# - but check input line count to avoid adding line to an empty file (c286) |
|
754
|
649
|
100
|
100
|
|
|
1000
|
if ( @{$rinput_lines} && $source_string !~ /\n$/ ) { |
|
|
649
|
|
|
|
|
5191
|
|
|
755
|
1
|
|
|
|
|
2
|
$source_string .= "\n"; |
|
756
|
|
|
|
|
|
|
} |
|
757
|
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
# Remove leading whitespace except newlines |
|
759
|
649
|
|
|
|
|
7505
|
$source_string =~ s/^ [^\S\n]+ //gxm; |
|
760
|
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
# Then break the string into lines |
|
762
|
649
|
|
|
|
|
3877
|
my @trimmed_lines = split /^/, $source_string; |
|
763
|
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
# Safety check - a change in number of lines would be a disaster |
|
765
|
649
|
50
|
|
|
|
1027
|
if ( @trimmed_lines != @{$rinput_lines} ) { |
|
|
649
|
|
|
|
|
1764
|
|
|
766
|
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
# Shouldn't happen - die in DEVEL_MODE and fix |
|
768
|
0
|
|
|
|
|
0
|
my $ntr = @trimmed_lines; |
|
769
|
0
|
|
|
|
|
0
|
my $utr = @{$rinput_lines}; |
|
|
0
|
|
|
|
|
0
|
|
|
770
|
0
|
|
|
|
|
0
|
DEVEL_MODE |
|
771
|
|
|
|
|
|
|
&& Fault("trimmed / untrimmed line counts differ: $ntr / $utr\n"); |
|
772
|
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
# Otherwise we can safely continue with undefined trimmed lines. They |
|
774
|
|
|
|
|
|
|
# will be detected and fixed later. |
|
775
|
0
|
|
|
|
|
0
|
@trimmed_lines = (); |
|
776
|
|
|
|
|
|
|
} |
|
777
|
|
|
|
|
|
|
|
|
778
|
649
|
|
|
|
|
1408
|
$self->[_rinput_lines_] = $rinput_lines; |
|
779
|
649
|
|
|
|
|
1350
|
$self->[_rtrimmed_input_lines_] = \@trimmed_lines; |
|
780
|
649
|
|
|
|
|
1151
|
$self->[_input_line_index_next_] = 0; |
|
781
|
649
|
|
|
|
|
1358
|
return; |
|
782
|
|
|
|
|
|
|
} ## end sub make_source_array |
|
783
|
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
sub peek_ahead { |
|
785
|
1377
|
|
|
1377
|
0
|
2425
|
my ( $self, $buffer_index ) = @_; |
|
786
|
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
# look $buffer_index lines ahead of the current location in the input |
|
788
|
|
|
|
|
|
|
# stream without disturbing the input |
|
789
|
1377
|
|
|
|
|
1761
|
my $line; |
|
790
|
1377
|
|
|
|
|
1969
|
my $rinput_lines = $self->[_rinput_lines_]; |
|
791
|
1377
|
|
|
|
|
2412
|
my $line_index = $buffer_index + $self->[_input_line_index_next_]; |
|
792
|
1377
|
100
|
|
|
|
1739
|
if ( $line_index < @{$rinput_lines} ) { |
|
|
1377
|
|
|
|
|
3076
|
|
|
793
|
1365
|
|
|
|
|
2151
|
$line = $rinput_lines->[$line_index]; |
|
794
|
|
|
|
|
|
|
} |
|
795
|
1377
|
|
|
|
|
3837
|
return $line; |
|
796
|
|
|
|
|
|
|
} ## end sub peek_ahead |
|
797
|
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
#----------------------------------------- |
|
799
|
|
|
|
|
|
|
# interface to Perl::Tidy::Logger routines |
|
800
|
|
|
|
|
|
|
#----------------------------------------- |
|
801
|
|
|
|
|
|
|
sub warning { |
|
802
|
|
|
|
|
|
|
|
|
803
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
|
804
|
|
|
|
|
|
|
|
|
805
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
|
806
|
0
|
|
|
|
|
0
|
$self->[_warning_count_]++; |
|
807
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
|
808
|
0
|
|
|
|
|
0
|
my $msg_line_number = $self->[_last_line_number_]; |
|
809
|
0
|
|
|
|
|
0
|
$logger_object->warning( $msg, $msg_line_number ); |
|
810
|
|
|
|
|
|
|
} |
|
811
|
0
|
|
|
|
|
0
|
return; |
|
812
|
|
|
|
|
|
|
} ## end sub warning |
|
813
|
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
sub warning_do_not_format { |
|
815
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
|
816
|
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
# Issue a warning message and set a flag to skip formatting this file. |
|
818
|
0
|
|
|
|
|
0
|
$self->warning($msg); |
|
819
|
0
|
|
|
|
|
0
|
$self->[_do_not_format_] = 1; |
|
820
|
0
|
|
|
|
|
0
|
return; |
|
821
|
|
|
|
|
|
|
} ## end sub warning_do_not_format |
|
822
|
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
sub complain { |
|
824
|
|
|
|
|
|
|
|
|
825
|
35
|
|
|
35
|
0
|
80
|
my ( $self, $msg ) = @_; |
|
826
|
|
|
|
|
|
|
|
|
827
|
35
|
|
|
|
|
68
|
my $logger_object = $self->[_logger_object_]; |
|
828
|
35
|
50
|
|
|
|
96
|
if ($logger_object) { |
|
829
|
35
|
|
|
|
|
59
|
my $input_line_number = $self->[_last_line_number_]; |
|
830
|
35
|
|
|
|
|
170
|
$logger_object->complain( $msg, $input_line_number ); |
|
831
|
|
|
|
|
|
|
} |
|
832
|
35
|
|
|
|
|
54
|
return; |
|
833
|
|
|
|
|
|
|
} ## end sub complain |
|
834
|
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
sub write_logfile_entry { |
|
836
|
|
|
|
|
|
|
|
|
837
|
2162
|
|
|
2162
|
0
|
4317
|
my ( $self, $msg ) = @_; |
|
838
|
|
|
|
|
|
|
|
|
839
|
2162
|
|
|
|
|
3374
|
my $logger_object = $self->[_logger_object_]; |
|
840
|
2162
|
100
|
|
|
|
4197
|
if ($logger_object) { |
|
841
|
2156
|
|
|
|
|
5744
|
$logger_object->write_logfile_entry($msg); |
|
842
|
|
|
|
|
|
|
} |
|
843
|
2162
|
|
|
|
|
3530
|
return; |
|
844
|
|
|
|
|
|
|
} ## end sub write_logfile_entry |
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
sub interrupt_logfile { |
|
847
|
|
|
|
|
|
|
|
|
848
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
849
|
|
|
|
|
|
|
|
|
850
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
|
851
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
|
852
|
0
|
|
|
|
|
0
|
$logger_object->interrupt_logfile(); |
|
853
|
|
|
|
|
|
|
} |
|
854
|
0
|
|
|
|
|
0
|
return; |
|
855
|
|
|
|
|
|
|
} ## end sub interrupt_logfile |
|
856
|
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
sub resume_logfile { |
|
858
|
|
|
|
|
|
|
|
|
859
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
860
|
|
|
|
|
|
|
|
|
861
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
|
862
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
|
863
|
0
|
|
|
|
|
0
|
$logger_object->resume_logfile(); |
|
864
|
|
|
|
|
|
|
} |
|
865
|
0
|
|
|
|
|
0
|
return; |
|
866
|
|
|
|
|
|
|
} ## end sub resume_logfile |
|
867
|
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
sub brace_warning { |
|
869
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
|
870
|
0
|
|
|
|
|
0
|
$self->[_saw_brace_error_]++; |
|
871
|
|
|
|
|
|
|
|
|
872
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
|
873
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
|
874
|
0
|
|
|
|
|
0
|
my $msg_line_number = $self->[_last_line_number_]; |
|
875
|
0
|
|
|
|
|
0
|
$logger_object->brace_warning( $msg, $msg_line_number ); |
|
876
|
|
|
|
|
|
|
} |
|
877
|
0
|
|
|
|
|
0
|
return; |
|
878
|
|
|
|
|
|
|
} ## end sub brace_warning |
|
879
|
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
sub increment_brace_error { |
|
881
|
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
# This is same as sub brace_warning but without a message |
|
883
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
884
|
0
|
|
|
|
|
0
|
$self->[_saw_brace_error_]++; |
|
885
|
|
|
|
|
|
|
|
|
886
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
|
887
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
|
888
|
0
|
|
|
|
|
0
|
$logger_object->increment_brace_error(); |
|
889
|
|
|
|
|
|
|
} |
|
890
|
0
|
|
|
|
|
0
|
return; |
|
891
|
|
|
|
|
|
|
} ## end sub increment_brace_error |
|
892
|
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
sub get_saw_brace_error { |
|
894
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
895
|
0
|
|
|
|
|
0
|
return $self->[_saw_brace_error_]; |
|
896
|
|
|
|
|
|
|
} |
|
897
|
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
sub report_definite_bug { |
|
899
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
900
|
0
|
|
|
|
|
0
|
$self->[_hit_bug_] = 1; |
|
901
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
|
902
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
|
903
|
0
|
|
|
|
|
0
|
$logger_object->report_definite_bug(); |
|
904
|
|
|
|
|
|
|
} |
|
905
|
0
|
|
|
|
|
0
|
return; |
|
906
|
|
|
|
|
|
|
} ## end sub report_definite_bug |
|
907
|
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
#------------------------------------- |
|
909
|
|
|
|
|
|
|
# Interface to Perl::Tidy::Diagnostics |
|
910
|
|
|
|
|
|
|
#------------------------------------- |
|
911
|
|
|
|
|
|
|
sub write_diagnostics { |
|
912
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
|
913
|
0
|
|
|
|
|
0
|
my $input_line_number = $self->[_last_line_number_]; |
|
914
|
0
|
|
|
|
|
0
|
my $diagnostics_object = $self->[_diagnostics_object_]; |
|
915
|
0
|
0
|
|
|
|
0
|
if ($diagnostics_object) { |
|
916
|
0
|
|
|
|
|
0
|
$diagnostics_object->write_diagnostics( $msg, $input_line_number ); |
|
917
|
|
|
|
|
|
|
} |
|
918
|
0
|
|
|
|
|
0
|
return; |
|
919
|
|
|
|
|
|
|
} ## end sub write_diagnostics |
|
920
|
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
sub report_tokenization_errors { |
|
922
|
|
|
|
|
|
|
|
|
923
|
649
|
|
|
649
|
0
|
1461
|
my ($self) = @_; |
|
924
|
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
# Report any tokenization errors and return a flag '$severe_error'. |
|
926
|
|
|
|
|
|
|
# Set $severe_error = 1 if the tokenization errors are so severe that |
|
927
|
|
|
|
|
|
|
# the formatter should not attempt to format the file. Instead, it will |
|
928
|
|
|
|
|
|
|
# just output the file verbatim. |
|
929
|
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
# set severe error flag if tokenizer has encountered file reading problems |
|
931
|
|
|
|
|
|
|
# (i.e. unexpected binary characters) |
|
932
|
|
|
|
|
|
|
# or code which may not be formatted correctly (such as 'my sub q') |
|
933
|
|
|
|
|
|
|
# The difference between _in_error_ and _do_not_format_ is that |
|
934
|
|
|
|
|
|
|
# _in_error_ stops the tokenizer immediately whereas |
|
935
|
|
|
|
|
|
|
# _do_not_format_ lets the tokenizer finish so that all errors are seen |
|
936
|
|
|
|
|
|
|
# Both block formatting and cause the input stream to be output verbatim. |
|
937
|
649
|
|
33
|
|
|
3369
|
my $severe_error = $self->[_in_error_] || $self->[_do_not_format_]; |
|
938
|
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
# And do not format if it looks like an html file (c209) |
|
940
|
649
|
|
33
|
|
|
3442
|
$severe_error ||= $self->[_html_tag_count_] && $self->[_warning_count_]; |
|
|
|
|
33
|
|
|
|
|
|
941
|
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
# Inform the logger object on length of input stream |
|
943
|
649
|
|
|
|
|
1295
|
my $logger_object = $self->[_logger_object_]; |
|
944
|
649
|
100
|
|
|
|
1760
|
if ($logger_object) { |
|
945
|
647
|
|
|
|
|
1186
|
my $last_line_number = $self->[_last_line_number_]; |
|
946
|
647
|
|
|
|
|
3205
|
$logger_object->set_last_input_line_number($last_line_number); |
|
947
|
|
|
|
|
|
|
} |
|
948
|
|
|
|
|
|
|
|
|
949
|
649
|
|
|
|
|
1193
|
my $maxle = $rOpts_maximum_level_errors; |
|
950
|
649
|
|
|
|
|
1001
|
my $maxue = $rOpts_maximum_unexpected_errors; |
|
951
|
649
|
50
|
|
|
|
1523
|
$maxle = 1 unless ( defined($maxle) ); |
|
952
|
649
|
50
|
|
|
|
1494
|
$maxue = 0 unless ( defined($maxue) ); |
|
953
|
|
|
|
|
|
|
|
|
954
|
649
|
|
|
|
|
2007
|
my $level = get_indentation_level(); |
|
955
|
649
|
50
|
|
|
|
1979
|
if ( $level != $self->[_starting_level_] ) { |
|
956
|
0
|
|
|
|
|
0
|
$self->warning("final indentation level: $level\n"); |
|
957
|
|
|
|
|
|
|
|
|
958
|
0
|
|
|
|
|
0
|
$self->[_show_indentation_table_] = 1; |
|
959
|
|
|
|
|
|
|
|
|
960
|
0
|
|
|
|
|
0
|
my $level_diff = $self->[_starting_level_] - $level; |
|
961
|
0
|
0
|
|
|
|
0
|
if ( $level_diff < 0 ) { $level_diff = -$level_diff } |
|
|
0
|
|
|
|
|
0
|
|
|
962
|
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
# Set severe error flag if the level error is greater than 1. |
|
964
|
|
|
|
|
|
|
# The formatter can function for any level error but it is probably |
|
965
|
|
|
|
|
|
|
# best not to attempt formatting for a high level error. |
|
966
|
0
|
0
|
0
|
|
|
0
|
if ( $maxle >= 0 && $level_diff > $maxle ) { |
|
967
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
|
968
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
|
969
|
|
|
|
|
|
|
Formatting will be skipped because level error '$level_diff' exceeds -maxle=$maxle; use -maxle=-1 to force formatting |
|
970
|
|
|
|
|
|
|
EOM |
|
971
|
|
|
|
|
|
|
} |
|
972
|
|
|
|
|
|
|
} |
|
973
|
|
|
|
|
|
|
|
|
974
|
649
|
|
|
|
|
2992
|
$self->check_final_nesting_depths(); |
|
975
|
|
|
|
|
|
|
|
|
976
|
649
|
50
|
|
|
|
1712
|
if ( $self->[_show_indentation_table_] ) { |
|
977
|
0
|
|
|
|
|
0
|
$self->show_indentation_table(); |
|
978
|
|
|
|
|
|
|
} |
|
979
|
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
# Likewise, large numbers of brace errors usually indicate non-perl |
|
981
|
|
|
|
|
|
|
# scripts, so set the severe error flag at a low number. This is similar |
|
982
|
|
|
|
|
|
|
# to the level check, but different because braces may balance but be |
|
983
|
|
|
|
|
|
|
# incorrectly interlaced. |
|
984
|
649
|
50
|
|
|
|
1901
|
if ( $self->[_true_brace_error_count_] > 2 ) { |
|
985
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
|
986
|
|
|
|
|
|
|
} |
|
987
|
|
|
|
|
|
|
|
|
988
|
649
|
50
|
66
|
|
|
1881
|
if ( $rOpts_look_for_hash_bang |
|
989
|
|
|
|
|
|
|
&& !$self->[_saw_hash_bang_] ) |
|
990
|
|
|
|
|
|
|
{ |
|
991
|
0
|
|
|
|
|
0
|
$self->warning( |
|
992
|
|
|
|
|
|
|
"hit EOF without seeing hash-bang line; maybe don't need -x?\n"); |
|
993
|
|
|
|
|
|
|
} |
|
994
|
|
|
|
|
|
|
|
|
995
|
649
|
50
|
|
|
|
1710
|
if ( $self->[_in_format_] ) { |
|
996
|
0
|
|
|
|
|
0
|
$self->warning("hit EOF while in format description\n"); |
|
997
|
|
|
|
|
|
|
} |
|
998
|
|
|
|
|
|
|
|
|
999
|
649
|
50
|
|
|
|
1761
|
if ( $self->[_in_code_skipping_] ) { |
|
1000
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
|
1001
|
|
|
|
|
|
|
"hit EOF while in lines skipped with --code-skipping\n"); |
|
1002
|
|
|
|
|
|
|
} |
|
1003
|
|
|
|
|
|
|
|
|
1004
|
649
|
50
|
|
|
|
1718
|
if ( $self->[_in_pod_] ) { |
|
1005
|
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
# Just write log entry if this is after __END__ or __DATA__ |
|
1007
|
|
|
|
|
|
|
# because this happens to often, and it is not likely to be |
|
1008
|
|
|
|
|
|
|
# a parsing error. |
|
1009
|
0
|
0
|
0
|
|
|
0
|
if ( $self->[_saw_data_] || $self->[_saw_end_] ) { |
|
1010
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
|
1011
|
|
|
|
|
|
|
"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" |
|
1012
|
|
|
|
|
|
|
); |
|
1013
|
|
|
|
|
|
|
} |
|
1014
|
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
else { |
|
1016
|
0
|
|
|
|
|
0
|
$self->complain( |
|
1017
|
|
|
|
|
|
|
"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" |
|
1018
|
|
|
|
|
|
|
); |
|
1019
|
|
|
|
|
|
|
} |
|
1020
|
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
} |
|
1022
|
|
|
|
|
|
|
|
|
1023
|
649
|
50
|
|
|
|
1759
|
if ( $self->[_in_here_doc_] ) { |
|
1024
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
|
1025
|
0
|
|
|
|
|
0
|
my $here_doc_target = $self->[_here_doc_target_]; |
|
1026
|
0
|
|
|
|
|
0
|
my $started_looking_for_here_target_at = |
|
1027
|
|
|
|
|
|
|
$self->[_started_looking_for_here_target_at_]; |
|
1028
|
0
|
0
|
|
|
|
0
|
if ($here_doc_target) { |
|
1029
|
0
|
|
|
|
|
0
|
$self->warning( |
|
1030
|
|
|
|
|
|
|
"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" |
|
1031
|
|
|
|
|
|
|
); |
|
1032
|
|
|
|
|
|
|
} |
|
1033
|
|
|
|
|
|
|
else { |
|
1034
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
|
1035
|
|
|
|
|
|
|
Hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string. |
|
1036
|
|
|
|
|
|
|
(Perl will match to the end of file but this may not be intended). |
|
1037
|
|
|
|
|
|
|
EOM |
|
1038
|
|
|
|
|
|
|
} |
|
1039
|
0
|
|
|
|
|
0
|
my $nearly_matched_here_target_at = |
|
1040
|
|
|
|
|
|
|
$self->[_nearly_matched_here_target_at_]; |
|
1041
|
0
|
0
|
|
|
|
0
|
if ($nearly_matched_here_target_at) { |
|
1042
|
0
|
|
|
|
|
0
|
$self->warning( |
|
1043
|
|
|
|
|
|
|
"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" |
|
1044
|
|
|
|
|
|
|
); |
|
1045
|
|
|
|
|
|
|
} |
|
1046
|
|
|
|
|
|
|
} |
|
1047
|
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
# Something is seriously wrong if we ended inside a quote |
|
1049
|
649
|
50
|
|
|
|
1613
|
if ( $self->[_in_quote_] ) { |
|
1050
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
|
1051
|
0
|
|
|
|
|
0
|
my $line_start_quote = $self->[_line_start_quote_]; |
|
1052
|
0
|
|
|
|
|
0
|
my $quote_target = $self->[_quote_target_]; |
|
1053
|
0
|
0
|
|
|
|
0
|
my $what = |
|
1054
|
|
|
|
|
|
|
( $self->[_in_attribute_list_] ) |
|
1055
|
|
|
|
|
|
|
? "attribute list" |
|
1056
|
|
|
|
|
|
|
: "quote/pattern"; |
|
1057
|
0
|
|
|
|
|
0
|
$self->warning( |
|
1058
|
|
|
|
|
|
|
"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n" |
|
1059
|
|
|
|
|
|
|
); |
|
1060
|
|
|
|
|
|
|
} |
|
1061
|
|
|
|
|
|
|
|
|
1062
|
649
|
50
|
|
|
|
1591
|
if ( $self->[_hit_bug_] ) { |
|
1063
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
|
1064
|
|
|
|
|
|
|
} |
|
1065
|
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
# Multiple "unexpected" type tokenization errors usually indicate parsing |
|
1067
|
|
|
|
|
|
|
# non-perl scripts, or that something is seriously wrong, so we should |
|
1068
|
|
|
|
|
|
|
# avoid formatting them. This can happen for example if we run perltidy on |
|
1069
|
|
|
|
|
|
|
# a shell script or an html file. But unfortunately this check can |
|
1070
|
|
|
|
|
|
|
# interfere with some extended syntaxes, such as RPerl, so it has to be off |
|
1071
|
|
|
|
|
|
|
# by default. |
|
1072
|
649
|
|
|
|
|
1161
|
my $ue_count = $self->[_unexpected_error_count_]; |
|
1073
|
649
|
50
|
33
|
|
|
1969
|
if ( $maxue > 0 && $ue_count > $maxue ) { |
|
1074
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
|
1075
|
|
|
|
|
|
|
Formatting will be skipped since unexpected token count = $ue_count > -maxue=$maxue; use -maxue=0 to force formatting |
|
1076
|
|
|
|
|
|
|
EOM |
|
1077
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
|
1078
|
|
|
|
|
|
|
} |
|
1079
|
|
|
|
|
|
|
|
|
1080
|
649
|
100
|
|
|
|
1722
|
if ( !$self->[_saw_perl_dash_w_] ) { |
|
1081
|
632
|
|
|
|
|
2140
|
$self->write_logfile_entry("Suggest including 'use warnings;'\n"); |
|
1082
|
|
|
|
|
|
|
} |
|
1083
|
|
|
|
|
|
|
|
|
1084
|
649
|
50
|
|
|
|
1716
|
if ( $self->[_saw_perl_dash_P_] ) { |
|
1085
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
|
1086
|
|
|
|
|
|
|
"Use of -P parameter for defines is discouraged\n"); |
|
1087
|
|
|
|
|
|
|
} |
|
1088
|
|
|
|
|
|
|
|
|
1089
|
649
|
100
|
|
|
|
1649
|
if ( !$self->[_saw_use_strict_] ) { |
|
1090
|
635
|
|
|
|
|
1495
|
$self->write_logfile_entry("Suggest including 'use strict;'\n"); |
|
1091
|
|
|
|
|
|
|
} |
|
1092
|
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
# it is suggested that labels have at least one upper case character |
|
1094
|
|
|
|
|
|
|
# for legibility and to avoid code breakage as new keywords are introduced |
|
1095
|
649
|
100
|
|
|
|
1915
|
if ( $self->[_rlower_case_labels_at_] ) { |
|
1096
|
12
|
|
|
|
|
19
|
my @lower_case_labels_at = @{ $self->[_rlower_case_labels_at_] }; |
|
|
12
|
|
|
|
|
35
|
|
|
1097
|
12
|
|
|
|
|
36
|
$self->write_logfile_entry( |
|
1098
|
|
|
|
|
|
|
"Suggest using upper case characters in label(s)\n"); |
|
1099
|
12
|
|
|
|
|
25
|
local $LIST_SEPARATOR = ')('; |
|
1100
|
12
|
|
|
|
|
57
|
$self->write_logfile_entry( |
|
1101
|
|
|
|
|
|
|
" defined at line(s): (@lower_case_labels_at)\n"); |
|
1102
|
|
|
|
|
|
|
} |
|
1103
|
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
# Get the text of any leading format skipping tag |
|
1105
|
649
|
|
|
|
|
1065
|
my $early_FS_end_marker; |
|
1106
|
649
|
|
|
|
|
1170
|
my $rformat_skipping_list = $self->[_rformat_skipping_list_]; |
|
1107
|
649
|
100
|
100
|
|
|
1010
|
if ( @{$rformat_skipping_list} && $rformat_skipping_list->[0]->[0] == -1 ) { |
|
|
649
|
|
|
|
|
2103
|
|
|
1108
|
3
|
|
|
|
|
9
|
$early_FS_end_marker = $rformat_skipping_list->[0]->[2]; |
|
1109
|
|
|
|
|
|
|
} |
|
1110
|
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
return { |
|
1112
|
649
|
|
|
|
|
3348
|
severe_error => $severe_error, |
|
1113
|
|
|
|
|
|
|
early_FS_end_marker => $early_FS_end_marker, |
|
1114
|
|
|
|
|
|
|
}; |
|
1115
|
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
} ## end sub report_tokenization_errors |
|
1117
|
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
sub show_indentation_table { |
|
1119
|
0
|
|
|
0
|
0
|
0
|
my ($self) = @_; |
|
1120
|
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
# Output indentation table made at closing braces. This can be helpful for |
|
1122
|
|
|
|
|
|
|
# the case of a missing brace in a previously formatted file. |
|
1123
|
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
# skip if problem reading file |
|
1125
|
0
|
0
|
|
|
|
0
|
return if ( $self->[_in_error_] ); |
|
1126
|
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
# skip if -wc is used (rare); it is too complex to use |
|
1128
|
0
|
0
|
|
|
|
0
|
return if ($rOpts_whitespace_cycle); |
|
1129
|
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
# skip if non-indenting-brace-prefix (very rare, but could be fixed) |
|
1131
|
0
|
0
|
|
|
|
0
|
return if ($rOpts_non_indenting_brace_prefix); |
|
1132
|
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
# skip if starting level is not zero (probably in editor) |
|
1134
|
0
|
0
|
|
|
|
0
|
return if ($rOpts_starting_indentation_level); |
|
1135
|
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
# skip if indentation analysis is not valid |
|
1137
|
0
|
|
|
|
|
0
|
my $rhash = $self->[_rclosing_brace_indentation_hash_]; |
|
1138
|
0
|
0
|
|
|
|
0
|
return if ( !$rhash->{valid} ); |
|
1139
|
|
|
|
|
|
|
|
|
1140
|
0
|
|
|
|
|
0
|
my $rhistory_line_number = $rhash->{rhistory_line_number}; |
|
1141
|
0
|
|
|
|
|
0
|
my $rhistory_level_diff = $rhash->{rhistory_level_diff}; |
|
1142
|
0
|
|
|
|
|
0
|
my $rhistory_anchor_point = $rhash->{rhistory_anchor_point}; |
|
1143
|
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
# Remove the first artificial point from the table |
|
1145
|
0
|
|
|
|
|
0
|
shift @{$rhistory_line_number}; |
|
|
0
|
|
|
|
|
0
|
|
|
1146
|
0
|
|
|
|
|
0
|
shift @{$rhistory_level_diff}; |
|
|
0
|
|
|
|
|
0
|
|
|
1147
|
0
|
|
|
|
|
0
|
shift @{$rhistory_anchor_point}; |
|
|
0
|
|
|
|
|
0
|
|
|
1148
|
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
# Remove dubious points at an anchor point = 2 and beyond |
|
1150
|
|
|
|
|
|
|
# These can occur when non-indenting braces are used |
|
1151
|
0
|
|
|
|
|
0
|
my $num_his = @{$rhistory_level_diff}; |
|
|
0
|
|
|
|
|
0
|
|
|
1152
|
0
|
|
|
|
|
0
|
foreach my $i ( 0 .. $num_his - 1 ) { |
|
1153
|
0
|
0
|
|
|
|
0
|
if ( $rhistory_anchor_point->[$i] == 2 ) { |
|
1154
|
0
|
|
|
|
|
0
|
$num_his = $i; |
|
1155
|
0
|
|
|
|
|
0
|
last; |
|
1156
|
|
|
|
|
|
|
} |
|
1157
|
|
|
|
|
|
|
} |
|
1158
|
0
|
0
|
|
|
|
0
|
return if ( $num_his <= 1 ); |
|
1159
|
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
# Ignore an ending non-anchor point |
|
1161
|
0
|
0
|
|
|
|
0
|
if ( !$rhistory_anchor_point->[-1] ) { |
|
1162
|
0
|
|
|
|
|
0
|
$num_his -= 1; |
|
1163
|
|
|
|
|
|
|
} |
|
1164
|
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
# Ignore an ending point which is the same as the previous point |
|
1166
|
0
|
0
|
|
|
|
0
|
if ( $num_his > 1 ) { |
|
1167
|
0
|
0
|
|
|
|
0
|
if ( $rhistory_level_diff->[ $num_his - 1 ] == |
|
1168
|
|
|
|
|
|
|
$rhistory_level_diff->[ $num_his - 2 ] ) |
|
1169
|
|
|
|
|
|
|
{ |
|
1170
|
0
|
|
|
|
|
0
|
$num_his -= 1; |
|
1171
|
|
|
|
|
|
|
} |
|
1172
|
|
|
|
|
|
|
} |
|
1173
|
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
# Skip if the table does not have at least 2 points to pinpoint an error |
|
1175
|
0
|
0
|
|
|
|
0
|
return if ( $num_his <= 1 ); |
|
1176
|
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
# Skip if first point shows a level error - the analysis may not be valid |
|
1178
|
0
|
0
|
|
|
|
0
|
return if ( $rhistory_level_diff->[0] ); |
|
1179
|
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
# Remove table points which return from negative to zero; they follow |
|
1181
|
|
|
|
|
|
|
# an error and may not be correct. c448. |
|
1182
|
0
|
|
|
|
|
0
|
my $min_lev = $rhistory_level_diff->[0]; |
|
1183
|
0
|
|
|
|
|
0
|
foreach my $ii ( 1 .. $num_his - 1 ) { |
|
1184
|
0
|
|
|
|
|
0
|
my $lev = $rhistory_level_diff->[$ii]; |
|
1185
|
0
|
0
|
|
|
|
0
|
if ( $lev < $min_lev ) { $min_lev = $lev; next } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1186
|
0
|
0
|
0
|
|
|
0
|
if ( $min_lev < 0 && $lev >= 0 ) { |
|
1187
|
0
|
|
|
|
|
0
|
$num_his = $ii; |
|
1188
|
0
|
|
|
|
|
0
|
last; |
|
1189
|
|
|
|
|
|
|
} |
|
1190
|
|
|
|
|
|
|
} |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
# Since the table could be arbitrarily large, we will limit the table to N |
|
1193
|
|
|
|
|
|
|
# lines. If there are more lines than that, we will show N-3 lines, then |
|
1194
|
|
|
|
|
|
|
# ..., then the last 2 lines. Allow about 3 lines per error, so a table |
|
1195
|
|
|
|
|
|
|
# limit of 10 can localize up to about 3 errors in a file. |
|
1196
|
0
|
|
|
|
|
0
|
my $nlines_max = 10; |
|
1197
|
0
|
|
|
|
|
0
|
my @pre_indexes = ( 0 .. $num_his - 1 ); |
|
1198
|
0
|
|
|
|
|
0
|
my @post_indexes = (); |
|
1199
|
0
|
0
|
|
|
|
0
|
if ( @pre_indexes > $nlines_max ) { |
|
1200
|
0
|
0
|
|
|
|
0
|
if ( $nlines_max >= 5 ) { |
|
1201
|
0
|
|
|
|
|
0
|
@pre_indexes = ( 0 .. $nlines_max - 4 ); |
|
1202
|
0
|
|
|
|
|
0
|
@post_indexes = ( $num_his - 2, $num_his - 1 ); |
|
1203
|
|
|
|
|
|
|
} |
|
1204
|
|
|
|
|
|
|
else { |
|
1205
|
0
|
|
|
|
|
0
|
@pre_indexes = ( 0 .. $nlines_max - 1 ); |
|
1206
|
|
|
|
|
|
|
} |
|
1207
|
|
|
|
|
|
|
} |
|
1208
|
|
|
|
|
|
|
|
|
1209
|
0
|
|
|
|
|
0
|
my @output_lines; |
|
1210
|
0
|
|
|
|
|
0
|
push @output_lines, <<EOM; |
|
1211
|
|
|
|
|
|
|
Table of initial nesting level differences at closing braces. |
|
1212
|
|
|
|
|
|
|
This might help localize brace errors IF perltidy previously formatted the file. |
|
1213
|
|
|
|
|
|
|
line: error=[new brace level]-[old indentation level] |
|
1214
|
|
|
|
|
|
|
EOM |
|
1215
|
0
|
|
|
|
|
0
|
foreach my $i (@pre_indexes) { |
|
1216
|
0
|
|
|
|
|
0
|
my $lno = $rhistory_line_number->[$i]; |
|
1217
|
0
|
|
|
|
|
0
|
my $diff = $rhistory_level_diff->[$i]; |
|
1218
|
0
|
|
|
|
|
0
|
push @output_lines, <<EOM; |
|
1219
|
|
|
|
|
|
|
$lno: $diff |
|
1220
|
|
|
|
|
|
|
EOM |
|
1221
|
|
|
|
|
|
|
} |
|
1222
|
0
|
0
|
|
|
|
0
|
if (@post_indexes) { |
|
1223
|
0
|
|
|
|
|
0
|
push @output_lines, "...\n"; |
|
1224
|
0
|
|
|
|
|
0
|
foreach my $i (@post_indexes) { |
|
1225
|
0
|
|
|
|
|
0
|
my $lno = $rhistory_line_number->[$i]; |
|
1226
|
0
|
|
|
|
|
0
|
my $diff = $rhistory_level_diff->[$i]; |
|
1227
|
0
|
|
|
|
|
0
|
push @output_lines, <<EOM; |
|
1228
|
|
|
|
|
|
|
$lno: $diff |
|
1229
|
|
|
|
|
|
|
EOM |
|
1230
|
|
|
|
|
|
|
} |
|
1231
|
|
|
|
|
|
|
} |
|
1232
|
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
# Try to give a hint |
|
1234
|
0
|
|
|
|
|
0
|
my $level_diff_1 = $rhistory_level_diff->[1]; |
|
1235
|
0
|
|
|
|
|
0
|
my $ln_0 = $rhistory_line_number->[0]; |
|
1236
|
0
|
|
|
|
|
0
|
my $ln_1 = $rhistory_line_number->[1]; |
|
1237
|
0
|
0
|
|
|
|
0
|
if ( $level_diff_1 < 0 ) { |
|
|
|
0
|
|
|
|
|
|
|
1238
|
0
|
|
|
|
|
0
|
push @output_lines, |
|
1239
|
|
|
|
|
|
|
"There may be an extra '}' or missing '{' between lines $ln_0 and $ln_1\n"; |
|
1240
|
|
|
|
|
|
|
} |
|
1241
|
|
|
|
|
|
|
elsif ( $level_diff_1 > 0 ) { |
|
1242
|
0
|
|
|
|
|
0
|
push @output_lines, |
|
1243
|
|
|
|
|
|
|
"There may be a missing '}' or extra '{' between lines $ln_0 and $ln_1\n"; |
|
1244
|
|
|
|
|
|
|
} |
|
1245
|
|
|
|
|
|
|
else { |
|
1246
|
|
|
|
|
|
|
## two leading zeros in the table - probably can't happen - no hint |
|
1247
|
|
|
|
|
|
|
} |
|
1248
|
|
|
|
|
|
|
|
|
1249
|
0
|
|
|
|
|
0
|
push @output_lines, "\n"; |
|
1250
|
0
|
|
|
|
|
0
|
my $output_str = join EMPTY_STRING, @output_lines; |
|
1251
|
|
|
|
|
|
|
|
|
1252
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
|
1253
|
0
|
|
|
|
|
0
|
$self->warning($output_str); |
|
1254
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
|
1255
|
|
|
|
|
|
|
|
|
1256
|
0
|
|
|
|
|
0
|
return; |
|
1257
|
|
|
|
|
|
|
} ## end sub show_indentation_table |
|
1258
|
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
sub report_v_string { |
|
1260
|
|
|
|
|
|
|
|
|
1261
|
|
|
|
|
|
|
# warn if this version can't handle v-strings |
|
1262
|
2
|
|
|
2
|
0
|
7
|
my ( $self, $tok ) = @_; |
|
1263
|
2
|
50
|
|
|
|
6
|
if ( $] < 5.006 ) { |
|
1264
|
0
|
|
|
|
|
0
|
$self->warning( |
|
1265
|
|
|
|
|
|
|
"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" |
|
1266
|
|
|
|
|
|
|
); |
|
1267
|
|
|
|
|
|
|
} |
|
1268
|
2
|
|
|
|
|
6
|
return; |
|
1269
|
|
|
|
|
|
|
} ## end sub report_v_string |
|
1270
|
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
sub is_valid_token_type { |
|
1272
|
447
|
|
|
447
|
0
|
530
|
my ($type) = @_; |
|
1273
|
447
|
|
|
|
|
1019
|
return $is_valid_token_type{$type}; |
|
1274
|
|
|
|
|
|
|
} |
|
1275
|
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
sub log_numbered_msg { |
|
1277
|
208
|
|
|
208
|
0
|
446
|
my ( $self, $msg ) = @_; |
|
1278
|
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
# write input line number + message to logfile |
|
1280
|
208
|
|
|
|
|
318
|
my $input_line_number = $self->[_last_line_number_]; |
|
1281
|
208
|
|
|
|
|
785
|
$self->write_logfile_entry("Line $input_line_number: $msg"); |
|
1282
|
208
|
|
|
|
|
438
|
return; |
|
1283
|
|
|
|
|
|
|
} ## end sub log_numbered_msg |
|
1284
|
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
sub get_line { |
|
1286
|
|
|
|
|
|
|
|
|
1287
|
9619
|
|
|
9619
|
0
|
13520
|
my $self = shift; |
|
1288
|
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
# Read the next input line and tokenize it |
|
1290
|
|
|
|
|
|
|
# Returns: |
|
1291
|
|
|
|
|
|
|
# $line_of_tokens = ref to hash of info for the tokenized line |
|
1292
|
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: |
|
1294
|
|
|
|
|
|
|
# $brace_depth, $square_bracket_depth, $paren_depth |
|
1295
|
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
# get the next line from the input array |
|
1297
|
9619
|
|
|
|
|
13025
|
my $input_line; |
|
1298
|
|
|
|
|
|
|
my $trimmed_input_line; |
|
1299
|
9619
|
|
|
|
|
13495
|
my $line_index = $self->[_input_line_index_next_]; |
|
1300
|
9619
|
|
|
|
|
12225
|
my $rinput_lines = $self->[_rinput_lines_]; |
|
1301
|
9619
|
100
|
|
|
|
11201
|
if ( $line_index < @{$rinput_lines} ) { |
|
|
9619
|
|
|
|
|
16073
|
|
|
1302
|
8970
|
|
|
|
|
16158
|
$trimmed_input_line = $self->[_rtrimmed_input_lines_]->[$line_index]; |
|
1303
|
8970
|
|
|
|
|
15306
|
$input_line = $rinput_lines->[ $line_index++ ]; |
|
1304
|
8970
|
|
|
|
|
12094
|
$self->[_input_line_index_next_] = $line_index; |
|
1305
|
|
|
|
|
|
|
} |
|
1306
|
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
# End of file .. check if file ends in a binary operator (c565) |
|
1308
|
|
|
|
|
|
|
else { |
|
1309
|
649
|
0
|
33
|
|
|
3566
|
if ( |
|
|
|
|
33
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
$is_binary_or_unary_operator_type{$last_nonblank_type} |
|
1311
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 'k' |
|
1312
|
|
|
|
|
|
|
&& $is_binary_or_unary_keyword{$last_nonblank_token} ) |
|
1313
|
|
|
|
|
|
|
) |
|
1314
|
|
|
|
|
|
|
{ |
|
1315
|
0
|
|
|
|
|
0
|
$self->warning( |
|
1316
|
|
|
|
|
|
|
"Unexpected EOF at operator '$last_nonblank_token'\n"); |
|
1317
|
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
# avoid repeating this message |
|
1319
|
0
|
|
|
|
|
0
|
$last_nonblank_token = ';'; |
|
1320
|
0
|
|
|
|
|
0
|
$last_nonblank_type = ';'; |
|
1321
|
|
|
|
|
|
|
} |
|
1322
|
|
|
|
|
|
|
} |
|
1323
|
|
|
|
|
|
|
|
|
1324
|
9619
|
|
|
|
|
14196
|
$self->[_line_of_text_] = $input_line; |
|
1325
|
|
|
|
|
|
|
|
|
1326
|
9619
|
100
|
|
|
|
17462
|
return if ( !defined($input_line) ); |
|
1327
|
|
|
|
|
|
|
|
|
1328
|
8970
|
|
|
|
|
12230
|
my $input_line_number = ++$self->[_last_line_number_]; |
|
1329
|
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
# Find and remove what characters terminate this line, including any |
|
1331
|
|
|
|
|
|
|
# control r |
|
1332
|
8970
|
|
|
|
|
11761
|
my $input_line_separator = EMPTY_STRING; |
|
1333
|
8970
|
100
|
|
|
|
19802
|
if ( chomp $input_line ) { |
|
1334
|
8969
|
|
|
|
|
18124
|
$input_line_separator = $INPUT_RECORD_SEPARATOR; |
|
1335
|
|
|
|
|
|
|
} |
|
1336
|
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
# The first test here very significantly speeds things up, but be sure to |
|
1338
|
|
|
|
|
|
|
# keep the regex and hash %other_line_endings the same. |
|
1339
|
8970
|
50
|
|
|
|
22354
|
if ( $other_line_endings{ substr( $input_line, -1 ) } ) { |
|
1340
|
0
|
0
|
|
|
|
0
|
if ( $input_line =~ s/([\r\035\032])+$// ) { |
|
1341
|
0
|
|
|
|
|
0
|
$input_line_separator = $1 . $input_line_separator; |
|
1342
|
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
# This could make the trimmed input line incorrect, so the |
|
1344
|
|
|
|
|
|
|
# safe thing to do is to make it undef to force it to be |
|
1345
|
|
|
|
|
|
|
# recomputed later. |
|
1346
|
0
|
|
|
|
|
0
|
$trimmed_input_line = undef; |
|
1347
|
|
|
|
|
|
|
} |
|
1348
|
|
|
|
|
|
|
} |
|
1349
|
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
# For backwards compatibility we keep the line text terminated with |
|
1351
|
|
|
|
|
|
|
# a newline character |
|
1352
|
8970
|
|
|
|
|
12443
|
$input_line .= "\n"; |
|
1353
|
8970
|
|
|
|
|
12011
|
$self->[_line_of_text_] = $input_line; |
|
1354
|
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
# create a data structure describing this line which will be |
|
1356
|
|
|
|
|
|
|
# returned to the caller. |
|
1357
|
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
# _line_type codes are: |
|
1359
|
|
|
|
|
|
|
# SYSTEM - system-specific code before hash-bang line |
|
1360
|
|
|
|
|
|
|
# CODE - line of perl code (including comments) |
|
1361
|
|
|
|
|
|
|
# POD_START - line starting pod, such as '=head' |
|
1362
|
|
|
|
|
|
|
# POD - pod documentation text |
|
1363
|
|
|
|
|
|
|
# POD_END - last line of pod section, '=cut' |
|
1364
|
|
|
|
|
|
|
# HERE - text of here-document |
|
1365
|
|
|
|
|
|
|
# HERE_END - last line of here-doc (target word) |
|
1366
|
|
|
|
|
|
|
# FORMAT - format section |
|
1367
|
|
|
|
|
|
|
# FORMAT_END - last line of format section, '.' |
|
1368
|
|
|
|
|
|
|
# SKIP - code skipping section |
|
1369
|
|
|
|
|
|
|
# SKIP_END - last line of code skipping section, '#>>V' |
|
1370
|
|
|
|
|
|
|
# DATA_START - __DATA__ line |
|
1371
|
|
|
|
|
|
|
# DATA - unidentified text following __DATA__ |
|
1372
|
|
|
|
|
|
|
# END_START - __END__ line |
|
1373
|
|
|
|
|
|
|
# END - unidentified text following __END__ |
|
1374
|
|
|
|
|
|
|
# ERROR - we are in big trouble, probably not a perl script |
|
1375
|
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
# Other variables: |
|
1377
|
|
|
|
|
|
|
# _curly_brace_depth - depth of curly braces at start of line |
|
1378
|
|
|
|
|
|
|
# _square_bracket_depth - depth of square brackets at start of line |
|
1379
|
|
|
|
|
|
|
# _paren_depth - depth of parens at start of line |
|
1380
|
|
|
|
|
|
|
# _starting_in_quote - this line continues a multi-line quote |
|
1381
|
|
|
|
|
|
|
# (so don't trim leading blanks!) |
|
1382
|
|
|
|
|
|
|
# _ending_in_quote - this line ends in a multi-line quote |
|
1383
|
|
|
|
|
|
|
# (so don't trim trailing blanks!) |
|
1384
|
8970
|
|
|
|
|
43010
|
my $line_of_tokens = { |
|
1385
|
|
|
|
|
|
|
_line_type => 'EOF', |
|
1386
|
|
|
|
|
|
|
_line_text => $input_line, |
|
1387
|
|
|
|
|
|
|
_line_number => $input_line_number, |
|
1388
|
|
|
|
|
|
|
_guessed_indentation_level => 0, |
|
1389
|
|
|
|
|
|
|
_curly_brace_depth => $brace_depth, |
|
1390
|
|
|
|
|
|
|
_square_bracket_depth => $square_bracket_depth, |
|
1391
|
|
|
|
|
|
|
_paren_depth => $paren_depth, |
|
1392
|
|
|
|
|
|
|
## Skip these needless initializations for efficiency: |
|
1393
|
|
|
|
|
|
|
## _rtoken_type => undef, |
|
1394
|
|
|
|
|
|
|
## _rtokens => undef, |
|
1395
|
|
|
|
|
|
|
## _rlevels => undef, |
|
1396
|
|
|
|
|
|
|
## _rblock_type => undef, |
|
1397
|
|
|
|
|
|
|
## _rtype_sequence => undef, |
|
1398
|
|
|
|
|
|
|
## _starting_in_quote => 0, |
|
1399
|
|
|
|
|
|
|
## _ending_in_quote => 0, |
|
1400
|
|
|
|
|
|
|
}; |
|
1401
|
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
# must print line unchanged if we are in a here document |
|
1403
|
8970
|
100
|
|
|
|
38527
|
if ( $self->[_in_here_doc_] ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
|
|
1405
|
32
|
|
|
|
|
49
|
$line_of_tokens->{_line_type} = 'HERE'; |
|
1406
|
32
|
|
|
|
|
46
|
my $here_doc_target = $self->[_here_doc_target_]; |
|
1407
|
32
|
|
|
|
|
54
|
my $here_quote_character = $self->[_here_quote_character_]; |
|
1408
|
32
|
|
|
|
|
46
|
my $candidate_target = $input_line; |
|
1409
|
32
|
|
|
|
|
42
|
chomp $candidate_target; |
|
1410
|
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
# Handle <<~ targets, which are indicated here by a leading space on |
|
1412
|
|
|
|
|
|
|
# the here quote character |
|
1413
|
32
|
100
|
|
|
|
84
|
if ( $here_quote_character =~ /^\s/ ) { |
|
1414
|
4
|
|
|
|
|
12
|
$candidate_target =~ s/^\s+//; |
|
1415
|
|
|
|
|
|
|
} |
|
1416
|
32
|
100
|
|
|
|
67
|
if ( $candidate_target eq $here_doc_target ) { |
|
1417
|
13
|
|
|
|
|
25
|
$self->[_nearly_matched_here_target_at_] = undef; |
|
1418
|
13
|
|
|
|
|
22
|
$line_of_tokens->{_line_type} = 'HERE_END'; |
|
1419
|
13
|
|
|
|
|
47
|
$self->log_numbered_msg("Exiting HERE document $here_doc_target\n"); |
|
1420
|
|
|
|
|
|
|
|
|
1421
|
13
|
|
|
|
|
24
|
my $rhere_target_list = $self->[_rhere_target_list_]; |
|
1422
|
13
|
100
|
|
|
|
21
|
if ( @{$rhere_target_list} ) { # there can be multiple here targets |
|
|
13
|
|
|
|
|
38
|
|
|
1423
|
|
|
|
|
|
|
( $here_doc_target, $here_quote_character ) = |
|
1424
|
2
|
|
|
|
|
3
|
@{ shift @{$rhere_target_list} }; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
6
|
|
|
1425
|
2
|
|
|
|
|
4
|
$self->[_here_doc_target_] = $here_doc_target; |
|
1426
|
2
|
|
|
|
|
2
|
$self->[_here_quote_character_] = $here_quote_character; |
|
1427
|
2
|
|
|
|
|
6
|
$self->log_numbered_msg( |
|
1428
|
|
|
|
|
|
|
"Entering HERE document $here_doc_target\n"); |
|
1429
|
2
|
|
|
|
|
4
|
$self->[_nearly_matched_here_target_at_] = undef; |
|
1430
|
2
|
|
|
|
|
4
|
$self->[_started_looking_for_here_target_at_] = |
|
1431
|
|
|
|
|
|
|
$input_line_number; |
|
1432
|
|
|
|
|
|
|
} |
|
1433
|
|
|
|
|
|
|
else { |
|
1434
|
11
|
|
|
|
|
34
|
$self->[_in_here_doc_] = 0; |
|
1435
|
11
|
|
|
|
|
20
|
$self->[_here_doc_target_] = EMPTY_STRING; |
|
1436
|
11
|
|
|
|
|
23
|
$self->[_here_quote_character_] = EMPTY_STRING; |
|
1437
|
|
|
|
|
|
|
} |
|
1438
|
|
|
|
|
|
|
} |
|
1439
|
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
# check for error of extra whitespace |
|
1441
|
|
|
|
|
|
|
# note for PERL6: leading whitespace is allowed |
|
1442
|
|
|
|
|
|
|
else { |
|
1443
|
19
|
|
|
|
|
144
|
$candidate_target =~ s/^ \s+ | \s+ $//gx; # trim both ends |
|
1444
|
19
|
50
|
|
|
|
52
|
if ( $candidate_target eq $here_doc_target ) { |
|
1445
|
0
|
|
|
|
|
0
|
$self->[_nearly_matched_here_target_at_] = $input_line_number; |
|
1446
|
|
|
|
|
|
|
} |
|
1447
|
|
|
|
|
|
|
} |
|
1448
|
32
|
|
|
|
|
96
|
return $line_of_tokens; |
|
1449
|
|
|
|
|
|
|
} |
|
1450
|
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
# Print line unchanged if we are in a format section |
|
1452
|
|
|
|
|
|
|
elsif ( $self->[_in_format_] ) { |
|
1453
|
|
|
|
|
|
|
|
|
1454
|
3
|
100
|
|
|
|
9
|
if ( $input_line =~ /^\.[\s#]*$/ ) { |
|
1455
|
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
# Decrement format depth count at a '.' after a 'format' |
|
1457
|
1
|
|
|
|
|
2
|
$self->[_in_format_]--; |
|
1458
|
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
# This is the end when count reaches 0 |
|
1460
|
1
|
50
|
|
|
|
3
|
if ( !$self->[_in_format_] ) { |
|
1461
|
1
|
|
|
|
|
4
|
$self->log_numbered_msg("Exiting format section\n"); |
|
1462
|
1
|
|
|
|
|
2
|
$line_of_tokens->{_line_type} = 'FORMAT_END'; |
|
1463
|
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
# Make the tokenizer mark an opening brace which follows |
|
1465
|
|
|
|
|
|
|
# as a code block. Fixes issue c202/t032. |
|
1466
|
1
|
|
|
|
|
2
|
$last_nonblank_token = ';'; |
|
1467
|
1
|
|
|
|
|
1
|
$last_nonblank_type = ';'; |
|
1468
|
|
|
|
|
|
|
} |
|
1469
|
|
|
|
|
|
|
} |
|
1470
|
|
|
|
|
|
|
else { |
|
1471
|
2
|
|
|
|
|
4
|
$line_of_tokens->{_line_type} = 'FORMAT'; |
|
1472
|
2
|
50
|
|
|
|
5
|
if ( $input_line =~ /^\s*format\s+\w+/ ) { |
|
1473
|
|
|
|
|
|
|
|
|
1474
|
|
|
|
|
|
|
# Increment format depth count at a 'format' within a 'format' |
|
1475
|
|
|
|
|
|
|
# This is a simple way to handle nested formats (issue c019). |
|
1476
|
0
|
|
|
|
|
0
|
$self->[_in_format_]++; |
|
1477
|
|
|
|
|
|
|
} |
|
1478
|
|
|
|
|
|
|
} |
|
1479
|
3
|
|
|
|
|
7
|
return $line_of_tokens; |
|
1480
|
|
|
|
|
|
|
} |
|
1481
|
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
# must print line unchanged if we are in pod documentation |
|
1483
|
|
|
|
|
|
|
elsif ( $self->[_in_pod_] ) { |
|
1484
|
|
|
|
|
|
|
|
|
1485
|
51
|
|
|
|
|
84
|
$line_of_tokens->{_line_type} = 'POD'; |
|
1486
|
51
|
100
|
|
|
|
141
|
if ( $input_line =~ /^=cut/ ) { |
|
1487
|
22
|
|
|
|
|
48
|
$line_of_tokens->{_line_type} = 'POD_END'; |
|
1488
|
22
|
|
|
|
|
60
|
$self->log_numbered_msg("Exiting POD section\n"); |
|
1489
|
22
|
|
|
|
|
35
|
$self->[_in_pod_] = 0; |
|
1490
|
|
|
|
|
|
|
} |
|
1491
|
51
|
50
|
33
|
|
|
178
|
if ( $input_line =~ /^\#\!.*perl\b/ && !$self->[_in_end_] ) { |
|
1492
|
0
|
|
|
|
|
0
|
$self->warning( |
|
1493
|
|
|
|
|
|
|
"Hash-bang in pod can cause older versions of perl to fail! \n" |
|
1494
|
|
|
|
|
|
|
); |
|
1495
|
|
|
|
|
|
|
} |
|
1496
|
|
|
|
|
|
|
|
|
1497
|
51
|
|
|
|
|
177
|
return $line_of_tokens; |
|
1498
|
|
|
|
|
|
|
} |
|
1499
|
|
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
|
# print line unchanged if in skipped section |
|
1501
|
|
|
|
|
|
|
elsif ( $self->[_in_code_skipping_] ) { |
|
1502
|
|
|
|
|
|
|
|
|
1503
|
8
|
|
|
|
|
12
|
$line_of_tokens->{_line_type} = 'SKIP'; |
|
1504
|
8
|
100
|
|
|
|
118
|
if ( $input_line =~ /$code_skipping_pattern_end/ ) { |
|
|
|
50
|
|
|
|
|
|
|
1505
|
2
|
|
|
|
|
6
|
$line_of_tokens->{_line_type} = 'SKIP_END'; |
|
1506
|
2
|
|
|
|
|
5
|
$self->log_numbered_msg("Exiting code-skipping section\n"); |
|
1507
|
2
|
|
|
|
|
3
|
$self->[_in_code_skipping_] = 0; |
|
1508
|
|
|
|
|
|
|
} |
|
1509
|
|
|
|
|
|
|
elsif ( $input_line =~ /$code_skipping_pattern_begin/ ) { |
|
1510
|
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
# warn of duplicate starting comment lines, git #118 |
|
1512
|
0
|
|
|
|
|
0
|
my $lno = $self->[_in_code_skipping_]; |
|
1513
|
0
|
|
|
|
|
0
|
$self->warning( |
|
1514
|
|
|
|
|
|
|
"Already in code-skipping section which started at line $lno\n" |
|
1515
|
|
|
|
|
|
|
); |
|
1516
|
|
|
|
|
|
|
} |
|
1517
|
|
|
|
|
|
|
else { |
|
1518
|
|
|
|
|
|
|
# not a code-skipping control line |
|
1519
|
|
|
|
|
|
|
} |
|
1520
|
8
|
|
|
|
|
20
|
return $line_of_tokens; |
|
1521
|
|
|
|
|
|
|
} |
|
1522
|
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
# must print line unchanged if we have seen a severe error (i.e., we |
|
1524
|
|
|
|
|
|
|
# are seeing illegal tokens and cannot continue. Syntax errors do |
|
1525
|
|
|
|
|
|
|
# not pass this route). Calling routine can decide what to do, but |
|
1526
|
|
|
|
|
|
|
# the default can be to just pass all lines as if they were after __END__ |
|
1527
|
|
|
|
|
|
|
elsif ( $self->[_in_error_] ) { |
|
1528
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'ERROR'; |
|
1529
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
|
1530
|
|
|
|
|
|
|
} |
|
1531
|
|
|
|
|
|
|
|
|
1532
|
|
|
|
|
|
|
# print line unchanged if we are __DATA__ section |
|
1533
|
|
|
|
|
|
|
elsif ( $self->[_in_data_] ) { |
|
1534
|
|
|
|
|
|
|
|
|
1535
|
|
|
|
|
|
|
# ...but look for POD |
|
1536
|
|
|
|
|
|
|
# Note that the _in_data and _in_end flags remain set |
|
1537
|
|
|
|
|
|
|
# so that we return to that state after seeing the |
|
1538
|
|
|
|
|
|
|
# end of a pod section |
|
1539
|
1
|
50
|
33
|
|
|
11
|
if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) { |
|
1540
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'POD_START'; |
|
1541
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg("Entering POD section\n"); |
|
1542
|
0
|
|
|
|
|
0
|
$self->[_in_pod_] = 1; |
|
1543
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
|
1544
|
|
|
|
|
|
|
} |
|
1545
|
|
|
|
|
|
|
else { |
|
1546
|
1
|
|
|
|
|
5
|
$line_of_tokens->{_line_type} = 'DATA'; |
|
1547
|
1
|
|
|
|
|
6
|
return $line_of_tokens; |
|
1548
|
|
|
|
|
|
|
} |
|
1549
|
|
|
|
|
|
|
} |
|
1550
|
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
# print line unchanged if we are in __END__ section |
|
1552
|
|
|
|
|
|
|
elsif ( $self->[_in_end_] ) { |
|
1553
|
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
# ...but look for POD |
|
1555
|
|
|
|
|
|
|
# Note that the _in_data and _in_end flags remain set |
|
1556
|
|
|
|
|
|
|
# so that we return to that state after seeing the |
|
1557
|
|
|
|
|
|
|
# end of a pod section |
|
1558
|
56
|
100
|
66
|
|
|
197
|
if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) { |
|
1559
|
7
|
|
|
|
|
17
|
$line_of_tokens->{_line_type} = 'POD_START'; |
|
1560
|
7
|
|
|
|
|
25
|
$self->log_numbered_msg("Entering POD section\n"); |
|
1561
|
7
|
|
|
|
|
11
|
$self->[_in_pod_] = 1; |
|
1562
|
7
|
|
|
|
|
23
|
return $line_of_tokens; |
|
1563
|
|
|
|
|
|
|
} |
|
1564
|
|
|
|
|
|
|
else { |
|
1565
|
49
|
|
|
|
|
69
|
$line_of_tokens->{_line_type} = 'END'; |
|
1566
|
49
|
|
|
|
|
119
|
return $line_of_tokens; |
|
1567
|
|
|
|
|
|
|
} |
|
1568
|
|
|
|
|
|
|
} |
|
1569
|
|
|
|
|
|
|
else { |
|
1570
|
|
|
|
|
|
|
# not a special control line |
|
1571
|
|
|
|
|
|
|
} |
|
1572
|
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
# check for a hash-bang line if we haven't seen one |
|
1574
|
8819
|
100
|
100
|
|
|
31965
|
if ( !$self->[_saw_hash_bang_] |
|
|
|
|
66
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
&& substr( $input_line, 0, 2 ) eq '#!' |
|
1576
|
|
|
|
|
|
|
&& $input_line =~ /^\#\!.*perl\b/ ) |
|
1577
|
|
|
|
|
|
|
{ |
|
1578
|
16
|
|
|
|
|
36
|
$self->[_saw_hash_bang_] = $input_line_number; |
|
1579
|
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
# check for -w and -P flags |
|
1581
|
16
|
50
|
|
|
|
78
|
if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { |
|
1582
|
0
|
|
|
|
|
0
|
$self->[_saw_perl_dash_P_] = 1; |
|
1583
|
|
|
|
|
|
|
} |
|
1584
|
|
|
|
|
|
|
|
|
1585
|
16
|
100
|
|
|
|
82
|
if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { |
|
1586
|
9
|
|
|
|
|
18
|
$self->[_saw_perl_dash_w_] = 1; |
|
1587
|
|
|
|
|
|
|
} |
|
1588
|
|
|
|
|
|
|
|
|
1589
|
16
|
100
|
33
|
|
|
114
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
$input_line_number > 1 |
|
1591
|
|
|
|
|
|
|
|
|
1592
|
|
|
|
|
|
|
# leave any hash bang in a BEGIN block alone |
|
1593
|
|
|
|
|
|
|
# i.e. see 'debugger-duck_type.t' |
|
1594
|
|
|
|
|
|
|
&& !( |
|
1595
|
|
|
|
|
|
|
$last_nonblank_block_type |
|
1596
|
|
|
|
|
|
|
&& $last_nonblank_block_type eq 'BEGIN' |
|
1597
|
|
|
|
|
|
|
) |
|
1598
|
|
|
|
|
|
|
&& !$rOpts_look_for_hash_bang |
|
1599
|
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
# Try to avoid giving a false alarm at a simple comment. |
|
1601
|
|
|
|
|
|
|
# These look like valid hash-bang lines: |
|
1602
|
|
|
|
|
|
|
|
|
1603
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
|
1604
|
|
|
|
|
|
|
#! /usr/bin/perl -w |
|
1605
|
|
|
|
|
|
|
#!c:\perl\bin\perl.exe |
|
1606
|
|
|
|
|
|
|
|
|
1607
|
|
|
|
|
|
|
# These are comments: |
|
1608
|
|
|
|
|
|
|
#! I love perl |
|
1609
|
|
|
|
|
|
|
#! sunos does not yet provide a /usr/bin/perl |
|
1610
|
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
# Comments typically have multiple spaces, which suggests |
|
1612
|
|
|
|
|
|
|
# the filter |
|
1613
|
|
|
|
|
|
|
&& $input_line =~ /^\#\!(\s+)?(\S+)?perl/ |
|
1614
|
|
|
|
|
|
|
) |
|
1615
|
|
|
|
|
|
|
{ |
|
1616
|
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
# this is helpful for VMS systems; we may have accidentally |
|
1618
|
|
|
|
|
|
|
# tokenized some DCL commands |
|
1619
|
1
|
50
|
|
|
|
4
|
if ( $self->[_started_tokenizing_] ) { |
|
1620
|
0
|
|
|
|
|
0
|
$self->warning( |
|
1621
|
|
|
|
|
|
|
"There seems to be a hash-bang after line 1; do you need to run with -x ?\n" |
|
1622
|
|
|
|
|
|
|
); |
|
1623
|
|
|
|
|
|
|
} |
|
1624
|
|
|
|
|
|
|
else { |
|
1625
|
1
|
|
|
|
|
4
|
$self->complain("Useless hash-bang after line 1\n"); |
|
1626
|
|
|
|
|
|
|
} |
|
1627
|
|
|
|
|
|
|
} |
|
1628
|
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
# Report the leading hash-bang as a system line |
|
1630
|
|
|
|
|
|
|
# This will prevent -dac from deleting it |
|
1631
|
|
|
|
|
|
|
else { |
|
1632
|
15
|
|
|
|
|
36
|
$line_of_tokens->{_line_type} = 'SYSTEM'; |
|
1633
|
15
|
|
|
|
|
74
|
return $line_of_tokens; |
|
1634
|
|
|
|
|
|
|
} |
|
1635
|
|
|
|
|
|
|
} |
|
1636
|
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
# wait for a hash-bang before parsing if the user invoked us with -x |
|
1638
|
8804
|
100
|
100
|
|
|
17387
|
if ( $rOpts_look_for_hash_bang |
|
1639
|
|
|
|
|
|
|
&& !$self->[_saw_hash_bang_] ) |
|
1640
|
|
|
|
|
|
|
{ |
|
1641
|
5
|
|
|
|
|
8
|
$line_of_tokens->{_line_type} = 'SYSTEM'; |
|
1642
|
5
|
|
|
|
|
12
|
return $line_of_tokens; |
|
1643
|
|
|
|
|
|
|
} |
|
1644
|
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
# a first line of the form ': #' will be marked as SYSTEM |
|
1646
|
|
|
|
|
|
|
# since lines of this form may be used by tcsh |
|
1647
|
8799
|
50
|
66
|
|
|
18695
|
if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { |
|
1648
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'SYSTEM'; |
|
1649
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
|
1650
|
|
|
|
|
|
|
} |
|
1651
|
|
|
|
|
|
|
|
|
1652
|
|
|
|
|
|
|
# now we know that it is ok to tokenize the line... |
|
1653
|
|
|
|
|
|
|
# the line tokenizer will modify any of these private variables: |
|
1654
|
|
|
|
|
|
|
# _rhere_target_list_ |
|
1655
|
|
|
|
|
|
|
# _in_data_ |
|
1656
|
|
|
|
|
|
|
# _in_end_ |
|
1657
|
|
|
|
|
|
|
# _in_format_ |
|
1658
|
|
|
|
|
|
|
# _in_error_ |
|
1659
|
|
|
|
|
|
|
# _in_code_skipping_ |
|
1660
|
|
|
|
|
|
|
# _in_format_skipping_ |
|
1661
|
|
|
|
|
|
|
# _in_pod_ |
|
1662
|
|
|
|
|
|
|
# _in_quote_ |
|
1663
|
|
|
|
|
|
|
|
|
1664
|
8799
|
|
|
|
|
23188
|
$self->tokenize_this_line( $line_of_tokens, $trimmed_input_line ); |
|
1665
|
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
# Now finish defining the return structure and return it |
|
1667
|
8799
|
|
|
|
|
16932
|
$line_of_tokens->{_ending_in_quote} = $self->[_in_quote_]; |
|
1668
|
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
# handle severe error (binary data in script) |
|
1670
|
8799
|
50
|
|
|
|
15894
|
if ( $self->[_in_error_] ) { |
|
1671
|
0
|
|
|
|
|
0
|
$self->[_in_quote_] = 0; # to avoid any more messages |
|
1672
|
0
|
|
|
|
|
0
|
$self->warning("Giving up after error\n"); |
|
1673
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'ERROR'; |
|
1674
|
0
|
|
|
|
|
0
|
reset_indentation_level(0); # avoid error messages |
|
1675
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
|
1676
|
|
|
|
|
|
|
} |
|
1677
|
|
|
|
|
|
|
|
|
1678
|
|
|
|
|
|
|
# handle start of pod documentation |
|
1679
|
8799
|
100
|
|
|
|
14516
|
if ( $self->[_in_pod_] ) { |
|
1680
|
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
# This gets tricky..above a __DATA__ or __END__ section, perl |
|
1682
|
|
|
|
|
|
|
# accepts '=cut' as the start of pod section. But afterwards, |
|
1683
|
|
|
|
|
|
|
# only pod utilities see it and they may ignore an =cut without |
|
1684
|
|
|
|
|
|
|
# leading =head. In any case, this isn't good. |
|
1685
|
15
|
50
|
|
|
|
62
|
if ( $input_line =~ /^=cut\b/ ) { |
|
1686
|
0
|
0
|
0
|
|
|
0
|
if ( $self->[_saw_data_] || $self->[_saw_end_] ) { |
|
1687
|
0
|
|
|
|
|
0
|
$self->complain("=cut while not in pod ignored\n"); |
|
1688
|
0
|
|
|
|
|
0
|
$self->[_in_pod_] = 0; |
|
1689
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'POD_END'; |
|
1690
|
|
|
|
|
|
|
} |
|
1691
|
|
|
|
|
|
|
else { |
|
1692
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'POD_START'; |
|
1693
|
0
|
|
|
|
|
0
|
if ( !DEVEL_MODE ) { |
|
1694
|
0
|
|
|
|
|
0
|
$self->warning( |
|
1695
|
|
|
|
|
|
|
"=cut starts a pod section .. this can fool pod utilities.\n" |
|
1696
|
|
|
|
|
|
|
); |
|
1697
|
|
|
|
|
|
|
} |
|
1698
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg("Entering POD section\n"); |
|
1699
|
|
|
|
|
|
|
} |
|
1700
|
|
|
|
|
|
|
} |
|
1701
|
|
|
|
|
|
|
|
|
1702
|
|
|
|
|
|
|
else { |
|
1703
|
15
|
|
|
|
|
37
|
$line_of_tokens->{_line_type} = 'POD_START'; |
|
1704
|
15
|
|
|
|
|
53
|
$self->log_numbered_msg("Entering POD section\n"); |
|
1705
|
|
|
|
|
|
|
} |
|
1706
|
|
|
|
|
|
|
|
|
1707
|
15
|
|
|
|
|
52
|
return $line_of_tokens; |
|
1708
|
|
|
|
|
|
|
} |
|
1709
|
|
|
|
|
|
|
|
|
1710
|
|
|
|
|
|
|
# handle start of skipped section |
|
1711
|
8784
|
100
|
|
|
|
14991
|
if ( $self->[_in_code_skipping_] ) { |
|
1712
|
|
|
|
|
|
|
|
|
1713
|
2
|
|
|
|
|
5
|
$line_of_tokens->{_line_type} = 'SKIP'; |
|
1714
|
2
|
|
|
|
|
9
|
$self->log_numbered_msg("Entering code-skipping section\n"); |
|
1715
|
2
|
|
|
|
|
6
|
return $line_of_tokens; |
|
1716
|
|
|
|
|
|
|
} |
|
1717
|
|
|
|
|
|
|
|
|
1718
|
|
|
|
|
|
|
# see if this line contains here doc targets |
|
1719
|
8782
|
|
|
|
|
11733
|
my $rhere_target_list = $self->[_rhere_target_list_]; |
|
1720
|
8782
|
100
|
|
|
|
9514
|
if ( @{$rhere_target_list} ) { |
|
|
8782
|
|
|
|
|
15910
|
|
|
1721
|
|
|
|
|
|
|
|
|
1722
|
|
|
|
|
|
|
my ( $here_doc_target, $here_quote_character ) = |
|
1723
|
11
|
|
|
|
|
20
|
@{ shift @{$rhere_target_list} }; |
|
|
11
|
|
|
|
|
20
|
|
|
|
11
|
|
|
|
|
33
|
|
|
1724
|
11
|
|
|
|
|
29
|
$self->[_in_here_doc_] = 1; |
|
1725
|
11
|
|
|
|
|
24
|
$self->[_here_doc_target_] = $here_doc_target; |
|
1726
|
11
|
|
|
|
|
22
|
$self->[_here_quote_character_] = $here_quote_character; |
|
1727
|
11
|
|
|
|
|
56
|
$self->log_numbered_msg("Entering HERE document $here_doc_target\n"); |
|
1728
|
11
|
|
|
|
|
19
|
$self->[_started_looking_for_here_target_at_] = $input_line_number; |
|
1729
|
|
|
|
|
|
|
} |
|
1730
|
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
# NOTE: __END__ and __DATA__ statements are written unformatted |
|
1732
|
|
|
|
|
|
|
# because they can theoretically contain additional characters |
|
1733
|
|
|
|
|
|
|
# which are not tokenized (and cannot be read with <DATA> either!). |
|
1734
|
8782
|
100
|
|
|
|
18445
|
if ( $self->[_in_data_] ) { |
|
|
|
100
|
|
|
|
|
|
|
1735
|
1
|
|
|
|
|
3
|
$line_of_tokens->{_line_type} = 'DATA_START'; |
|
1736
|
1
|
|
|
|
|
7
|
$self->log_numbered_msg("Starting __DATA__ section\n"); |
|
1737
|
1
|
|
|
|
|
2
|
$self->[_saw_data_] = 1; |
|
1738
|
|
|
|
|
|
|
|
|
1739
|
|
|
|
|
|
|
# keep parsing after __DATA__ if use SelfLoader was seen |
|
1740
|
1
|
50
|
|
|
|
5
|
if ( $self->[_saw_selfloader_] ) { |
|
1741
|
0
|
|
|
|
|
0
|
$self->[_in_data_] = 0; |
|
1742
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg( |
|
1743
|
|
|
|
|
|
|
"SelfLoader seen, continuing; -nlsl deactivates\n"); |
|
1744
|
|
|
|
|
|
|
} |
|
1745
|
|
|
|
|
|
|
|
|
1746
|
1
|
|
|
|
|
7
|
return $line_of_tokens; |
|
1747
|
|
|
|
|
|
|
} |
|
1748
|
|
|
|
|
|
|
|
|
1749
|
|
|
|
|
|
|
elsif ( $self->[_in_end_] ) { |
|
1750
|
7
|
|
|
|
|
19
|
$line_of_tokens->{_line_type} = 'END_START'; |
|
1751
|
7
|
|
|
|
|
23
|
$self->log_numbered_msg("Starting __END__ section\n"); |
|
1752
|
7
|
|
|
|
|
13
|
$self->[_saw_end_] = 1; |
|
1753
|
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
# keep parsing after __END__ if use AutoLoader was seen |
|
1755
|
7
|
50
|
|
|
|
31
|
if ( $self->[_saw_autoloader_] ) { |
|
1756
|
0
|
|
|
|
|
0
|
$self->[_in_end_] = 0; |
|
1757
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg( |
|
1758
|
|
|
|
|
|
|
"AutoLoader seen, continuing; -nlal deactivates\n"); |
|
1759
|
|
|
|
|
|
|
} |
|
1760
|
7
|
|
|
|
|
44
|
return $line_of_tokens; |
|
1761
|
|
|
|
|
|
|
} |
|
1762
|
|
|
|
|
|
|
else { |
|
1763
|
|
|
|
|
|
|
# not in __END__ or __DATA__ |
|
1764
|
|
|
|
|
|
|
} |
|
1765
|
|
|
|
|
|
|
|
|
1766
|
|
|
|
|
|
|
# now, finally, we know that this line is type 'CODE' |
|
1767
|
8774
|
|
|
|
|
13732
|
$line_of_tokens->{_line_type} = 'CODE'; |
|
1768
|
|
|
|
|
|
|
|
|
1769
|
|
|
|
|
|
|
# remember if we have seen any real code |
|
1770
|
8774
|
100
|
100
|
|
|
21974
|
if ( !$self->[_started_tokenizing_] |
|
|
|
|
100
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
&& $input_line !~ /^\s*$/ |
|
1772
|
|
|
|
|
|
|
&& $input_line !~ /^\s*#/ ) |
|
1773
|
|
|
|
|
|
|
{ |
|
1774
|
645
|
|
|
|
|
1280
|
$self->[_started_tokenizing_] = 1; |
|
1775
|
|
|
|
|
|
|
} |
|
1776
|
|
|
|
|
|
|
|
|
1777
|
8774
|
100
|
|
|
|
14977
|
if ( $self->[_debugger_object_] ) { |
|
1778
|
7
|
|
|
|
|
21
|
$self->[_debugger_object_]->write_debug_entry($line_of_tokens); |
|
1779
|
|
|
|
|
|
|
} |
|
1780
|
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
# Note: if keyword 'format' occurs in this line code, it is still CODE |
|
1782
|
|
|
|
|
|
|
# (keyword 'format' need not start a line) |
|
1783
|
8774
|
100
|
|
|
|
14750
|
if ( $self->[_in_format_] ) { |
|
1784
|
1
|
|
|
|
|
4
|
$self->log_numbered_msg("Entering format section\n"); |
|
1785
|
|
|
|
|
|
|
} |
|
1786
|
|
|
|
|
|
|
|
|
1787
|
8774
|
100
|
100
|
|
|
26309
|
if ( $self->[_in_quote_] |
|
|
|
100
|
100
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
and ( $self->[_line_start_quote_] < 0 ) ) |
|
1789
|
|
|
|
|
|
|
{ |
|
1790
|
63
|
100
|
|
|
|
399
|
if ( ( my $quote_target = $self->[_quote_target_] ) !~ /^\s*$/ ) { |
|
1791
|
62
|
|
|
|
|
112
|
$self->[_line_start_quote_] = $input_line_number; |
|
1792
|
62
|
|
|
|
|
274
|
$self->log_numbered_msg( |
|
1793
|
|
|
|
|
|
|
"Start multi-line quote or pattern ending in $quote_target\n"); |
|
1794
|
|
|
|
|
|
|
} |
|
1795
|
|
|
|
|
|
|
} |
|
1796
|
|
|
|
|
|
|
elsif ( ( $self->[_line_start_quote_] >= 0 ) |
|
1797
|
|
|
|
|
|
|
&& !$self->[_in_quote_] ) |
|
1798
|
|
|
|
|
|
|
{ |
|
1799
|
62
|
|
|
|
|
132
|
$self->[_line_start_quote_] = -1; |
|
1800
|
62
|
|
|
|
|
183
|
$self->log_numbered_msg("End of multi-line quote or pattern\n"); |
|
1801
|
|
|
|
|
|
|
} |
|
1802
|
|
|
|
|
|
|
else { |
|
1803
|
|
|
|
|
|
|
# not at the edge of a quote |
|
1804
|
|
|
|
|
|
|
} |
|
1805
|
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
# we are returning a line of CODE |
|
1807
|
8774
|
|
|
|
|
31151
|
return $line_of_tokens; |
|
1808
|
|
|
|
|
|
|
} ## end sub get_line |
|
1809
|
|
|
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
sub find_starting_indentation_level { |
|
1811
|
|
|
|
|
|
|
|
|
1812
|
|
|
|
|
|
|
# We need to find the indentation level of the first line of the |
|
1813
|
|
|
|
|
|
|
# script being formatted. Often it will be zero for an entire file, |
|
1814
|
|
|
|
|
|
|
# but if we are formatting a local block of code (within an editor for |
|
1815
|
|
|
|
|
|
|
# example) it may not be zero. The user may specify this with the |
|
1816
|
|
|
|
|
|
|
# -sil=n parameter but normally doesn't so we have to guess. |
|
1817
|
|
|
|
|
|
|
# |
|
1818
|
649
|
|
|
649
|
0
|
1347
|
my ($self) = @_; |
|
1819
|
649
|
|
|
|
|
1213
|
my $starting_level = 0; |
|
1820
|
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
# use value if given as parameter |
|
1822
|
649
|
100
|
|
|
|
2235
|
if ( $self->[_know_starting_level_] ) { |
|
|
|
100
|
|
|
|
|
|
|
1823
|
1
|
|
|
|
|
2
|
$starting_level = $self->[_starting_level_]; |
|
1824
|
|
|
|
|
|
|
} |
|
1825
|
|
|
|
|
|
|
|
|
1826
|
|
|
|
|
|
|
# if we know there is a hash_bang line, the level must be zero |
|
1827
|
|
|
|
|
|
|
elsif ($rOpts_look_for_hash_bang) { |
|
1828
|
1
|
|
|
|
|
2
|
$self->[_know_starting_level_] = 1; |
|
1829
|
|
|
|
|
|
|
} |
|
1830
|
|
|
|
|
|
|
|
|
1831
|
|
|
|
|
|
|
# otherwise figure it out from the input file |
|
1832
|
|
|
|
|
|
|
else { |
|
1833
|
647
|
|
|
|
|
992
|
my $line; |
|
1834
|
647
|
|
|
|
|
980
|
my $i = 0; |
|
1835
|
|
|
|
|
|
|
|
|
1836
|
|
|
|
|
|
|
# keep looking at lines until we find a hash bang or piece of code |
|
1837
|
|
|
|
|
|
|
# ( or, for now, an =pod line) |
|
1838
|
647
|
|
|
|
|
1075
|
my $msg = EMPTY_STRING; |
|
1839
|
647
|
|
|
|
|
1171
|
my $in_code_skipping; |
|
1840
|
|
|
|
|
|
|
my $line_for_guess; |
|
1841
|
647
|
|
|
|
|
2607
|
while ( defined( $line = $self->peek_ahead( $i++ ) ) ) { |
|
1842
|
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
# if first line is #! then assume starting level is zero |
|
1844
|
973
|
100
|
100
|
|
|
4286
|
if ( $i == 1 && $line =~ /^\#\!/ ) { |
|
1845
|
14
|
|
|
|
|
32
|
$starting_level = 0; |
|
1846
|
14
|
|
|
|
|
31
|
last; |
|
1847
|
|
|
|
|
|
|
} |
|
1848
|
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
# ignore lines fenced off with code-skipping comments |
|
1850
|
959
|
100
|
|
|
|
3398
|
if ( $line =~ /^\s*#/ ) { |
|
1851
|
|
|
|
|
|
|
|
|
1852
|
|
|
|
|
|
|
# use first comment for indentation guess in case of no code |
|
1853
|
310
|
100
|
|
|
|
901
|
if ( !defined($line_for_guess) ) { $line_for_guess = $line } |
|
|
254
|
|
|
|
|
507
|
|
|
1854
|
|
|
|
|
|
|
|
|
1855
|
310
|
50
|
|
|
|
774
|
if ( !$in_code_skipping ) { |
|
1856
|
310
|
50
|
33
|
|
|
3600
|
if ( $rOpts_code_skipping |
|
1857
|
|
|
|
|
|
|
&& $line =~ /$code_skipping_pattern_begin/ ) |
|
1858
|
|
|
|
|
|
|
{ |
|
1859
|
0
|
|
|
|
|
0
|
$in_code_skipping = 1; |
|
1860
|
0
|
|
|
|
|
0
|
next; |
|
1861
|
|
|
|
|
|
|
} |
|
1862
|
|
|
|
|
|
|
} |
|
1863
|
|
|
|
|
|
|
else { |
|
1864
|
0
|
0
|
|
|
|
0
|
if ( $line =~ /$code_skipping_pattern_end/ ) { |
|
1865
|
0
|
|
|
|
|
0
|
$in_code_skipping = 0; |
|
1866
|
|
|
|
|
|
|
} |
|
1867
|
0
|
|
|
|
|
0
|
next; |
|
1868
|
|
|
|
|
|
|
} |
|
1869
|
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
# Note that we could also ignore format-skipping lines here |
|
1871
|
|
|
|
|
|
|
# but it isn't clear if that would be best. |
|
1872
|
|
|
|
|
|
|
# See c326 for example code. |
|
1873
|
|
|
|
|
|
|
|
|
1874
|
310
|
|
|
|
|
808
|
next; |
|
1875
|
|
|
|
|
|
|
} |
|
1876
|
649
|
50
|
|
|
|
1437
|
next if ($in_code_skipping); |
|
1877
|
|
|
|
|
|
|
|
|
1878
|
649
|
100
|
|
|
|
2587
|
next if ( $line =~ /^\s*$/ ); # skip past blank lines |
|
1879
|
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
# use first line of code for indentation guess |
|
1881
|
631
|
|
|
|
|
1031
|
$line_for_guess = $line; |
|
1882
|
631
|
|
|
|
|
1072
|
last; |
|
1883
|
|
|
|
|
|
|
} ## end while ( defined( $line = ...)) |
|
1884
|
|
|
|
|
|
|
|
|
1885
|
647
|
100
|
|
|
|
1455
|
if ( defined($line_for_guess) ) { |
|
1886
|
631
|
|
|
|
|
2060
|
$starting_level = |
|
1887
|
|
|
|
|
|
|
$self->guess_old_indentation_level($line_for_guess); |
|
1888
|
|
|
|
|
|
|
} |
|
1889
|
647
|
|
|
|
|
1510
|
$msg = "Line $i implies starting-indentation-level = $starting_level\n"; |
|
1890
|
647
|
|
|
|
|
2739
|
$self->write_logfile_entry("$msg"); |
|
1891
|
|
|
|
|
|
|
} |
|
1892
|
649
|
|
|
|
|
1482
|
$self->[_starting_level_] = $starting_level; |
|
1893
|
649
|
|
|
|
|
2577
|
reset_indentation_level($starting_level); |
|
1894
|
649
|
|
|
|
|
952
|
return; |
|
1895
|
|
|
|
|
|
|
} ## end sub find_starting_indentation_level |
|
1896
|
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
sub guess_old_indentation_level { |
|
1898
|
634
|
|
|
634
|
0
|
1312
|
my ( $self, $line ) = @_; |
|
1899
|
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
# Guess the indentation level of an input line. |
|
1901
|
|
|
|
|
|
|
# |
|
1902
|
|
|
|
|
|
|
# For the first line of code this result will define the starting |
|
1903
|
|
|
|
|
|
|
# indentation level. It will mainly be non-zero when perltidy is applied |
|
1904
|
|
|
|
|
|
|
# within an editor to a local block of code. |
|
1905
|
|
|
|
|
|
|
# |
|
1906
|
|
|
|
|
|
|
# This is an impossible task in general because we can't know what tabs |
|
1907
|
|
|
|
|
|
|
# meant for the old script and how many spaces were used for one |
|
1908
|
|
|
|
|
|
|
# indentation level in the given input script. For example it may have |
|
1909
|
|
|
|
|
|
|
# been previously formatted with -i=7 -et=3. But we can at least try to |
|
1910
|
|
|
|
|
|
|
# make sure that perltidy guesses correctly if it is applied repeatedly to |
|
1911
|
|
|
|
|
|
|
# a block of code within an editor, so that the block stays at the same |
|
1912
|
|
|
|
|
|
|
# level when perltidy is applied repeatedly. |
|
1913
|
|
|
|
|
|
|
# |
|
1914
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
|
1915
|
634
|
|
|
|
|
1044
|
my $level = 0; |
|
1916
|
|
|
|
|
|
|
|
|
1917
|
|
|
|
|
|
|
# find leading tabs, spaces, and any statement label |
|
1918
|
634
|
|
|
|
|
985
|
my $spaces = 0; |
|
1919
|
634
|
50
|
|
|
|
4065
|
if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) { |
|
1920
|
|
|
|
|
|
|
|
|
1921
|
|
|
|
|
|
|
# If there are leading tabs, we use the tab scheme for this run, if |
|
1922
|
|
|
|
|
|
|
# any, so that the code will remain stable when editing. |
|
1923
|
634
|
100
|
|
|
|
2221
|
if ($1) { $spaces += length($1) * $tabsize } |
|
|
2
|
|
|
|
|
10
|
|
|
1924
|
|
|
|
|
|
|
|
|
1925
|
634
|
100
|
|
|
|
1944
|
if ($2) { $spaces += length($2) } |
|
|
90
|
|
|
|
|
236
|
|
|
1926
|
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
# correct for outdented labels |
|
1928
|
634
|
50
|
66
|
|
|
2523
|
if ( $3 |
|
|
|
|
66
|
|
|
|
|
|
1929
|
|
|
|
|
|
|
&& $rOpts_outdent_labels |
|
1930
|
|
|
|
|
|
|
&& $rOpts_continuation_indentation > 0 ) |
|
1931
|
|
|
|
|
|
|
{ |
|
1932
|
1
|
|
|
|
|
3
|
$spaces += $rOpts_continuation_indentation; |
|
1933
|
|
|
|
|
|
|
} |
|
1934
|
|
|
|
|
|
|
} |
|
1935
|
|
|
|
|
|
|
|
|
1936
|
634
|
|
|
|
|
1893
|
$level = int( $spaces / $rOpts_indent_columns ); |
|
1937
|
634
|
|
|
|
|
1316
|
return ($level); |
|
1938
|
|
|
|
|
|
|
} ## end sub guess_old_indentation_level |
|
1939
|
|
|
|
|
|
|
|
|
1940
|
|
|
|
|
|
|
sub dump_functions { |
|
1941
|
|
|
|
|
|
|
|
|
1942
|
|
|
|
|
|
|
# This is an unused debug routine, save for future use |
|
1943
|
|
|
|
|
|
|
|
|
1944
|
0
|
|
|
0
|
0
|
0
|
my $fh = *STDOUT; |
|
1945
|
0
|
|
|
|
|
0
|
foreach my $pkg ( keys %{$ris_user_function} ) { |
|
|
0
|
|
|
|
|
0
|
|
|
1946
|
0
|
|
|
|
|
0
|
$fh->print("\nnon-constant subs in package $pkg\n"); |
|
1947
|
|
|
|
|
|
|
|
|
1948
|
0
|
|
|
|
|
0
|
foreach my $sub ( keys %{ $ris_user_function->{$pkg} } ) { |
|
|
0
|
|
|
|
|
0
|
|
|
1949
|
0
|
|
|
|
|
0
|
my $msg = EMPTY_STRING; |
|
1950
|
0
|
0
|
|
|
|
0
|
if ( $ris_block_list_function->{$pkg}->{$sub} ) { |
|
1951
|
0
|
|
|
|
|
0
|
$msg = 'block_list'; |
|
1952
|
|
|
|
|
|
|
} |
|
1953
|
|
|
|
|
|
|
|
|
1954
|
0
|
0
|
|
|
|
0
|
if ( $ris_block_function->{$pkg}->{$sub} ) { |
|
1955
|
0
|
|
|
|
|
0
|
$msg = 'block'; |
|
1956
|
|
|
|
|
|
|
} |
|
1957
|
0
|
|
|
|
|
0
|
$fh->print("$sub $msg\n"); |
|
1958
|
|
|
|
|
|
|
} |
|
1959
|
|
|
|
|
|
|
} |
|
1960
|
|
|
|
|
|
|
|
|
1961
|
0
|
|
|
|
|
0
|
foreach my $pkg ( keys %{$ris_constant} ) { |
|
|
0
|
|
|
|
|
0
|
|
|
1962
|
0
|
|
|
|
|
0
|
$fh->print("\nconstants and constant subs in package $pkg\n"); |
|
1963
|
|
|
|
|
|
|
|
|
1964
|
0
|
|
|
|
|
0
|
foreach my $sub ( keys %{ $ris_constant->{$pkg} } ) { |
|
|
0
|
|
|
|
|
0
|
|
|
1965
|
0
|
|
|
|
|
0
|
$fh->print("$sub\n"); |
|
1966
|
|
|
|
|
|
|
} |
|
1967
|
|
|
|
|
|
|
} |
|
1968
|
0
|
|
|
|
|
0
|
return; |
|
1969
|
|
|
|
|
|
|
} ## end sub dump_functions |
|
1970
|
|
|
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
sub prepare_for_a_new_file { |
|
1972
|
|
|
|
|
|
|
|
|
1973
|
649
|
|
|
649
|
0
|
1492
|
my ( $self, $source_object ) = @_; |
|
1974
|
|
|
|
|
|
|
|
|
1975
|
|
|
|
|
|
|
# copy the source object lines to an array of lines |
|
1976
|
649
|
|
|
|
|
2765
|
$self->make_source_array($source_object); |
|
1977
|
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
# previous tokens needed to determine what to expect next |
|
1979
|
649
|
|
|
|
|
1267
|
$last_nonblank_token = ';'; # the only possible starting state which |
|
1980
|
649
|
|
|
|
|
1142
|
$last_nonblank_type = ';'; # will make a leading brace a code block |
|
1981
|
649
|
|
|
|
|
1140
|
$last_nonblank_block_type = EMPTY_STRING; |
|
1982
|
|
|
|
|
|
|
|
|
1983
|
|
|
|
|
|
|
# scalars for remembering statement types across multiple lines |
|
1984
|
649
|
|
|
|
|
1035
|
$statement_type = EMPTY_STRING; # '' or 'use' or 'sub..' or 'case..' |
|
1985
|
|
|
|
|
|
|
|
|
1986
|
|
|
|
|
|
|
# scalars for remembering where we are in the file |
|
1987
|
649
|
|
|
|
|
1129
|
$current_package = "main"; |
|
1988
|
649
|
|
|
|
|
1045
|
$context = UNKNOWN_CONTEXT; |
|
1989
|
|
|
|
|
|
|
|
|
1990
|
|
|
|
|
|
|
# hashes used to remember function information |
|
1991
|
649
|
|
|
|
|
1126
|
$ris_constant = {}; # user-defined constants |
|
1992
|
649
|
|
|
|
|
1535
|
$ris_user_function = {}; # user-defined functions |
|
1993
|
649
|
|
|
|
|
1443
|
$ruser_function_prototype = {}; # their prototypes |
|
1994
|
649
|
|
|
|
|
1208
|
$ris_block_function = {}; |
|
1995
|
649
|
|
|
|
|
1229
|
$ris_block_list_function = {}; |
|
1996
|
649
|
|
|
|
|
1307
|
$rsaw_function_definition = {}; |
|
1997
|
649
|
|
|
|
|
1200
|
$rsaw_use_module = {}; |
|
1998
|
|
|
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
# variables used to track depths of various containers |
|
2000
|
|
|
|
|
|
|
# and report nesting errors |
|
2001
|
649
|
|
|
|
|
1264
|
$paren_depth = 0; |
|
2002
|
649
|
|
|
|
|
965
|
$brace_depth = 0; |
|
2003
|
649
|
|
|
|
|
928
|
$square_bracket_depth = 0; |
|
2004
|
649
|
|
|
|
|
2643
|
$rcurrent_depth = [ (0) x scalar(@closing_brace_names) ]; |
|
2005
|
649
|
|
|
|
|
1043
|
$total_depth = 0; |
|
2006
|
649
|
|
|
|
|
970
|
$rtotal_depth = []; |
|
2007
|
649
|
|
|
|
|
1952
|
$rcurrent_sequence_number = []; |
|
2008
|
649
|
|
|
|
|
1815
|
$ris_lexical_sub = {}; |
|
2009
|
649
|
|
|
|
|
1055
|
$next_sequence_number = SEQ_ROOT + 1; |
|
2010
|
|
|
|
|
|
|
|
|
2011
|
649
|
|
|
|
|
992
|
$rparen_type = []; |
|
2012
|
649
|
|
|
|
|
1452
|
$rparen_semicolon_count = []; |
|
2013
|
649
|
|
|
|
|
1281
|
$rparen_vars = []; |
|
2014
|
649
|
|
|
|
|
2097
|
$rbrace_type = []; |
|
2015
|
649
|
|
|
|
|
1649
|
$rbrace_structural_type = []; |
|
2016
|
649
|
|
|
|
|
1379
|
$rbrace_context = []; |
|
2017
|
649
|
|
|
|
|
1344
|
$rbrace_package = []; |
|
2018
|
649
|
|
|
|
|
1347
|
$rsquare_bracket_type = []; |
|
2019
|
649
|
|
|
|
|
1395
|
$rsquare_bracket_structural_type = []; |
|
2020
|
649
|
|
|
|
|
1326
|
$rdepth_array = []; |
|
2021
|
649
|
|
|
|
|
3491
|
$rnested_ternary_flag = []; |
|
2022
|
649
|
|
|
|
|
1138
|
$rnested_statement_type = []; |
|
2023
|
649
|
|
|
|
|
3745
|
$rstarting_line_of_current_depth = []; |
|
2024
|
|
|
|
|
|
|
|
|
2025
|
649
|
|
|
|
|
3679
|
$rparen_type->[$paren_depth] = EMPTY_STRING; |
|
2026
|
649
|
|
|
|
|
1418
|
$rparen_semicolon_count->[$paren_depth] = 0; |
|
2027
|
649
|
|
|
|
|
1352
|
$rparen_vars->[$paren_depth] = []; |
|
2028
|
649
|
|
|
|
|
1371
|
$rbrace_type->[$brace_depth] = ';'; # identify opening brace as code block |
|
2029
|
649
|
|
|
|
|
1274
|
$rbrace_structural_type->[$brace_depth] = EMPTY_STRING; |
|
2030
|
649
|
|
|
|
|
1268
|
$rbrace_context->[$brace_depth] = UNKNOWN_CONTEXT; |
|
2031
|
649
|
|
|
|
|
1369
|
$rbrace_package->[$paren_depth] = $current_package; |
|
2032
|
649
|
|
|
|
|
1328
|
$rsquare_bracket_type->[$square_bracket_depth] = EMPTY_STRING; |
|
2033
|
649
|
|
|
|
|
1194
|
$rsquare_bracket_structural_type->[$square_bracket_depth] = EMPTY_STRING; |
|
2034
|
|
|
|
|
|
|
|
|
2035
|
649
|
|
|
|
|
2252
|
initialize_tokenizer_state(); |
|
2036
|
649
|
|
|
|
|
1008
|
return; |
|
2037
|
|
|
|
|
|
|
} ## end sub prepare_for_a_new_file |
|
2038
|
|
|
|
|
|
|
|
|
2039
|
|
|
|
|
|
|
{ ## closure for sub tokenize_this_line |
|
2040
|
|
|
|
|
|
|
|
|
2041
|
44
|
|
|
44
|
|
385
|
use constant BRACE => 0; |
|
|
44
|
|
|
|
|
77
|
|
|
|
44
|
|
|
|
|
3055
|
|
|
2042
|
44
|
|
|
44
|
|
256
|
use constant SQUARE_BRACKET => 1; |
|
|
44
|
|
|
|
|
82
|
|
|
|
44
|
|
|
|
|
1939
|
|
|
2043
|
44
|
|
|
44
|
|
216
|
use constant PAREN => 2; |
|
|
44
|
|
|
|
|
95
|
|
|
|
44
|
|
|
|
|
1569
|
|
|
2044
|
44
|
|
|
44
|
|
195
|
use constant QUESTION_COLON => 3; |
|
|
44
|
|
|
|
|
78
|
|
|
|
44
|
|
|
|
|
70916
|
|
|
2045
|
|
|
|
|
|
|
|
|
2046
|
|
|
|
|
|
|
# TV1: scalars for processing one LINE. |
|
2047
|
|
|
|
|
|
|
# Re-initialized on each entry to sub tokenize_this_line. |
|
2048
|
|
|
|
|
|
|
my ( |
|
2049
|
|
|
|
|
|
|
$block_type, $container_type, $expecting, |
|
2050
|
|
|
|
|
|
|
$i, $i_tok, $input_line, |
|
2051
|
|
|
|
|
|
|
$input_line_number, $last_nonblank_i, $max_token_index, |
|
2052
|
|
|
|
|
|
|
$next_tok, $next_type, $peeked_ahead, |
|
2053
|
|
|
|
|
|
|
$prototype, $rhere_target_list, $rtoken_map, |
|
2054
|
|
|
|
|
|
|
$rtoken_type, $rtokens, $tok, |
|
2055
|
|
|
|
|
|
|
$type, $type_sequence, $indent_flag, |
|
2056
|
|
|
|
|
|
|
); |
|
2057
|
|
|
|
|
|
|
|
|
2058
|
|
|
|
|
|
|
# TV2: refs to ARRAYS for processing one LINE |
|
2059
|
|
|
|
|
|
|
# Re-initialized on each call. |
|
2060
|
|
|
|
|
|
|
my $routput_token_list = []; # stack of output token indexes |
|
2061
|
|
|
|
|
|
|
my $routput_token_type = []; # token types |
|
2062
|
|
|
|
|
|
|
my $routput_block_type = []; # types of code block |
|
2063
|
|
|
|
|
|
|
my $routput_type_sequence = []; # nesting sequential number |
|
2064
|
|
|
|
|
|
|
my $routput_indent_flag = []; # |
|
2065
|
|
|
|
|
|
|
|
|
2066
|
|
|
|
|
|
|
# TV3: SCALARS for quote variables. These are initialized with a |
|
2067
|
|
|
|
|
|
|
# subroutine call and continually updated as lines are processed. |
|
2068
|
|
|
|
|
|
|
my ( |
|
2069
|
|
|
|
|
|
|
$in_quote, $quote_type, |
|
2070
|
|
|
|
|
|
|
$quote_character, $quote_pos, |
|
2071
|
|
|
|
|
|
|
$quote_depth, $quoted_string_1, |
|
2072
|
|
|
|
|
|
|
$quoted_string_2, $allowed_quote_modifiers, |
|
2073
|
|
|
|
|
|
|
$quote_starting_tok, $quote_here_target_2, |
|
2074
|
|
|
|
|
|
|
); |
|
2075
|
|
|
|
|
|
|
|
|
2076
|
|
|
|
|
|
|
# TV4: SCALARS for multi-line identifiers and |
|
2077
|
|
|
|
|
|
|
# statements. These are initialized with a subroutine call |
|
2078
|
|
|
|
|
|
|
# and continually updated as lines are processed. |
|
2079
|
|
|
|
|
|
|
my ( $id_scan_state, $identifier, $want_paren ); |
|
2080
|
|
|
|
|
|
|
|
|
2081
|
|
|
|
|
|
|
# TV5: SCALARS for tracking indentation level. |
|
2082
|
|
|
|
|
|
|
# Initialized once and continually updated as lines are |
|
2083
|
|
|
|
|
|
|
# processed. |
|
2084
|
|
|
|
|
|
|
my ( |
|
2085
|
|
|
|
|
|
|
$nesting_token_string, $nesting_block_string, |
|
2086
|
|
|
|
|
|
|
$nesting_block_flag, $level_in_tokenizer, |
|
2087
|
|
|
|
|
|
|
); |
|
2088
|
|
|
|
|
|
|
|
|
2089
|
|
|
|
|
|
|
# TV6: SCALARS for remembering several previous |
|
2090
|
|
|
|
|
|
|
# tokens. Initialized once and continually updated as |
|
2091
|
|
|
|
|
|
|
# lines are processed. |
|
2092
|
|
|
|
|
|
|
my ( |
|
2093
|
|
|
|
|
|
|
$last_nonblank_container_type, $last_nonblank_type_sequence, |
|
2094
|
|
|
|
|
|
|
$last_last_nonblank_token, $last_last_nonblank_type, |
|
2095
|
|
|
|
|
|
|
$last_nonblank_prototype, |
|
2096
|
|
|
|
|
|
|
); |
|
2097
|
|
|
|
|
|
|
|
|
2098
|
|
|
|
|
|
|
# ---------------------------------------------------------------- |
|
2099
|
|
|
|
|
|
|
# beginning of tokenizer variable access and manipulation routines |
|
2100
|
|
|
|
|
|
|
# ---------------------------------------------------------------- |
|
2101
|
|
|
|
|
|
|
|
|
2102
|
|
|
|
|
|
|
sub initialize_tokenizer_state { |
|
2103
|
|
|
|
|
|
|
|
|
2104
|
|
|
|
|
|
|
# GV1: initialized once |
|
2105
|
|
|
|
|
|
|
# TV1: initialized on each call |
|
2106
|
|
|
|
|
|
|
# TV2: initialized on each call |
|
2107
|
|
|
|
|
|
|
# TV3: |
|
2108
|
649
|
|
|
649
|
0
|
1066
|
$in_quote = 0; |
|
2109
|
649
|
|
|
|
|
1192
|
$quote_type = 'Q'; |
|
2110
|
649
|
|
|
|
|
1024
|
$quote_character = EMPTY_STRING; |
|
2111
|
649
|
|
|
|
|
964
|
$quote_pos = 0; |
|
2112
|
649
|
|
|
|
|
980
|
$quote_depth = 0; |
|
2113
|
649
|
|
|
|
|
982
|
$quoted_string_1 = EMPTY_STRING; |
|
2114
|
649
|
|
|
|
|
960
|
$quoted_string_2 = EMPTY_STRING; |
|
2115
|
649
|
|
|
|
|
986
|
$allowed_quote_modifiers = EMPTY_STRING; |
|
2116
|
649
|
|
|
|
|
1033
|
$quote_starting_tok = EMPTY_STRING; |
|
2117
|
649
|
|
|
|
|
1048
|
$quote_here_target_2 = undef; |
|
2118
|
|
|
|
|
|
|
|
|
2119
|
|
|
|
|
|
|
# TV4: |
|
2120
|
649
|
|
|
|
|
969
|
$id_scan_state = EMPTY_STRING; |
|
2121
|
649
|
|
|
|
|
1080
|
$identifier = EMPTY_STRING; |
|
2122
|
649
|
|
|
|
|
976
|
$want_paren = EMPTY_STRING; |
|
2123
|
|
|
|
|
|
|
|
|
2124
|
|
|
|
|
|
|
# TV5: |
|
2125
|
649
|
|
|
|
|
991
|
$nesting_token_string = EMPTY_STRING; |
|
2126
|
649
|
|
|
|
|
1012
|
$nesting_block_string = '1'; # initially in a block |
|
2127
|
649
|
|
|
|
|
1010
|
$nesting_block_flag = 1; |
|
2128
|
649
|
|
|
|
|
997
|
$level_in_tokenizer = 0; |
|
2129
|
|
|
|
|
|
|
|
|
2130
|
|
|
|
|
|
|
# TV6: |
|
2131
|
649
|
|
|
|
|
1021
|
$last_nonblank_container_type = EMPTY_STRING; |
|
2132
|
649
|
|
|
|
|
963
|
$last_nonblank_type_sequence = EMPTY_STRING; |
|
2133
|
649
|
|
|
|
|
983
|
$last_last_nonblank_token = ';'; |
|
2134
|
649
|
|
|
|
|
981
|
$last_last_nonblank_type = ';'; |
|
2135
|
649
|
|
|
|
|
1011
|
$last_nonblank_prototype = EMPTY_STRING; |
|
2136
|
649
|
|
|
|
|
946
|
return; |
|
2137
|
|
|
|
|
|
|
} ## end sub initialize_tokenizer_state |
|
2138
|
|
|
|
|
|
|
|
|
2139
|
|
|
|
|
|
|
sub save_tokenizer_state { |
|
2140
|
|
|
|
|
|
|
|
|
2141
|
|
|
|
|
|
|
# Global variables: |
|
2142
|
0
|
|
|
0
|
0
|
0
|
my $rGV1 = [ |
|
2143
|
|
|
|
|
|
|
$brace_depth, |
|
2144
|
|
|
|
|
|
|
$context, |
|
2145
|
|
|
|
|
|
|
$current_package, |
|
2146
|
|
|
|
|
|
|
$last_nonblank_block_type, |
|
2147
|
|
|
|
|
|
|
$last_nonblank_token, |
|
2148
|
|
|
|
|
|
|
$last_nonblank_type, |
|
2149
|
|
|
|
|
|
|
$next_sequence_number, |
|
2150
|
|
|
|
|
|
|
$paren_depth, |
|
2151
|
|
|
|
|
|
|
$rbrace_context, |
|
2152
|
|
|
|
|
|
|
$rbrace_package, |
|
2153
|
|
|
|
|
|
|
$rbrace_structural_type, |
|
2154
|
|
|
|
|
|
|
$rbrace_type, |
|
2155
|
|
|
|
|
|
|
$rcurrent_depth, |
|
2156
|
|
|
|
|
|
|
$rcurrent_sequence_number, |
|
2157
|
|
|
|
|
|
|
$ris_lexical_sub, |
|
2158
|
|
|
|
|
|
|
$rdepth_array, |
|
2159
|
|
|
|
|
|
|
$ris_block_function, |
|
2160
|
|
|
|
|
|
|
$ris_block_list_function, |
|
2161
|
|
|
|
|
|
|
$ris_constant, |
|
2162
|
|
|
|
|
|
|
$ris_user_function, |
|
2163
|
|
|
|
|
|
|
$rnested_statement_type, |
|
2164
|
|
|
|
|
|
|
$rnested_ternary_flag, |
|
2165
|
|
|
|
|
|
|
$rparen_semicolon_count, |
|
2166
|
|
|
|
|
|
|
$rparen_vars, |
|
2167
|
|
|
|
|
|
|
$rparen_type, |
|
2168
|
|
|
|
|
|
|
$rsaw_function_definition, |
|
2169
|
|
|
|
|
|
|
$rsaw_use_module, |
|
2170
|
|
|
|
|
|
|
$rsquare_bracket_structural_type, |
|
2171
|
|
|
|
|
|
|
$rsquare_bracket_type, |
|
2172
|
|
|
|
|
|
|
$rstarting_line_of_current_depth, |
|
2173
|
|
|
|
|
|
|
$rtotal_depth, |
|
2174
|
|
|
|
|
|
|
$ruser_function_prototype, |
|
2175
|
|
|
|
|
|
|
$square_bracket_depth, |
|
2176
|
|
|
|
|
|
|
$statement_type, |
|
2177
|
|
|
|
|
|
|
$total_depth, |
|
2178
|
|
|
|
|
|
|
|
|
2179
|
|
|
|
|
|
|
]; |
|
2180
|
|
|
|
|
|
|
|
|
2181
|
|
|
|
|
|
|
# Tokenizer closure variables: |
|
2182
|
0
|
|
|
|
|
0
|
my $rTV1 = [ |
|
2183
|
|
|
|
|
|
|
$block_type, $container_type, $expecting, |
|
2184
|
|
|
|
|
|
|
$i, $i_tok, $input_line, |
|
2185
|
|
|
|
|
|
|
$input_line_number, $last_nonblank_i, $max_token_index, |
|
2186
|
|
|
|
|
|
|
$next_tok, $next_type, $peeked_ahead, |
|
2187
|
|
|
|
|
|
|
$prototype, $rhere_target_list, $rtoken_map, |
|
2188
|
|
|
|
|
|
|
$rtoken_type, $rtokens, $tok, |
|
2189
|
|
|
|
|
|
|
$type, $type_sequence, $indent_flag, |
|
2190
|
|
|
|
|
|
|
]; |
|
2191
|
|
|
|
|
|
|
|
|
2192
|
0
|
|
|
|
|
0
|
my $rTV2 = [ |
|
2193
|
|
|
|
|
|
|
$routput_token_list, $routput_token_type, |
|
2194
|
|
|
|
|
|
|
$routput_block_type, $routput_type_sequence, |
|
2195
|
|
|
|
|
|
|
$routput_indent_flag, |
|
2196
|
|
|
|
|
|
|
]; |
|
2197
|
|
|
|
|
|
|
|
|
2198
|
0
|
|
|
|
|
0
|
my $rTV3 = [ |
|
2199
|
|
|
|
|
|
|
$in_quote, $quote_type, |
|
2200
|
|
|
|
|
|
|
$quote_character, $quote_pos, |
|
2201
|
|
|
|
|
|
|
$quote_depth, $quoted_string_1, |
|
2202
|
|
|
|
|
|
|
$quoted_string_2, $allowed_quote_modifiers, |
|
2203
|
|
|
|
|
|
|
$quote_starting_tok, $quote_here_target_2, |
|
2204
|
|
|
|
|
|
|
]; |
|
2205
|
|
|
|
|
|
|
|
|
2206
|
0
|
|
|
|
|
0
|
my $rTV4 = [ $id_scan_state, $identifier, $want_paren ]; |
|
2207
|
|
|
|
|
|
|
|
|
2208
|
0
|
|
|
|
|
0
|
my $rTV5 = [ |
|
2209
|
|
|
|
|
|
|
$nesting_token_string, $nesting_block_string, |
|
2210
|
|
|
|
|
|
|
$nesting_block_flag, $level_in_tokenizer, |
|
2211
|
|
|
|
|
|
|
]; |
|
2212
|
|
|
|
|
|
|
|
|
2213
|
0
|
|
|
|
|
0
|
my $rTV6 = [ |
|
2214
|
|
|
|
|
|
|
$last_nonblank_container_type, $last_nonblank_type_sequence, |
|
2215
|
|
|
|
|
|
|
$last_last_nonblank_token, $last_last_nonblank_type, |
|
2216
|
|
|
|
|
|
|
$last_nonblank_prototype, |
|
2217
|
|
|
|
|
|
|
]; |
|
2218
|
0
|
|
|
|
|
0
|
return [ $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ]; |
|
2219
|
|
|
|
|
|
|
} ## end sub save_tokenizer_state |
|
2220
|
|
|
|
|
|
|
|
|
2221
|
|
|
|
|
|
|
sub restore_tokenizer_state { |
|
2222
|
0
|
|
|
0
|
0
|
0
|
my ($rstate) = @_; |
|
2223
|
0
|
|
|
|
|
0
|
my ( $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate}; |
|
|
0
|
|
|
|
|
0
|
|
|
2224
|
|
|
|
|
|
|
|
|
2225
|
|
|
|
|
|
|
( |
|
2226
|
|
|
|
|
|
|
$brace_depth, |
|
2227
|
|
|
|
|
|
|
$context, |
|
2228
|
|
|
|
|
|
|
$current_package, |
|
2229
|
|
|
|
|
|
|
$last_nonblank_block_type, |
|
2230
|
|
|
|
|
|
|
$last_nonblank_token, |
|
2231
|
|
|
|
|
|
|
$last_nonblank_type, |
|
2232
|
|
|
|
|
|
|
$next_sequence_number, |
|
2233
|
|
|
|
|
|
|
$paren_depth, |
|
2234
|
|
|
|
|
|
|
$rbrace_context, |
|
2235
|
|
|
|
|
|
|
$rbrace_package, |
|
2236
|
|
|
|
|
|
|
$rbrace_structural_type, |
|
2237
|
|
|
|
|
|
|
$rbrace_type, |
|
2238
|
|
|
|
|
|
|
$rcurrent_depth, |
|
2239
|
|
|
|
|
|
|
$rcurrent_sequence_number, |
|
2240
|
|
|
|
|
|
|
$ris_lexical_sub, |
|
2241
|
|
|
|
|
|
|
$rdepth_array, |
|
2242
|
|
|
|
|
|
|
$ris_block_function, |
|
2243
|
|
|
|
|
|
|
$ris_block_list_function, |
|
2244
|
|
|
|
|
|
|
$ris_constant, |
|
2245
|
|
|
|
|
|
|
$ris_user_function, |
|
2246
|
|
|
|
|
|
|
$rnested_statement_type, |
|
2247
|
|
|
|
|
|
|
$rnested_ternary_flag, |
|
2248
|
|
|
|
|
|
|
$rparen_semicolon_count, |
|
2249
|
|
|
|
|
|
|
$rparen_vars, |
|
2250
|
|
|
|
|
|
|
$rparen_type, |
|
2251
|
|
|
|
|
|
|
$rsaw_function_definition, |
|
2252
|
|
|
|
|
|
|
$rsaw_use_module, |
|
2253
|
|
|
|
|
|
|
$rsquare_bracket_structural_type, |
|
2254
|
|
|
|
|
|
|
$rsquare_bracket_type, |
|
2255
|
|
|
|
|
|
|
$rstarting_line_of_current_depth, |
|
2256
|
|
|
|
|
|
|
$rtotal_depth, |
|
2257
|
|
|
|
|
|
|
$ruser_function_prototype, |
|
2258
|
|
|
|
|
|
|
$square_bracket_depth, |
|
2259
|
|
|
|
|
|
|
$statement_type, |
|
2260
|
|
|
|
|
|
|
$total_depth, |
|
2261
|
|
|
|
|
|
|
|
|
2262
|
0
|
|
|
|
|
0
|
) = @{$rGV1}; |
|
|
0
|
|
|
|
|
0
|
|
|
2263
|
|
|
|
|
|
|
|
|
2264
|
|
|
|
|
|
|
( |
|
2265
|
|
|
|
|
|
|
$block_type, $container_type, $expecting, |
|
2266
|
|
|
|
|
|
|
$i, $i_tok, $input_line, |
|
2267
|
|
|
|
|
|
|
$input_line_number, $last_nonblank_i, $max_token_index, |
|
2268
|
|
|
|
|
|
|
$next_tok, $next_type, $peeked_ahead, |
|
2269
|
|
|
|
|
|
|
$prototype, $rhere_target_list, $rtoken_map, |
|
2270
|
|
|
|
|
|
|
$rtoken_type, $rtokens, $tok, |
|
2271
|
|
|
|
|
|
|
$type, $type_sequence, $indent_flag, |
|
2272
|
0
|
|
|
|
|
0
|
) = @{$rTV1}; |
|
|
0
|
|
|
|
|
0
|
|
|
2273
|
|
|
|
|
|
|
|
|
2274
|
|
|
|
|
|
|
( |
|
2275
|
|
|
|
|
|
|
$routput_token_list, $routput_token_type, |
|
2276
|
|
|
|
|
|
|
$routput_block_type, $routput_type_sequence, |
|
2277
|
|
|
|
|
|
|
$routput_indent_flag, |
|
2278
|
0
|
|
|
|
|
0
|
) = @{$rTV2}; |
|
|
0
|
|
|
|
|
0
|
|
|
2279
|
|
|
|
|
|
|
|
|
2280
|
|
|
|
|
|
|
( |
|
2281
|
|
|
|
|
|
|
$in_quote, $quote_type, |
|
2282
|
|
|
|
|
|
|
$quote_character, $quote_pos, |
|
2283
|
|
|
|
|
|
|
$quote_depth, $quoted_string_1, |
|
2284
|
|
|
|
|
|
|
$quoted_string_2, $allowed_quote_modifiers, |
|
2285
|
|
|
|
|
|
|
$quote_starting_tok, $quote_here_target_2, |
|
2286
|
0
|
|
|
|
|
0
|
) = @{$rTV3}; |
|
|
0
|
|
|
|
|
0
|
|
|
2287
|
|
|
|
|
|
|
|
|
2288
|
0
|
|
|
|
|
0
|
( $id_scan_state, $identifier, $want_paren ) = @{$rTV4}; |
|
|
0
|
|
|
|
|
0
|
|
|
2289
|
|
|
|
|
|
|
|
|
2290
|
|
|
|
|
|
|
( |
|
2291
|
|
|
|
|
|
|
$nesting_token_string, $nesting_block_string, |
|
2292
|
|
|
|
|
|
|
$nesting_block_flag, $level_in_tokenizer, |
|
2293
|
0
|
|
|
|
|
0
|
) = @{$rTV5}; |
|
|
0
|
|
|
|
|
0
|
|
|
2294
|
|
|
|
|
|
|
|
|
2295
|
|
|
|
|
|
|
( |
|
2296
|
|
|
|
|
|
|
$last_nonblank_container_type, $last_nonblank_type_sequence, |
|
2297
|
|
|
|
|
|
|
$last_last_nonblank_token, $last_last_nonblank_type, |
|
2298
|
|
|
|
|
|
|
$last_nonblank_prototype, |
|
2299
|
0
|
|
|
|
|
0
|
) = @{$rTV6}; |
|
|
0
|
|
|
|
|
0
|
|
|
2300
|
0
|
|
|
|
|
0
|
return; |
|
2301
|
|
|
|
|
|
|
} ## end sub restore_tokenizer_state |
|
2302
|
|
|
|
|
|
|
|
|
2303
|
|
|
|
|
|
|
sub split_pretoken { |
|
2304
|
|
|
|
|
|
|
|
|
2305
|
8
|
|
|
8
|
0
|
17
|
my ( $self, $numc ) = @_; |
|
2306
|
|
|
|
|
|
|
|
|
2307
|
|
|
|
|
|
|
# This provides a way to work around the limitations of the |
|
2308
|
|
|
|
|
|
|
# pre-tokenization scheme upon which perltidy is based. It is rarely |
|
2309
|
|
|
|
|
|
|
# needed. |
|
2310
|
|
|
|
|
|
|
|
|
2311
|
|
|
|
|
|
|
# Split the leading $numc characters from the current token (at |
|
2312
|
|
|
|
|
|
|
# index=$i) which is pre-type 'w' and insert the remainder back into |
|
2313
|
|
|
|
|
|
|
# the pretoken stream with appropriate settings. Since we are |
|
2314
|
|
|
|
|
|
|
# splitting a pre-type 'w', there are three cases, depending on if the |
|
2315
|
|
|
|
|
|
|
# remainder starts with a digit: |
|
2316
|
|
|
|
|
|
|
# Case 1: remainder is type 'd', all digits |
|
2317
|
|
|
|
|
|
|
# Case 2: remainder is type 'd' and type 'w': digits & other characters |
|
2318
|
|
|
|
|
|
|
# Case 3: remainder is type 'w' |
|
2319
|
|
|
|
|
|
|
|
|
2320
|
|
|
|
|
|
|
# Examples, for $numc=1: |
|
2321
|
|
|
|
|
|
|
# $tok => $tok_0 $tok_1 $tok_2 |
|
2322
|
|
|
|
|
|
|
# 'x10' => 'x' '10' # case 1 |
|
2323
|
|
|
|
|
|
|
# 'x10if' => 'x' '10' 'if' # case 2 |
|
2324
|
|
|
|
|
|
|
# '0ne => 'O' 'ne' # case 3 |
|
2325
|
|
|
|
|
|
|
|
|
2326
|
|
|
|
|
|
|
# where: |
|
2327
|
|
|
|
|
|
|
# $tok_1 is a possible string of digits (pre-type 'd') |
|
2328
|
|
|
|
|
|
|
# $tok_2 is a possible word (pre-type 'w') |
|
2329
|
|
|
|
|
|
|
|
|
2330
|
|
|
|
|
|
|
# return 1 if successful |
|
2331
|
|
|
|
|
|
|
# return undef if error (shouldn't happen) |
|
2332
|
|
|
|
|
|
|
|
|
2333
|
|
|
|
|
|
|
# Calling routine should update '$type' and '$tok' if successful. |
|
2334
|
|
|
|
|
|
|
|
|
2335
|
8
|
|
|
|
|
15
|
my $pretoken = $rtokens->[$i]; |
|
2336
|
8
|
50
|
33
|
|
|
79
|
if ( $pretoken |
|
|
|
|
33
|
|
|
|
|
|
2337
|
|
|
|
|
|
|
&& length($pretoken) > $numc |
|
2338
|
|
|
|
|
|
|
&& substr( $pretoken, $numc ) =~ /^(\d*)(.*)$/ ) |
|
2339
|
|
|
|
|
|
|
{ |
|
2340
|
|
|
|
|
|
|
|
|
2341
|
|
|
|
|
|
|
# Split $tok into up to 3 tokens: |
|
2342
|
8
|
|
|
|
|
18
|
my $tok_0 = substr( $pretoken, 0, $numc ); |
|
2343
|
8
|
50
|
|
|
|
31
|
my $tok_1 = defined($1) ? $1 : EMPTY_STRING; |
|
2344
|
8
|
50
|
|
|
|
22
|
my $tok_2 = defined($2) ? $2 : EMPTY_STRING; |
|
2345
|
|
|
|
|
|
|
|
|
2346
|
8
|
|
|
|
|
16
|
my $len_0 = length($tok_0); |
|
2347
|
8
|
|
|
|
|
11
|
my $len_1 = length($tok_1); |
|
2348
|
8
|
|
|
|
|
13
|
my $len_2 = length($tok_2); |
|
2349
|
|
|
|
|
|
|
|
|
2350
|
|
|
|
|
|
|
##my $pre_type_0 = 'w'; |
|
2351
|
8
|
|
|
|
|
13
|
my $pre_type_1 = 'd'; |
|
2352
|
8
|
|
|
|
|
12
|
my $pre_type_2 = 'w'; |
|
2353
|
|
|
|
|
|
|
|
|
2354
|
8
|
|
|
|
|
15
|
my $pos_0 = $rtoken_map->[$i]; |
|
2355
|
8
|
|
|
|
|
11
|
my $pos_1 = $pos_0 + $len_0; |
|
2356
|
8
|
|
|
|
|
12
|
my $pos_2 = $pos_1 + $len_1; |
|
2357
|
|
|
|
|
|
|
|
|
2358
|
8
|
|
|
|
|
13
|
my $isplice = $i + 1; |
|
2359
|
|
|
|
|
|
|
|
|
2360
|
|
|
|
|
|
|
# Splice in any digits |
|
2361
|
8
|
100
|
|
|
|
17
|
if ($len_1) { |
|
2362
|
5
|
|
|
|
|
8
|
splice @{$rtoken_map}, $isplice, 0, $pos_1; |
|
|
5
|
|
|
|
|
16
|
|
|
2363
|
5
|
|
|
|
|
10
|
splice @{$rtokens}, $isplice, 0, $tok_1; |
|
|
5
|
|
|
|
|
13
|
|
|
2364
|
5
|
|
|
|
|
9
|
splice @{$rtoken_type}, $isplice, 0, $pre_type_1; |
|
|
5
|
|
|
|
|
10
|
|
|
2365
|
5
|
|
|
|
|
8
|
$max_token_index++; |
|
2366
|
5
|
|
|
|
|
8
|
$isplice++; |
|
2367
|
|
|
|
|
|
|
} |
|
2368
|
|
|
|
|
|
|
|
|
2369
|
|
|
|
|
|
|
# Splice in any trailing word |
|
2370
|
8
|
100
|
|
|
|
22
|
if ($len_2) { |
|
2371
|
4
|
|
|
|
|
5
|
splice @{$rtoken_map}, $isplice, 0, $pos_2; |
|
|
4
|
|
|
|
|
9
|
|
|
2372
|
4
|
|
|
|
|
7
|
splice @{$rtokens}, $isplice, 0, $tok_2; |
|
|
4
|
|
|
|
|
7
|
|
|
2373
|
4
|
|
|
|
|
5
|
splice @{$rtoken_type}, $isplice, 0, $pre_type_2; |
|
|
4
|
|
|
|
|
6
|
|
|
2374
|
4
|
|
|
|
|
6
|
$max_token_index++; |
|
2375
|
|
|
|
|
|
|
} |
|
2376
|
|
|
|
|
|
|
|
|
2377
|
8
|
|
|
|
|
16
|
$rtokens->[$i] = $tok_0; |
|
2378
|
8
|
|
|
|
|
27
|
return 1; |
|
2379
|
|
|
|
|
|
|
} |
|
2380
|
|
|
|
|
|
|
|
|
2381
|
|
|
|
|
|
|
# Shouldn't get here - bad call parameters |
|
2382
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
|
2383
|
|
|
|
|
|
|
Fault(<<EOM); |
|
2384
|
|
|
|
|
|
|
While working near line number $input_line_number, bad arg '$tok' passed to sub split_pretoken() |
|
2385
|
|
|
|
|
|
|
EOM |
|
2386
|
|
|
|
|
|
|
} |
|
2387
|
0
|
|
|
|
|
0
|
return; |
|
2388
|
|
|
|
|
|
|
} ## end sub split_pretoken |
|
2389
|
|
|
|
|
|
|
|
|
2390
|
|
|
|
|
|
|
sub get_indentation_level { |
|
2391
|
649
|
|
|
649
|
0
|
1429
|
return $level_in_tokenizer; |
|
2392
|
|
|
|
|
|
|
} |
|
2393
|
|
|
|
|
|
|
|
|
2394
|
|
|
|
|
|
|
sub reset_indentation_level { |
|
2395
|
649
|
|
|
649
|
0
|
1204
|
$level_in_tokenizer = shift; |
|
2396
|
649
|
|
|
|
|
1014
|
return; |
|
2397
|
|
|
|
|
|
|
} |
|
2398
|
|
|
|
|
|
|
|
|
2399
|
|
|
|
|
|
|
sub peeked_ahead { |
|
2400
|
280
|
|
|
280
|
0
|
477
|
( ( my $flag ) ) = @_; |
|
2401
|
|
|
|
|
|
|
|
|
2402
|
|
|
|
|
|
|
# get or set the closure flag '$peeked_ahead': |
|
2403
|
|
|
|
|
|
|
# - set $peeked_ahead to $flag if given, then |
|
2404
|
|
|
|
|
|
|
# - return current value |
|
2405
|
280
|
100
|
|
|
|
556
|
$peeked_ahead = defined($flag) ? $flag : $peeked_ahead; |
|
2406
|
280
|
|
|
|
|
886
|
return $peeked_ahead; |
|
2407
|
|
|
|
|
|
|
} ## end sub peeked_ahead |
|
2408
|
|
|
|
|
|
|
|
|
2409
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
|
2410
|
|
|
|
|
|
|
# end of tokenizer variable access and manipulation routines |
|
2411
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
|
2412
|
|
|
|
|
|
|
|
|
2413
|
|
|
|
|
|
|
#------------------------------ |
|
2414
|
|
|
|
|
|
|
# beginning of tokenizer hashes |
|
2415
|
|
|
|
|
|
|
#------------------------------ |
|
2416
|
|
|
|
|
|
|
|
|
2417
|
|
|
|
|
|
|
my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); |
|
2418
|
|
|
|
|
|
|
|
|
2419
|
|
|
|
|
|
|
my @q; |
|
2420
|
|
|
|
|
|
|
|
|
2421
|
|
|
|
|
|
|
# 'L' is token for opening { at hash key |
|
2422
|
|
|
|
|
|
|
my %is_opening_type; |
|
2423
|
|
|
|
|
|
|
@q = qw< L { ( [ >; |
|
2424
|
|
|
|
|
|
|
$is_opening_type{$_} = 1 for @q; |
|
2425
|
|
|
|
|
|
|
|
|
2426
|
|
|
|
|
|
|
my %is_opening_or_ternary_type; |
|
2427
|
|
|
|
|
|
|
push @q, '?'; |
|
2428
|
|
|
|
|
|
|
$is_opening_or_ternary_type{$_} = 1 for @q; |
|
2429
|
|
|
|
|
|
|
|
|
2430
|
|
|
|
|
|
|
# 'R' is token for closing } at hash key |
|
2431
|
|
|
|
|
|
|
my %is_closing_type; |
|
2432
|
|
|
|
|
|
|
@q = qw< R } ) ] >; |
|
2433
|
|
|
|
|
|
|
$is_closing_type{$_} = 1 for @q; |
|
2434
|
|
|
|
|
|
|
|
|
2435
|
|
|
|
|
|
|
my %is_closing_or_ternary_type; |
|
2436
|
|
|
|
|
|
|
push @q, ':'; |
|
2437
|
|
|
|
|
|
|
$is_closing_or_ternary_type{$_} = 1 for @q; |
|
2438
|
|
|
|
|
|
|
|
|
2439
|
|
|
|
|
|
|
my %is_redo_last_next_goto; |
|
2440
|
|
|
|
|
|
|
@q = qw( redo last next goto ); |
|
2441
|
|
|
|
|
|
|
$is_redo_last_next_goto{$_} = 1 for @q; |
|
2442
|
|
|
|
|
|
|
|
|
2443
|
|
|
|
|
|
|
my %is_use_require; |
|
2444
|
|
|
|
|
|
|
@q = qw( use require ); |
|
2445
|
|
|
|
|
|
|
$is_use_require{$_} = 1 for @q; |
|
2446
|
|
|
|
|
|
|
|
|
2447
|
|
|
|
|
|
|
# This hash holds the array index in $self for these keywords: |
|
2448
|
|
|
|
|
|
|
# Fix for issue c035: removed 'format' from this hash |
|
2449
|
|
|
|
|
|
|
my %is_END_DATA = ( |
|
2450
|
|
|
|
|
|
|
'__END__' => _in_end_, |
|
2451
|
|
|
|
|
|
|
'__DATA__' => _in_data_, |
|
2452
|
|
|
|
|
|
|
); |
|
2453
|
|
|
|
|
|
|
|
|
2454
|
|
|
|
|
|
|
# table showing how many quoted things to look for after quote operator.. |
|
2455
|
|
|
|
|
|
|
# s, y, tr have 2 (pattern and replacement) |
|
2456
|
|
|
|
|
|
|
# others have 1 (pattern only) |
|
2457
|
|
|
|
|
|
|
my %quote_items = ( |
|
2458
|
|
|
|
|
|
|
's' => 2, |
|
2459
|
|
|
|
|
|
|
'y' => 2, |
|
2460
|
|
|
|
|
|
|
'tr' => 2, |
|
2461
|
|
|
|
|
|
|
'm' => 1, |
|
2462
|
|
|
|
|
|
|
'qr' => 1, |
|
2463
|
|
|
|
|
|
|
'q' => 1, |
|
2464
|
|
|
|
|
|
|
'qq' => 1, |
|
2465
|
|
|
|
|
|
|
'qw' => 1, |
|
2466
|
|
|
|
|
|
|
'qx' => 1, |
|
2467
|
|
|
|
|
|
|
); |
|
2468
|
|
|
|
|
|
|
|
|
2469
|
|
|
|
|
|
|
my %is_for_foreach; |
|
2470
|
|
|
|
|
|
|
@q = qw( for foreach ); |
|
2471
|
|
|
|
|
|
|
$is_for_foreach{$_} = 1 for @q; |
|
2472
|
|
|
|
|
|
|
|
|
2473
|
|
|
|
|
|
|
# These keywords may introduce blocks after parenthesized expressions, |
|
2474
|
|
|
|
|
|
|
# in the form: |
|
2475
|
|
|
|
|
|
|
# keyword ( .... ) { BLOCK } |
|
2476
|
|
|
|
|
|
|
# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' |
|
2477
|
|
|
|
|
|
|
my %is_blocktype_with_paren; |
|
2478
|
|
|
|
|
|
|
@q = |
|
2479
|
|
|
|
|
|
|
qw(if elsif unless while until for foreach switch case given when catch); |
|
2480
|
|
|
|
|
|
|
$is_blocktype_with_paren{$_} = 1 for @q; |
|
2481
|
|
|
|
|
|
|
|
|
2482
|
|
|
|
|
|
|
my %is_case_default; |
|
2483
|
|
|
|
|
|
|
@q = qw( case default ); |
|
2484
|
|
|
|
|
|
|
$is_case_default{$_} = 1 for @q; |
|
2485
|
|
|
|
|
|
|
|
|
2486
|
|
|
|
|
|
|
#------------------------ |
|
2487
|
|
|
|
|
|
|
# end of tokenizer hashes |
|
2488
|
|
|
|
|
|
|
#------------------------ |
|
2489
|
|
|
|
|
|
|
|
|
2490
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
|
2491
|
|
|
|
|
|
|
# beginning of various scanner interface routines |
|
2492
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
|
2493
|
|
|
|
|
|
|
sub scan_replacement_text { |
|
2494
|
|
|
|
|
|
|
|
|
2495
|
|
|
|
|
|
|
# check for here-docs in replacement text invoked by |
|
2496
|
|
|
|
|
|
|
# a substitution operator with executable modifier 'e'. |
|
2497
|
|
|
|
|
|
|
# |
|
2498
|
|
|
|
|
|
|
# given: |
|
2499
|
|
|
|
|
|
|
# $replacement_text |
|
2500
|
|
|
|
|
|
|
# return: |
|
2501
|
|
|
|
|
|
|
# $rht = reference to any here-doc targets |
|
2502
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $replacement_text ) = @_; |
|
2503
|
|
|
|
|
|
|
|
|
2504
|
|
|
|
|
|
|
# quick check |
|
2505
|
0
|
0
|
|
|
|
0
|
return if ( $replacement_text !~ /<</ ); |
|
2506
|
|
|
|
|
|
|
|
|
2507
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
|
2508
|
|
|
|
|
|
|
"scanning replacement text for here-doc targets\n"); |
|
2509
|
|
|
|
|
|
|
|
|
2510
|
|
|
|
|
|
|
# save the logger object for error messages |
|
2511
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
|
2512
|
|
|
|
|
|
|
|
|
2513
|
|
|
|
|
|
|
# save all lexical variables |
|
2514
|
0
|
|
|
|
|
0
|
my $rstate = save_tokenizer_state(); |
|
2515
|
0
|
|
|
|
|
0
|
_decrement_count(); # avoid error check for multiple tokenizers |
|
2516
|
|
|
|
|
|
|
|
|
2517
|
|
|
|
|
|
|
# make a new tokenizer |
|
2518
|
0
|
|
|
|
|
0
|
my $tokenizer = Perl::Tidy::Tokenizer->new( |
|
2519
|
|
|
|
|
|
|
source_object => \$replacement_text, |
|
2520
|
|
|
|
|
|
|
logger_object => $logger_object, |
|
2521
|
|
|
|
|
|
|
starting_line_number => $input_line_number, |
|
2522
|
|
|
|
|
|
|
); |
|
2523
|
|
|
|
|
|
|
|
|
2524
|
|
|
|
|
|
|
# scan the replacement text |
|
2525
|
0
|
|
|
|
|
0
|
while ( $tokenizer->get_line() ) { } |
|
2526
|
|
|
|
|
|
|
|
|
2527
|
|
|
|
|
|
|
# remove any here doc targets |
|
2528
|
0
|
|
|
|
|
0
|
my $rht = undef; |
|
2529
|
0
|
0
|
|
|
|
0
|
if ( $tokenizer->[_in_here_doc_] ) { |
|
2530
|
0
|
|
|
|
|
0
|
$rht = []; |
|
2531
|
0
|
|
|
|
|
0
|
push @{$rht}, |
|
|
0
|
|
|
|
|
0
|
|
|
2532
|
|
|
|
|
|
|
[ |
|
2533
|
|
|
|
|
|
|
$tokenizer->[_here_doc_target_], |
|
2534
|
|
|
|
|
|
|
$tokenizer->[_here_quote_character_], |
|
2535
|
|
|
|
|
|
|
]; |
|
2536
|
0
|
0
|
|
|
|
0
|
if ( $tokenizer->[_rhere_target_list_] ) { |
|
2537
|
0
|
|
|
|
|
0
|
push @{$rht}, @{ $tokenizer->[_rhere_target_list_] }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
2538
|
0
|
|
|
|
|
0
|
$tokenizer->[_rhere_target_list_] = undef; |
|
2539
|
|
|
|
|
|
|
} |
|
2540
|
0
|
|
|
|
|
0
|
$tokenizer->[_in_here_doc_] = undef; |
|
2541
|
|
|
|
|
|
|
} |
|
2542
|
|
|
|
|
|
|
|
|
2543
|
|
|
|
|
|
|
# now its safe to report errors |
|
2544
|
0
|
|
|
|
|
0
|
my $rtokenization_info_uu = $tokenizer->report_tokenization_errors(); |
|
2545
|
|
|
|
|
|
|
|
|
2546
|
|
|
|
|
|
|
# TODO: Could propagate a severe error up |
|
2547
|
|
|
|
|
|
|
|
|
2548
|
|
|
|
|
|
|
# restore all tokenizer lexical variables |
|
2549
|
0
|
|
|
|
|
0
|
restore_tokenizer_state($rstate); |
|
2550
|
|
|
|
|
|
|
|
|
2551
|
|
|
|
|
|
|
# return the here doc targets |
|
2552
|
0
|
|
|
|
|
0
|
return $rht; |
|
2553
|
|
|
|
|
|
|
} ## end sub scan_replacement_text |
|
2554
|
|
|
|
|
|
|
|
|
2555
|
|
|
|
|
|
|
sub scan_bare_identifier { |
|
2556
|
1862
|
|
|
1862
|
0
|
2568
|
my $self = shift; |
|
2557
|
|
|
|
|
|
|
|
|
2558
|
|
|
|
|
|
|
# Scan a token starting with an alphanumeric variable or package |
|
2559
|
|
|
|
|
|
|
# separator, :: or '. |
|
2560
|
|
|
|
|
|
|
|
|
2561
|
1862
|
|
|
|
|
5653
|
( $i, $tok, $type, $prototype ) = $self->scan_bare_identifier_do( |
|
2562
|
|
|
|
|
|
|
|
|
2563
|
|
|
|
|
|
|
$input_line, |
|
2564
|
|
|
|
|
|
|
$i, |
|
2565
|
|
|
|
|
|
|
$tok, |
|
2566
|
|
|
|
|
|
|
$type, |
|
2567
|
|
|
|
|
|
|
$prototype, |
|
2568
|
|
|
|
|
|
|
$rtoken_map, |
|
2569
|
|
|
|
|
|
|
$max_token_index, |
|
2570
|
|
|
|
|
|
|
); |
|
2571
|
1862
|
|
|
|
|
3623
|
return; |
|
2572
|
|
|
|
|
|
|
} ## end sub scan_bare_identifier |
|
2573
|
|
|
|
|
|
|
|
|
2574
|
|
|
|
|
|
|
sub scan_identifier { |
|
2575
|
|
|
|
|
|
|
|
|
2576
|
|
|
|
|
|
|
# Scan for an identifier following a sigil or -> or other |
|
2577
|
|
|
|
|
|
|
# identifier prefix, such as '::' |
|
2578
|
|
|
|
|
|
|
|
|
2579
|
551
|
|
|
551
|
0
|
782
|
my $self = shift; |
|
2580
|
|
|
|
|
|
|
|
|
2581
|
|
|
|
|
|
|
( |
|
2582
|
|
|
|
|
|
|
|
|
2583
|
551
|
|
|
|
|
2096
|
$i, |
|
2584
|
|
|
|
|
|
|
$tok, |
|
2585
|
|
|
|
|
|
|
$type, |
|
2586
|
|
|
|
|
|
|
$id_scan_state, |
|
2587
|
|
|
|
|
|
|
$identifier, |
|
2588
|
|
|
|
|
|
|
my $split_pretoken_flag, |
|
2589
|
|
|
|
|
|
|
|
|
2590
|
|
|
|
|
|
|
) = $self->scan_complex_identifier( |
|
2591
|
|
|
|
|
|
|
|
|
2592
|
|
|
|
|
|
|
$i, |
|
2593
|
|
|
|
|
|
|
$id_scan_state, |
|
2594
|
|
|
|
|
|
|
$identifier, |
|
2595
|
|
|
|
|
|
|
$rtokens, |
|
2596
|
|
|
|
|
|
|
$max_token_index, |
|
2597
|
|
|
|
|
|
|
$expecting, |
|
2598
|
|
|
|
|
|
|
$rparen_type->[$paren_depth], |
|
2599
|
|
|
|
|
|
|
); |
|
2600
|
|
|
|
|
|
|
|
|
2601
|
|
|
|
|
|
|
# Check for signal to fix a special variable adjacent to a keyword, |
|
2602
|
|
|
|
|
|
|
# such as '$^One$0'. |
|
2603
|
551
|
100
|
|
|
|
1330
|
if ($split_pretoken_flag) { |
|
2604
|
|
|
|
|
|
|
|
|
2605
|
|
|
|
|
|
|
# Try to fix it by splitting the pretoken |
|
2606
|
3
|
50
|
33
|
|
|
17
|
if ( $i > 0 |
|
|
|
|
33
|
|
|
|
|
|
2607
|
|
|
|
|
|
|
&& $rtokens->[ $i - 1 ] eq '^' |
|
2608
|
|
|
|
|
|
|
&& $self->split_pretoken(1) ) |
|
2609
|
|
|
|
|
|
|
{ |
|
2610
|
3
|
|
|
|
|
4
|
$identifier = substr( $identifier, 0, 3 ); |
|
2611
|
3
|
|
|
|
|
4
|
$tok = $identifier; |
|
2612
|
|
|
|
|
|
|
} |
|
2613
|
|
|
|
|
|
|
else { |
|
2614
|
|
|
|
|
|
|
|
|
2615
|
|
|
|
|
|
|
# This shouldn't happen ... |
|
2616
|
0
|
|
|
|
|
0
|
my $var = substr( $tok, 0, 3 ); |
|
2617
|
0
|
|
|
|
|
0
|
my $excess = substr( $tok, 3 ); |
|
2618
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
|
2619
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
|
2620
|
|
|
|
|
|
|
$input_line_number: Trouble parsing at characters '$excess' after special variable '$var'. |
|
2621
|
|
|
|
|
|
|
A space may be needed after '$var'. |
|
2622
|
|
|
|
|
|
|
EOM |
|
2623
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
|
2624
|
|
|
|
|
|
|
} |
|
2625
|
|
|
|
|
|
|
} |
|
2626
|
551
|
|
|
|
|
846
|
return; |
|
2627
|
|
|
|
|
|
|
} ## end sub scan_identifier |
|
2628
|
|
|
|
|
|
|
|
|
2629
|
44
|
|
|
44
|
|
310
|
use constant VERIFY_FASTSCAN => 0; |
|
|
44
|
|
|
|
|
102
|
|
|
|
44
|
|
|
|
|
4439
|
|
|
2630
|
|
|
|
|
|
|
my %fast_scan_context; |
|
2631
|
|
|
|
|
|
|
|
|
2632
|
|
|
|
|
|
|
BEGIN { |
|
2633
|
44
|
|
|
44
|
|
55109
|
%fast_scan_context = ( |
|
2634
|
|
|
|
|
|
|
'$' => SCALAR_CONTEXT, |
|
2635
|
|
|
|
|
|
|
'*' => SCALAR_CONTEXT, |
|
2636
|
|
|
|
|
|
|
'@' => LIST_CONTEXT, |
|
2637
|
|
|
|
|
|
|
'%' => LIST_CONTEXT, |
|
2638
|
|
|
|
|
|
|
'&' => UNKNOWN_CONTEXT, |
|
2639
|
|
|
|
|
|
|
); |
|
2640
|
|
|
|
|
|
|
} ## end BEGIN |
|
2641
|
|
|
|
|
|
|
|
|
2642
|
|
|
|
|
|
|
sub scan_simple_identifier { |
|
2643
|
|
|
|
|
|
|
|
|
2644
|
5685
|
|
|
5685
|
0
|
7019
|
my $self = shift; |
|
2645
|
|
|
|
|
|
|
|
|
2646
|
|
|
|
|
|
|
# This is a wrapper for sub scan_identifier. It does a fast preliminary |
|
2647
|
|
|
|
|
|
|
# scan for certain common identifiers: |
|
2648
|
|
|
|
|
|
|
# '$var', '@var', %var, *var, &var, '@{...}', '%{...}' |
|
2649
|
|
|
|
|
|
|
# If it does not find one of these, or this is a restart, it calls the |
|
2650
|
|
|
|
|
|
|
# original scanner directly. |
|
2651
|
|
|
|
|
|
|
|
|
2652
|
|
|
|
|
|
|
# This gives the same results as the full scanner in about 1/4 the |
|
2653
|
|
|
|
|
|
|
# total runtime for a typical input stream. |
|
2654
|
|
|
|
|
|
|
|
|
2655
|
|
|
|
|
|
|
# Notation: |
|
2656
|
|
|
|
|
|
|
# $var * 2 |
|
2657
|
|
|
|
|
|
|
# ^^ ^ |
|
2658
|
|
|
|
|
|
|
# || | |
|
2659
|
|
|
|
|
|
|
# || ---- $i_next [= next nonblank pretoken ] |
|
2660
|
|
|
|
|
|
|
# |----$i_plus_1 [= a bareword ] |
|
2661
|
|
|
|
|
|
|
# ---$i_begin [= a sigil] |
|
2662
|
|
|
|
|
|
|
|
|
2663
|
5685
|
|
|
|
|
6790
|
my $i_begin = $i; |
|
2664
|
5685
|
|
|
|
|
6939
|
my $tok_begin = $tok; |
|
2665
|
5685
|
|
|
|
|
7237
|
my $i_plus_1 = $i + 1; |
|
2666
|
5685
|
|
|
|
|
6494
|
my $fast_scan_type; |
|
2667
|
|
|
|
|
|
|
|
|
2668
|
|
|
|
|
|
|
#------------------------------------------------------- |
|
2669
|
|
|
|
|
|
|
# Do full scan for anything following a pointer, such as |
|
2670
|
|
|
|
|
|
|
# $cref->&*; # a postderef |
|
2671
|
|
|
|
|
|
|
#------------------------------------------------------- |
|
2672
|
5685
|
100
|
66
|
|
|
25465
|
if ( $last_nonblank_token eq '->' ) { |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
2673
|
|
|
|
|
|
|
|
|
2674
|
|
|
|
|
|
|
} |
|
2675
|
|
|
|
|
|
|
|
|
2676
|
|
|
|
|
|
|
#------------------------------ |
|
2677
|
|
|
|
|
|
|
# quick scan with leading sigil |
|
2678
|
|
|
|
|
|
|
#------------------------------ |
|
2679
|
|
|
|
|
|
|
elsif ( !$id_scan_state |
|
2680
|
|
|
|
|
|
|
&& $i_plus_1 <= $max_token_index |
|
2681
|
|
|
|
|
|
|
&& $fast_scan_context{$tok} ) |
|
2682
|
|
|
|
|
|
|
{ |
|
2683
|
5571
|
|
|
|
|
7818
|
$context = $fast_scan_context{$tok}; |
|
2684
|
|
|
|
|
|
|
|
|
2685
|
|
|
|
|
|
|
# look for $var, @var, ... |
|
2686
|
5571
|
100
|
100
|
|
|
10583
|
if ( $rtoken_type->[$i_plus_1] eq 'w' ) { |
|
|
|
100
|
66
|
|
|
|
|
|
2687
|
5214
|
|
|
|
|
6880
|
my $pretype_next = EMPTY_STRING; |
|
2688
|
5214
|
100
|
|
|
|
8842
|
if ( $i_plus_1 < $max_token_index ) { |
|
2689
|
5092
|
|
|
|
|
6413
|
my $i_next = $i_plus_1 + 1; |
|
2690
|
5092
|
100
|
100
|
|
|
12986
|
if ( $rtoken_type->[$i_next] eq 'b' |
|
2691
|
|
|
|
|
|
|
&& $i_next < $max_token_index ) |
|
2692
|
|
|
|
|
|
|
{ |
|
2693
|
2045
|
|
|
|
|
2735
|
$i_next += 1; |
|
2694
|
|
|
|
|
|
|
} |
|
2695
|
5092
|
|
|
|
|
7232
|
$pretype_next = $rtoken_type->[$i_next]; |
|
2696
|
|
|
|
|
|
|
} |
|
2697
|
5214
|
100
|
100
|
|
|
14391
|
if ( $pretype_next ne ':' && $pretype_next ne "'" ) { |
|
2698
|
|
|
|
|
|
|
|
|
2699
|
|
|
|
|
|
|
# Found type 'i' like '$var', '@var', or '%var' |
|
2700
|
5098
|
|
|
|
|
8402
|
$identifier = $tok . $rtokens->[$i_plus_1]; |
|
2701
|
5098
|
|
|
|
|
5908
|
$tok = $identifier; |
|
2702
|
5098
|
|
|
|
|
6028
|
$type = 'i'; |
|
2703
|
5098
|
|
|
|
|
5800
|
$i = $i_plus_1; |
|
2704
|
5098
|
|
|
|
|
6821
|
$fast_scan_type = $type; |
|
2705
|
|
|
|
|
|
|
} |
|
2706
|
|
|
|
|
|
|
} |
|
2707
|
|
|
|
|
|
|
|
|
2708
|
|
|
|
|
|
|
# Look for @{ or %{ . |
|
2709
|
|
|
|
|
|
|
# But we must let the full scanner handle things ${ because it may |
|
2710
|
|
|
|
|
|
|
# keep going to get a complete identifier like '${#}' . |
|
2711
|
|
|
|
|
|
|
elsif ( |
|
2712
|
|
|
|
|
|
|
$rtoken_type->[$i_plus_1] eq '{' |
|
2713
|
|
|
|
|
|
|
&& ( $tok_begin eq '@' |
|
2714
|
|
|
|
|
|
|
|| $tok_begin eq '%' ) |
|
2715
|
|
|
|
|
|
|
) |
|
2716
|
|
|
|
|
|
|
{ |
|
2717
|
|
|
|
|
|
|
|
|
2718
|
43
|
|
|
|
|
79
|
$identifier = $tok; |
|
2719
|
43
|
|
|
|
|
76
|
$type = 't'; |
|
2720
|
43
|
|
|
|
|
97
|
$fast_scan_type = $type; |
|
2721
|
|
|
|
|
|
|
} |
|
2722
|
|
|
|
|
|
|
else { |
|
2723
|
|
|
|
|
|
|
## out of tricks |
|
2724
|
|
|
|
|
|
|
} |
|
2725
|
|
|
|
|
|
|
} |
|
2726
|
|
|
|
|
|
|
|
|
2727
|
|
|
|
|
|
|
#--------------------------- |
|
2728
|
|
|
|
|
|
|
# Quick scan with leading -> |
|
2729
|
|
|
|
|
|
|
# Look for ->[ and ->{ |
|
2730
|
|
|
|
|
|
|
#--------------------------- |
|
2731
|
|
|
|
|
|
|
elsif ( |
|
2732
|
|
|
|
|
|
|
$tok eq '->' |
|
2733
|
|
|
|
|
|
|
&& $i < $max_token_index |
|
2734
|
|
|
|
|
|
|
&& ( $rtokens->[$i_plus_1] eq '{' |
|
2735
|
|
|
|
|
|
|
|| $rtokens->[$i_plus_1] eq '[' ) |
|
2736
|
|
|
|
|
|
|
) |
|
2737
|
|
|
|
|
|
|
{ |
|
2738
|
0
|
|
|
|
|
0
|
$type = $tok; |
|
2739
|
0
|
|
|
|
|
0
|
$fast_scan_type = $type; |
|
2740
|
0
|
|
|
|
|
0
|
$identifier = $tok; |
|
2741
|
0
|
|
|
|
|
0
|
$context = UNKNOWN_CONTEXT; |
|
2742
|
|
|
|
|
|
|
} |
|
2743
|
|
|
|
|
|
|
else { |
|
2744
|
|
|
|
|
|
|
## out of tricks |
|
2745
|
|
|
|
|
|
|
} |
|
2746
|
|
|
|
|
|
|
|
|
2747
|
|
|
|
|
|
|
#-------------------------------------- |
|
2748
|
|
|
|
|
|
|
# Verify correctness during development |
|
2749
|
|
|
|
|
|
|
#-------------------------------------- |
|
2750
|
5685
|
|
|
|
|
6365
|
if ( VERIFY_FASTSCAN && $fast_scan_type ) { |
|
2751
|
|
|
|
|
|
|
|
|
2752
|
|
|
|
|
|
|
# We will call the full method |
|
2753
|
|
|
|
|
|
|
my $identifier_simple = $identifier; |
|
2754
|
|
|
|
|
|
|
my $tok_simple = $tok; |
|
2755
|
|
|
|
|
|
|
my $i_simple = $i; |
|
2756
|
|
|
|
|
|
|
my $context_simple = $context; |
|
2757
|
|
|
|
|
|
|
|
|
2758
|
|
|
|
|
|
|
$tok = $tok_begin; |
|
2759
|
|
|
|
|
|
|
$i = $i_begin; |
|
2760
|
|
|
|
|
|
|
$self->scan_identifier(); |
|
2761
|
|
|
|
|
|
|
|
|
2762
|
|
|
|
|
|
|
if ( $tok ne $tok_simple |
|
2763
|
|
|
|
|
|
|
|| $type ne $fast_scan_type |
|
2764
|
|
|
|
|
|
|
|| $i != $i_simple |
|
2765
|
|
|
|
|
|
|
|| $identifier ne $identifier_simple |
|
2766
|
|
|
|
|
|
|
|| $id_scan_state |
|
2767
|
|
|
|
|
|
|
|| $context ne $context_simple ) |
|
2768
|
|
|
|
|
|
|
{ |
|
2769
|
|
|
|
|
|
|
print {*STDERR} <<EOM; |
|
2770
|
|
|
|
|
|
|
scan_simple_identifier differs from scan_identifier: |
|
2771
|
|
|
|
|
|
|
simple: i=$i_simple, tok=$tok_simple, type=$fast_scan_type, ident=$identifier_simple, context='$context_simple |
|
2772
|
|
|
|
|
|
|
full: i=$i, tok=$tok, type=$type, ident=$identifier, context='$context state=$id_scan_state |
|
2773
|
|
|
|
|
|
|
EOM |
|
2774
|
|
|
|
|
|
|
} |
|
2775
|
|
|
|
|
|
|
} |
|
2776
|
|
|
|
|
|
|
|
|
2777
|
|
|
|
|
|
|
#------------------------------------------------- |
|
2778
|
|
|
|
|
|
|
# call full scanner if fast method did not succeed |
|
2779
|
|
|
|
|
|
|
#------------------------------------------------- |
|
2780
|
5685
|
100
|
|
|
|
9389
|
if ( !$fast_scan_type ) { |
|
2781
|
544
|
|
|
|
|
1416
|
$self->scan_identifier(); |
|
2782
|
|
|
|
|
|
|
} |
|
2783
|
5685
|
|
|
|
|
8424
|
return; |
|
2784
|
|
|
|
|
|
|
} ## end sub scan_simple_identifier |
|
2785
|
|
|
|
|
|
|
|
|
2786
|
|
|
|
|
|
|
sub method_ok_here { |
|
2787
|
|
|
|
|
|
|
|
|
2788
|
14
|
|
|
14
|
0
|
32
|
my ( $self, $next_nonblank_token ) = @_; |
|
2789
|
|
|
|
|
|
|
|
|
2790
|
|
|
|
|
|
|
# Return: |
|
2791
|
|
|
|
|
|
|
# false if this is definitely an invalid method declaration |
|
2792
|
|
|
|
|
|
|
# true otherwise (even if not sure) |
|
2793
|
|
|
|
|
|
|
|
|
2794
|
|
|
|
|
|
|
# We are trying to avoid problems with old uses of 'method' |
|
2795
|
|
|
|
|
|
|
# when --use-feature=class is set (rt145706). |
|
2796
|
|
|
|
|
|
|
# For example, this should cause a return of 'false': |
|
2797
|
|
|
|
|
|
|
|
|
2798
|
|
|
|
|
|
|
# method paint => sub { |
|
2799
|
|
|
|
|
|
|
# return; |
|
2800
|
|
|
|
|
|
|
# }; |
|
2801
|
|
|
|
|
|
|
|
|
2802
|
|
|
|
|
|
|
# Assume non-method if an error would occur |
|
2803
|
14
|
50
|
|
|
|
35
|
return if ( $expecting == OPERATOR ); |
|
2804
|
|
|
|
|
|
|
|
|
2805
|
|
|
|
|
|
|
# Currently marking a line-ending 'method' as a bareword (fix c532) |
|
2806
|
14
|
50
|
|
|
|
32
|
return if ( $i_tok >= $max_token_index ); |
|
2807
|
|
|
|
|
|
|
|
|
2808
|
|
|
|
|
|
|
# If a '$' follows 'method'... |
|
2809
|
|
|
|
|
|
|
# Check for possible Object::Pad lexical method like |
|
2810
|
|
|
|
|
|
|
# 'method $var {' |
|
2811
|
|
|
|
|
|
|
# TODO: maybe merge this with the code below by increasing pos by 1 |
|
2812
|
14
|
100
|
66
|
|
|
50
|
if ( $next_nonblank_token eq '$' && new_statement_ok() ) { |
|
2813
|
2
|
|
|
|
|
7
|
return 1; |
|
2814
|
|
|
|
|
|
|
} |
|
2815
|
|
|
|
|
|
|
|
|
2816
|
|
|
|
|
|
|
# Otherwise, not a method if non-word follows .. |
|
2817
|
12
|
100
|
|
|
|
46
|
if ( $next_nonblank_token !~ /^[\w\:]/ ) { return } |
|
|
4
|
|
|
|
|
14
|
|
|
2818
|
|
|
|
|
|
|
|
|
2819
|
|
|
|
|
|
|
# from do_scan_sub: |
|
2820
|
8
|
|
|
|
|
16
|
my $i_beg = $i + 1; |
|
2821
|
8
|
|
|
|
|
16
|
my $pos_beg = $rtoken_map->[$i_beg]; |
|
2822
|
8
|
|
|
|
|
25
|
pos($input_line) = $pos_beg; |
|
2823
|
|
|
|
|
|
|
|
|
2824
|
|
|
|
|
|
|
# TEST 1: look a valid sub NAME |
|
2825
|
8
|
50
|
|
|
|
44
|
if ( |
|
2826
|
|
|
|
|
|
|
$input_line =~ m{\G\s* |
|
2827
|
|
|
|
|
|
|
((?:\w*(?:'|::))*) # package - something that ends in :: or ' |
|
2828
|
|
|
|
|
|
|
(\w+) # NAME - required |
|
2829
|
|
|
|
|
|
|
}gcx |
|
2830
|
|
|
|
|
|
|
) |
|
2831
|
|
|
|
|
|
|
{ |
|
2832
|
|
|
|
|
|
|
# For possible future use.. |
|
2833
|
|
|
|
|
|
|
##my $subname = $2; |
|
2834
|
|
|
|
|
|
|
##my $package = $1 ? $1 : EMPTY_STRING; |
|
2835
|
|
|
|
|
|
|
} |
|
2836
|
|
|
|
|
|
|
else { |
|
2837
|
0
|
|
|
|
|
0
|
return; |
|
2838
|
|
|
|
|
|
|
} |
|
2839
|
|
|
|
|
|
|
|
|
2840
|
|
|
|
|
|
|
# TEST 2: look for invalid characters after name, such as here: |
|
2841
|
|
|
|
|
|
|
# method paint => sub { |
|
2842
|
|
|
|
|
|
|
# ... |
|
2843
|
|
|
|
|
|
|
# } |
|
2844
|
8
|
|
|
|
|
15
|
my $next_char = EMPTY_STRING; |
|
2845
|
8
|
100
|
|
|
|
29
|
if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 } |
|
|
7
|
|
|
|
|
17
|
|
|
2846
|
8
|
100
|
66
|
|
|
36
|
if ( !$next_char || $next_char eq '#' ) { |
|
2847
|
1
|
|
|
|
|
6
|
( $next_char, my $i_next_uu ) = |
|
2848
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
|
2849
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
|
2850
|
|
|
|
|
|
|
} |
|
2851
|
|
|
|
|
|
|
|
|
2852
|
8
|
50
|
|
|
|
18
|
if ( !$next_char ) { |
|
2853
|
|
|
|
|
|
|
|
|
2854
|
|
|
|
|
|
|
# out of characters - give up |
|
2855
|
0
|
|
|
|
|
0
|
return; |
|
2856
|
|
|
|
|
|
|
} |
|
2857
|
|
|
|
|
|
|
|
|
2858
|
|
|
|
|
|
|
# Possibly valid next token types: |
|
2859
|
|
|
|
|
|
|
# '(' could start prototype or signature |
|
2860
|
|
|
|
|
|
|
# ':' could start ATTRIBUTE |
|
2861
|
|
|
|
|
|
|
# '{' cold start BLOCK |
|
2862
|
|
|
|
|
|
|
# ';' or '}' could end a statement |
|
2863
|
8
|
100
|
|
|
|
26
|
if ( $next_char !~ /^[\(\:\{\;\}]/ ) { |
|
2864
|
|
|
|
|
|
|
|
|
2865
|
|
|
|
|
|
|
# This does not match use feature 'class' syntax |
|
2866
|
3
|
|
|
|
|
10
|
return; |
|
2867
|
|
|
|
|
|
|
} |
|
2868
|
|
|
|
|
|
|
|
|
2869
|
|
|
|
|
|
|
# We will stop here and assume that this is valid syntax for |
|
2870
|
|
|
|
|
|
|
# use feature 'class'. |
|
2871
|
5
|
|
|
|
|
19
|
return 1; |
|
2872
|
|
|
|
|
|
|
} ## end sub method_ok_here |
|
2873
|
|
|
|
|
|
|
|
|
2874
|
|
|
|
|
|
|
sub class_ok_here { |
|
2875
|
|
|
|
|
|
|
|
|
2876
|
12
|
|
|
12
|
0
|
19
|
my $self = shift; |
|
2877
|
|
|
|
|
|
|
|
|
2878
|
|
|
|
|
|
|
# Return: |
|
2879
|
|
|
|
|
|
|
# false if this is definitely an invalid class declaration |
|
2880
|
|
|
|
|
|
|
# true otherwise (even if not sure) |
|
2881
|
|
|
|
|
|
|
|
|
2882
|
|
|
|
|
|
|
# We are trying to avoid problems with old uses of 'class' |
|
2883
|
|
|
|
|
|
|
# when --use-feature=class is set (rt145706). We look ahead |
|
2884
|
|
|
|
|
|
|
# see if this use of 'class' is obviously inconsistent with |
|
2885
|
|
|
|
|
|
|
# the syntax of use feature 'class'. This allows the default |
|
2886
|
|
|
|
|
|
|
# setting --use-feature=class to work for old syntax too. |
|
2887
|
|
|
|
|
|
|
|
|
2888
|
|
|
|
|
|
|
# Valid class declarations look like |
|
2889
|
|
|
|
|
|
|
# class NAME ?ATTRS ?VERSION ?BLOCK |
|
2890
|
|
|
|
|
|
|
# where ATTRS VERSION and BLOCK are optional |
|
2891
|
|
|
|
|
|
|
|
|
2892
|
|
|
|
|
|
|
# For example, this should produce a return of 'false': |
|
2893
|
|
|
|
|
|
|
# |
|
2894
|
|
|
|
|
|
|
# class ExtendsBasicAttributes is BasicAttributes{ |
|
2895
|
|
|
|
|
|
|
|
|
2896
|
|
|
|
|
|
|
# TEST 1: class stmt can only go where a new statement can start |
|
2897
|
12
|
50
|
|
|
|
37
|
if ( !new_statement_ok() ) { return } |
|
|
0
|
|
|
|
|
0
|
|
|
2898
|
|
|
|
|
|
|
|
|
2899
|
12
|
|
|
|
|
21
|
my $i_beg = $i + 1; |
|
2900
|
12
|
|
|
|
|
25
|
my $pos_beg = $rtoken_map->[$i_beg]; |
|
2901
|
12
|
|
|
|
|
32
|
pos($input_line) = $pos_beg; |
|
2902
|
|
|
|
|
|
|
|
|
2903
|
|
|
|
|
|
|
# TEST 2: look for a valid NAME |
|
2904
|
12
|
50
|
|
|
|
70
|
if ( |
|
2905
|
|
|
|
|
|
|
$input_line =~ m{\G\s* |
|
2906
|
|
|
|
|
|
|
((?:\w*(?:'|::))*) # package - something that ends in :: or ' |
|
2907
|
|
|
|
|
|
|
(\w+) # NAME - required |
|
2908
|
|
|
|
|
|
|
}gcx |
|
2909
|
|
|
|
|
|
|
) |
|
2910
|
|
|
|
|
|
|
{ |
|
2911
|
|
|
|
|
|
|
# For possible future use.. |
|
2912
|
|
|
|
|
|
|
##my $subname = $2; |
|
2913
|
|
|
|
|
|
|
##my $package = $1 ? $1 : EMPTY_STRING; |
|
2914
|
|
|
|
|
|
|
} |
|
2915
|
|
|
|
|
|
|
else { |
|
2916
|
0
|
|
|
|
|
0
|
return; |
|
2917
|
|
|
|
|
|
|
} |
|
2918
|
|
|
|
|
|
|
|
|
2919
|
|
|
|
|
|
|
# TEST 3: look for valid characters after NAME |
|
2920
|
12
|
|
|
|
|
22
|
my $next_char = EMPTY_STRING; |
|
2921
|
12
|
100
|
|
|
|
44
|
if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 } |
|
|
11
|
|
|
|
|
22
|
|
|
2922
|
12
|
100
|
66
|
|
|
57
|
if ( !$next_char || $next_char eq '#' ) { |
|
2923
|
1
|
|
|
|
|
4
|
( $next_char, my $i_next_uu ) = |
|
2924
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
|
2925
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
|
2926
|
|
|
|
|
|
|
} |
|
2927
|
12
|
50
|
|
|
|
27
|
if ( !$next_char ) { |
|
2928
|
|
|
|
|
|
|
|
|
2929
|
|
|
|
|
|
|
# out of characters - give up |
|
2930
|
0
|
|
|
|
|
0
|
return; |
|
2931
|
|
|
|
|
|
|
} |
|
2932
|
|
|
|
|
|
|
|
|
2933
|
|
|
|
|
|
|
# Must see one of: ATTRIBUTE, VERSION, BLOCK, or end stmt |
|
2934
|
|
|
|
|
|
|
|
|
2935
|
|
|
|
|
|
|
# Possibly valid next token types: |
|
2936
|
|
|
|
|
|
|
# ':' could start ATTRIBUTE |
|
2937
|
|
|
|
|
|
|
# '\d' could start VERSION |
|
2938
|
|
|
|
|
|
|
# '{' cold start BLOCK |
|
2939
|
|
|
|
|
|
|
# ';' could end a statement |
|
2940
|
|
|
|
|
|
|
# '}' could end statement but would be strange |
|
2941
|
|
|
|
|
|
|
|
|
2942
|
12
|
100
|
|
|
|
44
|
if ( $next_char !~ /^[\:\d\{\;\}]/ ) { |
|
2943
|
|
|
|
|
|
|
|
|
2944
|
|
|
|
|
|
|
# This does not match use feature 'class' syntax |
|
2945
|
2
|
|
|
|
|
7
|
return; |
|
2946
|
|
|
|
|
|
|
} |
|
2947
|
|
|
|
|
|
|
|
|
2948
|
|
|
|
|
|
|
# We will stop here and assume that this is valid syntax for |
|
2949
|
|
|
|
|
|
|
# use feature 'class'. |
|
2950
|
10
|
|
|
|
|
28
|
return 1; |
|
2951
|
|
|
|
|
|
|
} ## end sub class_ok_here |
|
2952
|
|
|
|
|
|
|
|
|
2953
|
|
|
|
|
|
|
sub scan_id { |
|
2954
|
405
|
|
|
405
|
0
|
623
|
my $self = shift; |
|
2955
|
|
|
|
|
|
|
|
|
2956
|
|
|
|
|
|
|
# Scan for a sub or package name |
|
2957
|
|
|
|
|
|
|
|
|
2958
|
405
|
|
|
|
|
1634
|
( $i, $tok, $type, $id_scan_state ) = $self->scan_id_do( |
|
2959
|
|
|
|
|
|
|
|
|
2960
|
|
|
|
|
|
|
$input_line, |
|
2961
|
|
|
|
|
|
|
$i, $tok, |
|
2962
|
|
|
|
|
|
|
$rtokens, |
|
2963
|
|
|
|
|
|
|
$rtoken_map, |
|
2964
|
|
|
|
|
|
|
$id_scan_state, |
|
2965
|
|
|
|
|
|
|
$max_token_index, |
|
2966
|
|
|
|
|
|
|
); |
|
2967
|
405
|
|
|
|
|
910
|
return; |
|
2968
|
|
|
|
|
|
|
} ## end sub scan_id |
|
2969
|
|
|
|
|
|
|
|
|
2970
|
|
|
|
|
|
|
sub scan_number { |
|
2971
|
683
|
|
|
683
|
0
|
957
|
my $self = shift; |
|
2972
|
683
|
|
|
|
|
861
|
my $number; |
|
2973
|
683
|
|
|
|
|
1627
|
( $i, $type, $number ) = |
|
2974
|
|
|
|
|
|
|
$self->scan_number_do( $input_line, $i, $rtoken_map, $type, |
|
2975
|
|
|
|
|
|
|
$max_token_index ); |
|
2976
|
683
|
|
|
|
|
1393
|
return $number; |
|
2977
|
|
|
|
|
|
|
} ## end sub scan_number |
|
2978
|
|
|
|
|
|
|
|
|
2979
|
44
|
|
|
44
|
|
364
|
use constant VERIFY_FASTNUM => 0; |
|
|
44
|
|
|
|
|
77
|
|
|
|
44
|
|
|
|
|
26216
|
|
|
2980
|
|
|
|
|
|
|
|
|
2981
|
|
|
|
|
|
|
sub scan_number_fast { |
|
2982
|
|
|
|
|
|
|
|
|
2983
|
2900
|
|
|
2900
|
0
|
3538
|
my $self = shift; |
|
2984
|
|
|
|
|
|
|
|
|
2985
|
|
|
|
|
|
|
# This is a wrapper for sub scan_number. It does a fast preliminary |
|
2986
|
|
|
|
|
|
|
# scan for a simple integer. It calls the original scan_number if it |
|
2987
|
|
|
|
|
|
|
# does not find one. |
|
2988
|
|
|
|
|
|
|
|
|
2989
|
2900
|
|
|
|
|
3555
|
my $i_begin = $i; |
|
2990
|
2900
|
|
|
|
|
3623
|
my $tok_begin = $tok; |
|
2991
|
2900
|
|
|
|
|
3257
|
my $number; |
|
2992
|
|
|
|
|
|
|
|
|
2993
|
|
|
|
|
|
|
#--------------------------------- |
|
2994
|
|
|
|
|
|
|
# Quick check for (signed) integer |
|
2995
|
|
|
|
|
|
|
#--------------------------------- |
|
2996
|
|
|
|
|
|
|
|
|
2997
|
|
|
|
|
|
|
# This will be the string of digits: |
|
2998
|
2900
|
|
|
|
|
3405
|
my $i_d = $i; |
|
2999
|
2900
|
|
|
|
|
3459
|
my $tok_d = $tok; |
|
3000
|
2900
|
|
|
|
|
4357
|
my $typ_d = $rtoken_type->[$i_d]; |
|
3001
|
|
|
|
|
|
|
|
|
3002
|
|
|
|
|
|
|
# check for signed integer |
|
3003
|
2900
|
|
|
|
|
3850
|
my $sign = EMPTY_STRING; |
|
3004
|
2900
|
50
|
66
|
|
|
7288
|
if ( $typ_d ne 'd' |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
3005
|
|
|
|
|
|
|
&& ( $typ_d eq '+' || $typ_d eq '-' ) |
|
3006
|
|
|
|
|
|
|
&& $i_d < $max_token_index ) |
|
3007
|
|
|
|
|
|
|
{ |
|
3008
|
392
|
|
|
|
|
555
|
$sign = $tok_d; |
|
3009
|
392
|
|
|
|
|
499
|
$i_d++; |
|
3010
|
392
|
|
|
|
|
551
|
$tok_d = $rtokens->[$i_d]; |
|
3011
|
392
|
|
|
|
|
576
|
$typ_d = $rtoken_type->[$i_d]; |
|
3012
|
|
|
|
|
|
|
} |
|
3013
|
|
|
|
|
|
|
|
|
3014
|
|
|
|
|
|
|
# Handle integers |
|
3015
|
2900
|
100
|
100
|
|
|
17047
|
if ( |
|
|
|
|
100
|
|
|
|
|
|
3016
|
|
|
|
|
|
|
$typ_d eq 'd' |
|
3017
|
|
|
|
|
|
|
&& ( |
|
3018
|
|
|
|
|
|
|
$i_d == $max_token_index |
|
3019
|
|
|
|
|
|
|
|| ( $i_d < $max_token_index |
|
3020
|
|
|
|
|
|
|
&& $rtoken_type->[ $i_d + 1 ] ne '.' |
|
3021
|
|
|
|
|
|
|
&& $rtoken_type->[ $i_d + 1 ] ne 'w' ) |
|
3022
|
|
|
|
|
|
|
) |
|
3023
|
|
|
|
|
|
|
) |
|
3024
|
|
|
|
|
|
|
{ |
|
3025
|
|
|
|
|
|
|
# Let the full scanner handle multi-digit integers beginning with |
|
3026
|
|
|
|
|
|
|
# '0' because there could be error messages. For example, '009' is |
|
3027
|
|
|
|
|
|
|
# not a valid number. |
|
3028
|
|
|
|
|
|
|
|
|
3029
|
2284
|
100
|
100
|
|
|
7411
|
if ( $tok_d eq '0' || substr( $tok_d, 0, 1 ) ne '0' ) { |
|
3030
|
2227
|
|
|
|
|
3502
|
$number = $sign . $tok_d; |
|
3031
|
2227
|
|
|
|
|
2826
|
$type = 'n'; |
|
3032
|
2227
|
|
|
|
|
2953
|
$i = $i_d; |
|
3033
|
|
|
|
|
|
|
} |
|
3034
|
|
|
|
|
|
|
} |
|
3035
|
|
|
|
|
|
|
|
|
3036
|
|
|
|
|
|
|
#-------------------------------------- |
|
3037
|
|
|
|
|
|
|
# Verify correctness during development |
|
3038
|
|
|
|
|
|
|
#-------------------------------------- |
|
3039
|
2900
|
|
|
|
|
3175
|
if ( VERIFY_FASTNUM && defined($number) ) { |
|
3040
|
|
|
|
|
|
|
|
|
3041
|
|
|
|
|
|
|
# We will call the full method |
|
3042
|
|
|
|
|
|
|
my $type_simple = $type; |
|
3043
|
|
|
|
|
|
|
my $i_simple = $i; |
|
3044
|
|
|
|
|
|
|
my $number_simple = $number; |
|
3045
|
|
|
|
|
|
|
|
|
3046
|
|
|
|
|
|
|
$tok = $tok_begin; |
|
3047
|
|
|
|
|
|
|
$i = $i_begin; |
|
3048
|
|
|
|
|
|
|
$number = $self->scan_number(); |
|
3049
|
|
|
|
|
|
|
|
|
3050
|
|
|
|
|
|
|
if ( $type ne $type_simple |
|
3051
|
|
|
|
|
|
|
|| ( $i != $i_simple && $i <= $max_token_index ) |
|
3052
|
|
|
|
|
|
|
|| $number ne $number_simple ) |
|
3053
|
|
|
|
|
|
|
{ |
|
3054
|
|
|
|
|
|
|
print {*STDERR} <<EOM; |
|
3055
|
|
|
|
|
|
|
scan_number_fast differs from scan_number: |
|
3056
|
|
|
|
|
|
|
simple: i=$i_simple, type=$type_simple, number=$number_simple |
|
3057
|
|
|
|
|
|
|
full: i=$i, type=$type, number=$number |
|
3058
|
|
|
|
|
|
|
EOM |
|
3059
|
|
|
|
|
|
|
} |
|
3060
|
|
|
|
|
|
|
} |
|
3061
|
|
|
|
|
|
|
|
|
3062
|
|
|
|
|
|
|
#---------------------------------------- |
|
3063
|
|
|
|
|
|
|
# call full scanner if may not be integer |
|
3064
|
|
|
|
|
|
|
#---------------------------------------- |
|
3065
|
2900
|
100
|
|
|
|
5086
|
if ( !defined($number) ) { |
|
3066
|
673
|
|
|
|
|
1530
|
$number = $self->scan_number(); |
|
3067
|
|
|
|
|
|
|
} |
|
3068
|
2900
|
|
|
|
|
6146
|
return $number; |
|
3069
|
|
|
|
|
|
|
} ## end sub scan_number_fast |
|
3070
|
|
|
|
|
|
|
|
|
3071
|
|
|
|
|
|
|
sub error_if_expecting_TERM { |
|
3072
|
285
|
|
|
285
|
0
|
542
|
my ($self) = @_; |
|
3073
|
|
|
|
|
|
|
|
|
3074
|
|
|
|
|
|
|
# Issue a warning if a binary operator is missing a term to operate on |
|
3075
|
|
|
|
|
|
|
|
|
3076
|
|
|
|
|
|
|
# This should only be called if a term is expected here |
|
3077
|
285
|
50
|
|
|
|
685
|
if ( $expecting != TERM ) { return } |
|
|
0
|
|
|
|
|
0
|
|
|
3078
|
|
|
|
|
|
|
|
|
3079
|
|
|
|
|
|
|
# Be sure a TERM is definitely required .. |
|
3080
|
285
|
50
|
66
|
|
|
2290
|
if ( |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
3081
|
|
|
|
|
|
|
|
|
3082
|
|
|
|
|
|
|
# .. following a binary operator token type, like '=' |
|
3083
|
|
|
|
|
|
|
$is_binary_or_unary_operator_type{$last_nonblank_type} |
|
3084
|
|
|
|
|
|
|
|
|
3085
|
|
|
|
|
|
|
# .. or following a binary keyword operator, like 'and' |
|
3086
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 'k' |
|
3087
|
|
|
|
|
|
|
&& $is_binary_or_unary_keyword{$last_nonblank_token} ) |
|
3088
|
|
|
|
|
|
|
|
|
3089
|
|
|
|
|
|
|
# .. or for a binary operator following something like a ';' |
|
3090
|
|
|
|
|
|
|
|| ( $is_not_a_TERM_producer_type{$last_nonblank_type} |
|
3091
|
|
|
|
|
|
|
&& $is_binary_operator_type{$tok} ) |
|
3092
|
|
|
|
|
|
|
) |
|
3093
|
|
|
|
|
|
|
{ |
|
3094
|
|
|
|
|
|
|
|
|
3095
|
|
|
|
|
|
|
# We must exclude error checking in sub signatures which have some |
|
3096
|
|
|
|
|
|
|
# unusual syntax. For example the following syntax is okay within |
|
3097
|
|
|
|
|
|
|
# a signature: |
|
3098
|
|
|
|
|
|
|
# sub mysub ($=,) {...} |
|
3099
|
|
|
|
|
|
|
# $ = undef |
|
3100
|
0
|
|
|
|
|
0
|
my $ct = $rparen_type->[$paren_depth]; |
|
3101
|
0
|
0
|
0
|
|
|
0
|
if ( $ct && $ct =~ /^sub\b/ ) { return } |
|
|
0
|
|
|
|
|
0
|
|
|
3102
|
|
|
|
|
|
|
|
|
3103
|
|
|
|
|
|
|
$self->report_unexpected( |
|
3104
|
|
|
|
|
|
|
{ |
|
3105
|
0
|
|
|
|
|
0
|
found => $tok, |
|
3106
|
|
|
|
|
|
|
expecting => "term", |
|
3107
|
|
|
|
|
|
|
i_tok => $i_tok, |
|
3108
|
|
|
|
|
|
|
last_nonblank_i => $last_nonblank_i, |
|
3109
|
|
|
|
|
|
|
rpretoken_map => $rtoken_map, |
|
3110
|
|
|
|
|
|
|
rpretoken_type => $rtoken_type, |
|
3111
|
|
|
|
|
|
|
input_line => $input_line, |
|
3112
|
|
|
|
|
|
|
} |
|
3113
|
|
|
|
|
|
|
); |
|
3114
|
0
|
|
|
|
|
0
|
return 1; |
|
3115
|
|
|
|
|
|
|
} |
|
3116
|
285
|
|
|
|
|
736
|
return; |
|
3117
|
|
|
|
|
|
|
} ## end sub error_if_expecting_TERM |
|
3118
|
|
|
|
|
|
|
|
|
3119
|
|
|
|
|
|
|
# a sub to warn if token found where operator expected |
|
3120
|
|
|
|
|
|
|
sub error_if_expecting_OPERATOR { |
|
3121
|
|
|
|
|
|
|
|
|
3122
|
809
|
|
|
809
|
0
|
1368
|
my ( $self, ($thing) ) = @_; |
|
3123
|
|
|
|
|
|
|
|
|
3124
|
|
|
|
|
|
|
# Issue warning on error if expecting operator |
|
3125
|
|
|
|
|
|
|
# Given: |
|
3126
|
|
|
|
|
|
|
# $thing = the unexpected token or issue |
|
3127
|
|
|
|
|
|
|
# = undef to use current pre-token |
|
3128
|
|
|
|
|
|
|
|
|
3129
|
809
|
50
|
|
|
|
1588
|
if ( $expecting == OPERATOR ) { |
|
3130
|
0
|
0
|
|
|
|
0
|
if ( !defined($thing) ) { $thing = $tok } |
|
|
0
|
|
|
|
|
0
|
|
|
3131
|
|
|
|
|
|
|
$self->report_unexpected( |
|
3132
|
|
|
|
|
|
|
{ |
|
3133
|
0
|
|
|
|
|
0
|
found => $thing, |
|
3134
|
|
|
|
|
|
|
expecting => "operator", |
|
3135
|
|
|
|
|
|
|
i_tok => $i_tok, |
|
3136
|
|
|
|
|
|
|
last_nonblank_i => $last_nonblank_i, |
|
3137
|
|
|
|
|
|
|
rpretoken_map => $rtoken_map, |
|
3138
|
|
|
|
|
|
|
rpretoken_type => $rtoken_type, |
|
3139
|
|
|
|
|
|
|
input_line => $input_line, |
|
3140
|
|
|
|
|
|
|
} |
|
3141
|
|
|
|
|
|
|
); |
|
3142
|
0
|
0
|
|
|
|
0
|
if ( $i_tok == 0 ) { |
|
3143
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
|
3144
|
0
|
|
|
|
|
0
|
$self->warning("Missing ';' or ',' above?\n"); |
|
3145
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
|
3146
|
|
|
|
|
|
|
} |
|
3147
|
0
|
|
|
|
|
0
|
return 1; |
|
3148
|
|
|
|
|
|
|
} |
|
3149
|
809
|
|
|
|
|
1272
|
return; |
|
3150
|
|
|
|
|
|
|
} ## end sub error_if_expecting_OPERATOR |
|
3151
|
|
|
|
|
|
|
|
|
3152
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
|
3153
|
|
|
|
|
|
|
# end scanner interfaces |
|
3154
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
|
3155
|
|
|
|
|
|
|
|
|
3156
|
|
|
|
|
|
|
#------------------ |
|
3157
|
|
|
|
|
|
|
# Tokenization subs |
|
3158
|
|
|
|
|
|
|
#------------------ |
|
3159
|
|
|
|
|
|
|
# An identifier in possible indirect object location followed by any of |
|
3160
|
|
|
|
|
|
|
# these tokens: -> , ; } (plus others) is not an indirect object. Fix c257. |
|
3161
|
|
|
|
|
|
|
my %Z_test_hash; |
|
3162
|
|
|
|
|
|
|
|
|
3163
|
|
|
|
|
|
|
BEGIN { |
|
3164
|
44
|
|
|
44
|
|
313
|
my @qZ = qw# |
|
3165
|
|
|
|
|
|
|
-> ; } ) ] |
|
3166
|
|
|
|
|
|
|
=> =~ = == !~ || >= != *= .. && |= .= -= += <= %= |
|
3167
|
|
|
|
|
|
|
^= &&= ||= //= <=> |
|
3168
|
|
|
|
|
|
|
#; |
|
3169
|
44
|
|
|
|
|
115
|
push @qZ, COMMA; |
|
3170
|
44
|
|
|
|
|
252358
|
$Z_test_hash{$_} = 1 for @qZ; |
|
3171
|
|
|
|
|
|
|
} |
|
3172
|
|
|
|
|
|
|
|
|
3173
|
|
|
|
|
|
|
sub do_DOLLAR_SIGN { |
|
3174
|
|
|
|
|
|
|
|
|
3175
|
4833
|
|
|
4833
|
0
|
6389
|
my $self = shift; |
|
3176
|
|
|
|
|
|
|
|
|
3177
|
|
|
|
|
|
|
# '$' |
|
3178
|
|
|
|
|
|
|
# start looking for a scalar |
|
3179
|
4833
|
50
|
|
|
|
8469
|
$self->error_if_expecting_OPERATOR("Scalar") |
|
3180
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
|
3181
|
4833
|
|
|
|
|
13402
|
$self->scan_simple_identifier(); |
|
3182
|
|
|
|
|
|
|
|
|
3183
|
4833
|
100
|
|
|
|
8641
|
if ( $identifier eq '$^W' ) { |
|
3184
|
1
|
|
|
|
|
3
|
$self->[_saw_perl_dash_w_] = 1; |
|
3185
|
|
|
|
|
|
|
} |
|
3186
|
|
|
|
|
|
|
|
|
3187
|
|
|
|
|
|
|
# Check for identifier in indirect object slot |
|
3188
|
|
|
|
|
|
|
# (vorboard.pl, sort.t). Something like: |
|
3189
|
|
|
|
|
|
|
# /^(print|printf|sort|exec|system)$/ |
|
3190
|
4833
|
100
|
66
|
|
|
30514
|
if ( |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
3191
|
|
|
|
|
|
|
$is_indirect_object_taker{$last_nonblank_token} |
|
3192
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'k' |
|
3193
|
|
|
|
|
|
|
|| ( ( $last_nonblank_token eq '(' ) |
|
3194
|
|
|
|
|
|
|
&& $is_indirect_object_taker{ $rparen_type->[$paren_depth] } ) |
|
3195
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 'w' |
|
3196
|
|
|
|
|
|
|
|| $last_nonblank_type eq 'U' ) # possible object |
|
3197
|
|
|
|
|
|
|
) |
|
3198
|
|
|
|
|
|
|
{ |
|
3199
|
|
|
|
|
|
|
|
|
3200
|
|
|
|
|
|
|
# An identifier followed by '->' is not indirect object; |
|
3201
|
|
|
|
|
|
|
# fixes b1175, b1176. Fix c257: Likewise for other tokens like |
|
3202
|
|
|
|
|
|
|
# comma, semicolon, closing brace, and single space. |
|
3203
|
104
|
|
|
|
|
384
|
my ( $next_nonblank_token, $i_next_uu ) = |
|
3204
|
|
|
|
|
|
|
$self->find_next_noncomment_token( $i, $rtokens, |
|
3205
|
|
|
|
|
|
|
$max_token_index ); |
|
3206
|
104
|
100
|
|
|
|
343
|
$type = 'Z' if ( !$Z_test_hash{$next_nonblank_token} ); |
|
3207
|
|
|
|
|
|
|
} |
|
3208
|
4833
|
|
|
|
|
6561
|
return; |
|
3209
|
|
|
|
|
|
|
} ## end sub do_DOLLAR_SIGN |
|
3210
|
|
|
|
|
|
|
|
|
3211
|
|
|
|
|
|
|
sub do_LEFT_PARENTHESIS { |
|
3212
|
|
|
|
|
|
|
|
|
3213
|
2423
|
|
|
2423
|
0
|
3332
|
my $self = shift; |
|
3214
|
|
|
|
|
|
|
|
|
3215
|
|
|
|
|
|
|
# '(' |
|
3216
|
2423
|
|
|
|
|
3116
|
++$paren_depth; |
|
3217
|
|
|
|
|
|
|
|
|
3218
|
|
|
|
|
|
|
# variable to enable check for brace after closing paren (c230) |
|
3219
|
2423
|
|
|
|
|
3473
|
my $want_brace = EMPTY_STRING; |
|
3220
|
|
|
|
|
|
|
|
|
3221
|
2423
|
100
|
66
|
|
|
7900
|
if ($want_paren) { |
|
|
|
100
|
|
|
|
|
|
|
3222
|
289
|
|
|
|
|
462
|
$container_type = $want_paren; |
|
3223
|
289
|
|
|
|
|
449
|
$want_brace = $want_paren; |
|
3224
|
289
|
|
|
|
|
458
|
$want_paren = EMPTY_STRING; |
|
3225
|
|
|
|
|
|
|
} |
|
3226
|
|
|
|
|
|
|
elsif ( substr( $statement_type, 0, 3 ) eq 'sub' |
|
3227
|
|
|
|
|
|
|
&& $statement_type =~ /^sub\b/ ) |
|
3228
|
|
|
|
|
|
|
{ |
|
3229
|
20
|
|
|
|
|
38
|
$container_type = $statement_type; |
|
3230
|
|
|
|
|
|
|
} |
|
3231
|
|
|
|
|
|
|
else { |
|
3232
|
2114
|
|
|
|
|
2871
|
$container_type = $last_nonblank_token; |
|
3233
|
|
|
|
|
|
|
|
|
3234
|
|
|
|
|
|
|
# We can check for a syntax error here of unexpected '(', |
|
3235
|
|
|
|
|
|
|
# but this is going to get messy... |
|
3236
|
2114
|
100
|
100
|
|
|
7134
|
if ( |
|
3237
|
|
|
|
|
|
|
$expecting == OPERATOR |
|
3238
|
|
|
|
|
|
|
|
|
3239
|
|
|
|
|
|
|
# Be sure this is not a method call of the form |
|
3240
|
|
|
|
|
|
|
# &method(...), $method->(..), &{method}(...), |
|
3241
|
|
|
|
|
|
|
# $ref[2](list) is ok & short for $ref[2]->(list) |
|
3242
|
|
|
|
|
|
|
# NOTE: at present, braces in something like &{ xxx } |
|
3243
|
|
|
|
|
|
|
# are not marked as a block, we might have a method call. |
|
3244
|
|
|
|
|
|
|
# Added ')' to fix case c017, something like ()()() |
|
3245
|
|
|
|
|
|
|
&& $last_nonblank_token !~ /^(?:[\]\}\)\&]|\-\>)/ |
|
3246
|
|
|
|
|
|
|
) |
|
3247
|
|
|
|
|
|
|
{ |
|
3248
|
|
|
|
|
|
|
|
|
3249
|
|
|
|
|
|
|
# ref: camel 3 p 703. |
|
3250
|
3
|
50
|
|
|
|
14
|
if ( $last_last_nonblank_token eq 'do' ) { |
|
3251
|
0
|
|
|
|
|
0
|
$self->complain( |
|
3252
|
|
|
|
|
|
|
"do SUBROUTINE is deprecated; consider & or -> notation\n" |
|
3253
|
|
|
|
|
|
|
); |
|
3254
|
|
|
|
|
|
|
} |
|
3255
|
|
|
|
|
|
|
else { |
|
3256
|
|
|
|
|
|
|
|
|
3257
|
|
|
|
|
|
|
# if this is an empty list, (), then it is not an |
|
3258
|
|
|
|
|
|
|
# error; for example, we might have a constant pi and |
|
3259
|
|
|
|
|
|
|
# invoke it with pi() or just pi; |
|
3260
|
3
|
|
|
|
|
15
|
my ( $next_nonblank_token, $i_next_uu ) = |
|
3261
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, |
|
3262
|
|
|
|
|
|
|
$max_token_index ); |
|
3263
|
|
|
|
|
|
|
|
|
3264
|
|
|
|
|
|
|
# Patch for c029: give up error check if |
|
3265
|
|
|
|
|
|
|
# a side comment follows |
|
3266
|
3
|
50
|
33
|
|
|
20
|
if ( $next_nonblank_token ne ')' |
|
3267
|
|
|
|
|
|
|
&& $next_nonblank_token ne '#' ) |
|
3268
|
|
|
|
|
|
|
{ |
|
3269
|
0
|
|
|
|
|
0
|
my $hint; |
|
3270
|
|
|
|
|
|
|
|
|
3271
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR('('); |
|
3272
|
|
|
|
|
|
|
|
|
3273
|
0
|
0
|
|
|
|
0
|
if ( $last_nonblank_type eq 'C' ) { |
|
|
|
0
|
|
|
|
|
|
|
3274
|
0
|
|
|
|
|
0
|
$hint = |
|
3275
|
|
|
|
|
|
|
"$last_nonblank_token has a void prototype\n"; |
|
3276
|
|
|
|
|
|
|
} |
|
3277
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'i' ) { |
|
3278
|
0
|
0
|
0
|
|
|
0
|
if ( $i_tok > 0 |
|
3279
|
|
|
|
|
|
|
&& $last_nonblank_token =~ /^\$/ ) |
|
3280
|
|
|
|
|
|
|
{ |
|
3281
|
0
|
|
|
|
|
0
|
$hint = |
|
3282
|
|
|
|
|
|
|
"Do you mean '$last_nonblank_token->(' ?\n"; |
|
3283
|
|
|
|
|
|
|
} |
|
3284
|
|
|
|
|
|
|
} |
|
3285
|
|
|
|
|
|
|
else { |
|
3286
|
|
|
|
|
|
|
## no hint |
|
3287
|
|
|
|
|
|
|
} |
|
3288
|
0
|
0
|
|
|
|
0
|
if ($hint) { |
|
3289
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
|
3290
|
0
|
|
|
|
|
0
|
$self->warning($hint); |
|
3291
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
|
3292
|
|
|
|
|
|
|
} |
|
3293
|
|
|
|
|
|
|
} ## end if ( $next_nonblank_token... |
|
3294
|
|
|
|
|
|
|
} ## end else [ if ( $last_last_nonblank_token... |
|
3295
|
|
|
|
|
|
|
} ## end if ( $expecting == OPERATOR... |
|
3296
|
|
|
|
|
|
|
} |
|
3297
|
|
|
|
|
|
|
|
|
3298
|
2423
|
|
|
|
|
7611
|
( $type_sequence, $indent_flag ) = |
|
3299
|
|
|
|
|
|
|
$self->increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); |
|
3300
|
|
|
|
|
|
|
|
|
3301
|
|
|
|
|
|
|
# propagate types down through nested parens |
|
3302
|
|
|
|
|
|
|
# for example: the second paren in 'if ((' would be structural |
|
3303
|
|
|
|
|
|
|
# since the first is. |
|
3304
|
|
|
|
|
|
|
|
|
3305
|
2423
|
100
|
|
|
|
4524
|
if ( $last_nonblank_token eq '(' ) { |
|
3306
|
61
|
|
|
|
|
111
|
$type = $last_nonblank_type; |
|
3307
|
|
|
|
|
|
|
} |
|
3308
|
|
|
|
|
|
|
|
|
3309
|
|
|
|
|
|
|
# We exclude parens as structural after a ',' because it |
|
3310
|
|
|
|
|
|
|
# causes subtle problems with continuation indentation for |
|
3311
|
|
|
|
|
|
|
# something like this, where the first 'or' will not get |
|
3312
|
|
|
|
|
|
|
# indented. |
|
3313
|
|
|
|
|
|
|
# |
|
3314
|
|
|
|
|
|
|
# assert( |
|
3315
|
|
|
|
|
|
|
# __LINE__, |
|
3316
|
|
|
|
|
|
|
# ( not defined $check ) |
|
3317
|
|
|
|
|
|
|
# or ref $check |
|
3318
|
|
|
|
|
|
|
# or $check eq "new" |
|
3319
|
|
|
|
|
|
|
# or $check eq "old", |
|
3320
|
|
|
|
|
|
|
# ); |
|
3321
|
|
|
|
|
|
|
# |
|
3322
|
|
|
|
|
|
|
# Likewise, we exclude parens where a statement can start |
|
3323
|
|
|
|
|
|
|
# because of problems with continuation indentation, like |
|
3324
|
|
|
|
|
|
|
# these: |
|
3325
|
|
|
|
|
|
|
# |
|
3326
|
|
|
|
|
|
|
# ($firstline =~ /^#\!.*perl/) |
|
3327
|
|
|
|
|
|
|
# and (print $File::Find::name, "\n") |
|
3328
|
|
|
|
|
|
|
# and (return 1); |
|
3329
|
|
|
|
|
|
|
# |
|
3330
|
|
|
|
|
|
|
# (ref($usage_fref) =~ /CODE/) |
|
3331
|
|
|
|
|
|
|
# ? &$usage_fref |
|
3332
|
|
|
|
|
|
|
# : (&blast_usage, &blast_params, &blast_general_params); |
|
3333
|
|
|
|
|
|
|
|
|
3334
|
|
|
|
|
|
|
else { |
|
3335
|
2362
|
|
|
|
|
3226
|
$type = '{'; |
|
3336
|
|
|
|
|
|
|
} |
|
3337
|
|
|
|
|
|
|
|
|
3338
|
2423
|
50
|
|
|
|
4557
|
if ( $last_nonblank_type eq ')' ) { |
|
3339
|
0
|
|
|
|
|
0
|
$self->warning( |
|
3340
|
|
|
|
|
|
|
"Syntax error? found token '$last_nonblank_type' then '('\n"); |
|
3341
|
|
|
|
|
|
|
} |
|
3342
|
|
|
|
|
|
|
|
|
3343
|
|
|
|
|
|
|
# git #105: Copy container type and want-brace flag at ') ('; |
|
3344
|
|
|
|
|
|
|
# propagate the container type onward so that any subsequent brace gets |
|
3345
|
|
|
|
|
|
|
# correctly marked. I have implemented this as a general rule, which |
|
3346
|
|
|
|
|
|
|
# should be safe, but if necessary it could be restricted to certain |
|
3347
|
|
|
|
|
|
|
# container statement types such as 'for'. |
|
3348
|
2423
|
100
|
|
|
|
4780
|
if ( $last_nonblank_token eq ')' ) { |
|
3349
|
1
|
|
|
|
|
2
|
my $rvars = $rparen_vars->[$paren_depth]; |
|
3350
|
1
|
50
|
|
|
|
3
|
if ( defined($rvars) ) { |
|
3351
|
1
|
|
|
|
|
2
|
$container_type = $rparen_type->[$paren_depth]; |
|
3352
|
1
|
|
|
|
|
2
|
( my $type_lp_uu, $want_brace ) = @{$rvars}; |
|
|
1
|
|
|
|
|
3
|
|
|
3353
|
|
|
|
|
|
|
} |
|
3354
|
|
|
|
|
|
|
} |
|
3355
|
|
|
|
|
|
|
|
|
3356
|
2423
|
|
|
|
|
4125
|
$rparen_type->[$paren_depth] = $container_type; |
|
3357
|
2423
|
|
|
|
|
5795
|
$rparen_vars->[$paren_depth] = [ $type, $want_brace ]; |
|
3358
|
2423
|
|
|
|
|
4030
|
$rparen_semicolon_count->[$paren_depth] = 0; |
|
3359
|
|
|
|
|
|
|
|
|
3360
|
2423
|
|
|
|
|
3495
|
return; |
|
3361
|
|
|
|
|
|
|
|
|
3362
|
|
|
|
|
|
|
} ## end sub do_LEFT_PARENTHESIS |
|
3363
|
|
|
|
|
|
|
|
|
3364
|
|
|
|
|
|
|
sub do_RIGHT_PARENTHESIS { |
|
3365
|
|
|
|
|
|
|
|
|
3366
|
2423
|
|
|
2423
|
0
|
3216
|
my $self = shift; |
|
3367
|
|
|
|
|
|
|
|
|
3368
|
|
|
|
|
|
|
# ')' |
|
3369
|
2423
|
|
|
|
|
6700
|
( $type_sequence, $indent_flag ) = |
|
3370
|
|
|
|
|
|
|
$self->decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); |
|
3371
|
|
|
|
|
|
|
|
|
3372
|
2423
|
|
|
|
|
4147
|
my $rvars = $rparen_vars->[$paren_depth]; |
|
3373
|
2423
|
50
|
|
|
|
4755
|
if ( defined($rvars) ) { |
|
3374
|
2423
|
|
|
|
|
2911
|
my ( $type_lp, $want_brace_uu ) = @{$rvars}; |
|
|
2423
|
|
|
|
|
4642
|
|
|
3375
|
2423
|
50
|
33
|
|
|
8328
|
if ( $type_lp && $type_lp eq '{' ) { |
|
3376
|
2423
|
|
|
|
|
3702
|
$type = '}'; |
|
3377
|
|
|
|
|
|
|
} |
|
3378
|
|
|
|
|
|
|
} |
|
3379
|
|
|
|
|
|
|
|
|
3380
|
2423
|
|
|
|
|
3698
|
$container_type = $rparen_type->[$paren_depth]; |
|
3381
|
|
|
|
|
|
|
|
|
3382
|
|
|
|
|
|
|
# restore statement type as 'sub' at closing paren of a signature |
|
3383
|
|
|
|
|
|
|
# so that a subsequent ':' is identified as an attribute |
|
3384
|
2423
|
100
|
100
|
|
|
6635
|
if ( substr( $container_type, 0, 3 ) eq 'sub' |
|
3385
|
|
|
|
|
|
|
&& $container_type =~ /^sub\b/ ) |
|
3386
|
|
|
|
|
|
|
{ |
|
3387
|
30
|
|
|
|
|
60
|
$statement_type = $container_type; |
|
3388
|
|
|
|
|
|
|
} |
|
3389
|
|
|
|
|
|
|
|
|
3390
|
2423
|
100
|
|
|
|
5947
|
if ( $is_for_foreach{ $rparen_type->[$paren_depth] } ) { |
|
3391
|
79
|
|
|
|
|
141
|
my $num_sc = $rparen_semicolon_count->[$paren_depth]; |
|
3392
|
79
|
50
|
66
|
|
|
345
|
if ( $num_sc > 0 && $num_sc != 2 ) { |
|
3393
|
0
|
|
|
|
|
0
|
$self->warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); |
|
3394
|
|
|
|
|
|
|
} |
|
3395
|
|
|
|
|
|
|
} |
|
3396
|
|
|
|
|
|
|
|
|
3397
|
2423
|
50
|
|
|
|
4373
|
if ( $paren_depth > 0 ) { $paren_depth-- } |
|
|
2423
|
|
|
|
|
3007
|
|
|
3398
|
2423
|
|
|
|
|
3680
|
return; |
|
3399
|
|
|
|
|
|
|
} ## end sub do_RIGHT_PARENTHESIS |
|
3400
|
|
|
|
|
|
|
|
|
3401
|
|
|
|
|
|
|
sub do_COMMA { |
|
3402
|
|
|
|
|
|
|
|
|
3403
|
3688
|
|
|
3688
|
0
|
4602
|
my $self = shift; |
|
3404
|
|
|
|
|
|
|
|
|
3405
|
|
|
|
|
|
|
# ',' |
|
3406
|
3688
|
100
|
33
|
|
|
9756
|
if ( $last_nonblank_type eq COMMA ) { |
|
|
|
50
|
|
|
|
|
|
|
3407
|
10
|
|
|
|
|
22
|
$self->complain("Repeated ','s \n"); |
|
3408
|
|
|
|
|
|
|
} |
|
3409
|
|
|
|
|
|
|
|
|
3410
|
|
|
|
|
|
|
# Note that we have to check both token and type here because a |
|
3411
|
|
|
|
|
|
|
# comma following a qw list can have last token='(' but type = 'q' |
|
3412
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq '(' && $last_nonblank_type eq '{' ) { |
|
3413
|
0
|
|
|
|
|
0
|
$self->warning("Unexpected leading ',' after a '('\n"); |
|
3414
|
|
|
|
|
|
|
} |
|
3415
|
|
|
|
|
|
|
else { |
|
3416
|
|
|
|
|
|
|
## Error check added in update c565, moved to end of $code loop. |
|
3417
|
|
|
|
|
|
|
} |
|
3418
|
|
|
|
|
|
|
|
|
3419
|
|
|
|
|
|
|
# patch for operator_expected: note if we are in the list (use.t) |
|
3420
|
3688
|
100
|
|
|
|
6242
|
if ( $statement_type eq 'use' ) { $statement_type = '_use' } |
|
|
6
|
|
|
|
|
11
|
|
|
3421
|
3688
|
|
|
|
|
4531
|
return; |
|
3422
|
|
|
|
|
|
|
|
|
3423
|
|
|
|
|
|
|
} ## end sub do_COMMA |
|
3424
|
|
|
|
|
|
|
|
|
3425
|
|
|
|
|
|
|
sub do_SEMICOLON { |
|
3426
|
|
|
|
|
|
|
|
|
3427
|
2890
|
|
|
2890
|
0
|
3869
|
my $self = shift; |
|
3428
|
|
|
|
|
|
|
|
|
3429
|
|
|
|
|
|
|
# ';' |
|
3430
|
2890
|
|
|
|
|
3719
|
$context = UNKNOWN_CONTEXT; |
|
3431
|
2890
|
|
|
|
|
3759
|
$statement_type = EMPTY_STRING; |
|
3432
|
2890
|
|
|
|
|
3744
|
$want_paren = EMPTY_STRING; |
|
3433
|
|
|
|
|
|
|
|
|
3434
|
2890
|
100
|
|
|
|
7004
|
if ( $is_for_foreach{ $rparen_type->[$paren_depth] } ) |
|
3435
|
|
|
|
|
|
|
{ # mark ; in for loop |
|
3436
|
|
|
|
|
|
|
|
|
3437
|
|
|
|
|
|
|
# Be careful: we do not want a semicolon such as the |
|
3438
|
|
|
|
|
|
|
# following to be included: |
|
3439
|
|
|
|
|
|
|
# |
|
3440
|
|
|
|
|
|
|
# for (sort {strcoll($a,$b);} keys %investments) { |
|
3441
|
|
|
|
|
|
|
|
|
3442
|
35
|
100
|
66
|
|
|
197
|
if ( $brace_depth == $rdepth_array->[PAREN]->[BRACE]->[$paren_depth] |
|
3443
|
|
|
|
|
|
|
&& $square_bracket_depth == |
|
3444
|
|
|
|
|
|
|
$rdepth_array->[PAREN]->[SQUARE_BRACKET]->[$paren_depth] ) |
|
3445
|
|
|
|
|
|
|
{ |
|
3446
|
|
|
|
|
|
|
|
|
3447
|
34
|
|
|
|
|
54
|
$type = 'f'; |
|
3448
|
34
|
|
|
|
|
52
|
$rparen_semicolon_count->[$paren_depth]++; |
|
3449
|
|
|
|
|
|
|
} |
|
3450
|
|
|
|
|
|
|
} |
|
3451
|
|
|
|
|
|
|
else { |
|
3452
|
|
|
|
|
|
|
## Error check added in update c565, moved to end of $code loop. |
|
3453
|
|
|
|
|
|
|
} |
|
3454
|
2890
|
|
|
|
|
3868
|
return; |
|
3455
|
|
|
|
|
|
|
} ## end sub do_SEMICOLON |
|
3456
|
|
|
|
|
|
|
|
|
3457
|
|
|
|
|
|
|
sub do_QUOTATION_MARK { |
|
3458
|
|
|
|
|
|
|
|
|
3459
|
1253
|
|
|
1253
|
0
|
1727
|
my $self = shift; |
|
3460
|
|
|
|
|
|
|
|
|
3461
|
|
|
|
|
|
|
# '"' |
|
3462
|
1253
|
50
|
|
|
|
2474
|
$self->error_if_expecting_OPERATOR("String") |
|
3463
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
|
3464
|
1253
|
|
|
|
|
1711
|
$in_quote = 1; |
|
3465
|
1253
|
|
|
|
|
1639
|
$type = 'Q'; |
|
3466
|
1253
|
|
|
|
|
1601
|
$allowed_quote_modifiers = EMPTY_STRING; |
|
3467
|
1253
|
|
|
|
|
1556
|
$quote_starting_tok = $tok; |
|
3468
|
1253
|
|
|
|
|
1644
|
$quote_here_target_2 = undef; |
|
3469
|
1253
|
|
|
|
|
1666
|
return; |
|
3470
|
|
|
|
|
|
|
} ## end sub do_QUOTATION_MARK |
|
3471
|
|
|
|
|
|
|
|
|
3472
|
|
|
|
|
|
|
sub do_APOSTROPHE { |
|
3473
|
|
|
|
|
|
|
|
|
3474
|
1335
|
|
|
1335
|
0
|
1817
|
my $self = shift; |
|
3475
|
|
|
|
|
|
|
|
|
3476
|
|
|
|
|
|
|
# "'" |
|
3477
|
1335
|
50
|
|
|
|
2555
|
$self->error_if_expecting_OPERATOR("String") |
|
3478
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
|
3479
|
1335
|
|
|
|
|
1698
|
$in_quote = 1; |
|
3480
|
1335
|
|
|
|
|
1725
|
$type = 'Q'; |
|
3481
|
1335
|
|
|
|
|
1636
|
$allowed_quote_modifiers = EMPTY_STRING; |
|
3482
|
1335
|
|
|
|
|
1612
|
$quote_starting_tok = $tok; |
|
3483
|
1335
|
|
|
|
|
1745
|
$quote_here_target_2 = undef; |
|
3484
|
1335
|
|
|
|
|
1778
|
return; |
|
3485
|
|
|
|
|
|
|
} ## end sub do_APOSTROPHE |
|
3486
|
|
|
|
|
|
|
|
|
3487
|
|
|
|
|
|
|
sub do_BACKTICK { |
|
3488
|
|
|
|
|
|
|
|
|
3489
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
3490
|
|
|
|
|
|
|
|
|
3491
|
|
|
|
|
|
|
# '`' |
|
3492
|
0
|
0
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR("String") |
|
3493
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
|
3494
|
0
|
|
|
|
|
0
|
$in_quote = 1; |
|
3495
|
0
|
|
|
|
|
0
|
$type = 'Q'; |
|
3496
|
0
|
|
|
|
|
0
|
$allowed_quote_modifiers = EMPTY_STRING; |
|
3497
|
0
|
|
|
|
|
0
|
$quote_starting_tok = $tok; |
|
3498
|
0
|
|
|
|
|
0
|
$quote_here_target_2 = undef; |
|
3499
|
0
|
|
|
|
|
0
|
return; |
|
3500
|
|
|
|
|
|
|
} ## end sub do_BACKTICK |
|
3501
|
|
|
|
|
|
|
|
|
3502
|
|
|
|
|
|
|
sub do_SLASH { |
|
3503
|
|
|
|
|
|
|
|
|
3504
|
225
|
|
|
225
|
0
|
351
|
my $self = shift; |
|
3505
|
|
|
|
|
|
|
|
|
3506
|
|
|
|
|
|
|
# '/' |
|
3507
|
225
|
|
|
|
|
380
|
my $is_pattern; |
|
3508
|
|
|
|
|
|
|
|
|
3509
|
|
|
|
|
|
|
# a pattern cannot follow certain keywords which take optional |
|
3510
|
|
|
|
|
|
|
# arguments, like 'shift' and 'pop'. See also '?'. |
|
3511
|
225
|
50
|
66
|
|
|
1033
|
if ( |
|
|
|
50
|
|
|
|
|
|
|
3512
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' |
|
3513
|
|
|
|
|
|
|
&& $is_keyword_rejecting_slash_as_pattern_delimiter{ |
|
3514
|
|
|
|
|
|
|
$last_nonblank_token} |
|
3515
|
|
|
|
|
|
|
) |
|
3516
|
|
|
|
|
|
|
{ |
|
3517
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
|
3518
|
|
|
|
|
|
|
} |
|
3519
|
|
|
|
|
|
|
elsif ( $expecting == UNKNOWN ) { # indeterminate, must guess.. |
|
3520
|
|
|
|
|
|
|
( |
|
3521
|
0
|
|
|
|
|
0
|
$is_pattern, |
|
3522
|
|
|
|
|
|
|
my $msg, |
|
3523
|
|
|
|
|
|
|
|
|
3524
|
|
|
|
|
|
|
) = $self->guess_if_pattern_or_division( |
|
3525
|
|
|
|
|
|
|
|
|
3526
|
|
|
|
|
|
|
$i, |
|
3527
|
|
|
|
|
|
|
$rtokens, |
|
3528
|
|
|
|
|
|
|
$rtoken_type, |
|
3529
|
|
|
|
|
|
|
$rtoken_map, |
|
3530
|
|
|
|
|
|
|
$max_token_index, |
|
3531
|
|
|
|
|
|
|
); |
|
3532
|
|
|
|
|
|
|
|
|
3533
|
0
|
0
|
|
|
|
0
|
if ($msg) { |
|
3534
|
0
|
|
|
|
|
0
|
if ( 0 && DEBUG_GUESS_MODE ) { |
|
3535
|
|
|
|
|
|
|
$self->warning("DEBUG_GUESS_MODE message:\n$msg\n"); |
|
3536
|
|
|
|
|
|
|
} |
|
3537
|
0
|
|
|
|
|
0
|
$self->write_diagnostics("DIVIDE:$msg\n"); |
|
3538
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry($msg); |
|
3539
|
|
|
|
|
|
|
} |
|
3540
|
|
|
|
|
|
|
} |
|
3541
|
225
|
|
|
|
|
460
|
else { $is_pattern = ( $expecting == TERM ) } |
|
3542
|
|
|
|
|
|
|
|
|
3543
|
225
|
100
|
|
|
|
508
|
if ($is_pattern) { |
|
3544
|
88
|
|
|
|
|
134
|
$in_quote = 1; |
|
3545
|
88
|
|
|
|
|
137
|
$type = 'Q'; |
|
3546
|
88
|
|
|
|
|
233
|
$allowed_quote_modifiers = $quote_modifiers{'m'}; |
|
3547
|
88
|
|
|
|
|
129
|
$quote_starting_tok = 'm'; |
|
3548
|
88
|
|
|
|
|
149
|
$quote_here_target_2 = undef; |
|
3549
|
|
|
|
|
|
|
} |
|
3550
|
|
|
|
|
|
|
else { # not a pattern; check for a /= token |
|
3551
|
|
|
|
|
|
|
|
|
3552
|
137
|
100
|
|
|
|
436
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /= |
|
3553
|
4
|
|
|
|
|
7
|
$i++; |
|
3554
|
4
|
|
|
|
|
9
|
$tok = '/='; |
|
3555
|
4
|
|
|
|
|
8
|
$type = $tok; |
|
3556
|
|
|
|
|
|
|
} |
|
3557
|
|
|
|
|
|
|
|
|
3558
|
|
|
|
|
|
|
#DEBUG - collecting info on what tokens follow a divide |
|
3559
|
|
|
|
|
|
|
# for development of guessing algorithm |
|
3560
|
|
|
|
|
|
|
## if ( |
|
3561
|
|
|
|
|
|
|
## $self->is_possible_numerator( $i, $rtokens, |
|
3562
|
|
|
|
|
|
|
## $max_token_index ) < 0 |
|
3563
|
|
|
|
|
|
|
## ) |
|
3564
|
|
|
|
|
|
|
## { |
|
3565
|
|
|
|
|
|
|
## $self->write_diagnostics("DIVIDE? $input_line\n"); |
|
3566
|
|
|
|
|
|
|
## } |
|
3567
|
|
|
|
|
|
|
} |
|
3568
|
225
|
|
|
|
|
381
|
return; |
|
3569
|
|
|
|
|
|
|
} ## end sub do_SLASH |
|
3570
|
|
|
|
|
|
|
|
|
3571
|
|
|
|
|
|
|
sub do_LEFT_CURLY_BRACKET { |
|
3572
|
|
|
|
|
|
|
|
|
3573
|
2016
|
|
|
2016
|
0
|
2856
|
my $self = shift; |
|
3574
|
|
|
|
|
|
|
|
|
3575
|
|
|
|
|
|
|
# '{' |
|
3576
|
|
|
|
|
|
|
# if we just saw a ')', we will label this block with |
|
3577
|
|
|
|
|
|
|
# its type. We need to do this to allow sub |
|
3578
|
|
|
|
|
|
|
# code_block_type to determine if this brace starts a |
|
3579
|
|
|
|
|
|
|
# code block or anonymous hash. (The type of a paren |
|
3580
|
|
|
|
|
|
|
# pair is the preceding token, such as 'if', 'else', |
|
3581
|
|
|
|
|
|
|
# etc). |
|
3582
|
2016
|
|
|
|
|
2890
|
$container_type = EMPTY_STRING; |
|
3583
|
|
|
|
|
|
|
|
|
3584
|
|
|
|
|
|
|
# ATTRS: for a '{' following an attribute list, reset |
|
3585
|
|
|
|
|
|
|
# things to look like we just saw a sub name |
|
3586
|
|
|
|
|
|
|
# Added 'package' (can be 'class') for --use-feature=class (rt145706) |
|
3587
|
2016
|
100
|
100
|
|
|
15044
|
if ( substr( $statement_type, 0, 3 ) eq 'sub' ) { |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
3588
|
36
|
|
|
|
|
70
|
$last_nonblank_token = $statement_type; |
|
3589
|
36
|
|
|
|
|
50
|
$last_nonblank_type = 'S'; # c250 change |
|
3590
|
36
|
|
|
|
|
64
|
$statement_type = EMPTY_STRING; |
|
3591
|
|
|
|
|
|
|
} |
|
3592
|
|
|
|
|
|
|
elsif ( substr( $statement_type, 0, 7 ) eq 'package' ) { |
|
3593
|
10
|
|
|
|
|
16
|
$last_nonblank_token = $statement_type; |
|
3594
|
10
|
|
|
|
|
13
|
$last_nonblank_type = 'P'; # c250 change |
|
3595
|
10
|
|
|
|
|
14
|
$statement_type = EMPTY_STRING; |
|
3596
|
|
|
|
|
|
|
} |
|
3597
|
|
|
|
|
|
|
|
|
3598
|
|
|
|
|
|
|
# patch for SWITCH/CASE: hide these keywords from an immediately |
|
3599
|
|
|
|
|
|
|
# following opening brace |
|
3600
|
|
|
|
|
|
|
elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) |
|
3601
|
|
|
|
|
|
|
&& $statement_type eq $last_nonblank_token ) |
|
3602
|
|
|
|
|
|
|
{ |
|
3603
|
0
|
|
|
|
|
0
|
$last_nonblank_token = ";"; |
|
3604
|
|
|
|
|
|
|
} |
|
3605
|
|
|
|
|
|
|
|
|
3606
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq ')' ) { |
|
3607
|
295
|
|
|
|
|
629
|
$last_nonblank_token = $rparen_type->[ $paren_depth + 1 ]; |
|
3608
|
|
|
|
|
|
|
|
|
3609
|
|
|
|
|
|
|
# defensive move in case of a nesting error (pbug.t) |
|
3610
|
|
|
|
|
|
|
# in which this ')' had no previous '(' |
|
3611
|
|
|
|
|
|
|
# this nesting error will have been caught |
|
3612
|
295
|
50
|
|
|
|
732
|
if ( !defined($last_nonblank_token) ) { |
|
3613
|
0
|
|
|
|
|
0
|
$last_nonblank_token = 'if'; |
|
3614
|
|
|
|
|
|
|
} |
|
3615
|
|
|
|
|
|
|
|
|
3616
|
|
|
|
|
|
|
# Syntax check at '){' |
|
3617
|
295
|
100
|
|
|
|
818
|
if ( $is_blocktype_with_paren{$last_nonblank_token} ) { |
|
3618
|
|
|
|
|
|
|
|
|
3619
|
279
|
|
|
|
|
538
|
my $rvars = $rparen_vars->[ $paren_depth + 1 ]; |
|
3620
|
279
|
50
|
|
|
|
689
|
if ( defined($rvars) ) { |
|
3621
|
279
|
|
|
|
|
429
|
my ( $type_lp_uu, $want_brace ) = @{$rvars}; |
|
|
279
|
|
|
|
|
596
|
|
|
3622
|
|
|
|
|
|
|
|
|
3623
|
|
|
|
|
|
|
# OLD: Now verify that this is not a trailing form |
|
3624
|
|
|
|
|
|
|
# FIX for git #124: we have to skip this check because |
|
3625
|
|
|
|
|
|
|
# the 'gather' keyword of List::Gather can operate on |
|
3626
|
|
|
|
|
|
|
# a full statement, so it isn't possible to be sure |
|
3627
|
|
|
|
|
|
|
# this is a trailing form. |
|
3628
|
279
|
|
|
|
|
571
|
if ( 0 && !$want_brace ) { |
|
3629
|
|
|
|
|
|
|
$self->warning( |
|
3630
|
|
|
|
|
|
|
"syntax error at ') {', unexpected '{' after closing ')' of a trailing '$last_nonblank_token'\n" |
|
3631
|
|
|
|
|
|
|
); |
|
3632
|
|
|
|
|
|
|
} |
|
3633
|
|
|
|
|
|
|
} |
|
3634
|
|
|
|
|
|
|
} |
|
3635
|
|
|
|
|
|
|
else { |
|
3636
|
16
|
50
|
|
|
|
46
|
if ($rOpts_extended_syntax) { |
|
3637
|
|
|
|
|
|
|
|
|
3638
|
|
|
|
|
|
|
# we append a trailing () to mark this as an unknown |
|
3639
|
|
|
|
|
|
|
# block type. This allows perltidy to format some |
|
3640
|
|
|
|
|
|
|
# common extensions of perl syntax. |
|
3641
|
|
|
|
|
|
|
# This is used by sub code_block_type |
|
3642
|
16
|
|
|
|
|
63
|
$last_nonblank_token .= '()'; |
|
3643
|
|
|
|
|
|
|
} |
|
3644
|
|
|
|
|
|
|
else { |
|
3645
|
0
|
|
|
|
|
0
|
my $list = |
|
3646
|
|
|
|
|
|
|
join( SPACE, sort keys %is_blocktype_with_paren ); |
|
3647
|
0
|
|
|
|
|
0
|
$self->warning( |
|
3648
|
|
|
|
|
|
|
"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n" |
|
3649
|
|
|
|
|
|
|
); |
|
3650
|
|
|
|
|
|
|
} |
|
3651
|
|
|
|
|
|
|
} |
|
3652
|
|
|
|
|
|
|
} |
|
3653
|
|
|
|
|
|
|
|
|
3654
|
|
|
|
|
|
|
# patch for paren-less for/foreach glitch, part 2. |
|
3655
|
|
|
|
|
|
|
# see note below under 'qw' |
|
3656
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq 'qw' |
|
3657
|
|
|
|
|
|
|
&& $is_for_foreach{$want_paren} ) |
|
3658
|
|
|
|
|
|
|
{ |
|
3659
|
0
|
|
|
|
|
0
|
$last_nonblank_token = $want_paren; |
|
3660
|
0
|
0
|
|
|
|
0
|
if ( $last_last_nonblank_token eq $want_paren ) { |
|
3661
|
0
|
|
|
|
|
0
|
$self->warning( |
|
3662
|
|
|
|
|
|
|
"syntax error at '$want_paren .. {' -- missing \$ loop variable\n" |
|
3663
|
|
|
|
|
|
|
); |
|
3664
|
|
|
|
|
|
|
|
|
3665
|
|
|
|
|
|
|
} |
|
3666
|
0
|
|
|
|
|
0
|
$want_paren = EMPTY_STRING; |
|
3667
|
|
|
|
|
|
|
} |
|
3668
|
|
|
|
|
|
|
else { |
|
3669
|
|
|
|
|
|
|
# not special |
|
3670
|
|
|
|
|
|
|
} |
|
3671
|
|
|
|
|
|
|
|
|
3672
|
|
|
|
|
|
|
# now identify which of the three possible types of |
|
3673
|
|
|
|
|
|
|
# curly braces we have: hash index container, anonymous |
|
3674
|
|
|
|
|
|
|
# hash reference, or code block. |
|
3675
|
|
|
|
|
|
|
|
|
3676
|
|
|
|
|
|
|
# Patch for Object::Pad "field $var BLOCK" |
|
3677
|
2016
|
100
|
66
|
|
|
8111
|
if ( $statement_type eq 'field' |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
3678
|
|
|
|
|
|
|
&& $last_last_nonblank_token eq 'field' |
|
3679
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'i' |
|
3680
|
|
|
|
|
|
|
&& $last_last_nonblank_type eq 'k' ) |
|
3681
|
|
|
|
|
|
|
{ |
|
3682
|
8
|
|
|
|
|
9
|
$type = '{'; |
|
3683
|
8
|
|
|
|
|
10
|
$block_type = $statement_type; |
|
3684
|
|
|
|
|
|
|
} |
|
3685
|
|
|
|
|
|
|
|
|
3686
|
|
|
|
|
|
|
# non-structural (hash index) curly brace pair |
|
3687
|
|
|
|
|
|
|
# get marked 'L' and 'R' |
|
3688
|
|
|
|
|
|
|
elsif ( is_non_structural_brace() ) { |
|
3689
|
564
|
|
|
|
|
875
|
$type = 'L'; |
|
3690
|
|
|
|
|
|
|
|
|
3691
|
|
|
|
|
|
|
# patch for SWITCH/CASE: |
|
3692
|
|
|
|
|
|
|
# allow paren-less identifier after 'when' |
|
3693
|
|
|
|
|
|
|
# if the brace is preceded by a space |
|
3694
|
564
|
0
|
33
|
|
|
1506
|
if ( $statement_type eq 'when' |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3695
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'i' |
|
3696
|
|
|
|
|
|
|
&& $last_last_nonblank_type eq 'k' |
|
3697
|
|
|
|
|
|
|
&& ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) |
|
3698
|
|
|
|
|
|
|
{ |
|
3699
|
0
|
|
|
|
|
0
|
$type = '{'; |
|
3700
|
0
|
|
|
|
|
0
|
$block_type = $statement_type; |
|
3701
|
|
|
|
|
|
|
} |
|
3702
|
|
|
|
|
|
|
} |
|
3703
|
|
|
|
|
|
|
|
|
3704
|
|
|
|
|
|
|
# code and anonymous hash have the same type, '{', but are |
|
3705
|
|
|
|
|
|
|
# distinguished by 'block_type', |
|
3706
|
|
|
|
|
|
|
# which will be blank for an anonymous hash |
|
3707
|
|
|
|
|
|
|
else { |
|
3708
|
1444
|
|
|
|
|
4176
|
$block_type = |
|
3709
|
|
|
|
|
|
|
$self->code_block_type( $i_tok, $rtokens, $rtoken_type, |
|
3710
|
|
|
|
|
|
|
$max_token_index ); |
|
3711
|
|
|
|
|
|
|
|
|
3712
|
|
|
|
|
|
|
# Is a new lexical sub looking for its block sequence number? |
|
3713
|
|
|
|
|
|
|
# This is indicated with a special '911' signal. |
|
3714
|
1444
|
0
|
66
|
|
|
5687
|
if ( $block_type |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
3715
|
|
|
|
|
|
|
&& $ris_lexical_sub->{911} |
|
3716
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'S' |
|
3717
|
|
|
|
|
|
|
&& substr( $block_type, 0, 3 ) eq 'sub' ) |
|
3718
|
|
|
|
|
|
|
{ |
|
3719
|
0
|
|
|
|
|
0
|
my ( $subname, $package ) = @{ $ris_lexical_sub->{911} }; |
|
|
0
|
|
|
|
|
0
|
|
|
3720
|
0
|
0
|
0
|
|
|
0
|
if ( $block_type =~ /^sub $subname/ |
|
3721
|
|
|
|
|
|
|
&& $is_my_our_state{$last_last_nonblank_token} ) |
|
3722
|
|
|
|
|
|
|
{ |
|
3723
|
0
|
|
|
|
|
0
|
$ris_lexical_sub->{$subname}->{$package} = |
|
3724
|
|
|
|
|
|
|
$next_sequence_number; |
|
3725
|
|
|
|
|
|
|
} |
|
3726
|
|
|
|
|
|
|
|
|
3727
|
|
|
|
|
|
|
# Turn the signal off, even if we did not find the block being |
|
3728
|
|
|
|
|
|
|
# sought - it may not exist if the sub statement was a simple |
|
3729
|
|
|
|
|
|
|
# declaration without a block definition. |
|
3730
|
0
|
|
|
|
|
0
|
$ris_lexical_sub->{911} = undef; |
|
3731
|
|
|
|
|
|
|
} |
|
3732
|
|
|
|
|
|
|
|
|
3733
|
|
|
|
|
|
|
# patch to promote bareword type to function taking block |
|
3734
|
1444
|
100
|
100
|
|
|
5065
|
if ( $block_type |
|
|
|
|
66
|
|
|
|
|
|
3735
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'w' |
|
3736
|
|
|
|
|
|
|
&& $last_nonblank_i >= 0 ) |
|
3737
|
|
|
|
|
|
|
{ |
|
3738
|
36
|
50
|
|
|
|
105
|
if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) { |
|
3739
|
|
|
|
|
|
|
$routput_token_type->[$last_nonblank_i] = |
|
3740
|
36
|
100
|
|
|
|
131
|
$is_grep_alias{$block_type} ? 'k' : 'G'; |
|
3741
|
|
|
|
|
|
|
} |
|
3742
|
|
|
|
|
|
|
} |
|
3743
|
|
|
|
|
|
|
|
|
3744
|
|
|
|
|
|
|
# patch for SWITCH/CASE: if we find a stray opening block brace |
|
3745
|
|
|
|
|
|
|
# where we might accept a 'case' or 'when' block, then take it |
|
3746
|
1444
|
100
|
100
|
|
|
4736
|
if ( $statement_type eq 'case' |
|
3747
|
|
|
|
|
|
|
|| $statement_type eq 'when' ) |
|
3748
|
|
|
|
|
|
|
{ |
|
3749
|
48
|
100
|
66
|
|
|
201
|
if ( !$block_type || $block_type eq '}' ) { |
|
3750
|
4
|
|
|
|
|
6
|
$block_type = $statement_type; |
|
3751
|
|
|
|
|
|
|
} |
|
3752
|
|
|
|
|
|
|
} |
|
3753
|
|
|
|
|
|
|
} |
|
3754
|
|
|
|
|
|
|
|
|
3755
|
2016
|
|
|
|
|
3852
|
$rbrace_type->[ ++$brace_depth ] = $block_type; |
|
3756
|
|
|
|
|
|
|
|
|
3757
|
|
|
|
|
|
|
# Patch for CLASS BLOCK definitions: do not update the package for the |
|
3758
|
|
|
|
|
|
|
# current depth if this is a BLOCK type definition. |
|
3759
|
|
|
|
|
|
|
# TODO: should make 'class' separate from 'package' and only do |
|
3760
|
|
|
|
|
|
|
# this for 'class' |
|
3761
|
2016
|
100
|
|
|
|
5519
|
$rbrace_package->[$brace_depth] = $current_package |
|
3762
|
|
|
|
|
|
|
if ( substr( $block_type, 0, 8 ) ne 'package ' ); |
|
3763
|
|
|
|
|
|
|
|
|
3764
|
2016
|
|
|
|
|
3342
|
$rbrace_structural_type->[$brace_depth] = $type; |
|
3765
|
2016
|
|
|
|
|
3251
|
$rbrace_context->[$brace_depth] = $context; |
|
3766
|
2016
|
|
|
|
|
5086
|
( $type_sequence, $indent_flag ) = |
|
3767
|
|
|
|
|
|
|
$self->increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); |
|
3768
|
|
|
|
|
|
|
|
|
3769
|
2016
|
|
|
|
|
3212
|
return; |
|
3770
|
|
|
|
|
|
|
} ## end sub do_LEFT_CURLY_BRACKET |
|
3771
|
|
|
|
|
|
|
|
|
3772
|
|
|
|
|
|
|
sub do_RIGHT_CURLY_BRACKET { |
|
3773
|
|
|
|
|
|
|
|
|
3774
|
2016
|
|
|
2016
|
0
|
2900
|
my $self = shift; |
|
3775
|
|
|
|
|
|
|
|
|
3776
|
|
|
|
|
|
|
# '}' |
|
3777
|
2016
|
|
|
|
|
3388
|
$block_type = $rbrace_type->[$brace_depth]; |
|
3778
|
2016
|
100
|
|
|
|
4072
|
if ($block_type) { $statement_type = EMPTY_STRING } |
|
|
1115
|
|
|
|
|
1635
|
|
|
3779
|
2016
|
100
|
|
|
|
3903
|
if ( defined( $rbrace_package->[$brace_depth] ) ) { |
|
3780
|
2006
|
|
|
|
|
3290
|
$current_package = $rbrace_package->[$brace_depth]; |
|
3781
|
|
|
|
|
|
|
} |
|
3782
|
|
|
|
|
|
|
|
|
3783
|
|
|
|
|
|
|
# can happen on brace error (caught elsewhere) |
|
3784
|
|
|
|
|
|
|
else { |
|
3785
|
|
|
|
|
|
|
} |
|
3786
|
2016
|
|
|
|
|
5579
|
( $type_sequence, $indent_flag ) = |
|
3787
|
|
|
|
|
|
|
$self->decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); |
|
3788
|
|
|
|
|
|
|
|
|
3789
|
2016
|
100
|
|
|
|
4764
|
if ( $rbrace_structural_type->[$brace_depth] eq 'L' ) { |
|
3790
|
564
|
|
|
|
|
831
|
$type = 'R'; |
|
3791
|
|
|
|
|
|
|
} |
|
3792
|
|
|
|
|
|
|
|
|
3793
|
|
|
|
|
|
|
# propagate type information for 'do' and 'eval' blocks, and also |
|
3794
|
|
|
|
|
|
|
# for smartmatch operator. This is necessary to enable us to know |
|
3795
|
|
|
|
|
|
|
# if an operator or term is expected next. |
|
3796
|
2016
|
100
|
|
|
|
4724
|
if ( $is_block_operator{$block_type} ) { |
|
3797
|
85
|
|
|
|
|
138
|
$tok = $block_type; |
|
3798
|
|
|
|
|
|
|
} |
|
3799
|
|
|
|
|
|
|
|
|
3800
|
|
|
|
|
|
|
# pop non-indenting brace stack if sequence number matches |
|
3801
|
2016
|
100
|
100
|
|
|
2451
|
if ( @{ $self->[_rnon_indenting_brace_stack_] } |
|
|
2016
|
|
|
|
|
5180
|
|
|
3802
|
|
|
|
|
|
|
&& $self->[_rnon_indenting_brace_stack_]->[-1] eq $type_sequence ) |
|
3803
|
|
|
|
|
|
|
{ |
|
3804
|
6
|
|
|
|
|
14
|
pop @{ $self->[_rnon_indenting_brace_stack_] }; |
|
|
6
|
|
|
|
|
14
|
|
|
3805
|
|
|
|
|
|
|
} |
|
3806
|
|
|
|
|
|
|
|
|
3807
|
2016
|
|
|
|
|
3220
|
$context = $rbrace_context->[$brace_depth]; |
|
3808
|
2016
|
50
|
|
|
|
3885
|
if ( $brace_depth > 0 ) { $brace_depth--; } |
|
|
2016
|
|
|
|
|
2693
|
|
|
3809
|
2016
|
|
|
|
|
2816
|
return; |
|
3810
|
|
|
|
|
|
|
} ## end sub do_RIGHT_CURLY_BRACKET |
|
3811
|
|
|
|
|
|
|
|
|
3812
|
|
|
|
|
|
|
sub do_AMPERSAND { |
|
3813
|
|
|
|
|
|
|
|
|
3814
|
126
|
|
|
126
|
0
|
190
|
my $self = shift; |
|
3815
|
|
|
|
|
|
|
|
|
3816
|
|
|
|
|
|
|
# '&' = maybe sub call? start looking |
|
3817
|
|
|
|
|
|
|
# We have to check for sub call unless we are sure we |
|
3818
|
|
|
|
|
|
|
# are expecting an operator. This example from s2p |
|
3819
|
|
|
|
|
|
|
# got mistaken as a q operator in an early version: |
|
3820
|
|
|
|
|
|
|
# print BODY &q(<<'EOT'); |
|
3821
|
126
|
100
|
|
|
|
306
|
if ( $expecting != OPERATOR ) { |
|
3822
|
|
|
|
|
|
|
|
|
3823
|
|
|
|
|
|
|
# But only look for a sub call if we are expecting a term or |
|
3824
|
|
|
|
|
|
|
# if there is no existing space after the &. |
|
3825
|
|
|
|
|
|
|
# For example we probably don't want & as sub call here: |
|
3826
|
|
|
|
|
|
|
# Fcntl::S_IRUSR & $mode; |
|
3827
|
106
|
100
|
66
|
|
|
450
|
if ( $expecting == TERM || $next_type ne 'b' ) { |
|
3828
|
103
|
|
|
|
|
271
|
$self->scan_simple_identifier(); |
|
3829
|
|
|
|
|
|
|
} |
|
3830
|
|
|
|
|
|
|
} |
|
3831
|
|
|
|
|
|
|
else { |
|
3832
|
|
|
|
|
|
|
} |
|
3833
|
126
|
|
|
|
|
195
|
return; |
|
3834
|
|
|
|
|
|
|
} ## end sub do_AMPERSAND |
|
3835
|
|
|
|
|
|
|
|
|
3836
|
|
|
|
|
|
|
sub do_LESS_THAN_SIGN { |
|
3837
|
|
|
|
|
|
|
|
|
3838
|
33
|
|
|
33
|
0
|
69
|
my $self = shift; |
|
3839
|
|
|
|
|
|
|
|
|
3840
|
|
|
|
|
|
|
# '<' - angle operator or less than? |
|
3841
|
33
|
100
|
|
|
|
109
|
if ( $expecting != OPERATOR ) { |
|
3842
|
8
|
|
|
|
|
39
|
( $i, $type ) = $self->find_angle_operator_termination( |
|
3843
|
|
|
|
|
|
|
|
|
3844
|
|
|
|
|
|
|
$input_line, |
|
3845
|
|
|
|
|
|
|
$i, |
|
3846
|
|
|
|
|
|
|
$rtoken_map, |
|
3847
|
|
|
|
|
|
|
$expecting, |
|
3848
|
|
|
|
|
|
|
$max_token_index, |
|
3849
|
|
|
|
|
|
|
); |
|
3850
|
8
|
|
|
|
|
15
|
if ( DEBUG_GUESS_MODE && $expecting == UNKNOWN ) { |
|
3851
|
|
|
|
|
|
|
my $msg = "guessing that '<' is "; |
|
3852
|
|
|
|
|
|
|
$msg .= |
|
3853
|
|
|
|
|
|
|
$type eq 'Q' ? "an angle operator" : "a less than symbol"; |
|
3854
|
|
|
|
|
|
|
if ( $type eq 'Q' ) { |
|
3855
|
|
|
|
|
|
|
$self->warning("DEBUG_GUESS_MODE message:\n$msg\n"); |
|
3856
|
|
|
|
|
|
|
} |
|
3857
|
|
|
|
|
|
|
} |
|
3858
|
|
|
|
|
|
|
} |
|
3859
|
|
|
|
|
|
|
else { |
|
3860
|
|
|
|
|
|
|
} |
|
3861
|
33
|
|
|
|
|
64
|
return; |
|
3862
|
|
|
|
|
|
|
} ## end sub do_LESS_THAN_SIGN |
|
3863
|
|
|
|
|
|
|
|
|
3864
|
|
|
|
|
|
|
sub do_QUESTION_MARK { |
|
3865
|
|
|
|
|
|
|
|
|
3866
|
193
|
|
|
193
|
0
|
340
|
my $self = shift; |
|
3867
|
|
|
|
|
|
|
|
|
3868
|
|
|
|
|
|
|
# '?' = conditional or starting pattern? |
|
3869
|
193
|
|
|
|
|
335
|
my $is_pattern; |
|
3870
|
|
|
|
|
|
|
|
|
3871
|
|
|
|
|
|
|
# Patch for rt #126965 |
|
3872
|
|
|
|
|
|
|
# a pattern cannot follow certain keywords which take optional |
|
3873
|
|
|
|
|
|
|
# arguments, like 'shift' and 'pop'. See also '/'. |
|
3874
|
193
|
100
|
66
|
|
|
1474
|
if ( |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
3875
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' |
|
3876
|
|
|
|
|
|
|
&& $is_keyword_rejecting_question_as_pattern_delimiter{ |
|
3877
|
|
|
|
|
|
|
$last_nonblank_token} |
|
3878
|
|
|
|
|
|
|
) |
|
3879
|
|
|
|
|
|
|
{ |
|
3880
|
1
|
|
|
|
|
2
|
$is_pattern = 0; |
|
3881
|
|
|
|
|
|
|
} |
|
3882
|
|
|
|
|
|
|
|
|
3883
|
|
|
|
|
|
|
# patch for RT#131288, user constant function without prototype |
|
3884
|
|
|
|
|
|
|
# last type is 'U' followed by ?. |
|
3885
|
|
|
|
|
|
|
elsif ( $last_nonblank_type =~ /^[FUY]$/ ) { |
|
3886
|
1
|
|
|
|
|
2
|
$is_pattern = 0; |
|
3887
|
|
|
|
|
|
|
} |
|
3888
|
|
|
|
|
|
|
elsif ( $expecting == UNKNOWN ) { |
|
3889
|
|
|
|
|
|
|
|
|
3890
|
|
|
|
|
|
|
# In older versions of Perl, a bare ? can be a pattern |
|
3891
|
|
|
|
|
|
|
# delimiter. In perl version 5.22 this was |
|
3892
|
|
|
|
|
|
|
# dropped, but we have to support it in order to format |
|
3893
|
|
|
|
|
|
|
# older programs. See: |
|
3894
|
|
|
|
|
|
|
## https://perl.developpez.com/documentations/en/5.22.0/perl5211delta.html |
|
3895
|
|
|
|
|
|
|
# For example, the following line worked |
|
3896
|
|
|
|
|
|
|
# at one time: |
|
3897
|
|
|
|
|
|
|
# ?(.*)? && (print $1,"\n"); |
|
3898
|
|
|
|
|
|
|
# In current versions it would have to be written with slashes: |
|
3899
|
|
|
|
|
|
|
# /(.*)/ && (print $1,"\n"); |
|
3900
|
|
|
|
|
|
|
( |
|
3901
|
12
|
|
|
|
|
60
|
$is_pattern, |
|
3902
|
|
|
|
|
|
|
my $msg, |
|
3903
|
|
|
|
|
|
|
|
|
3904
|
|
|
|
|
|
|
) = $self->guess_if_pattern_or_conditional( |
|
3905
|
|
|
|
|
|
|
|
|
3906
|
|
|
|
|
|
|
$i, |
|
3907
|
|
|
|
|
|
|
$rtokens, |
|
3908
|
|
|
|
|
|
|
$rtoken_type, |
|
3909
|
|
|
|
|
|
|
$rtoken_map, |
|
3910
|
|
|
|
|
|
|
$max_token_index, |
|
3911
|
|
|
|
|
|
|
); |
|
3912
|
|
|
|
|
|
|
|
|
3913
|
12
|
50
|
|
|
|
39
|
if ($msg) { |
|
3914
|
12
|
|
|
|
|
54
|
$self->write_logfile_entry($msg); |
|
3915
|
12
|
|
|
|
|
24
|
if ( DEBUG_GUESS_MODE && $is_pattern ) { |
|
3916
|
|
|
|
|
|
|
$self->warning("DEBUG_GUESS_MODE message:\n$msg\n"); |
|
3917
|
|
|
|
|
|
|
} |
|
3918
|
|
|
|
|
|
|
} |
|
3919
|
|
|
|
|
|
|
} |
|
3920
|
179
|
|
|
|
|
430
|
else { $is_pattern = ( $expecting == TERM ) } |
|
3921
|
|
|
|
|
|
|
|
|
3922
|
193
|
50
|
|
|
|
488
|
if ($is_pattern) { |
|
3923
|
0
|
|
|
|
|
0
|
$in_quote = 1; |
|
3924
|
0
|
|
|
|
|
0
|
$type = 'Q'; |
|
3925
|
0
|
|
|
|
|
0
|
$allowed_quote_modifiers = $quote_modifiers{'m'}; |
|
3926
|
0
|
|
|
|
|
0
|
$quote_starting_tok = 'm'; |
|
3927
|
0
|
|
|
|
|
0
|
$quote_here_target_2 = undef; |
|
3928
|
|
|
|
|
|
|
} |
|
3929
|
|
|
|
|
|
|
else { |
|
3930
|
193
|
|
|
|
|
620
|
( $type_sequence, $indent_flag ) = |
|
3931
|
|
|
|
|
|
|
$self->increase_nesting_depth( QUESTION_COLON, |
|
3932
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
|
3933
|
|
|
|
|
|
|
} |
|
3934
|
193
|
|
|
|
|
355
|
return; |
|
3935
|
|
|
|
|
|
|
} ## end sub do_QUESTION_MARK |
|
3936
|
|
|
|
|
|
|
|
|
3937
|
|
|
|
|
|
|
sub do_STAR { |
|
3938
|
|
|
|
|
|
|
|
|
3939
|
254
|
|
|
254
|
0
|
395
|
my $self = shift; |
|
3940
|
|
|
|
|
|
|
|
|
3941
|
|
|
|
|
|
|
# '*' = typeglob, or multiply? |
|
3942
|
|
|
|
|
|
|
|
|
3943
|
|
|
|
|
|
|
# Guess based on next token. See also c036, and versions before 2026- |
|
3944
|
254
|
100
|
100
|
|
|
877
|
if ( $expecting == UNKNOWN && $next_type ne 'b' ) { |
|
3945
|
|
|
|
|
|
|
|
|
3946
|
|
|
|
|
|
|
# Check for a normal glob, like *OUT: |
|
3947
|
6
|
50
|
|
|
|
32
|
if ( $next_tok =~ /^[_A-Za-z]/ ) { |
|
3948
|
0
|
|
|
|
|
0
|
$expecting = TERM; |
|
3949
|
|
|
|
|
|
|
} |
|
3950
|
|
|
|
|
|
|
else { |
|
3951
|
|
|
|
|
|
|
## Could check for glob of a simple punctuation variable here |
|
3952
|
|
|
|
|
|
|
## by looking ahead one more character |
|
3953
|
|
|
|
|
|
|
} |
|
3954
|
|
|
|
|
|
|
} |
|
3955
|
|
|
|
|
|
|
|
|
3956
|
254
|
100
|
|
|
|
688
|
if ( $expecting == TERM ) { |
|
3957
|
21
|
|
|
|
|
72
|
$self->scan_simple_identifier(); |
|
3958
|
|
|
|
|
|
|
} |
|
3959
|
|
|
|
|
|
|
else { |
|
3960
|
|
|
|
|
|
|
|
|
3961
|
233
|
100
|
|
|
|
844
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { |
|
|
|
100
|
|
|
|
|
|
|
3962
|
2
|
|
|
|
|
3
|
$tok = '*='; |
|
3963
|
2
|
|
|
|
|
4
|
$type = $tok; |
|
3964
|
2
|
|
|
|
|
4
|
$i++; |
|
3965
|
|
|
|
|
|
|
} |
|
3966
|
|
|
|
|
|
|
elsif ( $rtokens->[ $i + 1 ] eq '*' ) { |
|
3967
|
42
|
|
|
|
|
72
|
$tok = '**'; |
|
3968
|
42
|
|
|
|
|
71
|
$type = $tok; |
|
3969
|
42
|
|
|
|
|
67
|
$i++; |
|
3970
|
42
|
100
|
|
|
|
144
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { |
|
3971
|
2
|
|
|
|
|
3
|
$tok = '**='; |
|
3972
|
2
|
|
|
|
|
3
|
$type = $tok; |
|
3973
|
2
|
|
|
|
|
4
|
$i++; |
|
3974
|
|
|
|
|
|
|
} |
|
3975
|
|
|
|
|
|
|
} |
|
3976
|
|
|
|
|
|
|
else { |
|
3977
|
|
|
|
|
|
|
## not multiple characters |
|
3978
|
|
|
|
|
|
|
} |
|
3979
|
|
|
|
|
|
|
} |
|
3980
|
254
|
|
|
|
|
421
|
return; |
|
3981
|
|
|
|
|
|
|
} ## end sub do_STAR |
|
3982
|
|
|
|
|
|
|
|
|
3983
|
|
|
|
|
|
|
sub do_DOT { |
|
3984
|
|
|
|
|
|
|
|
|
3985
|
168
|
|
|
168
|
0
|
252
|
my $self = shift; |
|
3986
|
|
|
|
|
|
|
|
|
3987
|
|
|
|
|
|
|
# '.' = what kind of . ? |
|
3988
|
168
|
100
|
|
|
|
386
|
if ( $expecting != OPERATOR ) { |
|
3989
|
10
|
|
|
|
|
37
|
$self->scan_number(); |
|
3990
|
|
|
|
|
|
|
} |
|
3991
|
168
|
|
|
|
|
1463
|
return; |
|
3992
|
|
|
|
|
|
|
} ## end sub do_DOT |
|
3993
|
|
|
|
|
|
|
|
|
3994
|
|
|
|
|
|
|
sub do_COLON { |
|
3995
|
|
|
|
|
|
|
|
|
3996
|
285
|
|
|
285
|
0
|
516
|
my $self = shift; |
|
3997
|
|
|
|
|
|
|
|
|
3998
|
|
|
|
|
|
|
# ':' = label, ternary, attribute, ? |
|
3999
|
|
|
|
|
|
|
|
|
4000
|
|
|
|
|
|
|
# if this is the first nonblank character, call it a label |
|
4001
|
|
|
|
|
|
|
# since perl seems to just swallow it |
|
4002
|
285
|
50
|
66
|
|
|
3403
|
if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
4003
|
0
|
|
|
|
|
0
|
$type = 'J'; |
|
4004
|
|
|
|
|
|
|
} |
|
4005
|
|
|
|
|
|
|
|
|
4006
|
|
|
|
|
|
|
# ATTRS: check for a ':' which introduces an attribute list |
|
4007
|
|
|
|
|
|
|
# either after a 'sub' keyword or within a paren list |
|
4008
|
|
|
|
|
|
|
# Added 'package' (can be 'class') for --use-feature=class (rt145706) |
|
4009
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^(sub|package)\b/ ) { |
|
4010
|
22
|
|
|
|
|
41
|
$type = 'A'; |
|
4011
|
22
|
|
|
|
|
36
|
$self->[_in_attribute_list_] = 1; |
|
4012
|
|
|
|
|
|
|
} |
|
4013
|
|
|
|
|
|
|
|
|
4014
|
|
|
|
|
|
|
# Within a signature, unless we are in a ternary. For example, |
|
4015
|
|
|
|
|
|
|
# from 't/filter_example.t': |
|
4016
|
|
|
|
|
|
|
# method foo4 ( $class: $bar ) { $class->bar($bar) } |
|
4017
|
|
|
|
|
|
|
elsif ( $rparen_type->[$paren_depth] =~ /^sub\b/ |
|
4018
|
|
|
|
|
|
|
&& !is_balanced_closing_container(QUESTION_COLON) ) |
|
4019
|
|
|
|
|
|
|
{ |
|
4020
|
1
|
|
|
|
|
2
|
$type = 'A'; |
|
4021
|
1
|
|
|
|
|
1
|
$self->[_in_attribute_list_] = 1; |
|
4022
|
|
|
|
|
|
|
} |
|
4023
|
|
|
|
|
|
|
|
|
4024
|
|
|
|
|
|
|
# check for scalar attribute, such as |
|
4025
|
|
|
|
|
|
|
# my $foo : shared = 1; |
|
4026
|
|
|
|
|
|
|
elsif ($is_my_our_state{$statement_type} |
|
4027
|
|
|
|
|
|
|
&& $rcurrent_depth->[QUESTION_COLON] == 0 ) |
|
4028
|
|
|
|
|
|
|
{ |
|
4029
|
17
|
|
|
|
|
31
|
$type = 'A'; |
|
4030
|
17
|
|
|
|
|
27
|
$self->[_in_attribute_list_] = 1; |
|
4031
|
|
|
|
|
|
|
} |
|
4032
|
|
|
|
|
|
|
|
|
4033
|
|
|
|
|
|
|
# Look for Switch::Plain syntax if an error would otherwise occur |
|
4034
|
|
|
|
|
|
|
# here. Note that we do not need to check if the extended syntax |
|
4035
|
|
|
|
|
|
|
# flag is set because otherwise an error would occur, and we would |
|
4036
|
|
|
|
|
|
|
# then have to output a message telling the user to set the |
|
4037
|
|
|
|
|
|
|
# extended syntax flag to avoid the error. |
|
4038
|
|
|
|
|
|
|
# case 1: { |
|
4039
|
|
|
|
|
|
|
# default: { |
|
4040
|
|
|
|
|
|
|
# default: |
|
4041
|
|
|
|
|
|
|
# Note that the line 'default:' will be parsed as a label elsewhere. |
|
4042
|
|
|
|
|
|
|
elsif ( $is_case_default{$statement_type} |
|
4043
|
|
|
|
|
|
|
&& !is_balanced_closing_container(QUESTION_COLON) ) |
|
4044
|
|
|
|
|
|
|
{ |
|
4045
|
|
|
|
|
|
|
# mark it as a perltidy label type |
|
4046
|
46
|
|
|
|
|
80
|
$type = 'J'; |
|
4047
|
|
|
|
|
|
|
} |
|
4048
|
|
|
|
|
|
|
|
|
4049
|
|
|
|
|
|
|
# mark colon as attribute if an error would occur otherwise; git #162 |
|
4050
|
|
|
|
|
|
|
elsif ( !$rcurrent_depth->[QUESTION_COLON] ) { |
|
4051
|
6
|
|
|
|
|
9
|
$type = 'A'; |
|
4052
|
6
|
|
|
|
|
9
|
$self->[_in_attribute_list_] = 1; |
|
4053
|
|
|
|
|
|
|
} |
|
4054
|
|
|
|
|
|
|
|
|
4055
|
|
|
|
|
|
|
# otherwise, it should be part of a ?/: operator |
|
4056
|
|
|
|
|
|
|
else { |
|
4057
|
193
|
|
|
|
|
657
|
( $type_sequence, $indent_flag ) = |
|
4058
|
|
|
|
|
|
|
$self->decrease_nesting_depth( QUESTION_COLON, |
|
4059
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
|
4060
|
193
|
50
|
|
|
|
561
|
if ( $last_nonblank_token eq '?' ) { |
|
4061
|
0
|
|
|
|
|
0
|
$self->warning("Syntax error near ? :\n"); |
|
4062
|
|
|
|
|
|
|
} |
|
4063
|
|
|
|
|
|
|
} |
|
4064
|
285
|
|
|
|
|
486
|
return; |
|
4065
|
|
|
|
|
|
|
} ## end sub do_COLON |
|
4066
|
|
|
|
|
|
|
|
|
4067
|
|
|
|
|
|
|
sub do_PLUS_SIGN { |
|
4068
|
|
|
|
|
|
|
|
|
4069
|
240
|
|
|
240
|
0
|
399
|
my $self = shift; |
|
4070
|
|
|
|
|
|
|
|
|
4071
|
|
|
|
|
|
|
# '+' = what kind of plus? |
|
4072
|
240
|
100
|
|
|
|
988
|
if ( $expecting == TERM ) { |
|
|
|
100
|
|
|
|
|
|
|
4073
|
14
|
|
|
|
|
53
|
my $number = $self->scan_number_fast(); |
|
4074
|
|
|
|
|
|
|
|
|
4075
|
|
|
|
|
|
|
# unary plus is safest assumption if not a number |
|
4076
|
14
|
50
|
|
|
|
41
|
if ( !defined($number) ) { $type = 'p'; } |
|
|
14
|
|
|
|
|
25
|
|
|
4077
|
|
|
|
|
|
|
} |
|
4078
|
|
|
|
|
|
|
elsif ( $expecting == OPERATOR ) { |
|
4079
|
|
|
|
|
|
|
} |
|
4080
|
|
|
|
|
|
|
else { |
|
4081
|
2
|
50
|
33
|
|
|
7
|
if ( $next_type eq 'w' || $next_type eq '{' ) { $type = 'p' } |
|
|
2
|
|
|
|
|
3
|
|
|
4082
|
|
|
|
|
|
|
} |
|
4083
|
240
|
|
|
|
|
386
|
return; |
|
4084
|
|
|
|
|
|
|
} ## end sub do_PLUS_SIGN |
|
4085
|
|
|
|
|
|
|
|
|
4086
|
|
|
|
|
|
|
sub do_AT_SIGN { |
|
4087
|
|
|
|
|
|
|
|
|
4088
|
524
|
|
|
524
|
0
|
892
|
my $self = shift; |
|
4089
|
|
|
|
|
|
|
|
|
4090
|
|
|
|
|
|
|
# '@' = sigil for array? |
|
4091
|
524
|
50
|
|
|
|
1362
|
$self->error_if_expecting_OPERATOR("Array") |
|
4092
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
|
4093
|
524
|
|
|
|
|
1771
|
$self->scan_simple_identifier(); |
|
4094
|
524
|
|
|
|
|
747
|
return; |
|
4095
|
|
|
|
|
|
|
} ## end sub do_AT_SIGN |
|
4096
|
|
|
|
|
|
|
|
|
4097
|
|
|
|
|
|
|
sub do_PERCENT_SIGN { |
|
4098
|
|
|
|
|
|
|
|
|
4099
|
214
|
|
|
214
|
0
|
352
|
my $self = shift; |
|
4100
|
|
|
|
|
|
|
|
|
4101
|
|
|
|
|
|
|
# '%' = hash or modulo? |
|
4102
|
|
|
|
|
|
|
# first guess is hash if no following blank or paren |
|
4103
|
214
|
50
|
|
|
|
588
|
if ( $expecting == UNKNOWN ) { |
|
4104
|
0
|
0
|
0
|
|
|
0
|
if ( $next_type ne 'b' && $next_type ne '(' ) { |
|
4105
|
0
|
|
|
|
|
0
|
$expecting = TERM; |
|
4106
|
|
|
|
|
|
|
} |
|
4107
|
|
|
|
|
|
|
} |
|
4108
|
214
|
100
|
|
|
|
537
|
if ( $expecting == TERM ) { |
|
4109
|
204
|
|
|
|
|
662
|
$self->scan_simple_identifier(); |
|
4110
|
|
|
|
|
|
|
} |
|
4111
|
214
|
|
|
|
|
341
|
return; |
|
4112
|
|
|
|
|
|
|
} ## end sub do_PERCENT_SIGN |
|
4113
|
|
|
|
|
|
|
|
|
4114
|
|
|
|
|
|
|
sub do_LEFT_SQUARE_BRACKET { |
|
4115
|
|
|
|
|
|
|
|
|
4116
|
814
|
|
|
814
|
0
|
1111
|
my $self = shift; |
|
4117
|
|
|
|
|
|
|
|
|
4118
|
|
|
|
|
|
|
# '[' |
|
4119
|
814
|
|
|
|
|
1539
|
$rsquare_bracket_type->[ ++$square_bracket_depth ] = |
|
4120
|
|
|
|
|
|
|
$last_nonblank_token; |
|
4121
|
814
|
|
|
|
|
2200
|
( $type_sequence, $indent_flag ) = |
|
4122
|
|
|
|
|
|
|
$self->increase_nesting_depth( SQUARE_BRACKET, |
|
4123
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
|
4124
|
|
|
|
|
|
|
|
|
4125
|
|
|
|
|
|
|
# It may seem odd, but structural square brackets have |
|
4126
|
|
|
|
|
|
|
# type '{' and '}'. This simplifies the indentation logic. |
|
4127
|
814
|
100
|
|
|
|
1868
|
if ( !is_non_structural_brace() ) { |
|
4128
|
374
|
|
|
|
|
597
|
$type = '{'; |
|
4129
|
|
|
|
|
|
|
} |
|
4130
|
814
|
|
|
|
|
1475
|
$rsquare_bracket_structural_type->[$square_bracket_depth] = $type; |
|
4131
|
814
|
|
|
|
|
1188
|
return; |
|
4132
|
|
|
|
|
|
|
} ## end sub do_LEFT_SQUARE_BRACKET |
|
4133
|
|
|
|
|
|
|
|
|
4134
|
|
|
|
|
|
|
sub do_RIGHT_SQUARE_BRACKET { |
|
4135
|
|
|
|
|
|
|
|
|
4136
|
814
|
|
|
814
|
0
|
1147
|
my $self = shift; |
|
4137
|
|
|
|
|
|
|
|
|
4138
|
|
|
|
|
|
|
# ']' |
|
4139
|
814
|
|
|
|
|
2251
|
( $type_sequence, $indent_flag ) = |
|
4140
|
|
|
|
|
|
|
$self->decrease_nesting_depth( SQUARE_BRACKET, |
|
4141
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
|
4142
|
|
|
|
|
|
|
|
|
4143
|
814
|
100
|
|
|
|
1961
|
if ( $rsquare_bracket_structural_type->[$square_bracket_depth] eq '{' ) |
|
4144
|
|
|
|
|
|
|
{ |
|
4145
|
374
|
|
|
|
|
552
|
$type = '}'; |
|
4146
|
|
|
|
|
|
|
} |
|
4147
|
|
|
|
|
|
|
|
|
4148
|
|
|
|
|
|
|
# propagate type information for smartmatch operator. This is |
|
4149
|
|
|
|
|
|
|
# necessary to enable us to know if an operator or term is expected |
|
4150
|
|
|
|
|
|
|
# next. |
|
4151
|
814
|
100
|
|
|
|
1828
|
if ( $rsquare_bracket_type->[$square_bracket_depth] eq '~~' ) { |
|
4152
|
20
|
|
|
|
|
32
|
$tok = $rsquare_bracket_type->[$square_bracket_depth]; |
|
4153
|
|
|
|
|
|
|
} |
|
4154
|
|
|
|
|
|
|
|
|
4155
|
814
|
50
|
|
|
|
1695
|
if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } |
|
|
814
|
|
|
|
|
1099
|
|
|
4156
|
814
|
|
|
|
|
1191
|
return; |
|
4157
|
|
|
|
|
|
|
} ## end sub do_RIGHT_SQUARE_BRACKET |
|
4158
|
|
|
|
|
|
|
|
|
4159
|
|
|
|
|
|
|
sub do_MINUS_SIGN { |
|
4160
|
|
|
|
|
|
|
|
|
4161
|
491
|
|
|
491
|
0
|
730
|
my $self = shift; |
|
4162
|
|
|
|
|
|
|
|
|
4163
|
|
|
|
|
|
|
# '-' = what kind of minus? |
|
4164
|
491
|
100
|
100
|
|
|
2744
|
if ( ( $expecting != OPERATOR ) |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
4165
|
|
|
|
|
|
|
&& $is_file_test_operator{$next_tok} ) |
|
4166
|
|
|
|
|
|
|
{ |
|
4167
|
12
|
|
|
|
|
61
|
my ( $next_nonblank_token, $i_next_uu ) = |
|
4168
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i + 1, $rtokens, |
|
4169
|
|
|
|
|
|
|
$max_token_index ); |
|
4170
|
|
|
|
|
|
|
|
|
4171
|
|
|
|
|
|
|
# check for a quoted word like "-w=>xx"; |
|
4172
|
|
|
|
|
|
|
# it is sufficient to just check for a following '=' |
|
4173
|
12
|
50
|
|
|
|
39
|
if ( $next_nonblank_token eq '=' ) { |
|
4174
|
0
|
|
|
|
|
0
|
$type = 'm'; |
|
4175
|
|
|
|
|
|
|
} |
|
4176
|
|
|
|
|
|
|
else { |
|
4177
|
12
|
|
|
|
|
20
|
$i++; |
|
4178
|
12
|
|
|
|
|
25
|
$tok .= $next_tok; |
|
4179
|
12
|
|
|
|
|
24
|
$type = 'F'; |
|
4180
|
|
|
|
|
|
|
} |
|
4181
|
|
|
|
|
|
|
} |
|
4182
|
|
|
|
|
|
|
elsif ( $expecting == TERM ) { |
|
4183
|
378
|
|
|
|
|
1020
|
my $number = $self->scan_number_fast(); |
|
4184
|
|
|
|
|
|
|
|
|
4185
|
|
|
|
|
|
|
# maybe part of bareword token? unary is safest |
|
4186
|
378
|
100
|
|
|
|
799
|
if ( !defined($number) ) { $type = 'm'; } |
|
|
288
|
|
|
|
|
402
|
|
|
4187
|
|
|
|
|
|
|
|
|
4188
|
|
|
|
|
|
|
} |
|
4189
|
|
|
|
|
|
|
elsif ( $expecting == OPERATOR ) { |
|
4190
|
|
|
|
|
|
|
} |
|
4191
|
|
|
|
|
|
|
else { |
|
4192
|
4
|
50
|
|
|
|
13
|
if ( $next_type eq 'w' ) { |
|
4193
|
4
|
|
|
|
|
8
|
$type = 'm'; |
|
4194
|
|
|
|
|
|
|
} |
|
4195
|
|
|
|
|
|
|
} |
|
4196
|
491
|
|
|
|
|
732
|
return; |
|
4197
|
|
|
|
|
|
|
} ## end sub do_MINUS_SIGN |
|
4198
|
|
|
|
|
|
|
|
|
4199
|
|
|
|
|
|
|
sub do_CARAT_SIGN { |
|
4200
|
|
|
|
|
|
|
|
|
4201
|
12
|
|
|
12
|
0
|
20
|
my $self = shift; |
|
4202
|
|
|
|
|
|
|
|
|
4203
|
|
|
|
|
|
|
# '^' |
|
4204
|
|
|
|
|
|
|
# check for special variables like ${^WARNING_BITS} |
|
4205
|
12
|
100
|
|
|
|
27
|
if ( $expecting == TERM ) { |
|
4206
|
|
|
|
|
|
|
|
|
4207
|
5
|
50
|
33
|
|
|
45
|
if ( $last_nonblank_token eq '{' |
|
|
|
|
33
|
|
|
|
|
|
4208
|
|
|
|
|
|
|
&& ( $next_tok !~ /^\d/ ) |
|
4209
|
|
|
|
|
|
|
&& ( $next_tok =~ /^\w/ ) ) |
|
4210
|
|
|
|
|
|
|
{ |
|
4211
|
|
|
|
|
|
|
|
|
4212
|
5
|
100
|
|
|
|
14
|
if ( $next_tok eq 'W' ) { |
|
4213
|
1
|
|
|
|
|
2
|
$self->[_saw_perl_dash_w_] = 1; |
|
4214
|
|
|
|
|
|
|
} |
|
4215
|
5
|
|
|
|
|
9
|
$tok = $tok . $next_tok; |
|
4216
|
5
|
|
|
|
|
10
|
$i = $i + 1; |
|
4217
|
5
|
|
|
|
|
8
|
$type = 'w'; |
|
4218
|
|
|
|
|
|
|
|
|
4219
|
|
|
|
|
|
|
# Optional coding to try to catch syntax errors. This can |
|
4220
|
|
|
|
|
|
|
# be removed if it ever causes incorrect warning messages. |
|
4221
|
|
|
|
|
|
|
# The '{^' should be preceded by either by a type or '$#' |
|
4222
|
|
|
|
|
|
|
# Examples: |
|
4223
|
|
|
|
|
|
|
# $#{^CAPTURE} ok |
|
4224
|
|
|
|
|
|
|
# *${^LAST_FH}{NAME} ok |
|
4225
|
|
|
|
|
|
|
# @{^HOWDY} ok |
|
4226
|
|
|
|
|
|
|
# $hash{^HOWDY} error |
|
4227
|
|
|
|
|
|
|
|
|
4228
|
|
|
|
|
|
|
# Note that a type sigil '$' may be tokenized as 'Z' |
|
4229
|
|
|
|
|
|
|
# after something like 'print', so allow type 'Z' |
|
4230
|
5
|
0
|
33
|
|
|
15
|
if ( $last_last_nonblank_type ne 't' |
|
|
|
|
33
|
|
|
|
|
|
4231
|
|
|
|
|
|
|
&& $last_last_nonblank_type ne 'Z' |
|
4232
|
|
|
|
|
|
|
&& $last_last_nonblank_token ne '$#' ) |
|
4233
|
|
|
|
|
|
|
{ |
|
4234
|
0
|
|
|
|
|
0
|
$self->warning("Possible syntax error near '{^'\n"); |
|
4235
|
|
|
|
|
|
|
} |
|
4236
|
|
|
|
|
|
|
} |
|
4237
|
|
|
|
|
|
|
} |
|
4238
|
12
|
|
|
|
|
20
|
return; |
|
4239
|
|
|
|
|
|
|
} ## end sub do_CARAT_SIGN |
|
4240
|
|
|
|
|
|
|
|
|
4241
|
|
|
|
|
|
|
sub do_DOUBLE_COLON { |
|
4242
|
|
|
|
|
|
|
|
|
4243
|
9
|
|
|
9
|
0
|
12
|
my $self = shift; |
|
4244
|
|
|
|
|
|
|
|
|
4245
|
|
|
|
|
|
|
# '::' = probably a sub call |
|
4246
|
9
|
|
|
|
|
20
|
$self->scan_bare_identifier(); |
|
4247
|
9
|
|
|
|
|
13
|
return; |
|
4248
|
|
|
|
|
|
|
} ## end sub do_DOUBLE_COLON |
|
4249
|
|
|
|
|
|
|
|
|
4250
|
|
|
|
|
|
|
sub do_LEFT_SHIFT { |
|
4251
|
|
|
|
|
|
|
|
|
4252
|
7
|
|
|
7
|
0
|
18
|
my $self = shift; |
|
4253
|
|
|
|
|
|
|
|
|
4254
|
|
|
|
|
|
|
# '<<' = maybe a here-doc? |
|
4255
|
7
|
50
|
|
|
|
117
|
if ( $expecting != OPERATOR ) { |
|
4256
|
|
|
|
|
|
|
my ( |
|
4257
|
7
|
|
|
|
|
41
|
$found_target, |
|
4258
|
|
|
|
|
|
|
$here_doc_target, |
|
4259
|
|
|
|
|
|
|
$here_quote_character, |
|
4260
|
|
|
|
|
|
|
$i_return, |
|
4261
|
|
|
|
|
|
|
$saw_error, |
|
4262
|
|
|
|
|
|
|
|
|
4263
|
|
|
|
|
|
|
) = $self->find_here_doc( |
|
4264
|
|
|
|
|
|
|
|
|
4265
|
|
|
|
|
|
|
$expecting, |
|
4266
|
|
|
|
|
|
|
$i, |
|
4267
|
|
|
|
|
|
|
$rtokens, |
|
4268
|
|
|
|
|
|
|
$rtoken_type, |
|
4269
|
|
|
|
|
|
|
$rtoken_map, |
|
4270
|
|
|
|
|
|
|
$max_token_index, |
|
4271
|
|
|
|
|
|
|
); |
|
4272
|
7
|
|
|
|
|
16
|
$i = $i_return; |
|
4273
|
|
|
|
|
|
|
|
|
4274
|
7
|
50
|
|
|
|
20
|
if ($found_target) { |
|
|
|
0
|
|
|
|
|
|
|
4275
|
7
|
|
|
|
|
14
|
push @{$rhere_target_list}, |
|
|
7
|
|
|
|
|
21
|
|
|
4276
|
|
|
|
|
|
|
[ $here_doc_target, $here_quote_character ]; |
|
4277
|
7
|
|
|
|
|
15
|
$type = 'h'; |
|
4278
|
7
|
50
|
|
|
|
71
|
if ( length($here_doc_target) > 80 ) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
4279
|
0
|
|
|
|
|
0
|
my $truncated = substr( $here_doc_target, 0, 80 ); |
|
4280
|
0
|
|
|
|
|
0
|
$self->complain("Long here-target: '$truncated' ...\n"); |
|
4281
|
|
|
|
|
|
|
} |
|
4282
|
|
|
|
|
|
|
elsif ( !$here_doc_target ) { |
|
4283
|
0
|
0
|
|
|
|
0
|
$self->warning( |
|
4284
|
|
|
|
|
|
|
'Use of bare << to mean <<"" is deprecated' . "\n" ) |
|
4285
|
|
|
|
|
|
|
if ( !$here_quote_character ); |
|
4286
|
|
|
|
|
|
|
} |
|
4287
|
|
|
|
|
|
|
elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { |
|
4288
|
2
|
|
|
|
|
9
|
$self->complain( |
|
4289
|
|
|
|
|
|
|
"Unconventional here-target: '$here_doc_target'\n"); |
|
4290
|
|
|
|
|
|
|
} |
|
4291
|
|
|
|
|
|
|
else { |
|
4292
|
|
|
|
|
|
|
# nothing to complain about |
|
4293
|
|
|
|
|
|
|
} |
|
4294
|
|
|
|
|
|
|
} |
|
4295
|
|
|
|
|
|
|
elsif ( $expecting == TERM ) { |
|
4296
|
0
|
0
|
|
|
|
0
|
if ( !$saw_error ) { |
|
4297
|
|
|
|
|
|
|
|
|
4298
|
|
|
|
|
|
|
# shouldn't happen..arriving here implies an error in |
|
4299
|
|
|
|
|
|
|
# the logic in sub 'find_here_doc' |
|
4300
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
|
4301
|
|
|
|
|
|
|
Fault(<<EOM); |
|
4302
|
|
|
|
|
|
|
Program bug; didn't find here doc target |
|
4303
|
|
|
|
|
|
|
EOM |
|
4304
|
|
|
|
|
|
|
} |
|
4305
|
|
|
|
|
|
|
$self->warning( |
|
4306
|
0
|
|
|
|
|
0
|
"Possible program error: didn't find here doc target\n" |
|
4307
|
|
|
|
|
|
|
); |
|
4308
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
|
4309
|
|
|
|
|
|
|
} |
|
4310
|
|
|
|
|
|
|
} |
|
4311
|
|
|
|
|
|
|
|
|
4312
|
|
|
|
|
|
|
# target not found, expecting == UNKNOWN |
|
4313
|
|
|
|
|
|
|
else { |
|
4314
|
|
|
|
|
|
|
# assume it is a shift |
|
4315
|
|
|
|
|
|
|
} |
|
4316
|
|
|
|
|
|
|
} |
|
4317
|
|
|
|
|
|
|
else { |
|
4318
|
|
|
|
|
|
|
} |
|
4319
|
7
|
|
|
|
|
12
|
return; |
|
4320
|
|
|
|
|
|
|
} ## end sub do_LEFT_SHIFT |
|
4321
|
|
|
|
|
|
|
|
|
4322
|
|
|
|
|
|
|
sub do_NEW_HERE_DOC { |
|
4323
|
|
|
|
|
|
|
|
|
4324
|
|
|
|
|
|
|
# '<<~' = a here-doc, new type added in v26 |
|
4325
|
|
|
|
|
|
|
|
|
4326
|
2
|
|
|
2
|
0
|
5
|
my $self = shift; |
|
4327
|
|
|
|
|
|
|
|
|
4328
|
|
|
|
|
|
|
return |
|
4329
|
2
|
50
|
|
|
|
6
|
if ( $i >= $max_token_index ); # here-doc not possible if end of line |
|
4330
|
2
|
50
|
|
|
|
7
|
if ( $expecting != OPERATOR ) { |
|
4331
|
|
|
|
|
|
|
my ( |
|
4332
|
2
|
|
|
|
|
12
|
$found_target, |
|
4333
|
|
|
|
|
|
|
$here_doc_target, |
|
4334
|
|
|
|
|
|
|
$here_quote_character, |
|
4335
|
|
|
|
|
|
|
$i_return, |
|
4336
|
|
|
|
|
|
|
$saw_error, |
|
4337
|
|
|
|
|
|
|
|
|
4338
|
|
|
|
|
|
|
) = $self->find_here_doc( |
|
4339
|
|
|
|
|
|
|
|
|
4340
|
|
|
|
|
|
|
$expecting, |
|
4341
|
|
|
|
|
|
|
$i, |
|
4342
|
|
|
|
|
|
|
$rtokens, |
|
4343
|
|
|
|
|
|
|
$rtoken_type, |
|
4344
|
|
|
|
|
|
|
$rtoken_map, |
|
4345
|
|
|
|
|
|
|
$max_token_index, |
|
4346
|
|
|
|
|
|
|
); |
|
4347
|
2
|
|
|
|
|
4
|
$i = $i_return; |
|
4348
|
|
|
|
|
|
|
|
|
4349
|
2
|
50
|
|
|
|
6
|
if ($found_target) { |
|
|
|
0
|
|
|
|
|
|
|
4350
|
|
|
|
|
|
|
|
|
4351
|
2
|
50
|
|
|
|
14
|
if ( length($here_doc_target) > 80 ) { |
|
|
|
50
|
|
|
|
|
|
|
4352
|
0
|
|
|
|
|
0
|
my $truncated = substr( $here_doc_target, 0, 80 ); |
|
4353
|
0
|
|
|
|
|
0
|
$self->complain("Long here-target: '$truncated' ...\n"); |
|
4354
|
|
|
|
|
|
|
} |
|
4355
|
|
|
|
|
|
|
elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { |
|
4356
|
0
|
|
|
|
|
0
|
$self->complain( |
|
4357
|
|
|
|
|
|
|
"Unconventional here-target: '$here_doc_target'\n"); |
|
4358
|
|
|
|
|
|
|
} |
|
4359
|
|
|
|
|
|
|
else { |
|
4360
|
|
|
|
|
|
|
# nothing to complain about |
|
4361
|
|
|
|
|
|
|
} |
|
4362
|
|
|
|
|
|
|
|
|
4363
|
|
|
|
|
|
|
# Note that we put a leading space on the here quote |
|
4364
|
|
|
|
|
|
|
# character indicate that it may be preceded by spaces |
|
4365
|
2
|
|
|
|
|
5
|
$here_quote_character = SPACE . $here_quote_character; |
|
4366
|
2
|
|
|
|
|
4
|
push @{$rhere_target_list}, |
|
|
2
|
|
|
|
|
7
|
|
|
4367
|
|
|
|
|
|
|
[ $here_doc_target, $here_quote_character ]; |
|
4368
|
2
|
|
|
|
|
4
|
$type = 'h'; |
|
4369
|
|
|
|
|
|
|
} |
|
4370
|
|
|
|
|
|
|
|
|
4371
|
|
|
|
|
|
|
# target not found .. |
|
4372
|
|
|
|
|
|
|
elsif ( $expecting == TERM ) { |
|
4373
|
0
|
0
|
|
|
|
0
|
if ( !$saw_error ) { |
|
4374
|
|
|
|
|
|
|
|
|
4375
|
|
|
|
|
|
|
# shouldn't happen..arriving here implies an error in |
|
4376
|
|
|
|
|
|
|
# the logic in sub 'find_here_doc' |
|
4377
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
|
4378
|
|
|
|
|
|
|
Fault(<<EOM); |
|
4379
|
|
|
|
|
|
|
Program bug; didn't find here doc target |
|
4380
|
|
|
|
|
|
|
EOM |
|
4381
|
|
|
|
|
|
|
} |
|
4382
|
|
|
|
|
|
|
$self->warning( |
|
4383
|
0
|
|
|
|
|
0
|
"Possible program error: didn't find here doc target\n" |
|
4384
|
|
|
|
|
|
|
); |
|
4385
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
|
4386
|
|
|
|
|
|
|
} |
|
4387
|
|
|
|
|
|
|
} |
|
4388
|
|
|
|
|
|
|
|
|
4389
|
|
|
|
|
|
|
# Target not found, expecting==UNKNOWN |
|
4390
|
|
|
|
|
|
|
else { |
|
4391
|
0
|
|
|
|
|
0
|
$self->warning("didn't find here doc target after '<<~'\n"); |
|
4392
|
|
|
|
|
|
|
} |
|
4393
|
|
|
|
|
|
|
} |
|
4394
|
|
|
|
|
|
|
else { |
|
4395
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR(); |
|
4396
|
|
|
|
|
|
|
} |
|
4397
|
2
|
|
|
|
|
22
|
return; |
|
4398
|
|
|
|
|
|
|
} ## end sub do_NEW_HERE_DOC |
|
4399
|
|
|
|
|
|
|
|
|
4400
|
|
|
|
|
|
|
sub do_POINTER { |
|
4401
|
|
|
|
|
|
|
|
|
4402
|
|
|
|
|
|
|
# '->' |
|
4403
|
1173
|
|
|
1173
|
0
|
1577
|
return; |
|
4404
|
|
|
|
|
|
|
} |
|
4405
|
|
|
|
|
|
|
|
|
4406
|
|
|
|
|
|
|
sub do_PLUS_PLUS { |
|
4407
|
|
|
|
|
|
|
|
|
4408
|
50
|
|
|
50
|
0
|
102
|
my $self = shift; |
|
4409
|
|
|
|
|
|
|
|
|
4410
|
|
|
|
|
|
|
# '++' |
|
4411
|
|
|
|
|
|
|
# type = 'pp' for pre-increment, '++' for post-increment |
|
4412
|
50
|
100
|
|
|
|
170
|
if ( $expecting == OPERATOR ) { $type = '++' } |
|
|
41
|
100
|
|
|
|
83
|
|
|
4413
|
7
|
|
|
|
|
20
|
elsif ( $expecting == TERM ) { $type = 'pp' } |
|
4414
|
|
|
|
|
|
|
|
|
4415
|
|
|
|
|
|
|
# handle ( $expecting == UNKNOWN ) |
|
4416
|
|
|
|
|
|
|
else { |
|
4417
|
|
|
|
|
|
|
|
|
4418
|
|
|
|
|
|
|
# look ahead .. |
|
4419
|
2
|
|
|
|
|
5
|
my ( $next_nonblank_token, $i_next ) = |
|
4420
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
|
4421
|
|
|
|
|
|
|
|
|
4422
|
|
|
|
|
|
|
# Fix for c042: look past a side comment |
|
4423
|
2
|
50
|
|
|
|
7
|
if ( $next_nonblank_token eq '#' ) { |
|
4424
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
|
4425
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
|
4426
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
|
4427
|
|
|
|
|
|
|
} |
|
4428
|
|
|
|
|
|
|
|
|
4429
|
2
|
50
|
|
|
|
5
|
if ( $next_nonblank_token eq '$' ) { $type = 'pp' } |
|
|
0
|
|
|
|
|
0
|
|
|
4430
|
|
|
|
|
|
|
} |
|
4431
|
50
|
|
|
|
|
116
|
return; |
|
4432
|
|
|
|
|
|
|
} ## end sub do_PLUS_PLUS |
|
4433
|
|
|
|
|
|
|
|
|
4434
|
|
|
|
|
|
|
sub do_FAT_COMMA { |
|
4435
|
|
|
|
|
|
|
|
|
4436
|
1103
|
|
|
1103
|
0
|
1521
|
my $self = shift; |
|
4437
|
|
|
|
|
|
|
|
|
4438
|
|
|
|
|
|
|
# '=>' |
|
4439
|
1103
|
50
|
|
|
|
2141
|
if ( $last_nonblank_type eq $tok ) { |
|
4440
|
0
|
|
|
|
|
0
|
$self->complain("Repeated '=>'s \n"); |
|
4441
|
|
|
|
|
|
|
} |
|
4442
|
|
|
|
|
|
|
|
|
4443
|
|
|
|
|
|
|
# patch for operator_expected: note if we are in the list (use.t) |
|
4444
|
|
|
|
|
|
|
# TODO: make version numbers a new token type |
|
4445
|
1103
|
100
|
|
|
|
2135
|
if ( $statement_type eq 'use' ) { $statement_type = '_use' } |
|
|
18
|
|
|
|
|
32
|
|
|
4446
|
1103
|
|
|
|
|
1488
|
return; |
|
4447
|
|
|
|
|
|
|
} ## end sub do_FAT_COMMA |
|
4448
|
|
|
|
|
|
|
|
|
4449
|
|
|
|
|
|
|
sub do_MINUS_MINUS { |
|
4450
|
|
|
|
|
|
|
|
|
4451
|
2
|
|
|
2
|
0
|
4
|
my $self = shift; |
|
4452
|
|
|
|
|
|
|
|
|
4453
|
|
|
|
|
|
|
# '--' |
|
4454
|
|
|
|
|
|
|
# type = 'mm' for pre-decrement, '--' for post-decrement |
|
4455
|
|
|
|
|
|
|
|
|
4456
|
2
|
50
|
|
|
|
10
|
if ( $expecting == OPERATOR ) { $type = '--' } |
|
|
0
|
50
|
|
|
|
0
|
|
|
4457
|
2
|
|
|
|
|
6
|
elsif ( $expecting == TERM ) { $type = 'mm' } |
|
4458
|
|
|
|
|
|
|
|
|
4459
|
|
|
|
|
|
|
# handle ( $expecting == UNKNOWN ) |
|
4460
|
|
|
|
|
|
|
else { |
|
4461
|
|
|
|
|
|
|
|
|
4462
|
|
|
|
|
|
|
# look ahead .. |
|
4463
|
0
|
|
|
|
|
0
|
my ( $next_nonblank_token, $i_next ) = |
|
4464
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
|
4465
|
|
|
|
|
|
|
|
|
4466
|
|
|
|
|
|
|
# Fix for c042: look past a side comment |
|
4467
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token eq '#' ) { |
|
4468
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
|
4469
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
|
4470
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
|
4471
|
|
|
|
|
|
|
} |
|
4472
|
|
|
|
|
|
|
|
|
4473
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token eq '$' ) { $type = 'mm' } |
|
|
0
|
|
|
|
|
0
|
|
|
4474
|
|
|
|
|
|
|
} |
|
4475
|
|
|
|
|
|
|
|
|
4476
|
2
|
|
|
|
|
4
|
return; |
|
4477
|
|
|
|
|
|
|
} ## end sub do_MINUS_MINUS |
|
4478
|
|
|
|
|
|
|
|
|
4479
|
|
|
|
|
|
|
sub do_DIGITS { |
|
4480
|
|
|
|
|
|
|
|
|
4481
|
2508
|
|
|
2508
|
0
|
3258
|
my $self = shift; |
|
4482
|
|
|
|
|
|
|
|
|
4483
|
|
|
|
|
|
|
# 'd' = string of digits |
|
4484
|
2508
|
50
|
|
|
|
4390
|
$self->error_if_expecting_OPERATOR("Number") |
|
4485
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
|
4486
|
|
|
|
|
|
|
|
|
4487
|
2508
|
|
|
|
|
5564
|
my $number = $self->scan_number_fast(); |
|
4488
|
2508
|
50
|
|
|
|
4868
|
if ( !defined($number) ) { |
|
4489
|
|
|
|
|
|
|
|
|
4490
|
|
|
|
|
|
|
# shouldn't happen - we should always get a number |
|
4491
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
|
4492
|
|
|
|
|
|
|
Fault(<<EOM); |
|
4493
|
|
|
|
|
|
|
non-number beginning with digit--program bug |
|
4494
|
|
|
|
|
|
|
EOM |
|
4495
|
|
|
|
|
|
|
} |
|
4496
|
|
|
|
|
|
|
$self->warning( |
|
4497
|
0
|
|
|
|
|
0
|
"Unexpected error condition: non-number beginning with digit\n" |
|
4498
|
|
|
|
|
|
|
); |
|
4499
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
|
4500
|
|
|
|
|
|
|
} |
|
4501
|
2508
|
|
|
|
|
3327
|
return; |
|
4502
|
|
|
|
|
|
|
} ## end sub do_DIGITS |
|
4503
|
|
|
|
|
|
|
|
|
4504
|
|
|
|
|
|
|
sub do_ATTRIBUTE_LIST { |
|
4505
|
|
|
|
|
|
|
|
|
4506
|
45
|
|
|
45
|
0
|
105
|
my ( $self, $next_nonblank_token ) = @_; |
|
4507
|
|
|
|
|
|
|
|
|
4508
|
|
|
|
|
|
|
# Called at a bareword encountered while in an attribute list |
|
4509
|
|
|
|
|
|
|
# returns 'is_attribute': |
|
4510
|
|
|
|
|
|
|
# true if attribute found |
|
4511
|
|
|
|
|
|
|
# false if an attribute (continue parsing bareword) |
|
4512
|
|
|
|
|
|
|
|
|
4513
|
|
|
|
|
|
|
# treat bare word followed by open paren like qw( |
|
4514
|
45
|
100
|
|
|
|
112
|
if ( $next_nonblank_token eq '(' ) { |
|
4515
|
|
|
|
|
|
|
|
|
4516
|
|
|
|
|
|
|
# For something like: |
|
4517
|
|
|
|
|
|
|
# : prototype($$) |
|
4518
|
|
|
|
|
|
|
# we should let do_scan_sub see it so that it can see |
|
4519
|
|
|
|
|
|
|
# the prototype. All other attributes get parsed as a |
|
4520
|
|
|
|
|
|
|
# quoted string. |
|
4521
|
20
|
100
|
|
|
|
328
|
if ( $tok eq 'prototype' ) { |
|
4522
|
2
|
|
|
|
|
4
|
$id_scan_state = 'prototype'; |
|
4523
|
|
|
|
|
|
|
|
|
4524
|
|
|
|
|
|
|
# start just after the word 'prototype' |
|
4525
|
2
|
|
|
|
|
4
|
my $i_beg = $i + 1; |
|
4526
|
2
|
|
|
|
|
18
|
( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub( |
|
4527
|
|
|
|
|
|
|
{ |
|
4528
|
|
|
|
|
|
|
input_line => $input_line, |
|
4529
|
|
|
|
|
|
|
i => $i, |
|
4530
|
|
|
|
|
|
|
i_beg => $i_beg, |
|
4531
|
|
|
|
|
|
|
tok => $tok, |
|
4532
|
|
|
|
|
|
|
type => $type, |
|
4533
|
|
|
|
|
|
|
rtokens => $rtokens, |
|
4534
|
|
|
|
|
|
|
rtoken_map => $rtoken_map, |
|
4535
|
|
|
|
|
|
|
id_scan_state => $id_scan_state, |
|
4536
|
|
|
|
|
|
|
max_token_index => $max_token_index, |
|
4537
|
|
|
|
|
|
|
} |
|
4538
|
|
|
|
|
|
|
); |
|
4539
|
|
|
|
|
|
|
|
|
4540
|
|
|
|
|
|
|
# If successful, mark as type 'q' to be consistent |
|
4541
|
|
|
|
|
|
|
# with other attributes. Type 'w' would also work. |
|
4542
|
2
|
50
|
|
|
|
12
|
if ( $i > $i_beg ) { |
|
4543
|
2
|
|
|
|
|
4
|
$type = 'q'; |
|
4544
|
2
|
|
|
|
|
5
|
return 1; |
|
4545
|
|
|
|
|
|
|
} |
|
4546
|
|
|
|
|
|
|
|
|
4547
|
|
|
|
|
|
|
# If not successful, continue and parse as a quote. |
|
4548
|
|
|
|
|
|
|
} |
|
4549
|
|
|
|
|
|
|
|
|
4550
|
|
|
|
|
|
|
# All other attribute lists must be parsed as quotes |
|
4551
|
|
|
|
|
|
|
# (see 'signatures.t' for good examples) |
|
4552
|
18
|
|
|
|
|
39
|
$in_quote = $quote_items{'q'}; |
|
4553
|
18
|
|
|
|
|
34
|
$allowed_quote_modifiers = $quote_modifiers{'q'}; |
|
4554
|
18
|
|
|
|
|
26
|
$quote_starting_tok = 'q'; |
|
4555
|
18
|
|
|
|
|
29
|
$type = 'q'; |
|
4556
|
18
|
|
|
|
|
27
|
$quote_type = 'q'; |
|
4557
|
18
|
|
|
|
|
27
|
$quote_here_target_2 = undef; |
|
4558
|
18
|
|
|
|
|
32
|
return 1; |
|
4559
|
|
|
|
|
|
|
} |
|
4560
|
|
|
|
|
|
|
|
|
4561
|
|
|
|
|
|
|
# handle bareword not followed by open paren |
|
4562
|
|
|
|
|
|
|
else { |
|
4563
|
25
|
|
|
|
|
39
|
$type = 'w'; |
|
4564
|
25
|
|
|
|
|
50
|
return 1; |
|
4565
|
|
|
|
|
|
|
} |
|
4566
|
|
|
|
|
|
|
|
|
4567
|
|
|
|
|
|
|
# attribute not found |
|
4568
|
0
|
|
|
|
|
0
|
return; |
|
4569
|
|
|
|
|
|
|
} ## end sub do_ATTRIBUTE_LIST |
|
4570
|
|
|
|
|
|
|
|
|
4571
|
|
|
|
|
|
|
sub do_X_OPERATOR { |
|
4572
|
|
|
|
|
|
|
|
|
4573
|
17
|
|
|
17
|
0
|
37
|
my $self = shift; |
|
4574
|
|
|
|
|
|
|
|
|
4575
|
|
|
|
|
|
|
# We are at a pretoken starting with 'x' where an operator is expected |
|
4576
|
|
|
|
|
|
|
|
|
4577
|
17
|
100
|
|
|
|
70
|
if ( $tok eq 'x' ) { |
|
4578
|
15
|
50
|
|
|
|
55
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { # x= |
|
4579
|
0
|
|
|
|
|
0
|
$tok = 'x='; |
|
4580
|
0
|
|
|
|
|
0
|
$type = $tok; |
|
4581
|
0
|
|
|
|
|
0
|
$i++; |
|
4582
|
|
|
|
|
|
|
} |
|
4583
|
|
|
|
|
|
|
else { |
|
4584
|
15
|
|
|
|
|
31
|
$type = 'x'; |
|
4585
|
|
|
|
|
|
|
} |
|
4586
|
|
|
|
|
|
|
} |
|
4587
|
|
|
|
|
|
|
else { |
|
4588
|
|
|
|
|
|
|
|
|
4589
|
|
|
|
|
|
|
# Split a pretoken like 'x10' into 'x' and '10'. |
|
4590
|
|
|
|
|
|
|
# Note: In previous versions of perltidy it was marked |
|
4591
|
|
|
|
|
|
|
# as a number, $type = 'n', and fixed downstream by the |
|
4592
|
|
|
|
|
|
|
# Formatter. |
|
4593
|
2
|
|
|
|
|
3
|
$type = 'n'; |
|
4594
|
2
|
50
|
|
|
|
6
|
if ( $self->split_pretoken(1) ) { |
|
4595
|
2
|
|
|
|
|
3
|
$type = 'x'; |
|
4596
|
2
|
|
|
|
|
3
|
$tok = 'x'; |
|
4597
|
|
|
|
|
|
|
} |
|
4598
|
|
|
|
|
|
|
} |
|
4599
|
17
|
|
|
|
|
39
|
return; |
|
4600
|
|
|
|
|
|
|
} ## end sub do_X_OPERATOR |
|
4601
|
|
|
|
|
|
|
|
|
4602
|
|
|
|
|
|
|
sub do_USE_CONSTANT { |
|
4603
|
|
|
|
|
|
|
|
|
4604
|
16
|
|
|
16
|
0
|
29
|
my $self = shift; |
|
4605
|
|
|
|
|
|
|
|
|
4606
|
|
|
|
|
|
|
# We just saw 'use constant' and must look ahead |
|
4607
|
|
|
|
|
|
|
|
|
4608
|
16
|
|
|
|
|
59
|
$self->scan_bare_identifier(); |
|
4609
|
16
|
|
|
|
|
63
|
my ( $next_nonblank_tok2, $i_next2_uu ) = |
|
4610
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
|
4611
|
|
|
|
|
|
|
|
|
4612
|
16
|
50
|
|
|
|
47
|
if ($next_nonblank_tok2) { |
|
4613
|
|
|
|
|
|
|
|
|
4614
|
16
|
100
|
|
|
|
53
|
if ( $is_keyword{$next_nonblank_tok2} ) { |
|
4615
|
|
|
|
|
|
|
|
|
4616
|
|
|
|
|
|
|
# Assume qw is used as a quote and okay, as in: |
|
4617
|
|
|
|
|
|
|
# use constant qw{ DEBUG 0 }; |
|
4618
|
|
|
|
|
|
|
# Not worth trying to parse for just a warning |
|
4619
|
|
|
|
|
|
|
|
|
4620
|
|
|
|
|
|
|
# NOTE: This warning is deactivated because recent |
|
4621
|
|
|
|
|
|
|
# versions of perl do not complain here, but |
|
4622
|
|
|
|
|
|
|
# the coding is retained for reference. |
|
4623
|
1
|
|
|
|
|
4
|
if ( 0 && $next_nonblank_tok2 ne 'qw' ) { |
|
4624
|
|
|
|
|
|
|
$self->warning( |
|
4625
|
|
|
|
|
|
|
"Attempting to define constant '$next_nonblank_tok2' which is a perl keyword\n" |
|
4626
|
|
|
|
|
|
|
); |
|
4627
|
|
|
|
|
|
|
} |
|
4628
|
|
|
|
|
|
|
} |
|
4629
|
|
|
|
|
|
|
|
|
4630
|
|
|
|
|
|
|
else { |
|
4631
|
15
|
|
|
|
|
42
|
$ris_constant->{$current_package}->{$next_nonblank_tok2} = 1; |
|
4632
|
|
|
|
|
|
|
} |
|
4633
|
|
|
|
|
|
|
} |
|
4634
|
16
|
|
|
|
|
30
|
return; |
|
4635
|
|
|
|
|
|
|
} ## end sub do_USE_CONSTANT |
|
4636
|
|
|
|
|
|
|
|
|
4637
|
|
|
|
|
|
|
sub do_KEYWORD { |
|
4638
|
|
|
|
|
|
|
|
|
4639
|
3060
|
|
|
3060
|
0
|
4080
|
my $self = shift; |
|
4640
|
|
|
|
|
|
|
|
|
4641
|
|
|
|
|
|
|
# found a keyword - set any associated flags |
|
4642
|
3060
|
|
|
|
|
4318
|
$type = 'k'; |
|
4643
|
|
|
|
|
|
|
|
|
4644
|
|
|
|
|
|
|
# Since for and foreach may not be followed immediately |
|
4645
|
|
|
|
|
|
|
# by an opening paren, we have to remember which keyword |
|
4646
|
|
|
|
|
|
|
# is associated with the next '(' |
|
4647
|
|
|
|
|
|
|
# Previously, before update c230 : if ( $is_for_foreach{$tok} ) { |
|
4648
|
|
|
|
|
|
|
##(if elsif unless while until for foreach switch case given when catch) |
|
4649
|
3060
|
100
|
|
|
|
6569
|
if ( $is_blocktype_with_paren{$tok} ) { |
|
4650
|
495
|
100
|
|
|
|
1385
|
if ( new_statement_ok() ) { |
|
4651
|
356
|
|
|
|
|
664
|
$want_paren = $tok; |
|
4652
|
|
|
|
|
|
|
} |
|
4653
|
|
|
|
|
|
|
} |
|
4654
|
|
|
|
|
|
|
|
|
4655
|
|
|
|
|
|
|
# Catch some unexpected keyword errors; c517. |
|
4656
|
|
|
|
|
|
|
# Note that we only check keywords for OPERATOR expected, not TERM. |
|
4657
|
|
|
|
|
|
|
# This is because a large number of keywords which normally expect |
|
4658
|
|
|
|
|
|
|
# a TERM will also take an OPERATOR. |
|
4659
|
3060
|
50
|
66
|
|
|
6947
|
if ( $expecting == OPERATOR && $is_TERM_keyword{$tok} ) { |
|
4660
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR(); |
|
4661
|
|
|
|
|
|
|
} |
|
4662
|
|
|
|
|
|
|
|
|
4663
|
|
|
|
|
|
|
# recognize 'use' statements, which are special |
|
4664
|
3060
|
100
|
100
|
|
|
17768
|
if ( $is_use_require{$tok} ) { |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
4665
|
178
|
|
|
|
|
298
|
$statement_type = $tok; |
|
4666
|
|
|
|
|
|
|
} |
|
4667
|
|
|
|
|
|
|
|
|
4668
|
|
|
|
|
|
|
# remember my and our to check for trailing ": shared" |
|
4669
|
|
|
|
|
|
|
elsif ( $is_my_our_state{$tok} ) { |
|
4670
|
747
|
|
|
|
|
1267
|
$statement_type = $tok; |
|
4671
|
|
|
|
|
|
|
} |
|
4672
|
|
|
|
|
|
|
|
|
4673
|
|
|
|
|
|
|
# Check for unexpected 'elsif' |
|
4674
|
|
|
|
|
|
|
elsif ( $tok eq 'elsif' ) { |
|
4675
|
33
|
0
|
0
|
|
|
422
|
if ( |
|
|
|
|
33
|
|
|
|
|
|
4676
|
|
|
|
|
|
|
|
|
4677
|
|
|
|
|
|
|
!$is_if_elsif_unless{$last_nonblank_block_type} |
|
4678
|
|
|
|
|
|
|
|
|
4679
|
|
|
|
|
|
|
# Allow isolated blocks of any kind during editing |
|
4680
|
|
|
|
|
|
|
# by checking for a last noblank token of ';' and no |
|
4681
|
|
|
|
|
|
|
# sequence numbers having been issued (c272). The check |
|
4682
|
|
|
|
|
|
|
# on sequence number is not perfect but good enough. |
|
4683
|
|
|
|
|
|
|
&& !( |
|
4684
|
|
|
|
|
|
|
$last_nonblank_token eq ';' |
|
4685
|
|
|
|
|
|
|
&& $next_sequence_number == SEQ_ROOT + 1 |
|
4686
|
|
|
|
|
|
|
) |
|
4687
|
|
|
|
|
|
|
|
|
4688
|
|
|
|
|
|
|
) |
|
4689
|
|
|
|
|
|
|
{ |
|
4690
|
|
|
|
|
|
|
## prevent formatting and avoid instability (b1553) |
|
4691
|
0
|
|
|
|
|
0
|
$self->warning_do_not_format( |
|
4692
|
|
|
|
|
|
|
"expecting '$tok' to follow one of 'if|elsif|unless'\n"); |
|
4693
|
|
|
|
|
|
|
} |
|
4694
|
|
|
|
|
|
|
} |
|
4695
|
|
|
|
|
|
|
|
|
4696
|
|
|
|
|
|
|
# Check for unexpected 'else' |
|
4697
|
|
|
|
|
|
|
elsif ( $tok eq 'else' ) { |
|
4698
|
|
|
|
|
|
|
|
|
4699
|
|
|
|
|
|
|
# patched for SWITCH/CASE |
|
4700
|
49
|
0
|
66
|
|
|
231
|
if ( |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
4701
|
|
|
|
|
|
|
|
|
4702
|
|
|
|
|
|
|
!$is_if_elsif_unless_case_when{$last_nonblank_block_type} |
|
4703
|
|
|
|
|
|
|
|
|
4704
|
|
|
|
|
|
|
# patch to avoid an unwanted error message for |
|
4705
|
|
|
|
|
|
|
# the case of a parenless 'case' (RT 105484): |
|
4706
|
|
|
|
|
|
|
# switch ( 1 ) { case x { 2 } else { } } |
|
4707
|
|
|
|
|
|
|
&& !$is_if_elsif_unless_case_when{$statement_type} |
|
4708
|
|
|
|
|
|
|
|
|
4709
|
|
|
|
|
|
|
# Allow isolated blocks of any kind during editing (c272) |
|
4710
|
|
|
|
|
|
|
&& !( |
|
4711
|
|
|
|
|
|
|
$last_nonblank_token eq ';' |
|
4712
|
|
|
|
|
|
|
&& $next_sequence_number == SEQ_ROOT + 1 |
|
4713
|
|
|
|
|
|
|
) |
|
4714
|
|
|
|
|
|
|
|
|
4715
|
|
|
|
|
|
|
) |
|
4716
|
|
|
|
|
|
|
{ |
|
4717
|
|
|
|
|
|
|
## prevent formatting and avoid instability (b1553) |
|
4718
|
0
|
|
|
|
|
0
|
$self->warning_do_not_format( |
|
4719
|
|
|
|
|
|
|
"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" |
|
4720
|
|
|
|
|
|
|
); |
|
4721
|
|
|
|
|
|
|
} |
|
4722
|
|
|
|
|
|
|
} |
|
4723
|
|
|
|
|
|
|
|
|
4724
|
|
|
|
|
|
|
# patch for SWITCH/CASE if 'case' and 'when are |
|
4725
|
|
|
|
|
|
|
# treated as keywords. Also 'default' for Switch::Plain |
|
4726
|
|
|
|
|
|
|
elsif ($tok eq 'when' |
|
4727
|
|
|
|
|
|
|
|| $tok eq 'case' |
|
4728
|
|
|
|
|
|
|
|| $tok eq 'default' ) |
|
4729
|
|
|
|
|
|
|
{ |
|
4730
|
70
|
|
|
|
|
109
|
$statement_type = $tok; # next '{' is block |
|
4731
|
|
|
|
|
|
|
} |
|
4732
|
|
|
|
|
|
|
|
|
4733
|
|
|
|
|
|
|
# feature 'err' was removed in Perl 5.10. So mark this as |
|
4734
|
|
|
|
|
|
|
# a bareword unless an operator is expected (see c158). |
|
4735
|
|
|
|
|
|
|
elsif ( $tok eq 'err' ) { |
|
4736
|
1
|
50
|
|
|
|
3
|
if ( $expecting != OPERATOR ) { $type = 'w' } |
|
|
1
|
|
|
|
|
2
|
|
|
4737
|
|
|
|
|
|
|
} |
|
4738
|
|
|
|
|
|
|
else { |
|
4739
|
|
|
|
|
|
|
## no special treatment needed |
|
4740
|
|
|
|
|
|
|
} |
|
4741
|
|
|
|
|
|
|
|
|
4742
|
3060
|
|
|
|
|
5052
|
return; |
|
4743
|
|
|
|
|
|
|
} ## end sub do_KEYWORD |
|
4744
|
|
|
|
|
|
|
|
|
4745
|
|
|
|
|
|
|
sub do_QUOTE_OPERATOR { |
|
4746
|
|
|
|
|
|
|
|
|
4747
|
232
|
|
|
232
|
0
|
365
|
my $self = shift; |
|
4748
|
|
|
|
|
|
|
|
|
4749
|
|
|
|
|
|
|
# We have arrived at a quote operator: q, qq, qw, qx, qr, s, y, tr, m |
|
4750
|
|
|
|
|
|
|
|
|
4751
|
232
|
50
|
|
|
|
596
|
if ( $expecting == OPERATOR ) { |
|
4752
|
|
|
|
|
|
|
|
|
4753
|
|
|
|
|
|
|
# Be careful not to call an error for a qw quote |
|
4754
|
|
|
|
|
|
|
# where a parenthesized list is allowed. For example, |
|
4755
|
|
|
|
|
|
|
# it could also be a for/foreach construct such as |
|
4756
|
|
|
|
|
|
|
# |
|
4757
|
|
|
|
|
|
|
# foreach my $key qw\Uno Due Tres Quadro\ { |
|
4758
|
|
|
|
|
|
|
# print "Set $key\n"; |
|
4759
|
|
|
|
|
|
|
# } |
|
4760
|
|
|
|
|
|
|
# |
|
4761
|
|
|
|
|
|
|
|
|
4762
|
|
|
|
|
|
|
# Or it could be a function call. |
|
4763
|
|
|
|
|
|
|
# NOTE: Braces in something like &{ xxx } are not |
|
4764
|
|
|
|
|
|
|
# marked as a block, we might have a method call. |
|
4765
|
|
|
|
|
|
|
# &method(...), $method->(..), &{method}(...), |
|
4766
|
|
|
|
|
|
|
# $ref[2](list) is ok & short for $ref[2]->(list) |
|
4767
|
|
|
|
|
|
|
# |
|
4768
|
|
|
|
|
|
|
# See notes in 'sub code_block_type' and |
|
4769
|
|
|
|
|
|
|
# 'sub is_non_structural_brace' |
|
4770
|
|
|
|
|
|
|
|
|
4771
|
|
|
|
|
|
|
my $paren_list_possible = $tok eq 'qw' |
|
4772
|
|
|
|
|
|
|
&& ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/ |
|
4773
|
0
|
|
0
|
|
|
0
|
|| $is_for_foreach{$want_paren} ); |
|
4774
|
|
|
|
|
|
|
|
|
4775
|
0
|
0
|
|
|
|
0
|
if ( !$paren_list_possible ) { |
|
4776
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR(); |
|
4777
|
|
|
|
|
|
|
} |
|
4778
|
|
|
|
|
|
|
} |
|
4779
|
232
|
|
|
|
|
478
|
$in_quote = $quote_items{$tok}; |
|
4780
|
232
|
|
|
|
|
466
|
$allowed_quote_modifiers = $quote_modifiers{$tok}; |
|
4781
|
232
|
|
|
|
|
349
|
$quote_starting_tok = $tok; |
|
4782
|
232
|
|
|
|
|
368
|
$quote_here_target_2 = undef; |
|
4783
|
|
|
|
|
|
|
|
|
4784
|
|
|
|
|
|
|
# All quote types are 'Q' except possibly qw quotes. |
|
4785
|
|
|
|
|
|
|
# qw quotes are special in that they may generally be trimmed |
|
4786
|
|
|
|
|
|
|
# of leading and trailing whitespace. So they are given a |
|
4787
|
|
|
|
|
|
|
# separate type, 'q', unless requested otherwise. |
|
4788
|
232
|
100
|
66
|
|
|
966
|
$type = |
|
4789
|
|
|
|
|
|
|
( $tok eq 'qw' && $rOpts_trim_qw ) |
|
4790
|
|
|
|
|
|
|
? 'q' |
|
4791
|
|
|
|
|
|
|
: 'Q'; |
|
4792
|
232
|
|
|
|
|
431
|
$quote_type = $type; |
|
4793
|
232
|
|
|
|
|
385
|
return; |
|
4794
|
|
|
|
|
|
|
} ## end sub do_QUOTE_OPERATOR |
|
4795
|
|
|
|
|
|
|
|
|
4796
|
|
|
|
|
|
|
sub do_UNKNOWN_BAREWORD { |
|
4797
|
|
|
|
|
|
|
|
|
4798
|
1030
|
|
|
1030
|
0
|
2085
|
my ( $self, $next_nonblank_token ) = @_; |
|
4799
|
|
|
|
|
|
|
|
|
4800
|
|
|
|
|
|
|
# We have encountered a bareword which needs more work to classify |
|
4801
|
|
|
|
|
|
|
|
|
4802
|
1030
|
|
|
|
|
3072
|
$self->scan_bare_identifier(); |
|
4803
|
|
|
|
|
|
|
|
|
4804
|
1030
|
100
|
100
|
|
|
3060
|
if ( $statement_type eq 'use' |
|
4805
|
|
|
|
|
|
|
&& $last_nonblank_token eq 'use' ) |
|
4806
|
|
|
|
|
|
|
{ |
|
4807
|
111
|
|
|
|
|
356
|
$rsaw_use_module->{$current_package}->{$tok} = 1; |
|
4808
|
|
|
|
|
|
|
} |
|
4809
|
|
|
|
|
|
|
|
|
4810
|
1030
|
100
|
|
|
|
2159
|
if ( $type eq 'w' ) { |
|
4811
|
|
|
|
|
|
|
|
|
4812
|
1005
|
100
|
|
|
|
2149
|
if ( $expecting == OPERATOR ) { |
|
4813
|
|
|
|
|
|
|
|
|
4814
|
|
|
|
|
|
|
# Patch to avoid error message for RPerl overloaded |
|
4815
|
|
|
|
|
|
|
# operator functions: use overload |
|
4816
|
|
|
|
|
|
|
# '+' => \&sse_add, |
|
4817
|
|
|
|
|
|
|
# '-' => \&sse_sub, |
|
4818
|
|
|
|
|
|
|
# '*' => \&sse_mul, |
|
4819
|
|
|
|
|
|
|
# '/' => \&sse_div; |
|
4820
|
|
|
|
|
|
|
# TODO: this could eventually be generalized |
|
4821
|
2
|
50
|
33
|
|
|
18
|
if ( $rsaw_use_module->{$current_package}->{'RPerl'} |
|
|
|
50
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4822
|
|
|
|
|
|
|
&& $tok =~ /^sse_(mul|div|add|sub)$/ ) |
|
4823
|
|
|
|
|
|
|
{ |
|
4824
|
|
|
|
|
|
|
|
|
4825
|
|
|
|
|
|
|
} |
|
4826
|
|
|
|
|
|
|
|
|
4827
|
|
|
|
|
|
|
# patch for Syntax::Operator::In, git #162 |
|
4828
|
|
|
|
|
|
|
elsif ( $tok eq 'in' && $next_nonblank_token eq ':' ) { |
|
4829
|
|
|
|
|
|
|
|
|
4830
|
|
|
|
|
|
|
} |
|
4831
|
|
|
|
|
|
|
|
|
4832
|
|
|
|
|
|
|
# Fix part 1 for git #63 in which a comment falls |
|
4833
|
|
|
|
|
|
|
# between an -> and the following word. An |
|
4834
|
|
|
|
|
|
|
# alternate fix would be to change operator_expected |
|
4835
|
|
|
|
|
|
|
# to return an UNKNOWN for this type. |
|
4836
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq '->' ) { |
|
4837
|
|
|
|
|
|
|
|
|
4838
|
|
|
|
|
|
|
} |
|
4839
|
|
|
|
|
|
|
|
|
4840
|
|
|
|
|
|
|
# don't complain about possible indirect object |
|
4841
|
|
|
|
|
|
|
# notation. |
|
4842
|
|
|
|
|
|
|
# For example: |
|
4843
|
|
|
|
|
|
|
# package main; |
|
4844
|
|
|
|
|
|
|
# sub new($) { ... } |
|
4845
|
|
|
|
|
|
|
# $b = new A::; # calls A::new |
|
4846
|
|
|
|
|
|
|
# $c = new A; # same thing but suspicious |
|
4847
|
|
|
|
|
|
|
# This will call A::new but we have a 'new' in |
|
4848
|
|
|
|
|
|
|
# main:: which looks like a constant. |
|
4849
|
|
|
|
|
|
|
# |
|
4850
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'C' ) { |
|
4851
|
0
|
0
|
|
|
|
0
|
if ( $tok !~ /::$/ ) { |
|
4852
|
0
|
|
|
|
|
0
|
$self->complain(<<EOM); |
|
4853
|
|
|
|
|
|
|
Expecting operator after '$last_nonblank_token' but found bare word '$tok' |
|
4854
|
|
|
|
|
|
|
Maybe indirect object notation? |
|
4855
|
|
|
|
|
|
|
EOM |
|
4856
|
|
|
|
|
|
|
} |
|
4857
|
|
|
|
|
|
|
} |
|
4858
|
|
|
|
|
|
|
else { |
|
4859
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR("bareword"); |
|
4860
|
|
|
|
|
|
|
} |
|
4861
|
|
|
|
|
|
|
} |
|
4862
|
|
|
|
|
|
|
|
|
4863
|
|
|
|
|
|
|
# mark bare words immediately followed by a paren as |
|
4864
|
|
|
|
|
|
|
# functions |
|
4865
|
1005
|
|
|
|
|
1793
|
$next_tok = $rtokens->[ $i + 1 ]; |
|
4866
|
1005
|
100
|
|
|
|
1987
|
if ( $next_tok eq '(' ) { |
|
4867
|
|
|
|
|
|
|
|
|
4868
|
|
|
|
|
|
|
# Patch for issue c151, where we are processing a snippet and |
|
4869
|
|
|
|
|
|
|
# have not seen that SPACE is a constant. In this case 'x' is |
|
4870
|
|
|
|
|
|
|
# probably an operator. The only disadvantage with an incorrect |
|
4871
|
|
|
|
|
|
|
# guess is that the space after it may be incorrect. For example |
|
4872
|
|
|
|
|
|
|
# $str .= SPACE x ( 16 - length($str) ); See also b1410. |
|
4873
|
294
|
50
|
33
|
|
|
1185
|
if ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { $type = 'x' } |
|
|
0
|
50
|
|
|
|
0
|
|
|
4874
|
|
|
|
|
|
|
|
|
4875
|
|
|
|
|
|
|
# Fix part 2 for git #63. Leave type as 'w' to keep |
|
4876
|
|
|
|
|
|
|
# the type the same as if the -> were not separated |
|
4877
|
294
|
|
|
|
|
615
|
elsif ( $last_nonblank_type ne '->' ) { $type = 'U' } |
|
4878
|
|
|
|
|
|
|
|
|
4879
|
|
|
|
|
|
|
# not a special case |
|
4880
|
|
|
|
|
|
|
else { } |
|
4881
|
|
|
|
|
|
|
|
|
4882
|
|
|
|
|
|
|
} |
|
4883
|
|
|
|
|
|
|
|
|
4884
|
|
|
|
|
|
|
# underscore after file test operator is file handle |
|
4885
|
1005
|
50
|
66
|
|
|
2445
|
if ( $tok eq '_' && $last_nonblank_type eq 'F' ) { |
|
4886
|
0
|
|
|
|
|
0
|
$type = 'Z'; |
|
4887
|
|
|
|
|
|
|
} |
|
4888
|
|
|
|
|
|
|
|
|
4889
|
|
|
|
|
|
|
# patch for SWITCH/CASE if 'case' and 'when are |
|
4890
|
|
|
|
|
|
|
# not treated as keywords: |
|
4891
|
1005
|
50
|
33
|
|
|
4397
|
if ( |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
4892
|
|
|
|
|
|
|
( $tok eq 'case' && $rbrace_type->[$brace_depth] eq 'switch' ) |
|
4893
|
|
|
|
|
|
|
|| ( $tok eq 'when' |
|
4894
|
|
|
|
|
|
|
&& $rbrace_type->[$brace_depth] eq 'given' ) |
|
4895
|
|
|
|
|
|
|
) |
|
4896
|
|
|
|
|
|
|
{ |
|
4897
|
0
|
|
|
|
|
0
|
$statement_type = $tok; # next '{' is block |
|
4898
|
0
|
|
|
|
|
0
|
$type = 'k'; # for keyword syntax coloring |
|
4899
|
|
|
|
|
|
|
} |
|
4900
|
1005
|
100
|
|
|
|
2096
|
if ( $next_nonblank_token eq '(' ) { |
|
4901
|
|
|
|
|
|
|
|
|
4902
|
|
|
|
|
|
|
# patch for SWITCH/CASE if switch and given not keywords |
|
4903
|
|
|
|
|
|
|
# Switch is not a perl 5 keyword, but we will gamble |
|
4904
|
|
|
|
|
|
|
# and mark switch followed by paren as a keyword. This |
|
4905
|
|
|
|
|
|
|
# is only necessary to get html syntax coloring nice, |
|
4906
|
|
|
|
|
|
|
# and does not commit this as being a switch/case. |
|
4907
|
259
|
50
|
33
|
|
|
1617
|
if ( $tok eq 'switch' || $tok eq 'given' ) { |
|
|
|
50
|
33
|
|
|
|
|
|
4908
|
0
|
|
|
|
|
0
|
$type = 'k'; # for keyword syntax coloring |
|
4909
|
|
|
|
|
|
|
} |
|
4910
|
|
|
|
|
|
|
|
|
4911
|
|
|
|
|
|
|
# mark 'x' as operator for something like this (see b1410) |
|
4912
|
|
|
|
|
|
|
# my $line = join( LD_X, map { LD_H x ( $_ + 2 ) } @$widths ); |
|
4913
|
|
|
|
|
|
|
elsif ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { |
|
4914
|
0
|
|
|
|
|
0
|
$type = 'x'; |
|
4915
|
|
|
|
|
|
|
} |
|
4916
|
|
|
|
|
|
|
else { |
|
4917
|
|
|
|
|
|
|
## not a special case |
|
4918
|
|
|
|
|
|
|
} |
|
4919
|
|
|
|
|
|
|
} |
|
4920
|
|
|
|
|
|
|
} |
|
4921
|
1030
|
|
|
|
|
1654
|
return; |
|
4922
|
|
|
|
|
|
|
} ## end sub do_UNKNOWN_BAREWORD |
|
4923
|
|
|
|
|
|
|
|
|
4924
|
|
|
|
|
|
|
sub sub_attribute_ok_here { |
|
4925
|
|
|
|
|
|
|
|
|
4926
|
37
|
|
|
37
|
0
|
108
|
my ( $self, $tok_kw, $next_nonblank_token, $i_next ) = @_; |
|
4927
|
|
|
|
|
|
|
|
|
4928
|
|
|
|
|
|
|
# Decide if a ':' can introduce an attribute. For example, |
|
4929
|
|
|
|
|
|
|
# something like 'sub :' |
|
4930
|
|
|
|
|
|
|
|
|
4931
|
|
|
|
|
|
|
# Given: |
|
4932
|
|
|
|
|
|
|
# $tok_kw = a bareword token |
|
4933
|
|
|
|
|
|
|
# $next_nonblank_token = a following ':' being examined |
|
4934
|
|
|
|
|
|
|
# $i_next = the index of the following ':' |
|
4935
|
|
|
|
|
|
|
|
|
4936
|
|
|
|
|
|
|
# We will decide based on if the colon is followed by a bareword |
|
4937
|
|
|
|
|
|
|
# which is not a keyword. Changed inext+1 to inext to fixed case |
|
4938
|
|
|
|
|
|
|
# b1190. |
|
4939
|
37
|
|
|
|
|
62
|
my $sub_attribute_ok_here; |
|
4940
|
37
|
50
|
66
|
|
|
148
|
if ( $is_sub{$tok_kw} |
|
|
|
|
66
|
|
|
|
|
|
4941
|
|
|
|
|
|
|
&& $expecting != OPERATOR |
|
4942
|
|
|
|
|
|
|
&& $next_nonblank_token eq ':' ) |
|
4943
|
|
|
|
|
|
|
{ |
|
4944
|
3
|
|
|
|
|
15
|
my ( $nn_nonblank_token, $i_nn_uu ) = |
|
4945
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i_next, $rtokens, |
|
4946
|
|
|
|
|
|
|
$max_token_index ); |
|
4947
|
|
|
|
|
|
|
$sub_attribute_ok_here = |
|
4948
|
|
|
|
|
|
|
$nn_nonblank_token =~ /^\w/ |
|
4949
|
|
|
|
|
|
|
&& $nn_nonblank_token !~ /^\d/ |
|
4950
|
3
|
|
66
|
|
|
46
|
&& !$is_keyword{$nn_nonblank_token}; |
|
4951
|
|
|
|
|
|
|
} |
|
4952
|
37
|
|
|
|
|
226
|
return $sub_attribute_ok_here; |
|
4953
|
|
|
|
|
|
|
} ## end sub sub_attribute_ok_here |
|
4954
|
|
|
|
|
|
|
|
|
4955
|
44
|
|
|
44
|
|
400
|
use constant DEBUG_BAREWORD => 0; |
|
|
44
|
|
|
|
|
77
|
|
|
|
44
|
|
|
|
|
13070
|
|
|
4956
|
|
|
|
|
|
|
|
|
4957
|
|
|
|
|
|
|
sub saw_bareword_function { |
|
4958
|
957
|
|
|
957
|
0
|
1787
|
my ( $self, $bareword ) = @_; |
|
4959
|
|
|
|
|
|
|
$self->[_rbareword_info_]->{$current_package}->{$bareword} |
|
4960
|
957
|
|
|
|
|
3723
|
->{function_count}++; |
|
4961
|
957
|
|
|
|
|
1702
|
return; |
|
4962
|
|
|
|
|
|
|
} ## end sub saw_bareword_function |
|
4963
|
|
|
|
|
|
|
|
|
4964
|
|
|
|
|
|
|
sub saw_bareword_constant { |
|
4965
|
180
|
|
|
180
|
0
|
345
|
my ( $self, $bareword ) = @_; |
|
4966
|
|
|
|
|
|
|
$self->[_rbareword_info_]->{$current_package}->{$bareword} |
|
4967
|
180
|
|
|
|
|
589
|
->{constant_count}++; |
|
4968
|
180
|
|
|
|
|
319
|
return; |
|
4969
|
|
|
|
|
|
|
} ## end sub saw_bareword_constant |
|
4970
|
|
|
|
|
|
|
|
|
4971
|
|
|
|
|
|
|
sub get_bareword_counts { |
|
4972
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $bareword ) = @_; |
|
4973
|
|
|
|
|
|
|
|
|
4974
|
|
|
|
|
|
|
# Given: |
|
4975
|
|
|
|
|
|
|
# $bareword = a bareword |
|
4976
|
|
|
|
|
|
|
# Return: |
|
4977
|
|
|
|
|
|
|
# $function_count = number of times seen as function taking >0 args |
|
4978
|
|
|
|
|
|
|
# $constant_count = number of times seen as function taking 0 args |
|
4979
|
|
|
|
|
|
|
# Note: |
|
4980
|
|
|
|
|
|
|
# $function_count > 0 implies that a TERM should come next |
|
4981
|
|
|
|
|
|
|
# $constant_count > 0 implies that an OPERATOR **may** come next, |
|
4982
|
|
|
|
|
|
|
# but this can be incorrect if $bareword can take 0 or more args. |
|
4983
|
|
|
|
|
|
|
# This is used to help guess tokenization around unknown barewords. |
|
4984
|
0
|
|
|
|
|
0
|
my $function_count; |
|
4985
|
|
|
|
|
|
|
my $constant_count; |
|
4986
|
0
|
|
|
|
|
0
|
my $rbareword_info_tok = $self->[_rbareword_info_]->{$current_package}; |
|
4987
|
0
|
0
|
|
|
|
0
|
if ($rbareword_info_tok) { |
|
4988
|
0
|
|
|
|
|
0
|
$rbareword_info_tok = $rbareword_info_tok->{$bareword}; |
|
4989
|
0
|
0
|
|
|
|
0
|
if ($rbareword_info_tok) { |
|
4990
|
0
|
|
|
|
|
0
|
$function_count = $rbareword_info_tok->{function_count}; |
|
4991
|
0
|
|
|
|
|
0
|
$constant_count = $rbareword_info_tok->{constant_count}; |
|
4992
|
|
|
|
|
|
|
|
|
4993
|
|
|
|
|
|
|
# a positive function count overrides a constant count |
|
4994
|
0
|
0
|
|
|
|
0
|
if ($function_count) { $constant_count = 0 } |
|
|
0
|
|
|
|
|
0
|
|
|
4995
|
|
|
|
|
|
|
} |
|
4996
|
|
|
|
|
|
|
} |
|
4997
|
0
|
0
|
|
|
|
0
|
if ( !defined($function_count) ) { $function_count = 0 } |
|
|
0
|
|
|
|
|
0
|
|
|
4998
|
0
|
0
|
|
|
|
0
|
if ( !defined($constant_count) ) { $constant_count = 0 } |
|
|
0
|
|
|
|
|
0
|
|
|
4999
|
0
|
|
|
|
|
0
|
return ( $function_count, $constant_count ); |
|
5000
|
|
|
|
|
|
|
} ## end sub get_bareword_counts |
|
5001
|
|
|
|
|
|
|
|
|
5002
|
|
|
|
|
|
|
# hashes used to help determine a bareword type |
|
5003
|
|
|
|
|
|
|
my %is_wiUC; |
|
5004
|
|
|
|
|
|
|
my %is_function_follower; |
|
5005
|
|
|
|
|
|
|
my %is_constant_follower; |
|
5006
|
|
|
|
|
|
|
my %is_use_require_no; |
|
5007
|
|
|
|
|
|
|
|
|
5008
|
|
|
|
|
|
|
BEGIN { |
|
5009
|
44
|
|
|
44
|
|
221
|
my @qz = qw( w i U C ); |
|
5010
|
44
|
|
|
|
|
286
|
$is_wiUC{$_} = 1 for @qz; |
|
5011
|
|
|
|
|
|
|
|
|
5012
|
44
|
|
|
|
|
110
|
@qz = qw( use require no ); |
|
5013
|
44
|
|
|
|
|
172
|
$is_use_require_no{$_} = 1 for @qz; |
|
5014
|
|
|
|
|
|
|
|
|
5015
|
|
|
|
|
|
|
# These pre-token types after a bareword imply that it |
|
5016
|
|
|
|
|
|
|
# is not a constant, except when '(' is followed by ')'. |
|
5017
|
44
|
|
|
|
|
139
|
@qz = qw# ( [ { $ @ " ' m #; |
|
5018
|
44
|
|
|
|
|
301
|
$is_function_follower{$_} = 1 for @qz; |
|
5019
|
|
|
|
|
|
|
|
|
5020
|
|
|
|
|
|
|
# These pre-token types after a bareword imply that it |
|
5021
|
|
|
|
|
|
|
# MIGHT be a constant, but it also might be a function taking |
|
5022
|
|
|
|
|
|
|
# 0 or more call args. |
|
5023
|
44
|
|
|
|
|
110
|
@qz = qw# ; ) ] } if unless #; |
|
5024
|
44
|
|
|
|
|
115
|
push @qz, COMMA; |
|
5025
|
44
|
|
|
|
|
112595
|
$is_constant_follower{$_} = 1 for @qz; |
|
5026
|
|
|
|
|
|
|
} |
|
5027
|
|
|
|
|
|
|
|
|
5028
|
|
|
|
|
|
|
sub do_BAREWORD { |
|
5029
|
|
|
|
|
|
|
|
|
5030
|
6616
|
|
|
6616
|
0
|
10033
|
my ($self) = @_; |
|
5031
|
|
|
|
|
|
|
|
|
5032
|
|
|
|
|
|
|
# handle a bareword token: |
|
5033
|
|
|
|
|
|
|
# returns |
|
5034
|
|
|
|
|
|
|
# true if this token ends the current line |
|
5035
|
|
|
|
|
|
|
# false otherwise |
|
5036
|
|
|
|
|
|
|
|
|
5037
|
6616
|
|
|
|
|
7704
|
my $next_nonblank_token; |
|
5038
|
6616
|
|
|
|
|
8340
|
my $i_next = $i + 1; |
|
5039
|
6616
|
100
|
100
|
|
|
19849
|
if ( $i_next <= $max_token_index && $rtoken_type->[$i_next] eq 'b' ) { |
|
5040
|
4001
|
|
|
|
|
5505
|
$i_next++; |
|
5041
|
|
|
|
|
|
|
} |
|
5042
|
6616
|
100
|
|
|
|
10515
|
if ( $i_next <= $max_token_index ) { |
|
5043
|
6532
|
|
|
|
|
9768
|
$next_nonblank_token = $rtokens->[$i_next]; |
|
5044
|
|
|
|
|
|
|
} |
|
5045
|
|
|
|
|
|
|
else { |
|
5046
|
84
|
|
|
|
|
374
|
( $next_nonblank_token, $i_next ) = |
|
5047
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
|
5048
|
|
|
|
|
|
|
} |
|
5049
|
|
|
|
|
|
|
|
|
5050
|
|
|
|
|
|
|
# Fix for git #182: If --use-feature=class is set, then |
|
5051
|
|
|
|
|
|
|
# a colon between words 'ADJUST' and 'params', and on the same line, |
|
5052
|
|
|
|
|
|
|
# does not form the label 'ADJUST:'. It will get marked as type 'A'. |
|
5053
|
6616
|
|
|
|
|
7742
|
my $is_not_label; |
|
5054
|
6616
|
100
|
66
|
|
|
11515
|
if ( $tok eq 'ADJUST' |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
5055
|
|
|
|
|
|
|
&& $is_code_block_token{$tok} |
|
5056
|
|
|
|
|
|
|
&& $rtokens->[$i_next] eq ':' |
|
5057
|
|
|
|
|
|
|
&& $i_next < $max_token_index ) |
|
5058
|
|
|
|
|
|
|
{ |
|
5059
|
2
|
50
|
|
|
|
9
|
my $i_next2 = |
|
5060
|
|
|
|
|
|
|
$rtoken_type->[ $i_next + 1 ] eq 'b' ? $i_next + 2 : $i_next + 1; |
|
5061
|
2
|
|
33
|
|
|
18
|
$is_not_label = |
|
5062
|
|
|
|
|
|
|
( $i_next2 <= $max_token_index |
|
5063
|
|
|
|
|
|
|
&& $rtoken_type->[$i_next2] eq 'w' |
|
5064
|
|
|
|
|
|
|
&& $rtokens->[$i_next2] eq 'params' ); |
|
5065
|
|
|
|
|
|
|
} |
|
5066
|
|
|
|
|
|
|
|
|
5067
|
|
|
|
|
|
|
# a bare word immediately followed by :: is not a keyword; |
|
5068
|
|
|
|
|
|
|
# use $tok_kw when testing for keywords to avoid a mistake |
|
5069
|
6616
|
|
|
|
|
8602
|
my $tok_kw = $tok; |
|
5070
|
6616
|
100
|
100
|
|
|
14446
|
if ( $rtokens->[ $i + 1 ] eq ':' |
|
5071
|
|
|
|
|
|
|
&& $rtokens->[ $i + 2 ] eq ':' ) |
|
5072
|
|
|
|
|
|
|
{ |
|
5073
|
272
|
|
|
|
|
486
|
$tok_kw .= '::'; |
|
5074
|
|
|
|
|
|
|
} |
|
5075
|
|
|
|
|
|
|
|
|
5076
|
6616
|
100
|
|
|
|
12161
|
if ( $self->[_in_attribute_list_] ) { |
|
5077
|
45
|
|
|
|
|
149
|
my $is_attribute = $self->do_ATTRIBUTE_LIST($next_nonblank_token); |
|
5078
|
45
|
50
|
|
|
|
127
|
return if ($is_attribute); |
|
5079
|
|
|
|
|
|
|
} |
|
5080
|
|
|
|
|
|
|
|
|
5081
|
|
|
|
|
|
|
#----------------------------------------- |
|
5082
|
|
|
|
|
|
|
# Preliminary check for a lexical sub name |
|
5083
|
|
|
|
|
|
|
#----------------------------------------- |
|
5084
|
6571
|
|
|
|
|
7706
|
my $is_lexical_sub_type; |
|
5085
|
|
|
|
|
|
|
|
|
5086
|
|
|
|
|
|
|
# Has this name been seen as a lexical sub? |
|
5087
|
6571
|
50
|
|
|
|
13171
|
if ( my $rseqno_hash = $ris_lexical_sub->{$tok_kw} ) { |
|
5088
|
|
|
|
|
|
|
|
|
5089
|
|
|
|
|
|
|
# Look back up the stack to see if it is still in scope. |
|
5090
|
|
|
|
|
|
|
# Use the deepest we find if there are multiple versions. |
|
5091
|
0
|
|
|
|
|
0
|
my @seqno_tested; |
|
5092
|
0
|
|
|
|
|
0
|
my $cd_aa = $rcurrent_depth->[BRACE]; |
|
5093
|
0
|
|
|
|
|
0
|
foreach my $cd ( reverse( 0 .. $cd_aa ) ) { |
|
5094
|
0
|
0
|
|
|
|
0
|
my $p_seqno = |
|
5095
|
|
|
|
|
|
|
$cd |
|
5096
|
|
|
|
|
|
|
? $rcurrent_sequence_number->[BRACE]->[$cd] |
|
5097
|
|
|
|
|
|
|
: SEQ_ROOT; |
|
5098
|
|
|
|
|
|
|
|
|
5099
|
0
|
|
|
|
|
0
|
push @seqno_tested, $p_seqno; |
|
5100
|
|
|
|
|
|
|
|
|
5101
|
|
|
|
|
|
|
# Lexical subs use their containing sequence number as package |
|
5102
|
0
|
0
|
|
|
|
0
|
if ( my $seqno_brace = $rseqno_hash->{$p_seqno} ) { |
|
5103
|
|
|
|
|
|
|
|
|
5104
|
|
|
|
|
|
|
# This sub is in scope .. lookup its type |
|
5105
|
|
|
|
|
|
|
$is_lexical_sub_type = |
|
5106
|
|
|
|
|
|
|
$ris_constant->{$p_seqno}->{$tok_kw} ? 'C' |
|
5107
|
|
|
|
|
|
|
: $ris_block_function->{$p_seqno}->{$tok_kw} ? 'G' |
|
5108
|
|
|
|
|
|
|
: $ris_block_list_function->{$p_seqno}->{$tok_kw} ? 'G' |
|
5109
|
0
|
0
|
|
|
|
0
|
: $ris_user_function->{$p_seqno}->{$tok_kw} ? 'U' |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5110
|
|
|
|
|
|
|
: 'U'; |
|
5111
|
|
|
|
|
|
|
|
|
5112
|
|
|
|
|
|
|
# But lexical subs do not apply within their defining code |
|
5113
|
0
|
|
|
|
|
0
|
foreach (@seqno_tested) { |
|
5114
|
0
|
0
|
|
|
|
0
|
next if ( $_ != $seqno_brace ); |
|
5115
|
0
|
|
|
|
|
0
|
$is_lexical_sub_type = undef; |
|
5116
|
0
|
|
|
|
|
0
|
last; |
|
5117
|
|
|
|
|
|
|
} |
|
5118
|
|
|
|
|
|
|
|
|
5119
|
0
|
|
|
|
|
0
|
last; |
|
5120
|
|
|
|
|
|
|
} |
|
5121
|
|
|
|
|
|
|
} |
|
5122
|
|
|
|
|
|
|
} |
|
5123
|
|
|
|
|
|
|
|
|
5124
|
|
|
|
|
|
|
#---------------------------------------- |
|
5125
|
|
|
|
|
|
|
# Starting final if-elsif- chain of tests |
|
5126
|
|
|
|
|
|
|
#---------------------------------------- |
|
5127
|
|
|
|
|
|
|
|
|
5128
|
|
|
|
|
|
|
# This is the return flag: |
|
5129
|
|
|
|
|
|
|
# true => this is the last token on the line |
|
5130
|
|
|
|
|
|
|
# false => keep tokenizing the line |
|
5131
|
6571
|
|
|
|
|
7701
|
my $is_last; |
|
5132
|
|
|
|
|
|
|
|
|
5133
|
|
|
|
|
|
|
# The following blocks of code must update these vars: |
|
5134
|
|
|
|
|
|
|
# $type - the final token type, must always be set |
|
5135
|
|
|
|
|
|
|
|
|
5136
|
|
|
|
|
|
|
# In addition, if additional pretokens are added: |
|
5137
|
|
|
|
|
|
|
# $tok - the final token |
|
5138
|
|
|
|
|
|
|
# $i - the index of the last pretoken |
|
5139
|
|
|
|
|
|
|
|
|
5140
|
|
|
|
|
|
|
# They may also need to check and set various flags |
|
5141
|
|
|
|
|
|
|
|
|
5142
|
|
|
|
|
|
|
# Scan a bare word following a -> as an identifier; it could |
|
5143
|
|
|
|
|
|
|
# have a long package name. Fixes c037, c041. |
|
5144
|
6571
|
100
|
100
|
|
|
86220
|
if ( $last_nonblank_token eq '->' ) { |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
|
50
|
100
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
5145
|
786
|
|
|
|
|
2124
|
$self->scan_bare_identifier(); |
|
5146
|
|
|
|
|
|
|
|
|
5147
|
|
|
|
|
|
|
# a bareward after '->' gets type 'i' |
|
5148
|
786
|
|
|
|
|
1127
|
$type = 'i'; |
|
5149
|
|
|
|
|
|
|
} |
|
5150
|
|
|
|
|
|
|
|
|
5151
|
|
|
|
|
|
|
# Quote a word followed by => operator |
|
5152
|
|
|
|
|
|
|
elsif ( |
|
5153
|
|
|
|
|
|
|
( $next_nonblank_token eq '=' && $rtokens->[ $i_next + 1 ] eq '>' ) |
|
5154
|
|
|
|
|
|
|
|
|
5155
|
|
|
|
|
|
|
# unless the word is __END__ or __DATA__ and is the only word on |
|
5156
|
|
|
|
|
|
|
# the line. |
|
5157
|
|
|
|
|
|
|
&& ( !defined( $is_END_DATA{$tok_kw} ) |
|
5158
|
|
|
|
|
|
|
|| $input_line !~ /^\s*__(?:END|DATA)__\s*$/ ) |
|
5159
|
|
|
|
|
|
|
) |
|
5160
|
|
|
|
|
|
|
{ |
|
5161
|
|
|
|
|
|
|
# Bareword followed by a fat comma - see 'git18.in' |
|
5162
|
|
|
|
|
|
|
# This code was previously sub do_QUOTED_BAREWORD: see c316, c317 |
|
5163
|
|
|
|
|
|
|
|
|
5164
|
|
|
|
|
|
|
# Older perl: |
|
5165
|
|
|
|
|
|
|
# 'v25=>1' is a v-string key! |
|
5166
|
|
|
|
|
|
|
# '-v25=>1' is also a v-string key! |
|
5167
|
|
|
|
|
|
|
# Deactivated: this is no longer true; see git #165 |
|
5168
|
812
|
100
|
|
|
|
2456
|
if ( 0 && $tok =~ /^v\d+$/ ) { |
|
5169
|
|
|
|
|
|
|
$type = 'v'; |
|
5170
|
|
|
|
|
|
|
$self->complain("v-string used as hash key\n"); |
|
5171
|
|
|
|
|
|
|
$self->report_v_string($tok); |
|
5172
|
|
|
|
|
|
|
} |
|
5173
|
|
|
|
|
|
|
|
|
5174
|
|
|
|
|
|
|
# If tok is something like 'x17' then it could |
|
5175
|
|
|
|
|
|
|
# actually be operator x followed by number 17. |
|
5176
|
|
|
|
|
|
|
# For example, here: |
|
5177
|
|
|
|
|
|
|
# 123x17 => [ 792, 1224 ], |
|
5178
|
|
|
|
|
|
|
# (a key of 123 repeated 17 times, perhaps not |
|
5179
|
|
|
|
|
|
|
# what was intended). We will mark x17 as type |
|
5180
|
|
|
|
|
|
|
# 'n' and it will be split. If the previous token |
|
5181
|
|
|
|
|
|
|
# was also a bareword then it is not very clear is |
|
5182
|
|
|
|
|
|
|
# going on. In this case we will not be sure that |
|
5183
|
|
|
|
|
|
|
# an operator is expected, so we just mark it as a |
|
5184
|
|
|
|
|
|
|
# bareword. Perl is a little murky in what it does |
|
5185
|
|
|
|
|
|
|
# with stuff like this, and its behavior can change |
|
5186
|
|
|
|
|
|
|
# over time. Something like |
|
5187
|
|
|
|
|
|
|
# a x18 => [792, 1224], will compile as |
|
5188
|
|
|
|
|
|
|
# a key with 18 a's. But something like |
|
5189
|
|
|
|
|
|
|
# push @array, a x18; |
|
5190
|
|
|
|
|
|
|
# is a syntax error. |
|
5191
|
0
|
100
|
33
|
|
|
0
|
elsif ( |
|
|
|
|
66
|
|
|
|
|
|
5192
|
|
|
|
|
|
|
$expecting == OPERATOR |
|
5193
|
|
|
|
|
|
|
&& substr( $tok, 0, 1 ) eq 'x' |
|
5194
|
|
|
|
|
|
|
&& ( length($tok) == 1 |
|
5195
|
|
|
|
|
|
|
|| substr( $tok, 1, 1 ) =~ /^\d/ ) |
|
5196
|
|
|
|
|
|
|
) |
|
5197
|
|
|
|
|
|
|
{ |
|
5198
|
3
|
|
|
|
|
9
|
$type = 'n'; |
|
5199
|
3
|
50
|
|
|
|
11
|
if ( $self->split_pretoken(1) ) { |
|
5200
|
3
|
|
|
|
|
5
|
$type = 'x'; |
|
5201
|
3
|
|
|
|
|
6
|
$tok = 'x'; |
|
5202
|
|
|
|
|
|
|
} |
|
5203
|
3
|
|
|
|
|
12
|
$self->complain("x operator in hash key\n"); |
|
5204
|
|
|
|
|
|
|
} |
|
5205
|
|
|
|
|
|
|
else { |
|
5206
|
|
|
|
|
|
|
|
|
5207
|
|
|
|
|
|
|
# git #18 |
|
5208
|
809
|
|
|
|
|
1157
|
$type = 'w'; |
|
5209
|
809
|
|
|
|
|
1916
|
$self->error_if_expecting_OPERATOR(); |
|
5210
|
|
|
|
|
|
|
} |
|
5211
|
|
|
|
|
|
|
} |
|
5212
|
|
|
|
|
|
|
|
|
5213
|
|
|
|
|
|
|
# quote a bare word within braces..like xxx->{s}; note that we |
|
5214
|
|
|
|
|
|
|
# must be sure this is not a structural brace, to avoid |
|
5215
|
|
|
|
|
|
|
# mistaking {s} in the following for a quoted bare word: |
|
5216
|
|
|
|
|
|
|
# for(@[){s}bla}BLA} |
|
5217
|
|
|
|
|
|
|
# Also treat q in something like var{-q} as a bare word, not |
|
5218
|
|
|
|
|
|
|
# a quote operator |
|
5219
|
|
|
|
|
|
|
elsif ( |
|
5220
|
|
|
|
|
|
|
$next_nonblank_token eq '}' |
|
5221
|
|
|
|
|
|
|
&& ( |
|
5222
|
|
|
|
|
|
|
$last_nonblank_type eq 'L' |
|
5223
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 'm' |
|
5224
|
|
|
|
|
|
|
&& $last_last_nonblank_type eq 'L' ) |
|
5225
|
|
|
|
|
|
|
) |
|
5226
|
|
|
|
|
|
|
) |
|
5227
|
|
|
|
|
|
|
{ |
|
5228
|
138
|
|
|
|
|
253
|
$type = 'w'; |
|
5229
|
|
|
|
|
|
|
} |
|
5230
|
|
|
|
|
|
|
|
|
5231
|
|
|
|
|
|
|
# handle operator x (now we know it isn't $x=) |
|
5232
|
|
|
|
|
|
|
elsif ( |
|
5233
|
|
|
|
|
|
|
$expecting == OPERATOR |
|
5234
|
|
|
|
|
|
|
&& substr( $tok, 0, 1 ) eq 'x' |
|
5235
|
|
|
|
|
|
|
&& ( length($tok) == 1 |
|
5236
|
|
|
|
|
|
|
|| substr( $tok, 1, 1 ) =~ /^\d/ ) |
|
5237
|
|
|
|
|
|
|
) |
|
5238
|
|
|
|
|
|
|
{ |
|
5239
|
17
|
|
|
|
|
79
|
$self->do_X_OPERATOR(); |
|
5240
|
|
|
|
|
|
|
} |
|
5241
|
|
|
|
|
|
|
elsif ( $tok_kw eq 'CORE::' ) { |
|
5242
|
3
|
|
|
|
|
5
|
$type = $tok = $tok_kw; |
|
5243
|
3
|
|
|
|
|
5
|
$i += 2; |
|
5244
|
|
|
|
|
|
|
} |
|
5245
|
|
|
|
|
|
|
elsif ( ( $tok eq 'strict' ) |
|
5246
|
|
|
|
|
|
|
and ( $last_nonblank_token eq 'use' ) ) |
|
5247
|
|
|
|
|
|
|
{ |
|
5248
|
14
|
|
|
|
|
40
|
$self->[_saw_use_strict_] = 1; |
|
5249
|
14
|
|
|
|
|
56
|
$self->scan_bare_identifier(); |
|
5250
|
|
|
|
|
|
|
} |
|
5251
|
|
|
|
|
|
|
|
|
5252
|
|
|
|
|
|
|
elsif ( ( $tok eq 'warnings' ) |
|
5253
|
|
|
|
|
|
|
and ( $last_nonblank_token eq 'use' ) ) |
|
5254
|
|
|
|
|
|
|
{ |
|
5255
|
7
|
|
|
|
|
17
|
$self->[_saw_perl_dash_w_] = 1; |
|
5256
|
|
|
|
|
|
|
|
|
5257
|
|
|
|
|
|
|
# scan as identifier, so that we pick up something like: |
|
5258
|
|
|
|
|
|
|
# use warnings::register |
|
5259
|
7
|
|
|
|
|
20
|
$self->scan_bare_identifier(); |
|
5260
|
|
|
|
|
|
|
} |
|
5261
|
|
|
|
|
|
|
|
|
5262
|
|
|
|
|
|
|
elsif ( |
|
5263
|
|
|
|
|
|
|
$tok eq 'AutoLoader' |
|
5264
|
|
|
|
|
|
|
&& $self->[_look_for_autoloader_] |
|
5265
|
|
|
|
|
|
|
&& ( |
|
5266
|
|
|
|
|
|
|
$last_nonblank_token eq 'use' |
|
5267
|
|
|
|
|
|
|
|
|
5268
|
|
|
|
|
|
|
# these regexes are from AutoSplit.pm, which we want |
|
5269
|
|
|
|
|
|
|
# to mimic |
|
5270
|
|
|
|
|
|
|
|| $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ |
|
5271
|
|
|
|
|
|
|
|| $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ |
|
5272
|
|
|
|
|
|
|
) |
|
5273
|
|
|
|
|
|
|
) |
|
5274
|
|
|
|
|
|
|
{ |
|
5275
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); |
|
5276
|
0
|
|
|
|
|
0
|
$self->[_saw_autoloader_] = 1; |
|
5277
|
0
|
|
|
|
|
0
|
$self->[_look_for_autoloader_] = 0; |
|
5278
|
0
|
|
|
|
|
0
|
$self->scan_bare_identifier(); |
|
5279
|
|
|
|
|
|
|
} |
|
5280
|
|
|
|
|
|
|
|
|
5281
|
|
|
|
|
|
|
elsif ( |
|
5282
|
|
|
|
|
|
|
$tok eq 'SelfLoader' |
|
5283
|
|
|
|
|
|
|
&& $self->[_look_for_selfloader_] |
|
5284
|
|
|
|
|
|
|
&& ( $last_nonblank_token eq 'use' |
|
5285
|
|
|
|
|
|
|
|| $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ |
|
5286
|
|
|
|
|
|
|
|| $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) |
|
5287
|
|
|
|
|
|
|
) |
|
5288
|
|
|
|
|
|
|
{ |
|
5289
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); |
|
5290
|
0
|
|
|
|
|
0
|
$self->[_saw_selfloader_] = 1; |
|
5291
|
0
|
|
|
|
|
0
|
$self->[_look_for_selfloader_] = 0; |
|
5292
|
0
|
|
|
|
|
0
|
$self->scan_bare_identifier(); |
|
5293
|
|
|
|
|
|
|
} |
|
5294
|
|
|
|
|
|
|
|
|
5295
|
|
|
|
|
|
|
elsif ( ( $tok eq 'constant' ) |
|
5296
|
|
|
|
|
|
|
and ( $last_nonblank_token eq 'use' ) ) |
|
5297
|
|
|
|
|
|
|
{ |
|
5298
|
16
|
|
|
|
|
54
|
$self->do_USE_CONSTANT(); |
|
5299
|
|
|
|
|
|
|
} |
|
5300
|
|
|
|
|
|
|
|
|
5301
|
|
|
|
|
|
|
# Lexical sub names override keywords, labels. Based on testing, |
|
5302
|
|
|
|
|
|
|
# this seems to be the correct location for this check. |
|
5303
|
|
|
|
|
|
|
elsif ($is_lexical_sub_type) { |
|
5304
|
0
|
|
|
|
|
0
|
$type = $is_lexical_sub_type; |
|
5305
|
|
|
|
|
|
|
} |
|
5306
|
|
|
|
|
|
|
|
|
5307
|
|
|
|
|
|
|
# various quote operators |
|
5308
|
|
|
|
|
|
|
elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { |
|
5309
|
232
|
|
|
|
|
757
|
$self->do_QUOTE_OPERATOR(); |
|
5310
|
|
|
|
|
|
|
} |
|
5311
|
|
|
|
|
|
|
|
|
5312
|
|
|
|
|
|
|
# check for a statement label |
|
5313
|
|
|
|
|
|
|
elsif ( |
|
5314
|
|
|
|
|
|
|
( $next_nonblank_token eq ':' ) |
|
5315
|
|
|
|
|
|
|
&& !$is_not_label |
|
5316
|
|
|
|
|
|
|
&& ( $rtokens->[ $i_next + 1 ] ne ':' ) |
|
5317
|
|
|
|
|
|
|
&& ( $i_next <= $max_token_index ) # colon on same line |
|
5318
|
|
|
|
|
|
|
|
|
5319
|
|
|
|
|
|
|
# like 'sub : lvalue' ? |
|
5320
|
|
|
|
|
|
|
&& !$self->sub_attribute_ok_here( $tok_kw, $next_nonblank_token, |
|
5321
|
|
|
|
|
|
|
$i_next ) |
|
5322
|
|
|
|
|
|
|
&& new_statement_ok() |
|
5323
|
|
|
|
|
|
|
) |
|
5324
|
|
|
|
|
|
|
{ |
|
5325
|
33
|
100
|
|
|
|
179
|
if ( $tok !~ /[A-Z]/ ) { |
|
5326
|
15
|
|
|
|
|
26
|
push @{ $self->[_rlower_case_labels_at_] }, $input_line_number; |
|
|
15
|
|
|
|
|
45
|
|
|
5327
|
|
|
|
|
|
|
} |
|
5328
|
33
|
|
|
|
|
56
|
$type = 'J'; |
|
5329
|
33
|
|
|
|
|
78
|
$tok .= ':'; |
|
5330
|
33
|
|
|
|
|
53
|
$i = $i_next; |
|
5331
|
|
|
|
|
|
|
} |
|
5332
|
|
|
|
|
|
|
|
|
5333
|
|
|
|
|
|
|
# 'sub' or other sub alias |
|
5334
|
|
|
|
|
|
|
elsif ( $is_sub{$tok_kw} ) { |
|
5335
|
|
|
|
|
|
|
|
|
5336
|
|
|
|
|
|
|
# Guess what to do for unknown word 'method': |
|
5337
|
|
|
|
|
|
|
# Updated for --use-feature=class (rt145706): |
|
5338
|
352
|
100
|
100
|
|
|
1955
|
if ( $tok_kw eq 'method' |
|
|
|
|
100
|
|
|
|
|
|
5339
|
|
|
|
|
|
|
&& $guess_if_method |
|
5340
|
|
|
|
|
|
|
&& !$self->method_ok_here($next_nonblank_token) ) |
|
5341
|
|
|
|
|
|
|
{ |
|
5342
|
7
|
|
|
|
|
23
|
$self->do_UNKNOWN_BAREWORD($next_nonblank_token); |
|
5343
|
|
|
|
|
|
|
} |
|
5344
|
|
|
|
|
|
|
else { |
|
5345
|
345
|
50
|
|
|
|
829
|
$self->error_if_expecting_OPERATOR() |
|
5346
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
|
5347
|
345
|
|
|
|
|
2349
|
initialize_subname(); |
|
5348
|
345
|
|
|
|
|
1092
|
$self->scan_id(); |
|
5349
|
|
|
|
|
|
|
} |
|
5350
|
|
|
|
|
|
|
} |
|
5351
|
|
|
|
|
|
|
|
|
5352
|
|
|
|
|
|
|
# 'package' |
|
5353
|
|
|
|
|
|
|
elsif ( $is_package{$tok_kw} ) { |
|
5354
|
|
|
|
|
|
|
|
|
5355
|
|
|
|
|
|
|
# Update for --use-feature=class (rt145706): |
|
5356
|
|
|
|
|
|
|
# We have to be extra careful because 'class' may be used for other |
|
5357
|
|
|
|
|
|
|
# purposes on older code; i.e. |
|
5358
|
|
|
|
|
|
|
# class($x) - valid sub call |
|
5359
|
|
|
|
|
|
|
# package($x) - error |
|
5360
|
54
|
100
|
|
|
|
175
|
if ( $tok_kw eq 'class' ) { |
|
5361
|
14
|
100
|
66
|
|
|
119
|
if ( $expecting == OPERATOR |
|
|
|
|
100
|
|
|
|
|
|
5362
|
|
|
|
|
|
|
|| $next_nonblank_token !~ /^[\w\:]/ |
|
5363
|
|
|
|
|
|
|
|| !$self->class_ok_here() ) |
|
5364
|
|
|
|
|
|
|
{ |
|
5365
|
4
|
|
|
|
|
12
|
$self->do_UNKNOWN_BAREWORD($next_nonblank_token); |
|
5366
|
|
|
|
|
|
|
} |
|
5367
|
10
|
|
|
|
|
24
|
else { $self->scan_id() } |
|
5368
|
|
|
|
|
|
|
} |
|
5369
|
|
|
|
|
|
|
else { |
|
5370
|
40
|
50
|
|
|
|
106
|
$self->error_if_expecting_OPERATOR() |
|
5371
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
|
5372
|
40
|
|
|
|
|
136
|
$self->scan_id(); |
|
5373
|
|
|
|
|
|
|
} |
|
5374
|
|
|
|
|
|
|
} |
|
5375
|
|
|
|
|
|
|
|
|
5376
|
|
|
|
|
|
|
# Fix for c035: split 'format' from 'is_format_END_DATA' to be |
|
5377
|
|
|
|
|
|
|
# more restrictive. Require a new statement to be ok here. |
|
5378
|
|
|
|
|
|
|
elsif ( $tok_kw eq 'format' && new_statement_ok() ) { |
|
5379
|
1
|
|
|
|
|
2
|
$type = ';'; # make tokenizer look for TERM next |
|
5380
|
1
|
|
|
|
|
3
|
$self->[_in_format_] = 1; |
|
5381
|
1
|
|
|
|
|
2
|
$is_last = 1; ## is last token on this line |
|
5382
|
|
|
|
|
|
|
} |
|
5383
|
|
|
|
|
|
|
|
|
5384
|
|
|
|
|
|
|
# Note on token types for format, __DATA__, __END__: |
|
5385
|
|
|
|
|
|
|
# It simplifies things to give these type ';', so that when we |
|
5386
|
|
|
|
|
|
|
# start rescanning we will be expecting a token of type TERM. |
|
5387
|
|
|
|
|
|
|
# We will switch to type 'k' before outputting the tokens. |
|
5388
|
|
|
|
|
|
|
elsif ( defined( $is_END_DATA{$tok_kw} ) ) { |
|
5389
|
|
|
|
|
|
|
|
|
5390
|
|
|
|
|
|
|
# Warn if this follows an operator expecting a term (c565) |
|
5391
|
8
|
50
|
|
|
|
57
|
$self->error_if_expecting_TERM() |
|
5392
|
|
|
|
|
|
|
if ( $expecting == TERM ); |
|
5393
|
|
|
|
|
|
|
|
|
5394
|
8
|
|
|
|
|
17
|
$type = ';'; # make tokenizer look for TERM next |
|
5395
|
|
|
|
|
|
|
|
|
5396
|
|
|
|
|
|
|
# Remember that we are in one of these three sections |
|
5397
|
8
|
|
|
|
|
25
|
$self->[ $is_END_DATA{$tok_kw} ] = 1; |
|
5398
|
8
|
|
|
|
|
16
|
$is_last = 1; ## is last token on this line |
|
5399
|
|
|
|
|
|
|
} |
|
5400
|
|
|
|
|
|
|
elsif ( $is_keyword{$tok_kw} ) { |
|
5401
|
3060
|
|
|
|
|
8635
|
$self->do_KEYWORD(); |
|
5402
|
|
|
|
|
|
|
} |
|
5403
|
|
|
|
|
|
|
|
|
5404
|
|
|
|
|
|
|
# check for inline label following |
|
5405
|
|
|
|
|
|
|
# /^(redo|last|next|goto)$/ |
|
5406
|
|
|
|
|
|
|
elsif (( $last_nonblank_type eq 'k' ) |
|
5407
|
|
|
|
|
|
|
&& ( $is_redo_last_next_goto{$last_nonblank_token} ) ) |
|
5408
|
|
|
|
|
|
|
{ |
|
5409
|
19
|
|
|
|
|
42
|
$type = 'j'; |
|
5410
|
|
|
|
|
|
|
} |
|
5411
|
|
|
|
|
|
|
|
|
5412
|
|
|
|
|
|
|
# something else -- |
|
5413
|
|
|
|
|
|
|
else { |
|
5414
|
1019
|
|
|
|
|
2980
|
$self->do_UNKNOWN_BAREWORD($next_nonblank_token); |
|
5415
|
|
|
|
|
|
|
} |
|
5416
|
|
|
|
|
|
|
|
|
5417
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
|
5418
|
|
|
|
|
|
|
# Save info for use in later guessing. Even for types 'i' and 'U' |
|
5419
|
|
|
|
|
|
|
# because those may be marked as type 'w' (barewords) elsewhere. |
|
5420
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
|
5421
|
6571
|
100
|
100
|
|
|
22055
|
if ( $is_wiUC{$type} |
|
|
|
|
100
|
|
|
|
|
|
5422
|
|
|
|
|
|
|
&& $statement_type ne 'use' |
|
5423
|
|
|
|
|
|
|
&& $statement_type ne '_use' ) |
|
5424
|
|
|
|
|
|
|
{ |
|
5425
|
2626
|
|
|
|
|
3678
|
my $result = "unknown"; |
|
5426
|
|
|
|
|
|
|
|
|
5427
|
|
|
|
|
|
|
# Words are marked 'function' if they appear in a role which |
|
5428
|
|
|
|
|
|
|
# is not consistent with a constant value. Typically they are |
|
5429
|
|
|
|
|
|
|
# function calls. |
|
5430
|
2626
|
100
|
100
|
|
|
11941
|
if ( $type eq 'U' |
|
|
|
100
|
66
|
|
|
|
|
|
5431
|
|
|
|
|
|
|
|| $is_function_follower{$next_nonblank_token} ) |
|
5432
|
|
|
|
|
|
|
{ |
|
5433
|
|
|
|
|
|
|
|
|
5434
|
1015
|
|
|
|
|
1556
|
my $empty_parens = 0; |
|
5435
|
1015
|
100
|
100
|
|
|
3403
|
if ( $next_nonblank_token eq '(' && $i_next < $max_token_index ) |
|
5436
|
|
|
|
|
|
|
{ |
|
5437
|
592
|
|
|
|
|
1173
|
my $tok_next_p1 = $rtokens->[ $i_next + 1 ]; |
|
5438
|
592
|
100
|
100
|
|
|
2329
|
if ( substr( $tok_next_p1, 0, 1 ) eq SPACE |
|
5439
|
|
|
|
|
|
|
&& $i_next + 2 <= $max_token_index ) |
|
5440
|
|
|
|
|
|
|
{ |
|
5441
|
282
|
|
|
|
|
576
|
$tok_next_p1 = $rtokens->[ $i_next + 2 ]; |
|
5442
|
|
|
|
|
|
|
} |
|
5443
|
592
|
|
|
|
|
1055
|
$empty_parens = $tok_next_p1 eq ')'; |
|
5444
|
|
|
|
|
|
|
} |
|
5445
|
|
|
|
|
|
|
|
|
5446
|
1015
|
100
|
|
|
|
2158
|
if ( !$empty_parens ) { |
|
5447
|
|
|
|
|
|
|
|
|
5448
|
|
|
|
|
|
|
# not a constant term - probably a function |
|
5449
|
957
|
|
|
|
|
1418
|
$result = "function"; |
|
5450
|
957
|
|
|
|
|
2420
|
$self->saw_bareword_function($tok); |
|
5451
|
|
|
|
|
|
|
} |
|
5452
|
|
|
|
|
|
|
} |
|
5453
|
|
|
|
|
|
|
|
|
5454
|
|
|
|
|
|
|
# Words are marked 'constant' if they appear in a role |
|
5455
|
|
|
|
|
|
|
# consistent with a constant value. However, they may simply |
|
5456
|
|
|
|
|
|
|
# be functions which optionally take zero args. So if a word |
|
5457
|
|
|
|
|
|
|
# appears as both constant and function, it is not a constant. |
|
5458
|
|
|
|
|
|
|
elsif ($type eq 'C' |
|
5459
|
|
|
|
|
|
|
|| $is_constant_follower{$next_nonblank_token} ) |
|
5460
|
|
|
|
|
|
|
{ |
|
5461
|
|
|
|
|
|
|
|
|
5462
|
443
|
|
100
|
|
|
1780
|
my $is_hash_key = $next_nonblank_token eq '}' |
|
5463
|
|
|
|
|
|
|
&& ( |
|
5464
|
|
|
|
|
|
|
$last_nonblank_type eq 'L' |
|
5465
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 'm' |
|
5466
|
|
|
|
|
|
|
&& $last_last_nonblank_type eq 'L' ) |
|
5467
|
|
|
|
|
|
|
); |
|
5468
|
|
|
|
|
|
|
|
|
5469
|
443
|
100
|
100
|
|
|
2440
|
if ( |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
5470
|
|
|
|
|
|
|
|
|
5471
|
|
|
|
|
|
|
# not a hash key like {bareword} or {-bareword} |
|
5472
|
|
|
|
|
|
|
!$is_hash_key |
|
5473
|
|
|
|
|
|
|
|
|
5474
|
|
|
|
|
|
|
# not a package name, etc |
|
5475
|
|
|
|
|
|
|
&& ( $last_nonblank_type ne 'k' |
|
5476
|
|
|
|
|
|
|
|| !$is_use_require_no{$last_nonblank_token} ) |
|
5477
|
|
|
|
|
|
|
|
|
5478
|
|
|
|
|
|
|
# skip arrow calls, which can go either way |
|
5479
|
|
|
|
|
|
|
&& $last_nonblank_token ne '->' |
|
5480
|
|
|
|
|
|
|
) |
|
5481
|
|
|
|
|
|
|
{ |
|
5482
|
|
|
|
|
|
|
# possibly a constant or constant function |
|
5483
|
180
|
|
|
|
|
260
|
$result = "constant"; |
|
5484
|
180
|
|
|
|
|
525
|
$self->saw_bareword_constant($tok); |
|
5485
|
|
|
|
|
|
|
} |
|
5486
|
|
|
|
|
|
|
else { |
|
5487
|
263
|
|
|
|
|
455
|
$result = "other bareword"; |
|
5488
|
|
|
|
|
|
|
} |
|
5489
|
|
|
|
|
|
|
} |
|
5490
|
|
|
|
|
|
|
else { |
|
5491
|
|
|
|
|
|
|
} |
|
5492
|
|
|
|
|
|
|
|
|
5493
|
2626
|
|
|
|
|
3412
|
if ( DEBUG_BAREWORD && $result ne 'other bareword' ) { |
|
5494
|
|
|
|
|
|
|
print |
|
5495
|
|
|
|
|
|
|
"$input_line_number: $result: $tok: type=$type : last_tok=$last_nonblank_token : next_tok='$next_nonblank_token'\n"; |
|
5496
|
|
|
|
|
|
|
} |
|
5497
|
|
|
|
|
|
|
} |
|
5498
|
6571
|
|
|
|
|
11808
|
return $is_last; |
|
5499
|
|
|
|
|
|
|
|
|
5500
|
|
|
|
|
|
|
} ## end sub do_BAREWORD |
|
5501
|
|
|
|
|
|
|
|
|
5502
|
|
|
|
|
|
|
# Table of quote types checked for interpolated here targets. |
|
5503
|
|
|
|
|
|
|
# Issue 310 has extensive test cases. |
|
5504
|
|
|
|
|
|
|
my %is_interpolated_quote = ( |
|
5505
|
|
|
|
|
|
|
q{'} => 0, |
|
5506
|
|
|
|
|
|
|
q{`} => 1, |
|
5507
|
|
|
|
|
|
|
q{"} => 1, |
|
5508
|
|
|
|
|
|
|
qq => 1, |
|
5509
|
|
|
|
|
|
|
qx => 1, |
|
5510
|
|
|
|
|
|
|
m => 1, |
|
5511
|
|
|
|
|
|
|
qr => 1, |
|
5512
|
|
|
|
|
|
|
q => 0, |
|
5513
|
|
|
|
|
|
|
qw => 0, |
|
5514
|
|
|
|
|
|
|
s => 1, |
|
5515
|
|
|
|
|
|
|
y => 0, |
|
5516
|
|
|
|
|
|
|
tr => 0, |
|
5517
|
|
|
|
|
|
|
); |
|
5518
|
|
|
|
|
|
|
|
|
5519
|
|
|
|
|
|
|
sub push_here_targets { |
|
5520
|
2
|
|
|
2
|
0
|
4
|
my ($rht) = @_; |
|
5521
|
|
|
|
|
|
|
|
|
5522
|
|
|
|
|
|
|
# Push here targets found in a quote onto the here target list |
|
5523
|
2
|
|
|
|
|
2
|
push @{$rhere_target_list}, @{$rht}; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
4
|
|
|
5524
|
|
|
|
|
|
|
|
|
5525
|
|
|
|
|
|
|
# Change type from 'Q' to 'h' for quotes with here-doc targets so that |
|
5526
|
|
|
|
|
|
|
# the formatter (see sub process_line_of_CODE) will not make any line |
|
5527
|
|
|
|
|
|
|
# breaks after this point. |
|
5528
|
2
|
|
|
|
|
3
|
$type = 'h'; |
|
5529
|
2
|
100
|
|
|
|
6
|
if ( $i_tok < 0 ) { |
|
5530
|
1
|
|
|
|
|
2
|
my $ilast = $routput_token_list->[-1]; |
|
5531
|
1
|
|
|
|
|
2
|
$routput_token_type->[$ilast] = $type; |
|
5532
|
|
|
|
|
|
|
} |
|
5533
|
2
|
|
|
|
|
4
|
return; |
|
5534
|
|
|
|
|
|
|
} ## end sub push_here_targets |
|
5535
|
|
|
|
|
|
|
|
|
5536
|
|
|
|
|
|
|
sub do_FOLLOW_QUOTE { |
|
5537
|
|
|
|
|
|
|
|
|
5538
|
3170
|
|
|
3170
|
0
|
3883
|
my $self = shift; |
|
5539
|
|
|
|
|
|
|
|
|
5540
|
|
|
|
|
|
|
# Continue following a quote on a new line |
|
5541
|
3170
|
|
|
|
|
3945
|
$type = $quote_type; |
|
5542
|
|
|
|
|
|
|
|
|
5543
|
|
|
|
|
|
|
# initialize if continuation line |
|
5544
|
3170
|
100
|
|
|
|
3514
|
if ( !@{$routput_token_list} ) { |
|
|
3170
|
|
|
|
|
5938
|
|
|
5545
|
245
|
|
|
|
|
337
|
push( @{$routput_token_list}, $i ); |
|
|
245
|
|
|
|
|
381
|
|
|
5546
|
245
|
|
|
|
|
425
|
$routput_token_type->[$i] = $type; |
|
5547
|
|
|
|
|
|
|
} |
|
5548
|
|
|
|
|
|
|
|
|
5549
|
|
|
|
|
|
|
# Save starting lengths for here target search |
|
5550
|
3170
|
|
|
|
|
4360
|
my $len_qs1 = length($quoted_string_1); |
|
5551
|
3170
|
|
|
|
|
3768
|
my $len_qs2 = length($quoted_string_2); |
|
5552
|
3170
|
|
|
|
|
3917
|
my $in_quote_start = $in_quote; |
|
5553
|
|
|
|
|
|
|
|
|
5554
|
|
|
|
|
|
|
# scan for the end of the quote or pattern |
|
5555
|
|
|
|
|
|
|
( |
|
5556
|
3170
|
|
|
|
|
8459
|
$i, |
|
5557
|
|
|
|
|
|
|
$in_quote, |
|
5558
|
|
|
|
|
|
|
$quote_character, |
|
5559
|
|
|
|
|
|
|
$quote_pos, |
|
5560
|
|
|
|
|
|
|
$quote_depth, |
|
5561
|
|
|
|
|
|
|
$quoted_string_1, |
|
5562
|
|
|
|
|
|
|
$quoted_string_2, |
|
5563
|
|
|
|
|
|
|
|
|
5564
|
|
|
|
|
|
|
) = $self->do_quote( |
|
5565
|
|
|
|
|
|
|
|
|
5566
|
|
|
|
|
|
|
$i, |
|
5567
|
|
|
|
|
|
|
$in_quote, |
|
5568
|
|
|
|
|
|
|
$quote_character, |
|
5569
|
|
|
|
|
|
|
$quote_pos, |
|
5570
|
|
|
|
|
|
|
$quote_depth, |
|
5571
|
|
|
|
|
|
|
$quoted_string_1, |
|
5572
|
|
|
|
|
|
|
$quoted_string_2, |
|
5573
|
|
|
|
|
|
|
$rtokens, |
|
5574
|
|
|
|
|
|
|
$rtoken_type, |
|
5575
|
|
|
|
|
|
|
$rtoken_map, |
|
5576
|
|
|
|
|
|
|
$max_token_index, |
|
5577
|
|
|
|
|
|
|
|
|
5578
|
|
|
|
|
|
|
); |
|
5579
|
|
|
|
|
|
|
|
|
5580
|
|
|
|
|
|
|
# Save pattern and replacement text for rescanning for /e |
|
5581
|
3170
|
|
|
|
|
5391
|
my $qs1_for_e_scan = $quoted_string_1; |
|
5582
|
|
|
|
|
|
|
|
|
5583
|
|
|
|
|
|
|
# Check for possible here targets in an interpolated quote |
|
5584
|
3170
|
100
|
100
|
|
|
9302
|
if ( $is_interpolated_quote{$quote_starting_tok} |
|
5585
|
|
|
|
|
|
|
&& $in_quote < $in_quote_start ) |
|
5586
|
|
|
|
|
|
|
{ |
|
5587
|
|
|
|
|
|
|
|
|
5588
|
|
|
|
|
|
|
# post any saved target of a 2-part quote if the end is reached |
|
5589
|
1431
|
100
|
100
|
|
|
4553
|
if ( !$in_quote && defined($quote_here_target_2) ) { |
|
5590
|
|
|
|
|
|
|
|
|
5591
|
|
|
|
|
|
|
# Safety check |
|
5592
|
1
|
50
|
|
|
|
4
|
if ( $quote_items{$quote_starting_tok} == 2 ) { |
|
5593
|
1
|
|
|
|
|
3
|
push_here_targets($quote_here_target_2); |
|
5594
|
|
|
|
|
|
|
} |
|
5595
|
|
|
|
|
|
|
else { |
|
5596
|
0
|
|
|
|
|
0
|
DEVEL_MODE |
|
5597
|
|
|
|
|
|
|
&& Fault( |
|
5598
|
|
|
|
|
|
|
"unexpected saved here target near line $input_line_number\n" |
|
5599
|
|
|
|
|
|
|
); |
|
5600
|
|
|
|
|
|
|
} |
|
5601
|
1
|
|
|
|
|
2
|
$quote_here_target_2 = undef; |
|
5602
|
|
|
|
|
|
|
} |
|
5603
|
|
|
|
|
|
|
|
|
5604
|
|
|
|
|
|
|
# Single part quotes: use $quoted_string_1, and |
|
5605
|
|
|
|
|
|
|
# $in_quote drops from 1 to 0 when the end is found |
|
5606
|
|
|
|
|
|
|
# Dual part quotes ('s'): first part is in $quoted_string_2, and |
|
5607
|
|
|
|
|
|
|
# $in_quote: |
|
5608
|
|
|
|
|
|
|
# drops from 2 to 1 when the the first part is found |
|
5609
|
|
|
|
|
|
|
# drops 1 to 0 when the the second part is found |
|
5610
|
|
|
|
|
|
|
# drops from 2 to 0 if both parts are found in this call |
|
5611
|
|
|
|
|
|
|
# The tricky part is that we must search for here targets whenever |
|
5612
|
|
|
|
|
|
|
# $in_quote drops, but we can only post here targets after the end |
|
5613
|
|
|
|
|
|
|
# of the last part is found (in_quote==0). See test 'here4.in'. |
|
5614
|
|
|
|
|
|
|
# Update c310 added interpolated here docs and has many test cases. |
|
5615
|
|
|
|
|
|
|
|
|
5616
|
|
|
|
|
|
|
# Initialize for the normal case of a single quote |
|
5617
|
1431
|
|
|
|
|
2052
|
my $qs = $quoted_string_1; |
|
5618
|
1431
|
|
|
|
|
1834
|
my $len_qs = $len_qs1; |
|
5619
|
1431
|
|
|
|
|
2119
|
my $num_quotes = $in_quote_start - $in_quote; |
|
5620
|
|
|
|
|
|
|
|
|
5621
|
|
|
|
|
|
|
# Dual part quotes (type 's') have first part in $quoted_string_2 |
|
5622
|
1431
|
100
|
|
|
|
2574
|
if ( $in_quote_start == 2 ) { |
|
5623
|
31
|
|
|
|
|
49
|
$qs = $quoted_string_2; |
|
5624
|
31
|
|
|
|
|
61
|
$len_qs = $len_qs2; |
|
5625
|
|
|
|
|
|
|
} |
|
5626
|
|
|
|
|
|
|
|
|
5627
|
|
|
|
|
|
|
# Loop to search 1 or 2 quotes for here targets |
|
5628
|
1431
|
|
|
|
|
3301
|
foreach ( 1 .. $num_quotes ) { |
|
5629
|
|
|
|
|
|
|
|
|
5630
|
|
|
|
|
|
|
# Perform quick tests to avoid a sub call: |
|
5631
|
1460
|
|
|
|
|
2870
|
my $pos_shift = rindex( $qs, '<<' ); |
|
5632
|
1460
|
50
|
100
|
|
|
3329
|
if ( |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
5633
|
|
|
|
|
|
|
|
|
5634
|
|
|
|
|
|
|
# '<<' in the last line |
|
5635
|
|
|
|
|
|
|
$pos_shift >= $len_qs |
|
5636
|
|
|
|
|
|
|
|
|
5637
|
|
|
|
|
|
|
# followed by a '}' |
|
5638
|
|
|
|
|
|
|
&& rindex( $qs, '}' ) > $pos_shift |
|
5639
|
|
|
|
|
|
|
|
|
5640
|
|
|
|
|
|
|
# preceded by '$' or '@' |
|
5641
|
|
|
|
|
|
|
&& ( rindex( $qs, '$', $pos_shift ) >= 0 |
|
5642
|
|
|
|
|
|
|
|| rindex( $qs, '@', $pos_shift ) >= 0 ) |
|
5643
|
|
|
|
|
|
|
) |
|
5644
|
|
|
|
|
|
|
{ |
|
5645
|
|
|
|
|
|
|
|
|
5646
|
|
|
|
|
|
|
# scan the quote for here targets |
|
5647
|
2
|
|
|
|
|
9
|
my ( $rht, $qs_mod ) = |
|
5648
|
|
|
|
|
|
|
$self->find_interpolated_here_targets( $qs, $len_qs ); |
|
5649
|
2
|
50
|
|
|
|
8
|
if ($rht) { |
|
5650
|
|
|
|
|
|
|
|
|
5651
|
|
|
|
|
|
|
# only post here targets when end of quote is found |
|
5652
|
2
|
100
|
|
|
|
5
|
if ($in_quote) { |
|
5653
|
1
|
|
|
|
|
2
|
$quote_here_target_2 = $rht; |
|
5654
|
|
|
|
|
|
|
} |
|
5655
|
|
|
|
|
|
|
else { |
|
5656
|
1
|
|
|
|
|
5
|
push_here_targets($rht); |
|
5657
|
|
|
|
|
|
|
|
|
5658
|
|
|
|
|
|
|
# Replace the string with the modified version |
|
5659
|
|
|
|
|
|
|
# in case it is re-scanned due to a /e modifier |
|
5660
|
1
|
|
|
|
|
2
|
$qs1_for_e_scan = $qs_mod; |
|
5661
|
|
|
|
|
|
|
} |
|
5662
|
|
|
|
|
|
|
} |
|
5663
|
|
|
|
|
|
|
} |
|
5664
|
|
|
|
|
|
|
|
|
5665
|
|
|
|
|
|
|
# re-initialize for next pass |
|
5666
|
1460
|
|
|
|
|
1922
|
$qs = $quoted_string_1; |
|
5667
|
1460
|
|
|
|
|
2637
|
$len_qs = $len_qs1; |
|
5668
|
|
|
|
|
|
|
} ## end while ( $num_quotes-- > 0) |
|
5669
|
|
|
|
|
|
|
} |
|
5670
|
|
|
|
|
|
|
|
|
5671
|
3170
|
100
|
|
|
|
5352
|
if ($in_quote) { return } |
|
|
244
|
|
|
|
|
427
|
|
|
5672
|
|
|
|
|
|
|
|
|
5673
|
|
|
|
|
|
|
# All done with this quote... |
|
5674
|
|
|
|
|
|
|
|
|
5675
|
|
|
|
|
|
|
# re-initialize for next search |
|
5676
|
2926
|
|
|
|
|
3677
|
$quote_character = EMPTY_STRING; |
|
5677
|
2926
|
|
|
|
|
3482
|
$quote_pos = 0; |
|
5678
|
2926
|
|
|
|
|
3526
|
$quote_type = 'Q'; |
|
5679
|
2926
|
|
|
|
|
3502
|
$quoted_string_1 = EMPTY_STRING; |
|
5680
|
2926
|
|
|
|
|
3457
|
$quoted_string_2 = EMPTY_STRING; |
|
5681
|
2926
|
100
|
|
|
|
5050
|
if ( ++$i > $max_token_index ) { return } |
|
|
126
|
|
|
|
|
256
|
|
|
5682
|
|
|
|
|
|
|
|
|
5683
|
|
|
|
|
|
|
# look for any modifiers |
|
5684
|
2800
|
100
|
|
|
|
4789
|
if ($allowed_quote_modifiers) { |
|
5685
|
|
|
|
|
|
|
|
|
5686
|
|
|
|
|
|
|
# check for exact quote modifiers |
|
5687
|
162
|
100
|
|
|
|
679
|
if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) { |
|
5688
|
31
|
|
|
|
|
53
|
my $str = $rtokens->[$i]; |
|
5689
|
31
|
|
|
|
|
47
|
my $saw_modifier_e; |
|
5690
|
31
|
|
|
|
|
731
|
while ( $str =~ /\G$allowed_quote_modifiers/gc ) { |
|
5691
|
49
|
|
|
|
|
82
|
my $pos = pos($str); |
|
5692
|
49
|
|
|
|
|
94
|
my $char = substr( $str, $pos - 1, 1 ); |
|
5693
|
49
|
|
100
|
|
|
255
|
$saw_modifier_e ||= ( $char eq 'e' ); |
|
5694
|
|
|
|
|
|
|
} |
|
5695
|
|
|
|
|
|
|
|
|
5696
|
|
|
|
|
|
|
# For an 'e' quote modifier we must scan the replacement |
|
5697
|
|
|
|
|
|
|
# text for here-doc targets... |
|
5698
|
|
|
|
|
|
|
# but if the modifier starts a new line we must skip |
|
5699
|
|
|
|
|
|
|
# this because either the here doc will be fully |
|
5700
|
|
|
|
|
|
|
# contained in the replacement text (so we can |
|
5701
|
|
|
|
|
|
|
# ignore it) or Perl will not find it. The modifier will have a |
|
5702
|
|
|
|
|
|
|
# pretoken index $i=1 if it starts a new line, so we only look |
|
5703
|
|
|
|
|
|
|
# for a here doc if $i>1. See test 'here2.in'. |
|
5704
|
31
|
50
|
66
|
|
|
93
|
if ( $saw_modifier_e && $i > 1 ) { |
|
5705
|
0
|
|
|
|
|
0
|
my $rht = $self->scan_replacement_text($qs1_for_e_scan); |
|
5706
|
0
|
0
|
|
|
|
0
|
if ($rht) { |
|
5707
|
0
|
|
|
|
|
0
|
push_here_targets($rht); |
|
5708
|
|
|
|
|
|
|
} |
|
5709
|
|
|
|
|
|
|
} |
|
5710
|
|
|
|
|
|
|
|
|
5711
|
31
|
50
|
|
|
|
70
|
if ( defined( pos($str) ) ) { |
|
5712
|
|
|
|
|
|
|
|
|
5713
|
|
|
|
|
|
|
# matched |
|
5714
|
31
|
50
|
|
|
|
78
|
if ( pos($str) == length($str) ) { |
|
5715
|
31
|
50
|
|
|
|
94
|
if ( ++$i > $max_token_index ) { return } |
|
|
0
|
|
|
|
|
0
|
|
|
5716
|
|
|
|
|
|
|
} |
|
5717
|
|
|
|
|
|
|
|
|
5718
|
|
|
|
|
|
|
# Looks like a joined quote modifier |
|
5719
|
|
|
|
|
|
|
# and keyword, maybe something like |
|
5720
|
|
|
|
|
|
|
# s/xxx/yyy/gefor @k=... |
|
5721
|
|
|
|
|
|
|
# Example is "galgen.pl". Would have to split |
|
5722
|
|
|
|
|
|
|
# the word and insert a new token in the |
|
5723
|
|
|
|
|
|
|
# pre-token list. This is so rare that I haven't |
|
5724
|
|
|
|
|
|
|
# done it. Will just issue a warning citation. |
|
5725
|
|
|
|
|
|
|
|
|
5726
|
|
|
|
|
|
|
# This error might also be triggered if my quote |
|
5727
|
|
|
|
|
|
|
# modifier characters are incomplete |
|
5728
|
|
|
|
|
|
|
else { |
|
5729
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
|
5730
|
|
|
|
|
|
|
|
|
5731
|
|
|
|
|
|
|
Partial match to quote modifier $allowed_quote_modifiers at word: '$str' |
|
5732
|
|
|
|
|
|
|
Please put a space between quote modifiers and trailing keywords. |
|
5733
|
|
|
|
|
|
|
EOM |
|
5734
|
|
|
|
|
|
|
|
|
5735
|
|
|
|
|
|
|
# print "token $rtokens->[$i]\n"; |
|
5736
|
|
|
|
|
|
|
# my $num = length($str) - pos($str); |
|
5737
|
|
|
|
|
|
|
# $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num); |
|
5738
|
|
|
|
|
|
|
# print "continuing with new token $rtokens->[$i]\n"; |
|
5739
|
|
|
|
|
|
|
|
|
5740
|
|
|
|
|
|
|
# skipping past this token does least damage |
|
5741
|
0
|
0
|
|
|
|
0
|
if ( ++$i > $max_token_index ) { return } |
|
|
0
|
|
|
|
|
0
|
|
|
5742
|
|
|
|
|
|
|
} |
|
5743
|
|
|
|
|
|
|
} |
|
5744
|
|
|
|
|
|
|
else { |
|
5745
|
|
|
|
|
|
|
|
|
5746
|
|
|
|
|
|
|
# example file: rokicki4.pl |
|
5747
|
|
|
|
|
|
|
# This error might also be triggered if my quote |
|
5748
|
|
|
|
|
|
|
# modifier characters are incomplete |
|
5749
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
|
5750
|
|
|
|
|
|
|
"Note: found word $str at quote modifier location\n"); |
|
5751
|
|
|
|
|
|
|
} |
|
5752
|
|
|
|
|
|
|
} |
|
5753
|
|
|
|
|
|
|
|
|
5754
|
|
|
|
|
|
|
# re-initialize |
|
5755
|
162
|
|
|
|
|
261
|
$allowed_quote_modifiers = EMPTY_STRING; |
|
5756
|
|
|
|
|
|
|
} |
|
5757
|
2800
|
|
|
|
|
4041
|
return; |
|
5758
|
|
|
|
|
|
|
} ## end sub do_FOLLOW_QUOTE |
|
5759
|
|
|
|
|
|
|
|
|
5760
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
|
5761
|
|
|
|
|
|
|
# begin hash of code for handling most token types |
|
5762
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
|
5763
|
|
|
|
|
|
|
my $tokenization_code = { |
|
5764
|
|
|
|
|
|
|
|
|
5765
|
|
|
|
|
|
|
'$' => \&do_DOLLAR_SIGN, |
|
5766
|
|
|
|
|
|
|
'(' => \&do_LEFT_PARENTHESIS, |
|
5767
|
|
|
|
|
|
|
')' => \&do_RIGHT_PARENTHESIS, |
|
5768
|
|
|
|
|
|
|
',' => \&do_COMMA, |
|
5769
|
|
|
|
|
|
|
';' => \&do_SEMICOLON, |
|
5770
|
|
|
|
|
|
|
'"' => \&do_QUOTATION_MARK, |
|
5771
|
|
|
|
|
|
|
"'" => \&do_APOSTROPHE, |
|
5772
|
|
|
|
|
|
|
'`' => \&do_BACKTICK, |
|
5773
|
|
|
|
|
|
|
'/' => \&do_SLASH, |
|
5774
|
|
|
|
|
|
|
'{' => \&do_LEFT_CURLY_BRACKET, |
|
5775
|
|
|
|
|
|
|
'}' => \&do_RIGHT_CURLY_BRACKET, |
|
5776
|
|
|
|
|
|
|
'&' => \&do_AMPERSAND, |
|
5777
|
|
|
|
|
|
|
'<' => \&do_LESS_THAN_SIGN, |
|
5778
|
|
|
|
|
|
|
'?' => \&do_QUESTION_MARK, |
|
5779
|
|
|
|
|
|
|
'*' => \&do_STAR, |
|
5780
|
|
|
|
|
|
|
'.' => \&do_DOT, |
|
5781
|
|
|
|
|
|
|
':' => \&do_COLON, |
|
5782
|
|
|
|
|
|
|
'+' => \&do_PLUS_SIGN, |
|
5783
|
|
|
|
|
|
|
'@' => \&do_AT_SIGN, |
|
5784
|
|
|
|
|
|
|
'%' => \&do_PERCENT_SIGN, |
|
5785
|
|
|
|
|
|
|
'[' => \&do_LEFT_SQUARE_BRACKET, |
|
5786
|
|
|
|
|
|
|
']' => \&do_RIGHT_SQUARE_BRACKET, |
|
5787
|
|
|
|
|
|
|
'-' => \&do_MINUS_SIGN, |
|
5788
|
|
|
|
|
|
|
'^' => \&do_CARAT_SIGN, |
|
5789
|
|
|
|
|
|
|
'::' => \&do_DOUBLE_COLON, |
|
5790
|
|
|
|
|
|
|
'<<' => \&do_LEFT_SHIFT, |
|
5791
|
|
|
|
|
|
|
'<<~' => \&do_NEW_HERE_DOC, |
|
5792
|
|
|
|
|
|
|
'->' => \&do_POINTER, |
|
5793
|
|
|
|
|
|
|
'++' => \&do_PLUS_PLUS, |
|
5794
|
|
|
|
|
|
|
'=>' => \&do_FAT_COMMA, |
|
5795
|
|
|
|
|
|
|
'--' => \&do_MINUS_MINUS, |
|
5796
|
|
|
|
|
|
|
|
|
5797
|
|
|
|
|
|
|
# No special code for these types yet, but syntax checks |
|
5798
|
|
|
|
|
|
|
# could be added. |
|
5799
|
|
|
|
|
|
|
## '&&' => \&do_LOGICAL_AND, |
|
5800
|
|
|
|
|
|
|
## '||' => \&do_LOGICAL_OR, |
|
5801
|
|
|
|
|
|
|
## '>' => \&do_GREATER_THAN_SIGN, |
|
5802
|
|
|
|
|
|
|
## '|' => \&do_VERTICAL_LINE, |
|
5803
|
|
|
|
|
|
|
## '//' => \&do_SLASH_SLASH, |
|
5804
|
|
|
|
|
|
|
## '!' => undef, |
|
5805
|
|
|
|
|
|
|
## '!=' => undef, |
|
5806
|
|
|
|
|
|
|
## '!~' => undef, |
|
5807
|
|
|
|
|
|
|
## '%=' => undef, |
|
5808
|
|
|
|
|
|
|
## '&&=' => undef, |
|
5809
|
|
|
|
|
|
|
## '&=' => undef, |
|
5810
|
|
|
|
|
|
|
## '+=' => undef, |
|
5811
|
|
|
|
|
|
|
## '-=' => undef, |
|
5812
|
|
|
|
|
|
|
## '..' => undef, |
|
5813
|
|
|
|
|
|
|
## '..' => undef, |
|
5814
|
|
|
|
|
|
|
## '...' => undef, |
|
5815
|
|
|
|
|
|
|
## '.=' => undef, |
|
5816
|
|
|
|
|
|
|
## '<<=' => undef, |
|
5817
|
|
|
|
|
|
|
## '<=' => undef, |
|
5818
|
|
|
|
|
|
|
## '<=>' => undef, |
|
5819
|
|
|
|
|
|
|
## '<>' => undef, |
|
5820
|
|
|
|
|
|
|
## '=' => undef, |
|
5821
|
|
|
|
|
|
|
## '==' => undef, |
|
5822
|
|
|
|
|
|
|
## '=~' => undef, |
|
5823
|
|
|
|
|
|
|
## '>=' => undef, |
|
5824
|
|
|
|
|
|
|
## '>>' => undef, |
|
5825
|
|
|
|
|
|
|
## '>>=' => undef, |
|
5826
|
|
|
|
|
|
|
## '\\' => undef, |
|
5827
|
|
|
|
|
|
|
## '^=' => undef, |
|
5828
|
|
|
|
|
|
|
## '|=' => undef, |
|
5829
|
|
|
|
|
|
|
## '||=' => undef, |
|
5830
|
|
|
|
|
|
|
## '//=' => undef, |
|
5831
|
|
|
|
|
|
|
## '~' => undef, |
|
5832
|
|
|
|
|
|
|
## '~~' => undef, |
|
5833
|
|
|
|
|
|
|
## '!~~' => undef, |
|
5834
|
|
|
|
|
|
|
|
|
5835
|
|
|
|
|
|
|
}; |
|
5836
|
|
|
|
|
|
|
|
|
5837
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
|
5838
|
|
|
|
|
|
|
# end hash of code for handling individual token types |
|
5839
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
|
5840
|
|
|
|
|
|
|
|
|
5841
|
44
|
|
|
44
|
|
388
|
use constant DEBUG_TOKENIZE => 0; |
|
|
44
|
|
|
|
|
79
|
|
|
|
44
|
|
|
|
|
4639
|
|
|
5842
|
|
|
|
|
|
|
|
|
5843
|
|
|
|
|
|
|
my %is_arrow_or_Z; |
|
5844
|
|
|
|
|
|
|
|
|
5845
|
|
|
|
|
|
|
BEGIN { |
|
5846
|
44
|
|
|
44
|
|
244
|
my @qZ = qw( -> Z ); |
|
5847
|
44
|
|
|
|
|
161157
|
$is_arrow_or_Z{$_} = 1 for @qZ; |
|
5848
|
|
|
|
|
|
|
} |
|
5849
|
|
|
|
|
|
|
|
|
5850
|
|
|
|
|
|
|
sub tokenize_this_line { |
|
5851
|
|
|
|
|
|
|
|
|
5852
|
8799
|
|
|
8799
|
0
|
14822
|
my ( $self, $line_of_tokens, $trimmed_input_line ) = @_; |
|
5853
|
|
|
|
|
|
|
|
|
5854
|
|
|
|
|
|
|
# This routine tokenizes one line. The results are stored in |
|
5855
|
|
|
|
|
|
|
# the hash ref '$line_of_tokens'. |
|
5856
|
|
|
|
|
|
|
|
|
5857
|
|
|
|
|
|
|
# Given: |
|
5858
|
|
|
|
|
|
|
# $line_of_tokens = ref to hash of values being filled for this line |
|
5859
|
|
|
|
|
|
|
# $trimmed_input_line |
|
5860
|
|
|
|
|
|
|
# = the input line without leading whitespace, OR |
|
5861
|
|
|
|
|
|
|
# = undef if not available |
|
5862
|
|
|
|
|
|
|
# Returns: |
|
5863
|
|
|
|
|
|
|
# nothing |
|
5864
|
|
|
|
|
|
|
|
|
5865
|
8799
|
|
|
|
|
13325
|
my $untrimmed_input_line = $line_of_tokens->{_line_text}; |
|
5866
|
|
|
|
|
|
|
|
|
5867
|
|
|
|
|
|
|
# Extract line number for use in error messages |
|
5868
|
8799
|
|
|
|
|
12184
|
$input_line_number = $line_of_tokens->{_line_number}; |
|
5869
|
|
|
|
|
|
|
|
|
5870
|
|
|
|
|
|
|
#------------------------------------- |
|
5871
|
|
|
|
|
|
|
# Check for start of pod documentation |
|
5872
|
|
|
|
|
|
|
#------------------------------------- |
|
5873
|
8799
|
100
|
100
|
|
|
27581
|
if ( !$in_quote |
|
|
|
|
66
|
|
|
|
|
|
5874
|
|
|
|
|
|
|
&& substr( $untrimmed_input_line, 0, 1 ) eq '=' |
|
5875
|
|
|
|
|
|
|
&& $untrimmed_input_line =~ /^=[A-Za-z_]/ ) |
|
5876
|
|
|
|
|
|
|
{ |
|
5877
|
|
|
|
|
|
|
|
|
5878
|
|
|
|
|
|
|
# Must not be in an equation where an '=' could be expected. |
|
5879
|
|
|
|
|
|
|
# Perl has additional restrictions which are not checked here. |
|
5880
|
15
|
|
|
|
|
29
|
my $blank_after_Z = 1; |
|
5881
|
15
|
|
|
|
|
56
|
$expecting = $self->operator_expected( '=', 'b', $blank_after_Z ); |
|
5882
|
15
|
50
|
|
|
|
63
|
if ( $expecting == TERM ) { |
|
5883
|
15
|
|
|
|
|
28
|
$self->[_in_pod_] = 1; |
|
5884
|
15
|
|
|
|
|
35
|
return; |
|
5885
|
|
|
|
|
|
|
} |
|
5886
|
|
|
|
|
|
|
} |
|
5887
|
|
|
|
|
|
|
|
|
5888
|
|
|
|
|
|
|
#-------------------------- |
|
5889
|
|
|
|
|
|
|
# Trim leading whitespace ? |
|
5890
|
|
|
|
|
|
|
#-------------------------- |
|
5891
|
|
|
|
|
|
|
# Use untrimmed line if we are continuing in a type 'Q' quote |
|
5892
|
8784
|
100
|
100
|
|
|
17727
|
if ( $in_quote && $quote_type eq 'Q' ) { |
|
5893
|
58
|
|
|
|
|
102
|
$line_of_tokens->{_starting_in_quote} = 1; |
|
5894
|
58
|
|
|
|
|
82
|
$input_line = $untrimmed_input_line; |
|
5895
|
58
|
|
|
|
|
98
|
chomp $input_line; |
|
5896
|
|
|
|
|
|
|
} |
|
5897
|
|
|
|
|
|
|
|
|
5898
|
|
|
|
|
|
|
# Trim start of this line if we are not continuing a quoted line. |
|
5899
|
|
|
|
|
|
|
# Do not trim end because we might end in a quote (test: deken4.pl) |
|
5900
|
|
|
|
|
|
|
# Perl::Tidy::Formatter will delete needless trailing blanks |
|
5901
|
|
|
|
|
|
|
else { |
|
5902
|
8726
|
|
|
|
|
13600
|
$line_of_tokens->{_starting_in_quote} = 0; |
|
5903
|
|
|
|
|
|
|
|
|
5904
|
|
|
|
|
|
|
# Use the pre-computed trimmed line if defined (most efficient) |
|
5905
|
8726
|
|
|
|
|
11401
|
$input_line = $trimmed_input_line; |
|
5906
|
|
|
|
|
|
|
|
|
5907
|
|
|
|
|
|
|
# otherwise trim the raw input line (much less efficient) |
|
5908
|
8726
|
50
|
|
|
|
19499
|
if ( !defined($input_line) ) { |
|
5909
|
0
|
|
|
|
|
0
|
$input_line = $untrimmed_input_line; |
|
5910
|
0
|
|
|
|
|
0
|
$input_line =~ s/^\s+//; |
|
5911
|
|
|
|
|
|
|
} |
|
5912
|
|
|
|
|
|
|
|
|
5913
|
8726
|
|
|
|
|
11869
|
chomp $input_line; |
|
5914
|
|
|
|
|
|
|
|
|
5915
|
|
|
|
|
|
|
# define 'guessed_indentation_level' if logfile will be saved |
|
5916
|
8726
|
100
|
100
|
|
|
17970
|
if ( $self->[_save_logfile_] && length($input_line) ) { |
|
5917
|
3
|
|
|
|
|
5
|
my $guess = |
|
5918
|
|
|
|
|
|
|
$self->guess_old_indentation_level($untrimmed_input_line); |
|
5919
|
3
|
|
|
|
|
5
|
$line_of_tokens->{_guessed_indentation_level} = $guess; |
|
5920
|
|
|
|
|
|
|
} |
|
5921
|
|
|
|
|
|
|
} |
|
5922
|
|
|
|
|
|
|
|
|
5923
|
|
|
|
|
|
|
#------------ |
|
5924
|
|
|
|
|
|
|
# Blank lines |
|
5925
|
|
|
|
|
|
|
#------------ |
|
5926
|
8784
|
100
|
|
|
|
15192
|
if ( !length($input_line) ) { |
|
5927
|
1040
|
|
|
|
|
1856
|
$line_of_tokens->{_line_type} = 'CODE'; |
|
5928
|
1040
|
|
|
|
|
1933
|
$line_of_tokens->{_rtokens} = []; |
|
5929
|
1040
|
|
|
|
|
1817
|
$line_of_tokens->{_rtoken_type} = []; |
|
5930
|
1040
|
|
|
|
|
2199
|
$line_of_tokens->{_rlevels} = []; |
|
5931
|
1040
|
|
|
|
|
1886
|
$line_of_tokens->{_rblock_type} = []; |
|
5932
|
1040
|
|
|
|
|
1983
|
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; |
|
5933
|
1040
|
|
|
|
|
1852
|
$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string; |
|
5934
|
1040
|
|
|
|
|
1795
|
return; |
|
5935
|
|
|
|
|
|
|
} |
|
5936
|
|
|
|
|
|
|
|
|
5937
|
|
|
|
|
|
|
#--------- |
|
5938
|
|
|
|
|
|
|
# Comments |
|
5939
|
|
|
|
|
|
|
#--------- |
|
5940
|
7744
|
100
|
100
|
|
|
21184
|
if ( !$in_quote && substr( $input_line, 0, 1 ) eq '#' ) { |
|
5941
|
|
|
|
|
|
|
|
|
5942
|
|
|
|
|
|
|
# and check for skipped section |
|
5943
|
884
|
50
|
66
|
|
|
4398
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
5944
|
|
|
|
|
|
|
( |
|
5945
|
|
|
|
|
|
|
substr( $input_line, 0, 4 ) eq '#<<V' |
|
5946
|
|
|
|
|
|
|
|| $rOpts_code_skipping_begin |
|
5947
|
|
|
|
|
|
|
) |
|
5948
|
|
|
|
|
|
|
&& $rOpts_code_skipping |
|
5949
|
|
|
|
|
|
|
|
|
5950
|
|
|
|
|
|
|
# note that the code_skipping_patterns require a newline |
|
5951
|
|
|
|
|
|
|
&& ( $input_line . SPACE ) =~ /$code_skipping_pattern_begin/ |
|
5952
|
|
|
|
|
|
|
) |
|
5953
|
|
|
|
|
|
|
{ |
|
5954
|
2
|
|
|
|
|
7
|
$self->[_in_code_skipping_] = $self->[_last_line_number_]; |
|
5955
|
2
|
|
|
|
|
4
|
return; |
|
5956
|
|
|
|
|
|
|
} |
|
5957
|
|
|
|
|
|
|
|
|
5958
|
|
|
|
|
|
|
# Look for format skipping tags, but just normal mode. |
|
5959
|
|
|
|
|
|
|
# It will be used for these purposes: |
|
5960
|
|
|
|
|
|
|
# - to inform the formatter of an end token with no begin token |
|
5961
|
|
|
|
|
|
|
# - for making a hint when a brace error is detected |
|
5962
|
882
|
100
|
100
|
|
|
8960
|
if ( |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
5963
|
|
|
|
|
|
|
( |
|
5964
|
|
|
|
|
|
|
substr( $input_line, 0, 4 ) eq '#<<<' |
|
5965
|
|
|
|
|
|
|
|| $rOpts_format_skipping_begin |
|
5966
|
|
|
|
|
|
|
) |
|
5967
|
|
|
|
|
|
|
&& $rOpts_format_skipping |
|
5968
|
|
|
|
|
|
|
|
|
5969
|
|
|
|
|
|
|
# note that the format_skipping_patterns require a space |
|
5970
|
|
|
|
|
|
|
&& ( $input_line . SPACE ) =~ /$format_skipping_pattern_begin/ |
|
5971
|
|
|
|
|
|
|
|
|
5972
|
|
|
|
|
|
|
# allow same token for begin and end |
|
5973
|
|
|
|
|
|
|
&& ( |
|
5974
|
|
|
|
|
|
|
!$self->[_in_format_skipping_] |
|
5975
|
|
|
|
|
|
|
|| ( $format_skipping_pattern_begin ne |
|
5976
|
|
|
|
|
|
|
$format_skipping_pattern_end ) |
|
5977
|
|
|
|
|
|
|
) |
|
5978
|
|
|
|
|
|
|
) |
|
5979
|
|
|
|
|
|
|
{ |
|
5980
|
17
|
|
|
|
|
76
|
my $on_off = 1; |
|
5981
|
17
|
|
|
|
|
33
|
my $lno = $self->[_last_line_number_]; |
|
5982
|
17
|
|
|
|
|
34
|
my $rformat_skipping_list = $self->[_rformat_skipping_list_]; |
|
5983
|
|
|
|
|
|
|
|
|
5984
|
|
|
|
|
|
|
# format markers must alternate between on and off |
|
5985
|
17
|
50
|
66
|
|
|
23
|
if ( @{$rformat_skipping_list} |
|
|
17
|
|
|
|
|
91
|
|
|
5986
|
|
|
|
|
|
|
&& $rformat_skipping_list->[-1]->[0] == $on_off ) |
|
5987
|
|
|
|
|
|
|
{ |
|
5988
|
0
|
|
|
|
|
0
|
my $lno_last = $rformat_skipping_list->[-1]->[1]; |
|
5989
|
0
|
|
|
|
|
0
|
$self->warning_do_not_format( |
|
5990
|
|
|
|
|
|
|
"consecutive format-skipping start markers - see line $lno_last\n" |
|
5991
|
|
|
|
|
|
|
); |
|
5992
|
|
|
|
|
|
|
} |
|
5993
|
17
|
|
|
|
|
36
|
push @{$rformat_skipping_list}, [ $on_off, $lno, $input_line ]; |
|
|
17
|
|
|
|
|
55
|
|
|
5994
|
17
|
|
|
|
|
38
|
$self->[_in_format_skipping_] = $lno; |
|
5995
|
|
|
|
|
|
|
} |
|
5996
|
|
|
|
|
|
|
elsif ( |
|
5997
|
|
|
|
|
|
|
( |
|
5998
|
|
|
|
|
|
|
substr( $input_line, 0, 4 ) eq '#>>>' |
|
5999
|
|
|
|
|
|
|
|| $rOpts_format_skipping_end |
|
6000
|
|
|
|
|
|
|
) |
|
6001
|
|
|
|
|
|
|
&& $rOpts_format_skipping |
|
6002
|
|
|
|
|
|
|
|
|
6003
|
|
|
|
|
|
|
# note that the format_skipping_patterns require a newline |
|
6004
|
|
|
|
|
|
|
&& ( $input_line . SPACE ) =~ /$format_skipping_pattern_end/ |
|
6005
|
|
|
|
|
|
|
) |
|
6006
|
|
|
|
|
|
|
{ |
|
6007
|
20
|
|
|
|
|
54
|
my $lno = $self->[_last_line_number_]; |
|
6008
|
20
|
|
|
|
|
40
|
my $rformat_skipping_list = $self->[_rformat_skipping_list_]; |
|
6009
|
20
|
|
|
|
|
31
|
my $on_off = -1; |
|
6010
|
|
|
|
|
|
|
|
|
6011
|
|
|
|
|
|
|
# markers must alternate between on and off |
|
6012
|
20
|
50
|
66
|
|
|
37
|
if ( @{$rformat_skipping_list} |
|
|
20
|
|
|
|
|
136
|
|
|
6013
|
|
|
|
|
|
|
&& $rformat_skipping_list->[-1]->[0] == $on_off ) |
|
6014
|
|
|
|
|
|
|
{ |
|
6015
|
0
|
|
|
|
|
0
|
my $lno_last = $rformat_skipping_list->[-1]->[1]; |
|
6016
|
0
|
|
|
|
|
0
|
$self->warning_do_not_format( |
|
6017
|
|
|
|
|
|
|
"consecutive format-skipping end markers - see line $lno_last\n" |
|
6018
|
|
|
|
|
|
|
); |
|
6019
|
|
|
|
|
|
|
} |
|
6020
|
|
|
|
|
|
|
|
|
6021
|
20
|
|
|
|
|
38
|
push @{$rformat_skipping_list}, [ $on_off, $lno, $input_line ]; |
|
|
20
|
|
|
|
|
76
|
|
|
6022
|
20
|
|
|
|
|
51
|
$self->[_in_format_skipping_] = 0; |
|
6023
|
|
|
|
|
|
|
} |
|
6024
|
|
|
|
|
|
|
else { |
|
6025
|
|
|
|
|
|
|
# not a format skipping comment |
|
6026
|
|
|
|
|
|
|
} |
|
6027
|
|
|
|
|
|
|
|
|
6028
|
|
|
|
|
|
|
# Optional fast processing of a block comment |
|
6029
|
882
|
|
|
|
|
1617
|
$line_of_tokens->{_line_type} = 'CODE'; |
|
6030
|
882
|
|
|
|
|
2165
|
$line_of_tokens->{_rtokens} = [$input_line]; |
|
6031
|
882
|
|
|
|
|
2026
|
$line_of_tokens->{_rtoken_type} = ['#']; |
|
6032
|
882
|
|
|
|
|
2534
|
$line_of_tokens->{_rlevels} = [$level_in_tokenizer]; |
|
6033
|
882
|
|
|
|
|
2064
|
$line_of_tokens->{_rblock_type} = [EMPTY_STRING]; |
|
6034
|
882
|
|
|
|
|
1855
|
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; |
|
6035
|
882
|
|
|
|
|
1765
|
$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string; |
|
6036
|
882
|
|
|
|
|
1756
|
return; |
|
6037
|
|
|
|
|
|
|
} |
|
6038
|
|
|
|
|
|
|
|
|
6039
|
|
|
|
|
|
|
#------------------------------------- |
|
6040
|
|
|
|
|
|
|
# Loop to find all tokens on this line |
|
6041
|
|
|
|
|
|
|
#------------------------------------- |
|
6042
|
|
|
|
|
|
|
|
|
6043
|
|
|
|
|
|
|
# Update the copy of the line for use in error messages |
|
6044
|
|
|
|
|
|
|
# This must be exactly what we give the pre_tokenizer |
|
6045
|
6860
|
|
|
|
|
9740
|
$self->[_line_of_text_] = $input_line; |
|
6046
|
|
|
|
|
|
|
|
|
6047
|
|
|
|
|
|
|
# re-initialize for the main loop |
|
6048
|
6860
|
|
|
|
|
10029
|
$routput_token_list = []; # stack of output token indexes |
|
6049
|
6860
|
|
|
|
|
12173
|
$routput_token_type = []; # token types |
|
6050
|
6860
|
|
|
|
|
17675
|
$routput_block_type = []; # types of code block |
|
6051
|
6860
|
|
|
|
|
16106
|
$routput_type_sequence = []; # nesting sequential number |
|
6052
|
|
|
|
|
|
|
|
|
6053
|
6860
|
|
|
|
|
13960
|
$rhere_target_list = []; |
|
6054
|
|
|
|
|
|
|
|
|
6055
|
6860
|
|
|
|
|
8993
|
$tok = $last_nonblank_token; |
|
6056
|
6860
|
|
|
|
|
8738
|
$type = $last_nonblank_type; |
|
6057
|
6860
|
|
|
|
|
8267
|
$prototype = $last_nonblank_prototype; |
|
6058
|
6860
|
|
|
|
|
8205
|
$last_nonblank_i = -1; |
|
6059
|
6860
|
|
|
|
|
8442
|
$block_type = $last_nonblank_block_type; |
|
6060
|
6860
|
|
|
|
|
8002
|
$container_type = $last_nonblank_container_type; |
|
6061
|
6860
|
|
|
|
|
7922
|
$type_sequence = $last_nonblank_type_sequence; |
|
6062
|
6860
|
|
|
|
|
7547
|
$indent_flag = 0; |
|
6063
|
6860
|
|
|
|
|
7613
|
$peeked_ahead = 0; |
|
6064
|
|
|
|
|
|
|
|
|
6065
|
6860
|
|
|
|
|
15579
|
$self->tokenizer_main_loop(); |
|
6066
|
|
|
|
|
|
|
|
|
6067
|
|
|
|
|
|
|
#------------------------------------------------- |
|
6068
|
|
|
|
|
|
|
# Done tokenizing this line ... package the result |
|
6069
|
|
|
|
|
|
|
#------------------------------------------------- |
|
6070
|
6860
|
|
|
|
|
18189
|
$self->tokenizer_wrapup_line($line_of_tokens); |
|
6071
|
|
|
|
|
|
|
|
|
6072
|
6860
|
|
|
|
|
10690
|
return; |
|
6073
|
|
|
|
|
|
|
} ## end sub tokenize_this_line |
|
6074
|
|
|
|
|
|
|
|
|
6075
|
|
|
|
|
|
|
sub tokenizer_main_loop { |
|
6076
|
|
|
|
|
|
|
|
|
6077
|
6860
|
|
|
6860
|
0
|
9953
|
my ($self) = @_; |
|
6078
|
|
|
|
|
|
|
|
|
6079
|
|
|
|
|
|
|
# Break one input line into tokens |
|
6080
|
|
|
|
|
|
|
# We are working on closure variables. |
|
6081
|
|
|
|
|
|
|
|
|
6082
|
|
|
|
|
|
|
# Start by breaking the line into pre-tokens |
|
6083
|
6860
|
|
|
|
|
15521
|
( $rtokens, $rtoken_map, $rtoken_type ) = pre_tokenize($input_line); |
|
6084
|
|
|
|
|
|
|
|
|
6085
|
|
|
|
|
|
|
# Verify that all leading whitespace has been trimmed |
|
6086
|
|
|
|
|
|
|
# except for quotes of type 'Q' (c273). |
|
6087
|
6860
|
50
|
66
|
|
|
25658
|
if ( @{$rtokens} |
|
|
6860
|
|
33
|
|
|
26494
|
|
|
|
|
|
66
|
|
|
|
|
|
6088
|
|
|
|
|
|
|
&& $rtoken_type->[0] eq 'b' |
|
6089
|
|
|
|
|
|
|
&& !( $in_quote && $quote_type eq 'Q' ) ) |
|
6090
|
|
|
|
|
|
|
{ |
|
6091
|
|
|
|
|
|
|
|
|
6092
|
|
|
|
|
|
|
# Shouldn't happen if calling sub did trim operation correctly. |
|
6093
|
0
|
|
|
|
|
0
|
DEVEL_MODE && Fault(<<EOM); |
|
6094
|
|
|
|
|
|
|
leading blank at line |
|
6095
|
|
|
|
|
|
|
$input_line |
|
6096
|
|
|
|
|
|
|
EOM |
|
6097
|
|
|
|
|
|
|
|
|
6098
|
|
|
|
|
|
|
# Fix by removing the leading blank token. This fix has been |
|
6099
|
|
|
|
|
|
|
# tested and works correctly even if no whitespaces was trimmed. |
|
6100
|
|
|
|
|
|
|
# But it is an inefficient way to do things because, for example, |
|
6101
|
|
|
|
|
|
|
# it forces all comments to be processed by sub pre_tokenize. |
|
6102
|
|
|
|
|
|
|
# And it may cause indented code-skipping comments to be missed. |
|
6103
|
0
|
|
|
|
|
0
|
shift @{$rtokens}; |
|
|
0
|
|
|
|
|
0
|
|
|
6104
|
0
|
|
|
|
|
0
|
shift @{$rtoken_map}; |
|
|
0
|
|
|
|
|
0
|
|
|
6105
|
0
|
|
|
|
|
0
|
shift @{$rtoken_type}; |
|
|
0
|
|
|
|
|
0
|
|
|
6106
|
|
|
|
|
|
|
} |
|
6107
|
|
|
|
|
|
|
|
|
6108
|
6860
|
|
|
|
|
8330
|
$max_token_index = scalar( @{$rtokens} ) - 1; |
|
|
6860
|
|
|
|
|
9551
|
|
|
6109
|
6860
|
|
|
|
|
8062
|
push( @{$rtokens}, SPACE, SPACE, SPACE ) |
|
|
6860
|
|
|
|
|
14237
|
|
|
6110
|
|
|
|
|
|
|
; # extra whitespace simplifies logic |
|
6111
|
6860
|
|
|
|
|
7961
|
push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced |
|
|
6860
|
|
|
|
|
11613
|
|
|
6112
|
6860
|
|
|
|
|
7967
|
push( @{$rtoken_type}, 'b', 'b', 'b' ); |
|
|
6860
|
|
|
|
|
12334
|
|
|
6113
|
|
|
|
|
|
|
|
|
6114
|
|
|
|
|
|
|
# initialize for main loop |
|
6115
|
6860
|
|
|
|
|
7993
|
if (0) { #<<< this is not necessary |
|
6116
|
|
|
|
|
|
|
foreach my $ii ( 0 .. $max_token_index + 3 ) { |
|
6117
|
|
|
|
|
|
|
$routput_token_type->[$ii] = EMPTY_STRING; |
|
6118
|
|
|
|
|
|
|
$routput_block_type->[$ii] = EMPTY_STRING; |
|
6119
|
|
|
|
|
|
|
$routput_type_sequence->[$ii] = EMPTY_STRING; |
|
6120
|
|
|
|
|
|
|
$routput_indent_flag->[$ii] = 0; |
|
6121
|
|
|
|
|
|
|
} |
|
6122
|
|
|
|
|
|
|
} |
|
6123
|
|
|
|
|
|
|
|
|
6124
|
6860
|
|
|
|
|
8018
|
$i = -1; |
|
6125
|
6860
|
|
|
|
|
8340
|
$i_tok = -1; |
|
6126
|
|
|
|
|
|
|
|
|
6127
|
|
|
|
|
|
|
#----------------------- |
|
6128
|
|
|
|
|
|
|
# main tokenization loop |
|
6129
|
|
|
|
|
|
|
#----------------------- |
|
6130
|
|
|
|
|
|
|
|
|
6131
|
|
|
|
|
|
|
# we are looking at each pre-token of one line and combining them |
|
6132
|
|
|
|
|
|
|
# into tokens |
|
6133
|
6860
|
|
|
|
|
12701
|
while ( ++$i <= $max_token_index ) { |
|
6134
|
|
|
|
|
|
|
|
|
6135
|
|
|
|
|
|
|
# continue looking for the end of a quote |
|
6136
|
59584
|
100
|
|
|
|
82393
|
if ($in_quote) { |
|
6137
|
3170
|
|
|
|
|
7822
|
$self->do_FOLLOW_QUOTE(); |
|
6138
|
3170
|
100
|
100
|
|
|
8844
|
last if ( $in_quote || $i > $max_token_index ); |
|
6139
|
|
|
|
|
|
|
} |
|
6140
|
|
|
|
|
|
|
|
|
6141
|
59214
|
100
|
100
|
|
|
124275
|
if ( $type ne 'b' && $type ne 'CORE::' ) { |
|
6142
|
|
|
|
|
|
|
|
|
6143
|
|
|
|
|
|
|
# try to catch some common errors |
|
6144
|
41499
|
100
|
100
|
|
|
69369
|
if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { |
|
6145
|
|
|
|
|
|
|
|
|
6146
|
2065
|
100
|
|
|
|
4637
|
if ( $last_nonblank_token eq 'eq' ) { |
|
|
|
50
|
|
|
|
|
|
|
6147
|
9
|
|
|
|
|
38
|
$self->complain("Should 'eq' be '==' here ?\n"); |
|
6148
|
|
|
|
|
|
|
} |
|
6149
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq 'ne' ) { |
|
6150
|
0
|
|
|
|
|
0
|
$self->complain("Should 'ne' be '!=' here ?\n"); |
|
6151
|
|
|
|
|
|
|
} |
|
6152
|
|
|
|
|
|
|
else { |
|
6153
|
|
|
|
|
|
|
# that's all |
|
6154
|
|
|
|
|
|
|
} |
|
6155
|
|
|
|
|
|
|
} |
|
6156
|
|
|
|
|
|
|
|
|
6157
|
|
|
|
|
|
|
# fix c090, only rotate vars if a new token will be stored |
|
6158
|
41499
|
100
|
|
|
|
59952
|
if ( $i_tok >= 0 ) { |
|
6159
|
|
|
|
|
|
|
|
|
6160
|
34828
|
|
|
|
|
39549
|
$last_last_nonblank_token = $last_nonblank_token; |
|
6161
|
34828
|
|
|
|
|
37251
|
$last_last_nonblank_type = $last_nonblank_type; |
|
6162
|
|
|
|
|
|
|
|
|
6163
|
34828
|
|
|
|
|
38039
|
$last_nonblank_prototype = $prototype; |
|
6164
|
34828
|
|
|
|
|
38098
|
$last_nonblank_block_type = $block_type; |
|
6165
|
34828
|
|
|
|
|
37220
|
$last_nonblank_container_type = $container_type; |
|
6166
|
34828
|
|
|
|
|
38563
|
$last_nonblank_type_sequence = $type_sequence; |
|
6167
|
34828
|
|
|
|
|
35590
|
$last_nonblank_i = $i_tok; |
|
6168
|
34828
|
|
|
|
|
35317
|
$last_nonblank_token = $tok; |
|
6169
|
34828
|
|
|
|
|
36504
|
$last_nonblank_type = $type; |
|
6170
|
|
|
|
|
|
|
} |
|
6171
|
|
|
|
|
|
|
|
|
6172
|
|
|
|
|
|
|
# Check for patches |
|
6173
|
41499
|
100
|
|
|
|
68796
|
if ( $is_arrow_or_Z{$last_last_nonblank_type} ) { |
|
6174
|
|
|
|
|
|
|
|
|
6175
|
|
|
|
|
|
|
# Patch for c030: Fix things in case a '->' got separated |
|
6176
|
|
|
|
|
|
|
# from the subsequent identifier by a side comment. We |
|
6177
|
|
|
|
|
|
|
# need the last_nonblank_token to have a leading -> to |
|
6178
|
|
|
|
|
|
|
# avoid triggering an operator expected error message at |
|
6179
|
|
|
|
|
|
|
# the next '('. See also fix for git #63. |
|
6180
|
1218
|
100
|
|
|
|
2511
|
if ( $last_last_nonblank_type eq '->' ) { |
|
|
|
50
|
|
|
|
|
|
|
6181
|
1172
|
100
|
66
|
|
|
4001
|
if ( $last_nonblank_type eq 'w' |
|
6182
|
|
|
|
|
|
|
|| $last_nonblank_type eq 'i' ) |
|
6183
|
|
|
|
|
|
|
{ |
|
6184
|
793
|
|
|
|
|
1351
|
$last_nonblank_token = '->' . $last_nonblank_token; |
|
6185
|
793
|
|
|
|
|
1186
|
$last_nonblank_type = 'i'; |
|
6186
|
|
|
|
|
|
|
} |
|
6187
|
|
|
|
|
|
|
} |
|
6188
|
|
|
|
|
|
|
|
|
6189
|
|
|
|
|
|
|
# Fix part #3 for git82: propagate type 'Z' though L-R pair |
|
6190
|
|
|
|
|
|
|
elsif ( $last_last_nonblank_type eq 'Z' ) { |
|
6191
|
46
|
100
|
|
|
|
138
|
if ( $last_nonblank_type eq 'R' ) { |
|
6192
|
1
|
|
|
|
|
3
|
$last_nonblank_type = $last_last_nonblank_type; |
|
6193
|
1
|
|
|
|
|
1
|
$last_nonblank_token = $last_last_nonblank_token; |
|
6194
|
|
|
|
|
|
|
} |
|
6195
|
|
|
|
|
|
|
} |
|
6196
|
|
|
|
|
|
|
else { |
|
6197
|
|
|
|
|
|
|
# No other patches |
|
6198
|
|
|
|
|
|
|
} |
|
6199
|
|
|
|
|
|
|
} |
|
6200
|
|
|
|
|
|
|
} |
|
6201
|
|
|
|
|
|
|
|
|
6202
|
|
|
|
|
|
|
# store previous token type |
|
6203
|
59214
|
100
|
|
|
|
80553
|
if ( $i_tok >= 0 ) { |
|
6204
|
52543
|
|
|
|
|
83840
|
$routput_token_type->[$i_tok] = $type; |
|
6205
|
52543
|
|
|
|
|
74068
|
$routput_block_type->[$i_tok] = $block_type; |
|
6206
|
52543
|
|
|
|
|
69132
|
$routput_type_sequence->[$i_tok] = $type_sequence; |
|
6207
|
52543
|
|
|
|
|
62772
|
$routput_indent_flag->[$i_tok] = $indent_flag; |
|
6208
|
|
|
|
|
|
|
} |
|
6209
|
|
|
|
|
|
|
|
|
6210
|
|
|
|
|
|
|
# get the next pre-token and type |
|
6211
|
|
|
|
|
|
|
# $tok and $type will be modified to make the output token |
|
6212
|
59214
|
|
|
|
|
77133
|
my $pre_tok = $tok = $rtokens->[$i]; # get the next pre-token |
|
6213
|
59214
|
|
|
|
|
73829
|
my $pre_type = $type = $rtoken_type->[$i]; # and type |
|
6214
|
|
|
|
|
|
|
|
|
6215
|
|
|
|
|
|
|
# re-initialize various flags for the next output token |
|
6216
|
|
|
|
|
|
|
( |
|
6217
|
|
|
|
|
|
|
|
|
6218
|
|
|
|
|
|
|
# remember the starting index of this token; we will update $i |
|
6219
|
59214
|
|
|
|
|
91052
|
$i_tok, |
|
6220
|
|
|
|
|
|
|
$block_type, |
|
6221
|
|
|
|
|
|
|
$container_type, |
|
6222
|
|
|
|
|
|
|
$type_sequence, |
|
6223
|
|
|
|
|
|
|
$indent_flag, |
|
6224
|
|
|
|
|
|
|
$prototype, |
|
6225
|
|
|
|
|
|
|
) |
|
6226
|
|
|
|
|
|
|
= ( |
|
6227
|
|
|
|
|
|
|
|
|
6228
|
|
|
|
|
|
|
$i, |
|
6229
|
|
|
|
|
|
|
EMPTY_STRING, |
|
6230
|
|
|
|
|
|
|
EMPTY_STRING, |
|
6231
|
|
|
|
|
|
|
EMPTY_STRING, |
|
6232
|
|
|
|
|
|
|
0, |
|
6233
|
|
|
|
|
|
|
EMPTY_STRING, |
|
6234
|
|
|
|
|
|
|
); |
|
6235
|
|
|
|
|
|
|
|
|
6236
|
|
|
|
|
|
|
# this pre-token will start an output token |
|
6237
|
59214
|
|
|
|
|
59789
|
push( @{$routput_token_list}, $i_tok ); |
|
|
59214
|
|
|
|
|
79284
|
|
|
6238
|
|
|
|
|
|
|
|
|
6239
|
|
|
|
|
|
|
#--------------------------------------------------- |
|
6240
|
|
|
|
|
|
|
# The token search leads to one of 5 main END NODES: |
|
6241
|
|
|
|
|
|
|
#--------------------------------------------------- |
|
6242
|
|
|
|
|
|
|
|
|
6243
|
|
|
|
|
|
|
#----------------------- |
|
6244
|
|
|
|
|
|
|
# END NODE 1: whitespace |
|
6245
|
|
|
|
|
|
|
#----------------------- |
|
6246
|
59214
|
100
|
|
|
|
95378
|
next if ( $pre_type eq 'b' ); |
|
6247
|
|
|
|
|
|
|
|
|
6248
|
|
|
|
|
|
|
#---------------------- |
|
6249
|
|
|
|
|
|
|
# END NODE 2: a comment |
|
6250
|
|
|
|
|
|
|
#---------------------- |
|
6251
|
41354
|
100
|
|
|
|
60099
|
if ( $pre_type eq '#' ) { |
|
6252
|
|
|
|
|
|
|
|
|
6253
|
|
|
|
|
|
|
# push non-indenting brace stack Look for a possible |
|
6254
|
|
|
|
|
|
|
# non-indenting brace. This is only used to give a hint in |
|
6255
|
|
|
|
|
|
|
# case the file is unbalanced. |
|
6256
|
|
|
|
|
|
|
# Hardwired to '#<<<' for efficiency. We will not use the |
|
6257
|
|
|
|
|
|
|
# result later if the pattern has been changed (very unusual). |
|
6258
|
363
|
100
|
66
|
|
|
1653
|
if ( $last_nonblank_token eq '{' |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
6259
|
|
|
|
|
|
|
&& $last_nonblank_block_type |
|
6260
|
|
|
|
|
|
|
&& $last_nonblank_type_sequence |
|
6261
|
|
|
|
|
|
|
&& !$self->[_in_format_skipping_] |
|
6262
|
|
|
|
|
|
|
&& $rOpts_non_indenting_braces ) |
|
6263
|
|
|
|
|
|
|
{ |
|
6264
|
46
|
|
|
|
|
90
|
my $offset = $rtoken_map->[$i_tok]; |
|
6265
|
46
|
|
|
|
|
122
|
my $text = substr( $input_line, $offset, 5 ); |
|
6266
|
46
|
|
|
|
|
77
|
my $len = length($text); |
|
6267
|
46
|
100
|
66
|
|
|
358
|
if ( $len == 4 && $text eq '#<<<' |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
6268
|
|
|
|
|
|
|
|| $len > 4 && $text eq '#<<< ' ) |
|
6269
|
|
|
|
|
|
|
{ |
|
6270
|
6
|
|
|
|
|
9
|
push @{ $self->[_rnon_indenting_brace_stack_] }, |
|
|
6
|
|
|
|
|
16
|
|
|
6271
|
|
|
|
|
|
|
$last_nonblank_type_sequence; |
|
6272
|
|
|
|
|
|
|
} |
|
6273
|
|
|
|
|
|
|
} |
|
6274
|
363
|
|
|
|
|
651
|
last; |
|
6275
|
|
|
|
|
|
|
} |
|
6276
|
|
|
|
|
|
|
|
|
6277
|
|
|
|
|
|
|
# continue gathering identifier if necessary |
|
6278
|
40991
|
100
|
|
|
|
59659
|
if ($id_scan_state) { |
|
6279
|
|
|
|
|
|
|
|
|
6280
|
17
|
100
|
66
|
|
|
76
|
if ( $is_sub{$id_scan_state} || $is_package{$id_scan_state} ) { |
|
6281
|
10
|
|
|
|
|
32
|
$self->scan_id(); |
|
6282
|
|
|
|
|
|
|
} |
|
6283
|
|
|
|
|
|
|
else { |
|
6284
|
7
|
|
|
|
|
25
|
$self->scan_identifier(); |
|
6285
|
|
|
|
|
|
|
} |
|
6286
|
|
|
|
|
|
|
|
|
6287
|
17
|
100
|
|
|
|
49
|
if ($id_scan_state) { |
|
6288
|
|
|
|
|
|
|
|
|
6289
|
|
|
|
|
|
|
# Still scanning ... |
|
6290
|
|
|
|
|
|
|
# Check for side comment between sub and prototype (c061) |
|
6291
|
|
|
|
|
|
|
|
|
6292
|
|
|
|
|
|
|
# done if nothing left to scan on this line |
|
6293
|
1
|
50
|
|
|
|
5
|
last if ( $i > $max_token_index ); |
|
6294
|
|
|
|
|
|
|
|
|
6295
|
1
|
|
|
|
|
4
|
my ( $next_nonblank_token_uu, $i_next ) = |
|
6296
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i, $rtokens, |
|
6297
|
|
|
|
|
|
|
$max_token_index ); |
|
6298
|
|
|
|
|
|
|
|
|
6299
|
|
|
|
|
|
|
# done if it was just some trailing space |
|
6300
|
1
|
50
|
|
|
|
4
|
last if ( $i_next > $max_token_index ); |
|
6301
|
|
|
|
|
|
|
|
|
6302
|
|
|
|
|
|
|
# something remains on the line ... must be a side comment |
|
6303
|
1
|
|
|
|
|
6
|
next; |
|
6304
|
|
|
|
|
|
|
} |
|
6305
|
|
|
|
|
|
|
|
|
6306
|
16
|
100
|
100
|
|
|
87
|
next if ( ( $i > 0 ) || $type ); |
|
6307
|
|
|
|
|
|
|
|
|
6308
|
|
|
|
|
|
|
# didn't find any token; start over |
|
6309
|
7
|
|
|
|
|
16
|
$type = $pre_type; |
|
6310
|
7
|
|
|
|
|
12
|
$tok = $pre_tok; |
|
6311
|
|
|
|
|
|
|
} |
|
6312
|
|
|
|
|
|
|
|
|
6313
|
|
|
|
|
|
|
#----------------------------------------------------------- |
|
6314
|
|
|
|
|
|
|
# Combine pre-tokens into digraphs and trigraphs if possible |
|
6315
|
|
|
|
|
|
|
#----------------------------------------------------------- |
|
6316
|
|
|
|
|
|
|
|
|
6317
|
|
|
|
|
|
|
# See if we can make a digraph... |
|
6318
|
|
|
|
|
|
|
# The following tokens are excluded and handled specially: |
|
6319
|
|
|
|
|
|
|
# '/=' is excluded because the / might start a pattern. |
|
6320
|
|
|
|
|
|
|
# 'x=' is excluded since it might be $x=, with $ on previous line |
|
6321
|
|
|
|
|
|
|
# '**' and *= might be typeglobs of punctuation variables |
|
6322
|
|
|
|
|
|
|
# I have allowed tokens starting with <, such as <=, |
|
6323
|
|
|
|
|
|
|
# because I don't think these could be valid angle operators. |
|
6324
|
|
|
|
|
|
|
# test file: storrs4.pl |
|
6325
|
40981
|
100
|
100
|
|
|
97132
|
if ( $can_start_digraph{$tok} |
|
|
|
|
100
|
|
|
|
|
|
6326
|
|
|
|
|
|
|
&& $i < $max_token_index |
|
6327
|
|
|
|
|
|
|
&& $is_digraph{ $tok . $rtokens->[ $i + 1 ] } ) |
|
6328
|
|
|
|
|
|
|
{ |
|
6329
|
|
|
|
|
|
|
|
|
6330
|
3047
|
|
|
|
|
4085
|
my $combine_ok = 1; |
|
6331
|
3047
|
|
|
|
|
5237
|
my $test_tok = $tok . $rtokens->[ $i + 1 ]; |
|
6332
|
|
|
|
|
|
|
|
|
6333
|
|
|
|
|
|
|
# check for special cases which cannot be combined |
|
6334
|
|
|
|
|
|
|
|
|
6335
|
|
|
|
|
|
|
# Smartmatch is being deprecated, but may exist in older |
|
6336
|
|
|
|
|
|
|
# scripts. |
|
6337
|
3047
|
100
|
|
|
|
9021
|
if ( $test_tok eq '~~' ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
6338
|
|
|
|
|
|
|
|
|
6339
|
|
|
|
|
|
|
# Do not combine if a TERM is required |
|
6340
|
111
|
100
|
|
|
|
224
|
if ( $self->operator_expected( $tok, '~', undef ) == TERM ) |
|
6341
|
|
|
|
|
|
|
{ |
|
6342
|
|
|
|
|
|
|
|
|
6343
|
|
|
|
|
|
|
# block types ';' may actually be hash refs, c567 |
|
6344
|
1
|
50
|
|
|
|
3
|
if ( $last_nonblank_type eq '}' ) { |
|
6345
|
1
|
|
|
|
|
3
|
my $blk = $rbrace_type->[ $brace_depth + 1 ]; |
|
6346
|
1
|
50
|
33
|
|
|
6
|
if ( !$blk || $blk ne ';' ) { $combine_ok = 0 } |
|
|
0
|
|
|
|
|
0
|
|
|
6347
|
|
|
|
|
|
|
} |
|
6348
|
|
|
|
|
|
|
else { |
|
6349
|
0
|
|
|
|
|
0
|
$combine_ok = 0; |
|
6350
|
|
|
|
|
|
|
} |
|
6351
|
|
|
|
|
|
|
} |
|
6352
|
|
|
|
|
|
|
} |
|
6353
|
|
|
|
|
|
|
|
|
6354
|
|
|
|
|
|
|
# '//' must be defined_or operator if an operator is expected. |
|
6355
|
|
|
|
|
|
|
# TODO: Code for other ambiguous digraphs (/=, x=, **, *=) |
|
6356
|
|
|
|
|
|
|
# could be migrated here for clarity |
|
6357
|
|
|
|
|
|
|
|
|
6358
|
|
|
|
|
|
|
# Patch for RT#102371, misparsing a // in the following snippet: |
|
6359
|
|
|
|
|
|
|
# state $b //= ccc(); |
|
6360
|
|
|
|
|
|
|
# The solution is to always accept the digraph (or trigraph) |
|
6361
|
|
|
|
|
|
|
# after type 'Z' (possible file handle). The reason is that |
|
6362
|
|
|
|
|
|
|
# sub operator_expected gives TERM expected here, which is |
|
6363
|
|
|
|
|
|
|
# wrong in this case. |
|
6364
|
|
|
|
|
|
|
elsif ( $test_tok eq '//' ) { |
|
6365
|
16
|
50
|
|
|
|
47
|
if ( $last_nonblank_type ne 'Z' ) { |
|
6366
|
|
|
|
|
|
|
|
|
6367
|
|
|
|
|
|
|
# note that here $tok = '/' and the next tok and type |
|
6368
|
|
|
|
|
|
|
# is '/' |
|
6369
|
16
|
|
|
|
|
29
|
my $blank_after_Z; |
|
6370
|
16
|
|
|
|
|
53
|
$expecting = |
|
6371
|
|
|
|
|
|
|
$self->operator_expected( $tok, '/', $blank_after_Z ); |
|
6372
|
|
|
|
|
|
|
|
|
6373
|
|
|
|
|
|
|
# Patched for RT#101547, was |
|
6374
|
|
|
|
|
|
|
# 'unless ($expecting==OPERATOR)' |
|
6375
|
16
|
100
|
|
|
|
43
|
$combine_ok = 0 if ( $expecting == TERM ); |
|
6376
|
|
|
|
|
|
|
} |
|
6377
|
|
|
|
|
|
|
} |
|
6378
|
|
|
|
|
|
|
|
|
6379
|
|
|
|
|
|
|
# Patch for RT #114359: mis-parsing of "print $x ** 0.5; |
|
6380
|
|
|
|
|
|
|
# Accept the digraphs '**' only after type 'Z' |
|
6381
|
|
|
|
|
|
|
# Otherwise postpone the decision. |
|
6382
|
|
|
|
|
|
|
elsif ( $test_tok eq '**' ) { |
|
6383
|
45
|
100
|
|
|
|
147
|
if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 } |
|
|
43
|
|
|
|
|
75
|
|
|
6384
|
|
|
|
|
|
|
} |
|
6385
|
|
|
|
|
|
|
else { |
|
6386
|
|
|
|
|
|
|
## no other special cases |
|
6387
|
|
|
|
|
|
|
} |
|
6388
|
|
|
|
|
|
|
|
|
6389
|
3047
|
100
|
100
|
|
|
15477
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6390
|
|
|
|
|
|
|
|
|
6391
|
|
|
|
|
|
|
# still ok to combine? |
|
6392
|
|
|
|
|
|
|
$combine_ok |
|
6393
|
|
|
|
|
|
|
|
|
6394
|
|
|
|
|
|
|
&& ( $test_tok ne '/=' ) # might be pattern |
|
6395
|
|
|
|
|
|
|
&& ( $test_tok ne 'x=' ) # might be $x |
|
6396
|
|
|
|
|
|
|
&& ( $test_tok ne '*=' ) # typeglob? |
|
6397
|
|
|
|
|
|
|
|
|
6398
|
|
|
|
|
|
|
# Moved above as part of fix for |
|
6399
|
|
|
|
|
|
|
# RT #114359: Missparsing of "print $x ** 0.5; |
|
6400
|
|
|
|
|
|
|
# && ( $test_tok ne '**' ) # typeglob? |
|
6401
|
|
|
|
|
|
|
) |
|
6402
|
|
|
|
|
|
|
{ |
|
6403
|
2994
|
|
|
|
|
3762
|
$tok = $test_tok; |
|
6404
|
2994
|
|
|
|
|
3487
|
$i++; |
|
6405
|
|
|
|
|
|
|
|
|
6406
|
|
|
|
|
|
|
# Now try to assemble trigraphs. Note that all possible |
|
6407
|
|
|
|
|
|
|
# perl trigraphs can be constructed by appending a character |
|
6408
|
|
|
|
|
|
|
# to a digraph. |
|
6409
|
2994
|
|
|
|
|
4407
|
$test_tok = $tok . $rtokens->[ $i + 1 ]; |
|
6410
|
|
|
|
|
|
|
|
|
6411
|
2994
|
100
|
|
|
|
5586
|
if ( $is_trigraph{$test_tok} ) { |
|
6412
|
87
|
|
|
|
|
141
|
$tok = $test_tok; |
|
6413
|
87
|
|
|
|
|
147
|
$i++; |
|
6414
|
|
|
|
|
|
|
} |
|
6415
|
|
|
|
|
|
|
|
|
6416
|
|
|
|
|
|
|
# The only current tetragraph is the double diamond operator |
|
6417
|
|
|
|
|
|
|
# and its first three characters are NOT a trigraph, so |
|
6418
|
|
|
|
|
|
|
# we do can do a special test for it |
|
6419
|
|
|
|
|
|
|
else { |
|
6420
|
2907
|
100
|
|
|
|
5775
|
if ( $test_tok eq '<<>' ) { |
|
6421
|
1
|
|
|
|
|
3
|
$test_tok .= $rtokens->[ $i + 2 ]; |
|
6422
|
1
|
50
|
|
|
|
4
|
if ( $is_tetragraph{$test_tok} ) { |
|
6423
|
1
|
|
|
|
|
2
|
$tok = $test_tok; |
|
6424
|
1
|
|
|
|
|
2
|
$i += 2; |
|
6425
|
|
|
|
|
|
|
} |
|
6426
|
|
|
|
|
|
|
} |
|
6427
|
|
|
|
|
|
|
} |
|
6428
|
|
|
|
|
|
|
} |
|
6429
|
|
|
|
|
|
|
} |
|
6430
|
|
|
|
|
|
|
|
|
6431
|
40981
|
|
|
|
|
44810
|
$type = $tok; |
|
6432
|
40981
|
|
|
|
|
53410
|
$next_tok = $rtokens->[ $i + 1 ]; |
|
6433
|
40981
|
|
|
|
|
50179
|
$next_type = $rtoken_type->[ $i + 1 ]; |
|
6434
|
|
|
|
|
|
|
|
|
6435
|
|
|
|
|
|
|
# expecting an operator here? first try table lookup, then function |
|
6436
|
40981
|
|
|
|
|
55433
|
$expecting = $op_expected_table{$last_nonblank_type}; |
|
6437
|
40981
|
100
|
|
|
|
59396
|
if ( !defined($expecting) ) { |
|
6438
|
11705
|
|
100
|
|
|
20925
|
my $blank_after_Z = $last_nonblank_type eq 'Z' |
|
6439
|
|
|
|
|
|
|
&& ( $i == 0 || $rtoken_type->[ $i - 1 ] eq 'b' ); |
|
6440
|
11705
|
|
|
|
|
24970
|
$expecting = |
|
6441
|
|
|
|
|
|
|
$self->operator_expected( $tok, $next_type, $blank_after_Z ); |
|
6442
|
|
|
|
|
|
|
} |
|
6443
|
|
|
|
|
|
|
|
|
6444
|
40981
|
|
|
|
|
42289
|
DEBUG_TOKENIZE && do { |
|
6445
|
|
|
|
|
|
|
local $LIST_SEPARATOR = ')('; |
|
6446
|
|
|
|
|
|
|
my @debug_list = ( |
|
6447
|
|
|
|
|
|
|
$last_nonblank_token, $tok, |
|
6448
|
|
|
|
|
|
|
$next_tok, $brace_depth, |
|
6449
|
|
|
|
|
|
|
$rbrace_type->[$brace_depth], $paren_depth, |
|
6450
|
|
|
|
|
|
|
$rparen_type->[$paren_depth], |
|
6451
|
|
|
|
|
|
|
); |
|
6452
|
|
|
|
|
|
|
print {*STDOUT} "TOKENIZE:(@debug_list)\n"; |
|
6453
|
|
|
|
|
|
|
}; |
|
6454
|
|
|
|
|
|
|
|
|
6455
|
|
|
|
|
|
|
# The next token is '$tok'. |
|
6456
|
|
|
|
|
|
|
# Now we have to define its '$type' |
|
6457
|
|
|
|
|
|
|
|
|
6458
|
|
|
|
|
|
|
#------------------------ |
|
6459
|
|
|
|
|
|
|
# END NODE 3: a bare word |
|
6460
|
|
|
|
|
|
|
#------------------------ |
|
6461
|
40981
|
100
|
|
|
|
58274
|
if ( $pre_type eq 'w' ) { |
|
6462
|
6616
|
|
|
|
|
16038
|
my $is_last = $self->do_BAREWORD(); |
|
6463
|
6616
|
100
|
|
|
|
11425
|
last if ($is_last); |
|
6464
|
6607
|
|
|
|
|
14421
|
next; |
|
6465
|
|
|
|
|
|
|
} |
|
6466
|
|
|
|
|
|
|
|
|
6467
|
|
|
|
|
|
|
# Turn off attribute list on first non-blank, non-bareword, |
|
6468
|
|
|
|
|
|
|
# and non-comment (added to fix c038) |
|
6469
|
34365
|
|
|
|
|
41814
|
$self->[_in_attribute_list_] = 0; |
|
6470
|
|
|
|
|
|
|
|
|
6471
|
|
|
|
|
|
|
#------------------------------- |
|
6472
|
|
|
|
|
|
|
# END NODE 4: a string of digits |
|
6473
|
|
|
|
|
|
|
#------------------------------- |
|
6474
|
34365
|
100
|
|
|
|
48537
|
if ( $pre_type eq 'd' ) { |
|
6475
|
2508
|
|
|
|
|
6975
|
$self->do_DIGITS(); |
|
6476
|
2508
|
|
|
|
|
4790
|
next; |
|
6477
|
|
|
|
|
|
|
} |
|
6478
|
|
|
|
|
|
|
|
|
6479
|
|
|
|
|
|
|
#------------------------------------------ |
|
6480
|
|
|
|
|
|
|
# END NODE 5: everything else (punctuation) |
|
6481
|
|
|
|
|
|
|
#------------------------------------------ |
|
6482
|
31857
|
|
|
|
|
47517
|
my $code = $tokenization_code->{$tok}; |
|
6483
|
31857
|
100
|
|
|
|
45635
|
if ($code) { |
|
6484
|
29616
|
|
|
|
|
63461
|
$code->($self); |
|
6485
|
29616
|
100
|
|
|
|
44869
|
redo if ($in_quote); |
|
6486
|
|
|
|
|
|
|
} |
|
6487
|
|
|
|
|
|
|
|
|
6488
|
|
|
|
|
|
|
# Check for a non-TERM where a TERM is expected. Note that this |
|
6489
|
|
|
|
|
|
|
# checks all symbols, even those without a $code (update c566) |
|
6490
|
29181
|
100
|
|
|
|
54302
|
if ( $expecting == TERM ) { |
|
6491
|
|
|
|
|
|
|
my $is_not_term = |
|
6492
|
|
|
|
|
|
|
$type eq ';' |
|
6493
|
|
|
|
|
|
|
|| $type eq ',' |
|
6494
|
11678
|
|
100
|
|
|
40806
|
|| $is_binary_operator_type{$type}; |
|
6495
|
11678
|
100
|
|
|
|
26410
|
if ($is_not_term) { |
|
6496
|
277
|
|
|
|
|
879
|
$self->error_if_expecting_TERM(); |
|
6497
|
|
|
|
|
|
|
} |
|
6498
|
|
|
|
|
|
|
} |
|
6499
|
|
|
|
|
|
|
} ## End main tokenizer loop |
|
6500
|
|
|
|
|
|
|
|
|
6501
|
|
|
|
|
|
|
# Store the final token |
|
6502
|
6860
|
100
|
|
|
|
11522
|
if ( $i_tok >= 0 ) { |
|
6503
|
6671
|
|
|
|
|
12048
|
$routput_token_type->[$i_tok] = $type; |
|
6504
|
6671
|
|
|
|
|
10184
|
$routput_block_type->[$i_tok] = $block_type; |
|
6505
|
6671
|
|
|
|
|
10031
|
$routput_type_sequence->[$i_tok] = $type_sequence; |
|
6506
|
6671
|
|
|
|
|
9167
|
$routput_indent_flag->[$i_tok] = $indent_flag; |
|
6507
|
|
|
|
|
|
|
} |
|
6508
|
|
|
|
|
|
|
|
|
6509
|
|
|
|
|
|
|
# Remember last nonblank values |
|
6510
|
6860
|
100
|
100
|
|
|
18870
|
if ( $type ne 'b' && $type ne '#' ) { |
|
6511
|
|
|
|
|
|
|
|
|
6512
|
6349
|
|
|
|
|
7510
|
$last_last_nonblank_token = $last_nonblank_token; |
|
6513
|
6349
|
|
|
|
|
7419
|
$last_last_nonblank_type = $last_nonblank_type; |
|
6514
|
|
|
|
|
|
|
|
|
6515
|
6349
|
|
|
|
|
7726
|
$last_nonblank_prototype = $prototype; |
|
6516
|
6349
|
|
|
|
|
7265
|
$last_nonblank_block_type = $block_type; |
|
6517
|
6349
|
|
|
|
|
7336
|
$last_nonblank_container_type = $container_type; |
|
6518
|
6349
|
|
|
|
|
7440
|
$last_nonblank_type_sequence = $type_sequence; |
|
6519
|
6349
|
|
|
|
|
6990
|
$last_nonblank_token = $tok; |
|
6520
|
6349
|
|
|
|
|
7406
|
$last_nonblank_type = $type; |
|
6521
|
|
|
|
|
|
|
} |
|
6522
|
|
|
|
|
|
|
|
|
6523
|
|
|
|
|
|
|
# reset indentation level if necessary at a sub or package |
|
6524
|
|
|
|
|
|
|
# in an attempt to recover from a nesting error |
|
6525
|
6860
|
50
|
|
|
|
11466
|
if ( $level_in_tokenizer < 0 ) { |
|
6526
|
0
|
0
|
|
|
|
0
|
if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { |
|
6527
|
0
|
|
|
|
|
0
|
reset_indentation_level(0); |
|
6528
|
0
|
|
|
|
|
0
|
$self->brace_warning("resetting level to 0 at $1 $2\n"); |
|
6529
|
|
|
|
|
|
|
} |
|
6530
|
|
|
|
|
|
|
} |
|
6531
|
|
|
|
|
|
|
|
|
6532
|
6860
|
|
|
|
|
9063
|
$self->[_in_quote_] = $in_quote; |
|
6533
|
|
|
|
|
|
|
$self->[_quote_target_] = |
|
6534
|
|
|
|
|
|
|
$in_quote |
|
6535
|
|
|
|
|
|
|
? ( |
|
6536
|
|
|
|
|
|
|
$matching_end_token{$quote_character} |
|
6537
|
6860
|
100
|
|
|
|
12334
|
? $matching_end_token{$quote_character} |
|
|
|
100
|
|
|
|
|
|
|
6538
|
|
|
|
|
|
|
: $quote_character |
|
6539
|
|
|
|
|
|
|
) |
|
6540
|
|
|
|
|
|
|
: EMPTY_STRING; |
|
6541
|
6860
|
|
|
|
|
10782
|
$self->[_rhere_target_list_] = $rhere_target_list; |
|
6542
|
|
|
|
|
|
|
|
|
6543
|
6860
|
|
|
|
|
9698
|
return; |
|
6544
|
|
|
|
|
|
|
} ## end sub tokenizer_main_loop |
|
6545
|
|
|
|
|
|
|
|
|
6546
|
|
|
|
|
|
|
sub tokenizer_wrapup_line { |
|
6547
|
6860
|
|
|
6860
|
0
|
10675
|
my ( $self, $line_of_tokens ) = @_; |
|
6548
|
|
|
|
|
|
|
|
|
6549
|
|
|
|
|
|
|
#--------------------------------------------------------- |
|
6550
|
|
|
|
|
|
|
# Package a line of tokens for shipping back to the caller |
|
6551
|
|
|
|
|
|
|
#--------------------------------------------------------- |
|
6552
|
|
|
|
|
|
|
|
|
6553
|
|
|
|
|
|
|
# Arrays to hold token values for this line: |
|
6554
|
|
|
|
|
|
|
my ( |
|
6555
|
6860
|
|
|
|
|
9899
|
@output_levels, @output_block_type, @output_type_sequence, |
|
6556
|
|
|
|
|
|
|
@output_token_type, @output_tokens, |
|
6557
|
|
|
|
|
|
|
); |
|
6558
|
|
|
|
|
|
|
|
|
6559
|
6860
|
|
|
|
|
14666
|
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; |
|
6560
|
|
|
|
|
|
|
|
|
6561
|
|
|
|
|
|
|
# Remember starting nesting block string |
|
6562
|
6860
|
|
|
|
|
9195
|
my $nesting_block_string_0 = $nesting_block_string; |
|
6563
|
|
|
|
|
|
|
|
|
6564
|
|
|
|
|
|
|
#----------------- |
|
6565
|
|
|
|
|
|
|
# Loop over tokens |
|
6566
|
|
|
|
|
|
|
#----------------- |
|
6567
|
|
|
|
|
|
|
# $i is the index of the pretoken which starts this full token |
|
6568
|
6860
|
|
|
|
|
8009
|
foreach my $ii ( @{$routput_token_list} ) { |
|
|
6860
|
|
|
|
|
11601
|
|
|
6569
|
|
|
|
|
|
|
|
|
6570
|
59459
|
|
|
|
|
68915
|
my $type_i = $routput_token_type->[$ii]; |
|
6571
|
|
|
|
|
|
|
|
|
6572
|
|
|
|
|
|
|
#---------------------------------------- |
|
6573
|
|
|
|
|
|
|
# Section 1. Handle a non-sequenced token |
|
6574
|
|
|
|
|
|
|
#---------------------------------------- |
|
6575
|
59459
|
100
|
|
|
|
75378
|
if ( !$routput_type_sequence->[$ii] ) { |
|
6576
|
|
|
|
|
|
|
|
|
6577
|
|
|
|
|
|
|
#------------------------------- |
|
6578
|
|
|
|
|
|
|
# Section 1.1. types ';' and 't' |
|
6579
|
|
|
|
|
|
|
#------------------------------- |
|
6580
|
|
|
|
|
|
|
# - output anonymous 'sub' as keyword (type 'k') |
|
6581
|
|
|
|
|
|
|
# - output __END__, __DATA__, and format as type 'k' instead |
|
6582
|
|
|
|
|
|
|
# of ';' to make html colors correct, etc. |
|
6583
|
48567
|
100
|
|
|
|
82490
|
if ( $is_semicolon_or_t{$type_i} ) { |
|
|
|
50
|
|
|
|
|
|
|
6584
|
3153
|
|
|
|
|
4593
|
my $tok_i = $rtokens->[$ii]; |
|
6585
|
3153
|
100
|
|
|
|
6759
|
if ( $is_END_DATA_format_sub{$tok_i} ) { |
|
6586
|
187
|
|
|
|
|
337
|
$type_i = 'k'; |
|
6587
|
|
|
|
|
|
|
} |
|
6588
|
|
|
|
|
|
|
} |
|
6589
|
|
|
|
|
|
|
|
|
6590
|
|
|
|
|
|
|
#---------------------------------------------- |
|
6591
|
|
|
|
|
|
|
# Section 1.2. Check for an invalid token type. |
|
6592
|
|
|
|
|
|
|
#---------------------------------------------- |
|
6593
|
|
|
|
|
|
|
# This can happen by running perltidy on non-scripts although |
|
6594
|
|
|
|
|
|
|
# it could also be bug introduced by programming change. Perl |
|
6595
|
|
|
|
|
|
|
# silently accepts a 032 (^Z) and takes it as the end |
|
6596
|
|
|
|
|
|
|
elsif ( !$is_valid_token_type{$type_i} ) { |
|
6597
|
0
|
0
|
|
|
|
0
|
if ( !$self->[_in_error_] ) { |
|
6598
|
0
|
|
|
|
|
0
|
my $val = ord($type_i); |
|
6599
|
0
|
|
|
|
|
0
|
$self->warning( |
|
6600
|
|
|
|
|
|
|
"unexpected character decimal $val ($type_i) in script\n" |
|
6601
|
|
|
|
|
|
|
); |
|
6602
|
0
|
|
|
|
|
0
|
$self->[_in_error_] = 1; |
|
6603
|
|
|
|
|
|
|
} |
|
6604
|
|
|
|
|
|
|
} |
|
6605
|
|
|
|
|
|
|
else { |
|
6606
|
|
|
|
|
|
|
# valid token type other than ; and t |
|
6607
|
|
|
|
|
|
|
} |
|
6608
|
|
|
|
|
|
|
|
|
6609
|
|
|
|
|
|
|
#---------------------------------------------------- |
|
6610
|
|
|
|
|
|
|
# Section 1.3. Store values for a non-sequenced token |
|
6611
|
|
|
|
|
|
|
#---------------------------------------------------- |
|
6612
|
48567
|
|
|
|
|
63421
|
push( @output_levels, $level_in_tokenizer ); |
|
6613
|
48567
|
|
|
|
|
59820
|
push( @output_block_type, EMPTY_STRING ); |
|
6614
|
48567
|
|
|
|
|
58213
|
push( @output_type_sequence, EMPTY_STRING ); |
|
6615
|
48567
|
|
|
|
|
74844
|
push( @output_token_type, $type_i ); |
|
6616
|
|
|
|
|
|
|
|
|
6617
|
|
|
|
|
|
|
} |
|
6618
|
|
|
|
|
|
|
|
|
6619
|
|
|
|
|
|
|
#------------------------------------ |
|
6620
|
|
|
|
|
|
|
# Section 2. Handle a sequenced token |
|
6621
|
|
|
|
|
|
|
# One of { [ ( ? : ) ] } |
|
6622
|
|
|
|
|
|
|
#------------------------------------ |
|
6623
|
|
|
|
|
|
|
else { |
|
6624
|
|
|
|
|
|
|
|
|
6625
|
|
|
|
|
|
|
# $level_i is the level we will store. Levels of braces are |
|
6626
|
|
|
|
|
|
|
# set so that the leading braces have a HIGHER level than their |
|
6627
|
|
|
|
|
|
|
# CONTENTS, which is convenient for indentation. |
|
6628
|
10892
|
|
|
|
|
12269
|
my $level_i = $level_in_tokenizer; |
|
6629
|
|
|
|
|
|
|
|
|
6630
|
|
|
|
|
|
|
# $tok_i is the PRE-token. It only equals the token for symbols |
|
6631
|
10892
|
|
|
|
|
13367
|
my $tok_i = $rtokens->[$ii]; |
|
6632
|
|
|
|
|
|
|
|
|
6633
|
|
|
|
|
|
|
# $routput_indent_flag->[$ii] indicates that we need a change |
|
6634
|
|
|
|
|
|
|
# in level at a nested ternary, as follows |
|
6635
|
|
|
|
|
|
|
# 1 => at a nested ternary ? |
|
6636
|
|
|
|
|
|
|
# -1 => at a nested ternary : |
|
6637
|
|
|
|
|
|
|
# 0 => otherwise |
|
6638
|
|
|
|
|
|
|
|
|
6639
|
|
|
|
|
|
|
#-------------------------------------------- |
|
6640
|
|
|
|
|
|
|
# Section 2.1 Handle a level-increasing token |
|
6641
|
|
|
|
|
|
|
#-------------------------------------------- |
|
6642
|
10892
|
100
|
|
|
|
21627
|
if ( $is_opening_or_ternary_type{$type_i} ) { |
|
|
|
50
|
|
|
|
|
|
|
6643
|
|
|
|
|
|
|
|
|
6644
|
5446
|
100
|
|
|
|
8571
|
if ( $type_i eq '?' ) { |
|
6645
|
|
|
|
|
|
|
|
|
6646
|
193
|
100
|
|
|
|
570
|
if ( $routput_indent_flag->[$ii] > 0 ) { |
|
6647
|
8
|
|
|
|
|
15
|
$level_in_tokenizer++; |
|
6648
|
|
|
|
|
|
|
|
|
6649
|
|
|
|
|
|
|
# break BEFORE '?' in a nested ternary |
|
6650
|
8
|
|
|
|
|
11
|
$level_i = $level_in_tokenizer; |
|
6651
|
8
|
|
|
|
|
18
|
$nesting_block_string .= "$nesting_block_flag"; |
|
6652
|
|
|
|
|
|
|
|
|
6653
|
|
|
|
|
|
|
} |
|
6654
|
|
|
|
|
|
|
} |
|
6655
|
|
|
|
|
|
|
else { |
|
6656
|
|
|
|
|
|
|
|
|
6657
|
5253
|
|
|
|
|
6773
|
$nesting_token_string .= $tok_i; |
|
6658
|
|
|
|
|
|
|
|
|
6659
|
5253
|
100
|
100
|
|
|
11342
|
if ( $type_i eq '{' || $type_i eq 'L' ) { |
|
6660
|
|
|
|
|
|
|
|
|
6661
|
4813
|
|
|
|
|
5801
|
$level_in_tokenizer++; |
|
6662
|
|
|
|
|
|
|
|
|
6663
|
4813
|
100
|
|
|
|
7709
|
if ( $routput_block_type->[$ii] ) { |
|
6664
|
1115
|
|
|
|
|
1569
|
$nesting_block_flag = 1; |
|
6665
|
1115
|
|
|
|
|
1668
|
$nesting_block_string .= '1'; |
|
6666
|
|
|
|
|
|
|
} |
|
6667
|
|
|
|
|
|
|
else { |
|
6668
|
3698
|
|
|
|
|
4774
|
$nesting_block_flag = 0; |
|
6669
|
3698
|
|
|
|
|
5321
|
$nesting_block_string .= '0'; |
|
6670
|
|
|
|
|
|
|
} |
|
6671
|
|
|
|
|
|
|
} |
|
6672
|
|
|
|
|
|
|
} |
|
6673
|
|
|
|
|
|
|
} |
|
6674
|
|
|
|
|
|
|
|
|
6675
|
|
|
|
|
|
|
#--------------------------------------------- |
|
6676
|
|
|
|
|
|
|
# Section 2.2. Handle a level-decreasing token |
|
6677
|
|
|
|
|
|
|
#--------------------------------------------- |
|
6678
|
|
|
|
|
|
|
elsif ( $is_closing_or_ternary_type{$type_i} ) { |
|
6679
|
|
|
|
|
|
|
|
|
6680
|
5446
|
100
|
|
|
|
9582
|
if ( $type_i ne ':' ) { |
|
6681
|
5253
|
|
|
|
|
8346
|
my $char = chop $nesting_token_string; |
|
6682
|
5253
|
50
|
|
|
|
11465
|
if ( $char ne $matching_start_token{$tok_i} ) { |
|
6683
|
0
|
|
|
|
|
0
|
$nesting_token_string .= $char . $tok_i; |
|
6684
|
|
|
|
|
|
|
} |
|
6685
|
|
|
|
|
|
|
} |
|
6686
|
|
|
|
|
|
|
|
|
6687
|
5446
|
100
|
100
|
|
|
14059
|
if ( |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6688
|
|
|
|
|
|
|
$type_i eq '}' |
|
6689
|
|
|
|
|
|
|
|| $type_i eq 'R' |
|
6690
|
|
|
|
|
|
|
|
|
6691
|
|
|
|
|
|
|
# only the second and higher ? : have levels |
|
6692
|
|
|
|
|
|
|
|| $type_i eq ':' && $routput_indent_flag->[$ii] < 0 |
|
6693
|
|
|
|
|
|
|
) |
|
6694
|
|
|
|
|
|
|
{ |
|
6695
|
|
|
|
|
|
|
|
|
6696
|
4821
|
|
|
|
|
5970
|
$level_i = --$level_in_tokenizer; |
|
6697
|
|
|
|
|
|
|
|
|
6698
|
4821
|
50
|
|
|
|
7827
|
if ( $level_in_tokenizer < 0 ) { |
|
6699
|
0
|
0
|
|
|
|
0
|
if ( !$self->[_saw_negative_indentation_] ) { |
|
6700
|
0
|
|
|
|
|
0
|
$self->[_saw_negative_indentation_] = 1; |
|
6701
|
0
|
|
|
|
|
0
|
$self->warning( |
|
6702
|
|
|
|
|
|
|
"Starting negative indentation\n"); |
|
6703
|
|
|
|
|
|
|
} |
|
6704
|
|
|
|
|
|
|
} |
|
6705
|
|
|
|
|
|
|
|
|
6706
|
|
|
|
|
|
|
# restore previous level values |
|
6707
|
4821
|
50
|
|
|
|
8149
|
if ( length($nesting_block_string) > 1 ) |
|
6708
|
|
|
|
|
|
|
{ # true for valid script |
|
6709
|
4821
|
|
|
|
|
5791
|
chop $nesting_block_string; |
|
6710
|
4821
|
|
|
|
|
8489
|
$nesting_block_flag = |
|
6711
|
|
|
|
|
|
|
substr( $nesting_block_string, -1 ) eq '1'; |
|
6712
|
|
|
|
|
|
|
} |
|
6713
|
|
|
|
|
|
|
|
|
6714
|
|
|
|
|
|
|
} |
|
6715
|
|
|
|
|
|
|
} |
|
6716
|
|
|
|
|
|
|
|
|
6717
|
|
|
|
|
|
|
#----------------------------------------------------- |
|
6718
|
|
|
|
|
|
|
# Section 2.3. Unexpected sequenced token type - error |
|
6719
|
|
|
|
|
|
|
#----------------------------------------------------- |
|
6720
|
|
|
|
|
|
|
else { |
|
6721
|
|
|
|
|
|
|
|
|
6722
|
|
|
|
|
|
|
# The tokenizer should only be assigning sequence numbers |
|
6723
|
|
|
|
|
|
|
# to types { [ ( ? ) ] } : |
|
6724
|
0
|
|
|
|
|
0
|
DEVEL_MODE && Fault(<<EOM); |
|
6725
|
|
|
|
|
|
|
unexpected sequence number on token type $type_i with pre-tok=$tok_i |
|
6726
|
|
|
|
|
|
|
EOM |
|
6727
|
|
|
|
|
|
|
} |
|
6728
|
|
|
|
|
|
|
|
|
6729
|
|
|
|
|
|
|
#------------------------------------------------ |
|
6730
|
|
|
|
|
|
|
# Section 2.4. Store values for a sequenced token |
|
6731
|
|
|
|
|
|
|
#------------------------------------------------ |
|
6732
|
|
|
|
|
|
|
|
|
6733
|
|
|
|
|
|
|
# The starting nesting block string, which is used in any .LOG |
|
6734
|
|
|
|
|
|
|
# output, should include the first token of the line |
|
6735
|
10892
|
100
|
|
|
|
16503
|
if ( !@output_levels ) { |
|
6736
|
1776
|
|
|
|
|
2674
|
$nesting_block_string_0 = $nesting_block_string; |
|
6737
|
|
|
|
|
|
|
} |
|
6738
|
|
|
|
|
|
|
|
|
6739
|
|
|
|
|
|
|
# Store values for a sequenced token |
|
6740
|
10892
|
|
|
|
|
15435
|
push( @output_levels, $level_i ); |
|
6741
|
10892
|
|
|
|
|
17567
|
push( @output_block_type, $routput_block_type->[$ii] ); |
|
6742
|
10892
|
|
|
|
|
15407
|
push( @output_type_sequence, $routput_type_sequence->[$ii] ); |
|
6743
|
10892
|
|
|
|
|
19292
|
push( @output_token_type, $type_i ); |
|
6744
|
|
|
|
|
|
|
|
|
6745
|
|
|
|
|
|
|
} |
|
6746
|
|
|
|
|
|
|
} ## End loop to over tokens |
|
6747
|
|
|
|
|
|
|
|
|
6748
|
|
|
|
|
|
|
#--------------------- |
|
6749
|
|
|
|
|
|
|
# Post-loop operations |
|
6750
|
|
|
|
|
|
|
#--------------------- |
|
6751
|
|
|
|
|
|
|
|
|
6752
|
6860
|
|
|
|
|
14186
|
$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string_0; |
|
6753
|
|
|
|
|
|
|
|
|
6754
|
|
|
|
|
|
|
# Form and store the tokens |
|
6755
|
6860
|
50
|
|
|
|
11565
|
if (@output_levels) { |
|
6756
|
|
|
|
|
|
|
|
|
6757
|
6860
|
|
|
|
|
7467
|
my $im = shift @{$routput_token_list}; |
|
|
6860
|
|
|
|
|
10777
|
|
|
6758
|
6860
|
|
|
|
|
10229
|
my $offset = $rtoken_map->[$im]; |
|
6759
|
6860
|
|
|
|
|
7674
|
foreach my $ii ( @{$routput_token_list} ) { |
|
|
6860
|
|
|
|
|
9672
|
|
|
6760
|
52599
|
|
|
|
|
55642
|
my $numc = $rtoken_map->[$ii] - $offset; |
|
6761
|
52599
|
|
|
|
|
77440
|
push( @output_tokens, substr( $input_line, $offset, $numc ) ); |
|
6762
|
52599
|
|
|
|
|
52524
|
$offset += $numc; |
|
6763
|
|
|
|
|
|
|
|
|
6764
|
|
|
|
|
|
|
# programming note: it seems most efficient to 'next' out of |
|
6765
|
|
|
|
|
|
|
# a critical loop like this as early as possible. So instead |
|
6766
|
|
|
|
|
|
|
# of 'if ( DEVEL_MODE && $numc < 0 )' we write: |
|
6767
|
52599
|
|
|
|
|
55888
|
next unless (DEVEL_MODE); |
|
6768
|
0
|
0
|
|
|
|
0
|
next if ( $numc > 0 ); |
|
6769
|
|
|
|
|
|
|
|
|
6770
|
|
|
|
|
|
|
# Should not happen unless @{$rtoken_map} is corrupted |
|
6771
|
0
|
|
|
|
|
0
|
Fault("number of characters is '$numc' but should be >0\n"); |
|
6772
|
|
|
|
|
|
|
} |
|
6773
|
|
|
|
|
|
|
|
|
6774
|
|
|
|
|
|
|
# Form and store the final token of this line |
|
6775
|
6860
|
|
|
|
|
9653
|
my $numc = length($input_line) - $offset; |
|
6776
|
6860
|
|
|
|
|
12043
|
push( @output_tokens, substr( $input_line, $offset, $numc ) ); |
|
6777
|
|
|
|
|
|
|
|
|
6778
|
6860
|
|
|
|
|
8315
|
if (DEVEL_MODE) { |
|
6779
|
|
|
|
|
|
|
if ( $numc <= 0 ) { |
|
6780
|
|
|
|
|
|
|
|
|
6781
|
|
|
|
|
|
|
# check '$rtoken_map' and '$routput_token_list' |
|
6782
|
|
|
|
|
|
|
Fault("Number of Characters is '$numc' but should be >0\n"); |
|
6783
|
|
|
|
|
|
|
} |
|
6784
|
|
|
|
|
|
|
|
|
6785
|
|
|
|
|
|
|
# Make sure we didn't gain or lose any characters |
|
6786
|
|
|
|
|
|
|
my $test_line = join EMPTY_STRING, @output_tokens; |
|
6787
|
|
|
|
|
|
|
if ( $test_line ne $input_line ) { |
|
6788
|
|
|
|
|
|
|
my $len_input = length($input_line); |
|
6789
|
|
|
|
|
|
|
my $len_test = length($test_line); |
|
6790
|
|
|
|
|
|
|
|
|
6791
|
|
|
|
|
|
|
# check '$rtoken_map' and '$routput_token_list' |
|
6792
|
|
|
|
|
|
|
Fault(<<EOM); |
|
6793
|
|
|
|
|
|
|
Reconstructed line difers from input; input_length=$len_input test_length=$len_test |
|
6794
|
|
|
|
|
|
|
input:'$input_line' |
|
6795
|
|
|
|
|
|
|
test :'$test_line' |
|
6796
|
|
|
|
|
|
|
EOM |
|
6797
|
|
|
|
|
|
|
} |
|
6798
|
|
|
|
|
|
|
} |
|
6799
|
|
|
|
|
|
|
} |
|
6800
|
|
|
|
|
|
|
|
|
6801
|
|
|
|
|
|
|
# Wrap up this line of tokens for shipping to the Formatter |
|
6802
|
6860
|
|
|
|
|
17468
|
$line_of_tokens->{_rtoken_type} = \@output_token_type; |
|
6803
|
6860
|
|
|
|
|
14048
|
$line_of_tokens->{_rtokens} = \@output_tokens; |
|
6804
|
6860
|
|
|
|
|
13291
|
$line_of_tokens->{_rblock_type} = \@output_block_type; |
|
6805
|
6860
|
|
|
|
|
11445
|
$line_of_tokens->{_rtype_sequence} = \@output_type_sequence; |
|
6806
|
6860
|
|
|
|
|
11259
|
$line_of_tokens->{_rlevels} = \@output_levels; |
|
6807
|
|
|
|
|
|
|
|
|
6808
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
|
6809
|
|
|
|
|
|
|
# Compare input indentation with computed levels at closing braces |
|
6810
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
|
6811
|
|
|
|
|
|
|
# This may provide a useful hint for error location if the file |
|
6812
|
|
|
|
|
|
|
# is not balanced in braces. Closing braces are used because they |
|
6813
|
|
|
|
|
|
|
# have a well-defined indentation and can be processed efficiently. |
|
6814
|
6860
|
100
|
|
|
|
12596
|
if ( $output_tokens[0] eq '}' ) { |
|
6815
|
|
|
|
|
|
|
|
|
6816
|
737
|
|
|
|
|
1218
|
my $blk = $output_block_type[0]; |
|
6817
|
737
|
100
|
100
|
|
|
4171
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
6818
|
|
|
|
|
|
|
( |
|
6819
|
|
|
|
|
|
|
# builtin block types without continuation indentation |
|
6820
|
|
|
|
|
|
|
$is_zero_continuation_block_type{$blk} |
|
6821
|
|
|
|
|
|
|
|
|
6822
|
|
|
|
|
|
|
# or a named sub, but skip sub aliases for efficiency, |
|
6823
|
|
|
|
|
|
|
# since this is just for diagnostic info |
|
6824
|
|
|
|
|
|
|
|| substr( $blk, 0, 4 ) eq 'sub ' |
|
6825
|
|
|
|
|
|
|
) |
|
6826
|
|
|
|
|
|
|
|
|
6827
|
|
|
|
|
|
|
# and we are not in format skipping |
|
6828
|
|
|
|
|
|
|
&& !$self->[_in_format_skipping_] |
|
6829
|
|
|
|
|
|
|
) |
|
6830
|
|
|
|
|
|
|
{ |
|
6831
|
|
|
|
|
|
|
|
|
6832
|
|
|
|
|
|
|
# subtract 1 space for newline in untrimmed line |
|
6833
|
395
|
|
|
|
|
811
|
my $untrimmed_input_line = $line_of_tokens->{_line_text}; |
|
6834
|
395
|
|
|
|
|
886
|
my $space_count = |
|
6835
|
|
|
|
|
|
|
length($untrimmed_input_line) - length($input_line) - 1; |
|
6836
|
|
|
|
|
|
|
|
|
6837
|
|
|
|
|
|
|
# check for tabs |
|
6838
|
395
|
100
|
100
|
|
|
1540
|
if ( $space_count |
|
6839
|
|
|
|
|
|
|
&& ord( substr( $untrimmed_input_line, 0, 1 ) ) == ORD_TAB ) |
|
6840
|
|
|
|
|
|
|
{ |
|
6841
|
15
|
50
|
|
|
|
125
|
if ( $untrimmed_input_line =~ /^(\t+)?(\s+)?/ ) { |
|
6842
|
15
|
50
|
|
|
|
65
|
if ($1) { $space_count += length($1) * $tabsize } |
|
|
15
|
|
|
|
|
38
|
|
|
6843
|
15
|
100
|
|
|
|
49
|
if ($2) { $space_count += length($2) } |
|
|
1
|
|
|
|
|
4
|
|
|
6844
|
|
|
|
|
|
|
} |
|
6845
|
|
|
|
|
|
|
} |
|
6846
|
|
|
|
|
|
|
|
|
6847
|
|
|
|
|
|
|
# '$guess' = the level according to indentation |
|
6848
|
395
|
|
|
|
|
968
|
my $guess = int( $space_count / $rOpts_indent_columns ); |
|
6849
|
|
|
|
|
|
|
|
|
6850
|
|
|
|
|
|
|
# subtract 1 level from guess for --indent-closing-brace |
|
6851
|
395
|
100
|
|
|
|
1001
|
$guess -= 1 if ($rOpts_indent_closing_brace); |
|
6852
|
|
|
|
|
|
|
|
|
6853
|
|
|
|
|
|
|
# subtract 1 from $level for each non-indenting brace level |
|
6854
|
395
|
|
|
|
|
561
|
my $adjust = @{ $self->[_rnon_indenting_brace_stack_] }; |
|
|
395
|
|
|
|
|
692
|
|
|
6855
|
|
|
|
|
|
|
|
|
6856
|
395
|
|
|
|
|
655
|
my $level = $output_levels[0]; |
|
6857
|
|
|
|
|
|
|
|
|
6858
|
|
|
|
|
|
|
# find the difference between expected and indentation guess |
|
6859
|
395
|
|
|
|
|
665
|
my $level_diff = $level - $adjust - $guess; |
|
6860
|
|
|
|
|
|
|
|
|
6861
|
395
|
|
|
|
|
601
|
my $rhash = $self->[_rclosing_brace_indentation_hash_]; |
|
6862
|
|
|
|
|
|
|
|
|
6863
|
|
|
|
|
|
|
# results are only valid if we guess correctly at the |
|
6864
|
|
|
|
|
|
|
# first spaced brace |
|
6865
|
395
|
100
|
100
|
|
|
1503
|
if ( $space_count && !defined( $rhash->{valid} ) ) { |
|
6866
|
79
|
|
|
|
|
199
|
$rhash->{valid} = !$level_diff; |
|
6867
|
|
|
|
|
|
|
} |
|
6868
|
|
|
|
|
|
|
|
|
6869
|
|
|
|
|
|
|
# save the result |
|
6870
|
395
|
|
|
|
|
771
|
my $rhistory_line_number = $rhash->{rhistory_line_number}; |
|
6871
|
395
|
|
|
|
|
707
|
my $rhistory_level_diff = $rhash->{rhistory_level_diff}; |
|
6872
|
395
|
|
|
|
|
718
|
my $rhistory_anchor_point = $rhash->{rhistory_anchor_point}; |
|
6873
|
|
|
|
|
|
|
|
|
6874
|
395
|
100
|
|
|
|
960
|
if ( $rhistory_level_diff->[-1] != $level_diff ) { |
|
6875
|
|
|
|
|
|
|
|
|
6876
|
|
|
|
|
|
|
# Patch for non-indenting-braces: if we guess zero and |
|
6877
|
|
|
|
|
|
|
# match before all non-indenting braces have been found, |
|
6878
|
|
|
|
|
|
|
# it means that we would need negative indentation to |
|
6879
|
|
|
|
|
|
|
# match if/when the brace is found. So we have a problem |
|
6880
|
|
|
|
|
|
|
# from here on. We indicate this with a value 2 instead |
|
6881
|
|
|
|
|
|
|
# of 1 as a signal to stop outputting the table here. |
|
6882
|
55
|
|
|
|
|
95
|
my $anchor = 1; |
|
6883
|
55
|
50
|
66
|
|
|
277
|
if ( $guess == 0 && $adjust > 0 ) { $anchor = 2 } |
|
|
0
|
|
|
|
|
0
|
|
|
6884
|
|
|
|
|
|
|
|
|
6885
|
|
|
|
|
|
|
# add an anchor point |
|
6886
|
55
|
|
|
|
|
89
|
push @{$rhistory_level_diff}, $level_diff; |
|
|
55
|
|
|
|
|
119
|
|
|
6887
|
55
|
|
|
|
|
95
|
push @{$rhistory_line_number}, $input_line_number; |
|
|
55
|
|
|
|
|
120
|
|
|
6888
|
55
|
|
|
|
|
94
|
push @{$rhistory_anchor_point}, $anchor; |
|
|
55
|
|
|
|
|
132
|
|
|
6889
|
|
|
|
|
|
|
} |
|
6890
|
|
|
|
|
|
|
else { |
|
6891
|
|
|
|
|
|
|
|
|
6892
|
|
|
|
|
|
|
# add a movable point following an anchor point |
|
6893
|
340
|
100
|
|
|
|
766
|
if ( $rhistory_anchor_point->[-1] ) { |
|
6894
|
152
|
|
|
|
|
291
|
push @{$rhistory_level_diff}, $level_diff; |
|
|
152
|
|
|
|
|
316
|
|
|
6895
|
152
|
|
|
|
|
276
|
push @{$rhistory_line_number}, $input_line_number; |
|
|
152
|
|
|
|
|
288
|
|
|
6896
|
152
|
|
|
|
|
244
|
push @{$rhistory_anchor_point}, 0; |
|
|
152
|
|
|
|
|
367
|
|
|
6897
|
|
|
|
|
|
|
} |
|
6898
|
|
|
|
|
|
|
|
|
6899
|
|
|
|
|
|
|
# extend a movable point |
|
6900
|
|
|
|
|
|
|
else { |
|
6901
|
188
|
|
|
|
|
443
|
$rhistory_line_number->[-1] = $input_line_number; |
|
6902
|
|
|
|
|
|
|
} |
|
6903
|
|
|
|
|
|
|
} |
|
6904
|
|
|
|
|
|
|
} |
|
6905
|
|
|
|
|
|
|
} |
|
6906
|
|
|
|
|
|
|
|
|
6907
|
6860
|
|
|
|
|
13713
|
return; |
|
6908
|
|
|
|
|
|
|
} ## end sub tokenizer_wrapup_line |
|
6909
|
|
|
|
|
|
|
|
|
6910
|
|
|
|
|
|
|
} ## end tokenize_this_line |
|
6911
|
|
|
|
|
|
|
|
|
6912
|
|
|
|
|
|
|
####################################################################### |
|
6913
|
|
|
|
|
|
|
# Tokenizer routines which assist in identifying token types |
|
6914
|
|
|
|
|
|
|
####################################################################### |
|
6915
|
|
|
|
|
|
|
|
|
6916
|
|
|
|
|
|
|
# Define Global '%op_expected_table' |
|
6917
|
|
|
|
|
|
|
# = hash table of operator expected values based on last nonblank token |
|
6918
|
|
|
|
|
|
|
|
|
6919
|
|
|
|
|
|
|
# exceptions to perl's weird parsing rules after type 'Z' |
|
6920
|
|
|
|
|
|
|
my %is_weird_parsing_rule_exception; |
|
6921
|
|
|
|
|
|
|
|
|
6922
|
|
|
|
|
|
|
my %is_paren_dollar; |
|
6923
|
|
|
|
|
|
|
|
|
6924
|
|
|
|
|
|
|
my %is_n_v; |
|
6925
|
|
|
|
|
|
|
|
|
6926
|
|
|
|
|
|
|
BEGIN { |
|
6927
|
|
|
|
|
|
|
|
|
6928
|
|
|
|
|
|
|
# Always expecting TERM following these types: |
|
6929
|
|
|
|
|
|
|
# note: this is identical to '@value_requestor_type' defined later. |
|
6930
|
|
|
|
|
|
|
# Fix for c250: add new type 'P' for package (expecting VERSION or {} |
|
6931
|
|
|
|
|
|
|
# after package NAMESPACE, so expecting TERM) |
|
6932
|
|
|
|
|
|
|
# Fix for c250: add new type 'S' for sub (not expecting operator) |
|
6933
|
44
|
|
|
44
|
|
867
|
my @q = qw# |
|
6934
|
|
|
|
|
|
|
; ! + x & ? F J - p / Y : % f U ~ A G j L P S * . | ^ < = [ m { > t |
|
6935
|
|
|
|
|
|
|
|| >= != mm *= => .. !~ == && |= .= pp -= =~ += <= %= ^= x= ~~ ** << /= |
|
6936
|
|
|
|
|
|
|
&= // >> ~. &. |. ^. |
|
6937
|
|
|
|
|
|
|
... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~ |
|
6938
|
|
|
|
|
|
|
#; |
|
6939
|
44
|
|
|
|
|
174
|
push @q, BACKSLASH; |
|
6940
|
44
|
|
|
|
|
96
|
push @q, COMMA; |
|
6941
|
44
|
|
|
|
|
83
|
push @q, '('; # for completeness, not currently a token type |
|
6942
|
44
|
|
|
|
|
94
|
push @q, '->'; # was previously in UNKNOWN |
|
6943
|
44
|
|
|
|
|
2435
|
$op_expected_table{$_} = TERM for @q; |
|
6944
|
|
|
|
|
|
|
|
|
6945
|
|
|
|
|
|
|
# No UNKNOWN table types: |
|
6946
|
|
|
|
|
|
|
# removed '->' for c030, now always TERM |
|
6947
|
|
|
|
|
|
|
# removed 'w' for c392 to allow use of 'function_count' info in the sub |
|
6948
|
|
|
|
|
|
|
|
|
6949
|
|
|
|
|
|
|
# Always expecting OPERATOR ... |
|
6950
|
|
|
|
|
|
|
# 'n' and 'v' are currently excluded because they might be VERSION numbers |
|
6951
|
|
|
|
|
|
|
# 'i' is currently excluded because it might be a package |
|
6952
|
|
|
|
|
|
|
# 'q' is currently excluded because it might be a prototype |
|
6953
|
|
|
|
|
|
|
# Fix for c030: removed '->' from this list: |
|
6954
|
|
|
|
|
|
|
# Fix for c250: added 'i' because new type 'P' was added |
|
6955
|
44
|
|
|
|
|
258
|
@q = qw( -- C h R ++ ] Q <> i ); |
|
6956
|
44
|
|
|
|
|
80
|
push @q, ')'; |
|
6957
|
44
|
|
|
|
|
585
|
$op_expected_table{$_} = OPERATOR for @q; |
|
6958
|
|
|
|
|
|
|
|
|
6959
|
|
|
|
|
|
|
# Fix for git #62: added '*' and '%' |
|
6960
|
44
|
|
|
|
|
122
|
@q = qw( < ? * % ); |
|
6961
|
44
|
|
|
|
|
115
|
$is_weird_parsing_rule_exception{$_} = 1 for @q; |
|
6962
|
|
|
|
|
|
|
|
|
6963
|
44
|
|
|
|
|
85
|
@q = qw<) $>; |
|
6964
|
44
|
|
|
|
|
100
|
$is_paren_dollar{$_} = 1 for @q; |
|
6965
|
|
|
|
|
|
|
|
|
6966
|
44
|
|
|
|
|
83
|
@q = qw( n v ); |
|
6967
|
44
|
|
|
|
|
1377
|
$is_n_v{$_} = 1 for @q; |
|
6968
|
|
|
|
|
|
|
|
|
6969
|
|
|
|
|
|
|
} ## end BEGIN |
|
6970
|
|
|
|
|
|
|
|
|
6971
|
44
|
|
|
44
|
|
288
|
use constant DEBUG_OPERATOR_EXPECTED => 0; |
|
|
44
|
|
|
|
|
67
|
|
|
|
44
|
|
|
|
|
97167
|
|
|
6972
|
|
|
|
|
|
|
|
|
6973
|
|
|
|
|
|
|
sub operator_expected { |
|
6974
|
|
|
|
|
|
|
|
|
6975
|
11847
|
|
|
11847
|
0
|
21236
|
my ( $self, $tok, $next_type, $blank_after_Z ) = @_; |
|
6976
|
|
|
|
|
|
|
|
|
6977
|
|
|
|
|
|
|
# Returns a parameter indicating what types of tokens can occur next |
|
6978
|
|
|
|
|
|
|
|
|
6979
|
|
|
|
|
|
|
# Call format: |
|
6980
|
|
|
|
|
|
|
# $op_expected = |
|
6981
|
|
|
|
|
|
|
# $self->operator_expected( $tok, $next_type, $blank_after_Z ); |
|
6982
|
|
|
|
|
|
|
# where |
|
6983
|
|
|
|
|
|
|
# $tok is the current token |
|
6984
|
|
|
|
|
|
|
# $next_type is the type of the next token (blank or not) |
|
6985
|
|
|
|
|
|
|
# $blank_after_Z = flag for guessing after a type 'Z': |
|
6986
|
|
|
|
|
|
|
# true if $tok follows type 'Z' with intermediate blank |
|
6987
|
|
|
|
|
|
|
# false if $tok follows type 'Z' with no intermediate blank |
|
6988
|
|
|
|
|
|
|
# ignored if $tok does not follow type 'Z' |
|
6989
|
|
|
|
|
|
|
|
|
6990
|
|
|
|
|
|
|
# Many perl symbols have two or more meanings. For example, '<<' |
|
6991
|
|
|
|
|
|
|
# can be a shift operator or a here-doc operator. The |
|
6992
|
|
|
|
|
|
|
# interpretation of these symbols depends on the current state of |
|
6993
|
|
|
|
|
|
|
# the tokenizer, which may either be expecting a term or an |
|
6994
|
|
|
|
|
|
|
# operator. For this example, a << would be a shift if an OPERATOR |
|
6995
|
|
|
|
|
|
|
# is expected, and a here-doc if a TERM is expected. This routine |
|
6996
|
|
|
|
|
|
|
# is called to make this decision for any current token. It returns |
|
6997
|
|
|
|
|
|
|
# one of three possible values: |
|
6998
|
|
|
|
|
|
|
# |
|
6999
|
|
|
|
|
|
|
# OPERATOR - operator expected (or at least, not a term) |
|
7000
|
|
|
|
|
|
|
# UNKNOWN - can't tell |
|
7001
|
|
|
|
|
|
|
# TERM - a term is expected (or at least, not an operator) |
|
7002
|
|
|
|
|
|
|
# |
|
7003
|
|
|
|
|
|
|
# The decision is based on what has been seen so far. This |
|
7004
|
|
|
|
|
|
|
# information is stored in the "$last_nonblank_type" and |
|
7005
|
|
|
|
|
|
|
# "$last_nonblank_token" variables. For example, if the |
|
7006
|
|
|
|
|
|
|
# $last_nonblank_type is '=~', then we are expecting a TERM, whereas |
|
7007
|
|
|
|
|
|
|
# if $last_nonblank_type is 'n' (numeric), we are expecting an |
|
7008
|
|
|
|
|
|
|
# OPERATOR. |
|
7009
|
|
|
|
|
|
|
# |
|
7010
|
|
|
|
|
|
|
# If a UNKNOWN is returned, the calling routine must guess. A major |
|
7011
|
|
|
|
|
|
|
# goal of this tokenizer is to minimize the possibility of returning |
|
7012
|
|
|
|
|
|
|
# UNKNOWN, because a wrong guess can spoil the formatting of a |
|
7013
|
|
|
|
|
|
|
# script. |
|
7014
|
|
|
|
|
|
|
# |
|
7015
|
|
|
|
|
|
|
# Adding NEW_TOKENS: it is critically important that this routine be |
|
7016
|
|
|
|
|
|
|
# updated to allow it to determine if an operator or term is to be |
|
7017
|
|
|
|
|
|
|
# expected after the new token. Doing this simply involves adding |
|
7018
|
|
|
|
|
|
|
# the new token character to one of the regexes in this routine or |
|
7019
|
|
|
|
|
|
|
# to one of the hash lists |
|
7020
|
|
|
|
|
|
|
# that it uses, which are initialized in the BEGIN section. |
|
7021
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, |
|
7022
|
|
|
|
|
|
|
# $statement_type |
|
7023
|
|
|
|
|
|
|
|
|
7024
|
|
|
|
|
|
|
# When possible, token types should be selected such that we can determine |
|
7025
|
|
|
|
|
|
|
# the 'operator_expected' value by a simple hash lookup. If there are |
|
7026
|
|
|
|
|
|
|
# exceptions, that is an indication that a new type is needed. |
|
7027
|
|
|
|
|
|
|
|
|
7028
|
|
|
|
|
|
|
#-------------------------------------------- |
|
7029
|
|
|
|
|
|
|
# Section 1: Table lookup will get most cases |
|
7030
|
|
|
|
|
|
|
#-------------------------------------------- |
|
7031
|
|
|
|
|
|
|
|
|
7032
|
|
|
|
|
|
|
# Many types are can be obtained by a table lookup. This typically handles |
|
7033
|
|
|
|
|
|
|
# more than half of the calls. For speed, the caller may try table lookup |
|
7034
|
|
|
|
|
|
|
# first before calling this sub. |
|
7035
|
11847
|
|
|
|
|
15396
|
my $op_expected = $op_expected_table{$last_nonblank_type}; |
|
7036
|
11847
|
100
|
|
|
|
17770
|
if ( defined($op_expected) ) { |
|
7037
|
|
|
|
|
|
|
DEBUG_OPERATOR_EXPECTED |
|
7038
|
78
|
|
|
|
|
111
|
&& print {*STDOUT} |
|
7039
|
|
|
|
|
|
|
"OPERATOR_EXPECTED: Table Lookup; returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; |
|
7040
|
78
|
|
|
|
|
193
|
return $op_expected; |
|
7041
|
|
|
|
|
|
|
} |
|
7042
|
|
|
|
|
|
|
|
|
7043
|
|
|
|
|
|
|
DEBUG_OPERATOR_EXPECTED |
|
7044
|
11769
|
|
|
|
|
12596
|
&& print {*STDOUT} |
|
7045
|
|
|
|
|
|
|
"OPERATOR_EXPECTED: in hardwired table for last type $last_nonblank_type token $last_nonblank_token\n"; |
|
7046
|
|
|
|
|
|
|
|
|
7047
|
|
|
|
|
|
|
#--------------------------------------------- |
|
7048
|
|
|
|
|
|
|
# Section 2: Handle special cases if necessary |
|
7049
|
|
|
|
|
|
|
#--------------------------------------------- |
|
7050
|
|
|
|
|
|
|
|
|
7051
|
|
|
|
|
|
|
# Types 'k', '}' and 'Z' depend on context |
|
7052
|
|
|
|
|
|
|
# Types 'n', 'v', 'q' also depend on context. |
|
7053
|
|
|
|
|
|
|
|
|
7054
|
|
|
|
|
|
|
# identifier... |
|
7055
|
|
|
|
|
|
|
# Fix for c250: removed coding for type 'i' because 'i' and new type 'P' |
|
7056
|
|
|
|
|
|
|
# are now done by hash table lookup |
|
7057
|
|
|
|
|
|
|
|
|
7058
|
|
|
|
|
|
|
#-------------------- |
|
7059
|
|
|
|
|
|
|
# Section 2A: keyword |
|
7060
|
|
|
|
|
|
|
#-------------------- |
|
7061
|
11769
|
100
|
|
|
|
18699
|
if ( $last_nonblank_type eq 'k' ) { |
|
7062
|
|
|
|
|
|
|
|
|
7063
|
|
|
|
|
|
|
# keywords expecting TERM: |
|
7064
|
3063
|
100
|
|
|
|
7700
|
if ( $expecting_term_token{$last_nonblank_token} ) { |
|
7065
|
|
|
|
|
|
|
|
|
7066
|
|
|
|
|
|
|
# Exceptions from TERM: |
|
7067
|
|
|
|
|
|
|
|
|
7068
|
|
|
|
|
|
|
# // may follow perl functions which may be unary operators |
|
7069
|
|
|
|
|
|
|
# see test file dor.t (defined or); |
|
7070
|
2943
|
100
|
100
|
|
|
6011
|
if ( |
|
|
|
|
100
|
|
|
|
|
|
7071
|
|
|
|
|
|
|
$tok eq '/' |
|
7072
|
|
|
|
|
|
|
&& $next_type eq '/' |
|
7073
|
|
|
|
|
|
|
&& $is_keyword_rejecting_slash_as_pattern_delimiter{ |
|
7074
|
|
|
|
|
|
|
$last_nonblank_token} |
|
7075
|
|
|
|
|
|
|
) |
|
7076
|
|
|
|
|
|
|
{ |
|
7077
|
1
|
|
|
|
|
3
|
return OPERATOR; |
|
7078
|
|
|
|
|
|
|
} |
|
7079
|
|
|
|
|
|
|
|
|
7080
|
|
|
|
|
|
|
# Patch to allow a ? following 'split' to be a deprecated pattern |
|
7081
|
|
|
|
|
|
|
# delimiter. This patch is coordinated with the omission of split |
|
7082
|
|
|
|
|
|
|
# from the list |
|
7083
|
|
|
|
|
|
|
# %is_keyword_rejecting_question_as_pattern_delimiter. This patch |
|
7084
|
|
|
|
|
|
|
# will force perltidy to guess. |
|
7085
|
2942
|
50
|
66
|
|
|
5916
|
if ( $tok eq '?' |
|
7086
|
|
|
|
|
|
|
&& $last_nonblank_token eq 'split' ) |
|
7087
|
|
|
|
|
|
|
{ |
|
7088
|
0
|
|
|
|
|
0
|
return UNKNOWN; |
|
7089
|
|
|
|
|
|
|
} |
|
7090
|
|
|
|
|
|
|
|
|
7091
|
2942
|
|
|
|
|
5593
|
return TERM; |
|
7092
|
|
|
|
|
|
|
} |
|
7093
|
|
|
|
|
|
|
|
|
7094
|
|
|
|
|
|
|
# keywords expecting OPERATOR: |
|
7095
|
120
|
100
|
|
|
|
362
|
if ( $expecting_operator_token{$last_nonblank_token} ) { |
|
7096
|
7
|
|
|
|
|
19
|
return OPERATOR; |
|
7097
|
|
|
|
|
|
|
} |
|
7098
|
|
|
|
|
|
|
|
|
7099
|
113
|
|
|
|
|
299
|
return TERM; |
|
7100
|
|
|
|
|
|
|
|
|
7101
|
|
|
|
|
|
|
} ## end type 'k' |
|
7102
|
|
|
|
|
|
|
|
|
7103
|
|
|
|
|
|
|
#------------------------------------ |
|
7104
|
|
|
|
|
|
|
# Section 2B: Closing container token |
|
7105
|
|
|
|
|
|
|
#------------------------------------ |
|
7106
|
|
|
|
|
|
|
|
|
7107
|
|
|
|
|
|
|
# Note that the actual token for type '}' may also be a ')'. |
|
7108
|
|
|
|
|
|
|
|
|
7109
|
|
|
|
|
|
|
# Also note that $last_nonblank_token is not the token corresponding to |
|
7110
|
|
|
|
|
|
|
# $last_nonblank_type when the type is a closing container. In that |
|
7111
|
|
|
|
|
|
|
# case it is the token before the corresponding opening container token. |
|
7112
|
|
|
|
|
|
|
# So for example, for this snippet |
|
7113
|
|
|
|
|
|
|
# $a = do { BLOCK } / 2; |
|
7114
|
|
|
|
|
|
|
# the $last_nonblank_token is 'do' when $last_nonblank_type eq '}'. |
|
7115
|
|
|
|
|
|
|
|
|
7116
|
8706
|
100
|
|
|
|
15110
|
if ( $last_nonblank_type eq '}' ) { |
|
7117
|
|
|
|
|
|
|
|
|
7118
|
|
|
|
|
|
|
#------------------------------------------- |
|
7119
|
|
|
|
|
|
|
# Section 2B1: Closing structural ')' or ']' |
|
7120
|
|
|
|
|
|
|
#------------------------------------------- |
|
7121
|
4140
|
100
|
100
|
|
|
10520
|
if ( $last_nonblank_token eq ')' || $last_nonblank_token eq ']' ) { |
|
7122
|
2794
|
|
|
|
|
5600
|
return OPERATOR; |
|
7123
|
|
|
|
|
|
|
} |
|
7124
|
|
|
|
|
|
|
|
|
7125
|
|
|
|
|
|
|
#------------------------------------- |
|
7126
|
|
|
|
|
|
|
# Section 2B2: Closing block brace '}' |
|
7127
|
|
|
|
|
|
|
#------------------------------------- |
|
7128
|
1346
|
|
|
|
|
2381
|
my $blk = $rbrace_type->[ $brace_depth + 1 ]; |
|
7129
|
|
|
|
|
|
|
|
|
7130
|
|
|
|
|
|
|
# Non-blocks |
|
7131
|
1346
|
100
|
|
|
|
2576
|
if ( !defined($blk) ) { |
|
7132
|
2
|
|
|
|
|
4
|
return OPERATOR; |
|
7133
|
|
|
|
|
|
|
} |
|
7134
|
|
|
|
|
|
|
|
|
7135
|
|
|
|
|
|
|
# Unidentified block type |
|
7136
|
1344
|
100
|
|
|
|
2921
|
if ( !$blk ) { |
|
7137
|
367
|
|
|
|
|
776
|
return UNKNOWN; |
|
7138
|
|
|
|
|
|
|
} |
|
7139
|
|
|
|
|
|
|
|
|
7140
|
|
|
|
|
|
|
# Blocks followed by a TERM |
|
7141
|
977
|
100
|
100
|
|
|
9842
|
if ( $is_zero_continuation_block_type{$blk} |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
7142
|
|
|
|
|
|
|
|| $is_sort_map_grep{$blk} |
|
7143
|
|
|
|
|
|
|
|| $is_grep_alias{$blk} |
|
7144
|
|
|
|
|
|
|
|| substr( $blk, -1, 1 ) eq ':' && $blk =~ /^\w+:$/ |
|
7145
|
|
|
|
|
|
|
|| substr( $blk, 0, 3 ) eq 'sub' && $blk =~ /^sub\s/ |
|
7146
|
|
|
|
|
|
|
|| substr( $blk, 0, 7 ) eq 'package' && $blk =~ /^package\s/ ) |
|
7147
|
|
|
|
|
|
|
{ |
|
7148
|
635
|
|
|
|
|
1563
|
return TERM; |
|
7149
|
|
|
|
|
|
|
} |
|
7150
|
|
|
|
|
|
|
|
|
7151
|
|
|
|
|
|
|
# Blocks followed by an OPERATOR |
|
7152
|
|
|
|
|
|
|
# do eval sub |
|
7153
|
342
|
100
|
100
|
|
|
1360
|
if ( $is_block_operator{$blk} |
|
7154
|
|
|
|
|
|
|
|| $is_sub{$blk} ) |
|
7155
|
|
|
|
|
|
|
{ |
|
7156
|
274
|
|
|
|
|
667
|
return OPERATOR; |
|
7157
|
|
|
|
|
|
|
} |
|
7158
|
|
|
|
|
|
|
|
|
7159
|
|
|
|
|
|
|
# Any other block type is marked UNKNOWN to be safe (c566). |
|
7160
|
|
|
|
|
|
|
# For example, a block type marked ';' could be a hash ref: |
|
7161
|
|
|
|
|
|
|
# { map { $_ => 'x' } keys %main:: } ~~ \%main::; |
|
7162
|
|
|
|
|
|
|
# The tokenizer would have to analyze the contents to distinguish |
|
7163
|
|
|
|
|
|
|
# between a block closure and a hash ref brace in this case. So mark |
|
7164
|
|
|
|
|
|
|
# this as UNKNOWN and let the lower level routines figure it out. |
|
7165
|
68
|
|
|
|
|
196
|
return UNKNOWN; |
|
7166
|
|
|
|
|
|
|
} |
|
7167
|
|
|
|
|
|
|
|
|
7168
|
|
|
|
|
|
|
#------------------------------- |
|
7169
|
|
|
|
|
|
|
# Section 2C: number or v-string |
|
7170
|
|
|
|
|
|
|
#------------------------------- |
|
7171
|
|
|
|
|
|
|
# An exception is for VERSION numbers a 'use' statement. It has the format |
|
7172
|
|
|
|
|
|
|
# use Module VERSION LIST |
|
7173
|
|
|
|
|
|
|
# We could avoid this exception by writing a special sub to parse 'use' |
|
7174
|
|
|
|
|
|
|
# statements and perhaps mark these numbers with a new type V (for VERSION) |
|
7175
|
4566
|
100
|
|
|
|
9462
|
if ( $is_n_v{$last_nonblank_type} ) { |
|
7176
|
2621
|
100
|
|
|
|
4533
|
if ( $statement_type eq 'use' ) { |
|
7177
|
11
|
|
|
|
|
32
|
return UNKNOWN; |
|
7178
|
|
|
|
|
|
|
} |
|
7179
|
2610
|
|
|
|
|
4756
|
return OPERATOR; |
|
7180
|
|
|
|
|
|
|
} |
|
7181
|
|
|
|
|
|
|
|
|
7182
|
|
|
|
|
|
|
#--------------------- |
|
7183
|
|
|
|
|
|
|
# Section 2D: qw quote |
|
7184
|
|
|
|
|
|
|
#--------------------- |
|
7185
|
|
|
|
|
|
|
# TODO: labeled prototype words would better be given type 'A' or maybe |
|
7186
|
|
|
|
|
|
|
# 'J'; not 'q'; or maybe mark as type 'Y'? |
|
7187
|
1945
|
100
|
|
|
|
3962
|
if ( $last_nonblank_type eq 'q' ) { |
|
7188
|
158
|
50
|
|
|
|
400
|
if ( $last_nonblank_token eq 'prototype' ) { |
|
7189
|
0
|
|
|
|
|
0
|
return TERM; |
|
7190
|
|
|
|
|
|
|
} |
|
7191
|
|
|
|
|
|
|
|
|
7192
|
|
|
|
|
|
|
# update for --use-feature=class (rt145706): |
|
7193
|
|
|
|
|
|
|
# Look for class VERSION after possible attribute, as in |
|
7194
|
|
|
|
|
|
|
# class Example::Subclass : isa(Example::Base) 1.345 { ... } |
|
7195
|
158
|
100
|
|
|
|
396
|
if ( $statement_type =~ /^package\b/ ) { |
|
7196
|
3
|
|
|
|
|
5
|
return TERM; |
|
7197
|
|
|
|
|
|
|
} |
|
7198
|
|
|
|
|
|
|
|
|
7199
|
|
|
|
|
|
|
# everything else |
|
7200
|
155
|
|
|
|
|
323
|
return OPERATOR; |
|
7201
|
|
|
|
|
|
|
} |
|
7202
|
|
|
|
|
|
|
|
|
7203
|
|
|
|
|
|
|
#--------------------- |
|
7204
|
|
|
|
|
|
|
# Section 2E: bareword |
|
7205
|
|
|
|
|
|
|
#--------------------- |
|
7206
|
1787
|
100
|
|
|
|
3607
|
if ( $last_nonblank_type eq 'w' ) { |
|
7207
|
|
|
|
|
|
|
|
|
7208
|
|
|
|
|
|
|
# It is safest to return UNKNOWN if a possible ? pattern delimiter may |
|
7209
|
|
|
|
|
|
|
# follow (git #32, c469) and let the guess algorithm handle it. |
|
7210
|
1738
|
100
|
|
|
|
3193
|
if ( $tok eq '?' ) { return UNKNOWN } |
|
|
7
|
|
|
|
|
18
|
|
|
7211
|
|
|
|
|
|
|
|
|
7212
|
|
|
|
|
|
|
# see if this has been seen in the role of a function taking args |
|
7213
|
1731
|
|
|
|
|
3232
|
my $rinfo = $self->[_rbareword_info_]->{$current_package}; |
|
7214
|
1731
|
100
|
|
|
|
3140
|
if ($rinfo) { |
|
7215
|
1228
|
|
|
|
|
1974
|
$rinfo = $rinfo->{$last_nonblank_token}; |
|
7216
|
1228
|
100
|
|
|
|
2440
|
if ($rinfo) { |
|
7217
|
317
|
|
|
|
|
537
|
my $function_count = $rinfo->{function_count}; |
|
7218
|
317
|
100
|
66
|
|
|
1108
|
if ( $function_count && $function_count > 0 ) { return TERM } |
|
|
135
|
|
|
|
|
363
|
|
|
7219
|
|
|
|
|
|
|
} |
|
7220
|
|
|
|
|
|
|
} |
|
7221
|
1596
|
|
|
|
|
3251
|
return UNKNOWN; |
|
7222
|
|
|
|
|
|
|
} |
|
7223
|
|
|
|
|
|
|
|
|
7224
|
|
|
|
|
|
|
#----------------------------------- |
|
7225
|
|
|
|
|
|
|
# Section 2F: file handle or similar |
|
7226
|
|
|
|
|
|
|
#----------------------------------- |
|
7227
|
49
|
100
|
|
|
|
134
|
if ( $last_nonblank_type eq 'Z' ) { |
|
7228
|
|
|
|
|
|
|
|
|
7229
|
|
|
|
|
|
|
# angle.t |
|
7230
|
45
|
100
|
|
|
|
193
|
if ( $last_nonblank_token =~ /^\w/ ) { |
|
7231
|
2
|
|
|
|
|
5
|
return UNKNOWN; |
|
7232
|
|
|
|
|
|
|
} |
|
7233
|
|
|
|
|
|
|
|
|
7234
|
|
|
|
|
|
|
# Exception to weird parsing rules for 'x(' ... see case b1205: |
|
7235
|
|
|
|
|
|
|
# In something like 'print $vv x(...' the x is an operator; |
|
7236
|
|
|
|
|
|
|
# Likewise in 'print $vv x$ww' the x is an operator (case b1207) |
|
7237
|
|
|
|
|
|
|
# otherwise x follows the weird parsing rules. |
|
7238
|
43
|
50
|
33
|
|
|
154
|
if ( $tok eq 'x' && $next_type =~ /^[\(\$\@\%]$/ ) { |
|
7239
|
0
|
|
|
|
|
0
|
return OPERATOR; |
|
7240
|
|
|
|
|
|
|
} |
|
7241
|
|
|
|
|
|
|
|
|
7242
|
|
|
|
|
|
|
# The 'weird parsing rules' of next section do not work for '<' and '?' |
|
7243
|
|
|
|
|
|
|
# It is best to mark them as unknown. Test case: |
|
7244
|
|
|
|
|
|
|
# print $fh <DATA>; |
|
7245
|
43
|
100
|
|
|
|
132
|
if ( $is_weird_parsing_rule_exception{$tok} ) { |
|
7246
|
4
|
|
|
|
|
8
|
return UNKNOWN; |
|
7247
|
|
|
|
|
|
|
} |
|
7248
|
|
|
|
|
|
|
|
|
7249
|
|
|
|
|
|
|
# For possible file handle like "$a", Perl uses weird parsing rules. |
|
7250
|
|
|
|
|
|
|
# For example: |
|
7251
|
|
|
|
|
|
|
# print $a/2,"/hi"; - division |
|
7252
|
|
|
|
|
|
|
# print $a / 2,"/hi"; - division |
|
7253
|
|
|
|
|
|
|
# print $a/ 2,"/hi"; - division |
|
7254
|
|
|
|
|
|
|
# print $a /2,"/hi"; - pattern (and error)! |
|
7255
|
|
|
|
|
|
|
# Some examples where this logic works okay, for '&','*','+': |
|
7256
|
|
|
|
|
|
|
# print $fh &xsi_protos(@mods); |
|
7257
|
|
|
|
|
|
|
# my $x = new $CompressClass *FH; |
|
7258
|
|
|
|
|
|
|
# print $OUT +( $count % 15 ? ", " : "\n\t" ); |
|
7259
|
39
|
50
|
66
|
|
|
142
|
if ( $blank_after_Z |
|
7260
|
|
|
|
|
|
|
&& $next_type ne 'b' ) |
|
7261
|
|
|
|
|
|
|
{ |
|
7262
|
0
|
|
|
|
|
0
|
return TERM; |
|
7263
|
|
|
|
|
|
|
} |
|
7264
|
|
|
|
|
|
|
|
|
7265
|
|
|
|
|
|
|
# Note that '?' and '<' have been moved above |
|
7266
|
|
|
|
|
|
|
# ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { |
|
7267
|
39
|
100
|
|
|
|
206
|
if ( $tok =~ /^([x\/\+\-\*\%\&\.]|\>\>)$/ ) { |
|
7268
|
|
|
|
|
|
|
|
|
7269
|
|
|
|
|
|
|
# Do not complain in 'use' statements, which have special syntax. |
|
7270
|
|
|
|
|
|
|
# For example, from RT#130344: |
|
7271
|
|
|
|
|
|
|
# use lib $FindBin::Bin . '/lib'; |
|
7272
|
9
|
50
|
|
|
|
20
|
if ( $statement_type ne 'use' ) { |
|
7273
|
9
|
|
|
|
|
29
|
$self->complain( |
|
7274
|
|
|
|
|
|
|
"operator in possible indirect object location not recommended\n" |
|
7275
|
|
|
|
|
|
|
); |
|
7276
|
|
|
|
|
|
|
} |
|
7277
|
9
|
|
|
|
|
19
|
return OPERATOR; |
|
7278
|
|
|
|
|
|
|
} |
|
7279
|
|
|
|
|
|
|
|
|
7280
|
|
|
|
|
|
|
# all other cases |
|
7281
|
|
|
|
|
|
|
|
|
7282
|
30
|
|
|
|
|
81
|
return UNKNOWN; |
|
7283
|
|
|
|
|
|
|
} |
|
7284
|
|
|
|
|
|
|
|
|
7285
|
|
|
|
|
|
|
#-------------------------- |
|
7286
|
|
|
|
|
|
|
# Section 2F: anything else |
|
7287
|
|
|
|
|
|
|
#-------------------------- |
|
7288
|
4
|
|
|
|
|
9
|
return UNKNOWN; |
|
7289
|
|
|
|
|
|
|
|
|
7290
|
|
|
|
|
|
|
} ## end sub operator_expected |
|
7291
|
|
|
|
|
|
|
|
|
7292
|
|
|
|
|
|
|
sub new_statement_ok { |
|
7293
|
|
|
|
|
|
|
|
|
7294
|
|
|
|
|
|
|
# Returns: |
|
7295
|
|
|
|
|
|
|
# true if a new statement can begin here |
|
7296
|
|
|
|
|
|
|
# false otherwise |
|
7297
|
|
|
|
|
|
|
|
|
7298
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, |
|
7299
|
|
|
|
|
|
|
# $brace_depth, $rbrace_type |
|
7300
|
|
|
|
|
|
|
|
|
7301
|
|
|
|
|
|
|
# Uses: |
|
7302
|
|
|
|
|
|
|
# - See if a 'class' statement can occur here |
|
7303
|
|
|
|
|
|
|
# - See if a keyword begins at a new statement; i.e. is an 'if' a |
|
7304
|
|
|
|
|
|
|
# block if or a trailing if? Also see if 'format' starts a statement. |
|
7305
|
|
|
|
|
|
|
# - Decide if a ':' is part of a statement label (not a ternary) |
|
7306
|
|
|
|
|
|
|
|
|
7307
|
|
|
|
|
|
|
# Curly braces are tricky because some small blocks do not get marked as |
|
7308
|
|
|
|
|
|
|
# blocks.. |
|
7309
|
|
|
|
|
|
|
|
|
7310
|
|
|
|
|
|
|
# if it follows an opening curly brace.. |
|
7311
|
545
|
100
|
100
|
545
|
0
|
2305
|
if ( $last_nonblank_token eq '{' ) { |
|
|
|
100
|
|
|
|
|
|
|
7312
|
|
|
|
|
|
|
|
|
7313
|
|
|
|
|
|
|
# The safe thing is to return true in all cases because: |
|
7314
|
|
|
|
|
|
|
# - a ternary ':' cannot occur here |
|
7315
|
|
|
|
|
|
|
# - an 'if' here, for example, cannot be a trailing if |
|
7316
|
|
|
|
|
|
|
# See test case c231 for an example. |
|
7317
|
|
|
|
|
|
|
# This works but could be improved, if necessary, by returning |
|
7318
|
|
|
|
|
|
|
# 'false' at obvious non-blocks. |
|
7319
|
66
|
|
|
|
|
239
|
return 1; |
|
7320
|
|
|
|
|
|
|
} |
|
7321
|
|
|
|
|
|
|
|
|
7322
|
|
|
|
|
|
|
# if it follows a closing code block curly brace.. |
|
7323
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq '}' |
|
7324
|
|
|
|
|
|
|
&& $last_nonblank_type eq $last_nonblank_token ) |
|
7325
|
|
|
|
|
|
|
{ |
|
7326
|
|
|
|
|
|
|
|
|
7327
|
|
|
|
|
|
|
# A new statement can follow certain closing block braces ... |
|
7328
|
|
|
|
|
|
|
# Previously, a true was always returned, and this worked ok. |
|
7329
|
|
|
|
|
|
|
# Update c443: now we return false for certain blocks which must be |
|
7330
|
|
|
|
|
|
|
# followed by a ';'. See comments elsewhere on |
|
7331
|
|
|
|
|
|
|
# '%is_zero_continuation_block_type'. The value of $brace_depth has |
|
7332
|
|
|
|
|
|
|
# also been corrected, it was off by 1. |
|
7333
|
115
|
|
|
|
|
244
|
my $block_type = $rbrace_type->[ $brace_depth + 1 ]; |
|
7334
|
|
|
|
|
|
|
return $block_type |
|
7335
|
115
|
|
66
|
|
|
685
|
&& !$is_sort_map_grep_eval_do_sub{$block_type}; |
|
7336
|
|
|
|
|
|
|
} |
|
7337
|
|
|
|
|
|
|
|
|
7338
|
|
|
|
|
|
|
# otherwise, it is a label if and only if it follows a ';' (real or fake) |
|
7339
|
|
|
|
|
|
|
# or another label |
|
7340
|
|
|
|
|
|
|
else { |
|
7341
|
364
|
|
100
|
|
|
1789
|
return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' ); |
|
7342
|
|
|
|
|
|
|
} |
|
7343
|
|
|
|
|
|
|
} ## end sub new_statement_ok |
|
7344
|
|
|
|
|
|
|
|
|
7345
|
|
|
|
|
|
|
sub code_block_type { |
|
7346
|
|
|
|
|
|
|
|
|
7347
|
1444
|
|
|
1444
|
0
|
3236
|
my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_; |
|
7348
|
|
|
|
|
|
|
|
|
7349
|
|
|
|
|
|
|
# Decide if this is a block of code, and its type. |
|
7350
|
|
|
|
|
|
|
# Must be called only when $type = $token = '{' |
|
7351
|
|
|
|
|
|
|
# The problem is to distinguish between the start of a block of code |
|
7352
|
|
|
|
|
|
|
# and the start of an anonymous hash reference |
|
7353
|
|
|
|
|
|
|
# Returns "" if not code block, otherwise returns 'last_nonblank_token' |
|
7354
|
|
|
|
|
|
|
# to indicate the type of code block. (For example, 'last_nonblank_token' |
|
7355
|
|
|
|
|
|
|
# might be 'if' for an if block, 'else' for an else block, etc). |
|
7356
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, |
|
7357
|
|
|
|
|
|
|
# $last_nonblank_block_type, $brace_depth, $rbrace_type |
|
7358
|
|
|
|
|
|
|
|
|
7359
|
|
|
|
|
|
|
# handle case of multiple '{'s |
|
7360
|
|
|
|
|
|
|
|
|
7361
|
|
|
|
|
|
|
#print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; |
|
7362
|
|
|
|
|
|
|
|
|
7363
|
1444
|
100
|
66
|
|
|
17736
|
if ( $last_nonblank_token eq '{' |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
7364
|
|
|
|
|
|
|
&& $last_nonblank_type eq $last_nonblank_token ) |
|
7365
|
|
|
|
|
|
|
{ |
|
7366
|
|
|
|
|
|
|
|
|
7367
|
|
|
|
|
|
|
# opening brace where a statement may appear is probably |
|
7368
|
|
|
|
|
|
|
# a code block but might be and anonymous hash reference |
|
7369
|
98
|
50
|
|
|
|
274
|
if ( $rbrace_type->[$brace_depth] ) { |
|
7370
|
98
|
|
|
|
|
281
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
|
7371
|
|
|
|
|
|
|
$max_token_index ); |
|
7372
|
|
|
|
|
|
|
} |
|
7373
|
|
|
|
|
|
|
|
|
7374
|
|
|
|
|
|
|
# cannot start a code block within an anonymous hash |
|
7375
|
|
|
|
|
|
|
else { |
|
7376
|
0
|
|
|
|
|
0
|
return EMPTY_STRING; |
|
7377
|
|
|
|
|
|
|
} |
|
7378
|
|
|
|
|
|
|
} |
|
7379
|
|
|
|
|
|
|
|
|
7380
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq ';' ) { |
|
7381
|
|
|
|
|
|
|
|
|
7382
|
|
|
|
|
|
|
# an opening brace where a statement may appear is probably |
|
7383
|
|
|
|
|
|
|
# a code block but might be and anonymous hash reference |
|
7384
|
51
|
|
|
|
|
180
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
|
7385
|
|
|
|
|
|
|
$max_token_index ); |
|
7386
|
|
|
|
|
|
|
} |
|
7387
|
|
|
|
|
|
|
|
|
7388
|
|
|
|
|
|
|
# handle case of '}{' |
|
7389
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq '}' |
|
7390
|
|
|
|
|
|
|
&& $last_nonblank_type eq $last_nonblank_token ) |
|
7391
|
|
|
|
|
|
|
{ |
|
7392
|
|
|
|
|
|
|
|
|
7393
|
|
|
|
|
|
|
# a } { situation ... |
|
7394
|
|
|
|
|
|
|
# could be hash reference after code block..(blktype1.t) |
|
7395
|
10
|
50
|
|
|
|
34
|
if ($last_nonblank_block_type) { |
|
7396
|
10
|
|
|
|
|
37
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
|
7397
|
|
|
|
|
|
|
$max_token_index ); |
|
7398
|
|
|
|
|
|
|
} |
|
7399
|
|
|
|
|
|
|
|
|
7400
|
|
|
|
|
|
|
# must be a block if it follows a closing hash reference |
|
7401
|
|
|
|
|
|
|
else { |
|
7402
|
0
|
|
|
|
|
0
|
return $last_nonblank_token; |
|
7403
|
|
|
|
|
|
|
} |
|
7404
|
|
|
|
|
|
|
} |
|
7405
|
|
|
|
|
|
|
|
|
7406
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
|
7407
|
|
|
|
|
|
|
# NOTE: braces after type characters start code blocks, but for |
|
7408
|
|
|
|
|
|
|
# simplicity these are not identified as such. See also |
|
7409
|
|
|
|
|
|
|
# sub is_non_structural_brace. |
|
7410
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
|
7411
|
|
|
|
|
|
|
|
|
7412
|
|
|
|
|
|
|
## elsif ( $last_nonblank_type eq 't' ) { |
|
7413
|
|
|
|
|
|
|
## return $last_nonblank_token; |
|
7414
|
|
|
|
|
|
|
## } |
|
7415
|
|
|
|
|
|
|
|
|
7416
|
|
|
|
|
|
|
# brace after label: |
|
7417
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'J' ) { |
|
7418
|
34
|
|
|
|
|
95
|
return $last_nonblank_token; |
|
7419
|
|
|
|
|
|
|
} |
|
7420
|
|
|
|
|
|
|
|
|
7421
|
|
|
|
|
|
|
# otherwise, see if a block must follow the previous token (such as 'if'): |
|
7422
|
|
|
|
|
|
|
elsif ($is_code_block_token{$last_nonblank_token} |
|
7423
|
|
|
|
|
|
|
|| $is_grep_alias{$last_nonblank_token} ) |
|
7424
|
|
|
|
|
|
|
{ |
|
7425
|
|
|
|
|
|
|
|
|
7426
|
|
|
|
|
|
|
# Bug Patch: Note that the opening brace after the 'if' in the following |
|
7427
|
|
|
|
|
|
|
# snippet is an anonymous hash ref and not a code block! |
|
7428
|
|
|
|
|
|
|
# print 'hi' if { x => 1, }->{x}; |
|
7429
|
|
|
|
|
|
|
# We can identify this situation because the last nonblank type |
|
7430
|
|
|
|
|
|
|
# will be a keyword (instead of a closing paren) |
|
7431
|
547
|
50
|
33
|
|
|
2321
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
7432
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' |
|
7433
|
|
|
|
|
|
|
&& ( $last_nonblank_token eq 'if' |
|
7434
|
|
|
|
|
|
|
|| $last_nonblank_token eq 'unless' ) |
|
7435
|
|
|
|
|
|
|
) |
|
7436
|
|
|
|
|
|
|
{ |
|
7437
|
0
|
|
|
|
|
0
|
return EMPTY_STRING; |
|
7438
|
|
|
|
|
|
|
} |
|
7439
|
|
|
|
|
|
|
else { |
|
7440
|
547
|
|
|
|
|
1531
|
return $last_nonblank_token; |
|
7441
|
|
|
|
|
|
|
} |
|
7442
|
|
|
|
|
|
|
} |
|
7443
|
|
|
|
|
|
|
|
|
7444
|
|
|
|
|
|
|
# or a sub or package BLOCK |
|
7445
|
|
|
|
|
|
|
# Fixed for c250 to include new package type 'P', and change 'i' to 'S' |
|
7446
|
|
|
|
|
|
|
elsif ( |
|
7447
|
|
|
|
|
|
|
$last_nonblank_type eq 'P' |
|
7448
|
|
|
|
|
|
|
|| $last_nonblank_type eq 'S' |
|
7449
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 't' |
|
7450
|
|
|
|
|
|
|
&& substr( $last_nonblank_token, 0, 3 ) eq 'sub' ) |
|
7451
|
|
|
|
|
|
|
) |
|
7452
|
|
|
|
|
|
|
{ |
|
7453
|
346
|
|
|
|
|
999
|
return $last_nonblank_token; |
|
7454
|
|
|
|
|
|
|
} |
|
7455
|
|
|
|
|
|
|
|
|
7456
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^(sub|package)\b/ ) { |
|
7457
|
0
|
|
|
|
|
0
|
return $statement_type; |
|
7458
|
|
|
|
|
|
|
} |
|
7459
|
|
|
|
|
|
|
|
|
7460
|
|
|
|
|
|
|
# user-defined subs with block parameters (like grep/map/eval) |
|
7461
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'G' ) { |
|
7462
|
0
|
|
|
|
|
0
|
return $last_nonblank_token; |
|
7463
|
|
|
|
|
|
|
} |
|
7464
|
|
|
|
|
|
|
|
|
7465
|
|
|
|
|
|
|
# check bareword |
|
7466
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'w' ) { |
|
7467
|
|
|
|
|
|
|
|
|
7468
|
|
|
|
|
|
|
# check for syntax 'use MODULE LIST' |
|
7469
|
|
|
|
|
|
|
# This fixes b1022 b1025 b1027 b1028 b1029 b1030 b1031 |
|
7470
|
24
|
100
|
|
|
|
92
|
return EMPTY_STRING if ( $statement_type eq 'use' ); |
|
7471
|
|
|
|
|
|
|
|
|
7472
|
23
|
|
|
|
|
94
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
|
7473
|
|
|
|
|
|
|
$max_token_index ); |
|
7474
|
|
|
|
|
|
|
} |
|
7475
|
|
|
|
|
|
|
|
|
7476
|
|
|
|
|
|
|
# Patch for bug # RT #94338 reported by Daniel Trizen |
|
7477
|
|
|
|
|
|
|
# for-loop in a parenthesized block-map triggering an error message: |
|
7478
|
|
|
|
|
|
|
# map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) ); |
|
7479
|
|
|
|
|
|
|
# Check for a code block within a parenthesized function call |
|
7480
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq '(' ) { |
|
7481
|
87
|
|
|
|
|
173
|
my $paren_type = $rparen_type->[$paren_depth]; |
|
7482
|
|
|
|
|
|
|
|
|
7483
|
|
|
|
|
|
|
# /^(map|grep|sort)$/ |
|
7484
|
87
|
100
|
66
|
|
|
395
|
if ( $paren_type && $is_sort_map_grep{$paren_type} ) { |
|
7485
|
|
|
|
|
|
|
|
|
7486
|
|
|
|
|
|
|
# We will mark this as a code block but use type 't' instead |
|
7487
|
|
|
|
|
|
|
# of the name of the containing function. This will allow for |
|
7488
|
|
|
|
|
|
|
# correct parsing but will usually produce better formatting. |
|
7489
|
|
|
|
|
|
|
# Braces with block type 't' are not broken open automatically |
|
7490
|
|
|
|
|
|
|
# in the formatter as are other code block types, and this usually |
|
7491
|
|
|
|
|
|
|
# works best. |
|
7492
|
1
|
|
|
|
|
3
|
return 't'; # (Not $paren_type) |
|
7493
|
|
|
|
|
|
|
} |
|
7494
|
|
|
|
|
|
|
else { |
|
7495
|
86
|
|
|
|
|
213
|
return EMPTY_STRING; |
|
7496
|
|
|
|
|
|
|
} |
|
7497
|
|
|
|
|
|
|
} |
|
7498
|
|
|
|
|
|
|
|
|
7499
|
|
|
|
|
|
|
# handle unknown syntax ') {' |
|
7500
|
|
|
|
|
|
|
# we previously appended a '()' to mark this case |
|
7501
|
|
|
|
|
|
|
elsif ( $last_nonblank_token =~ /\(\)$/ ) { |
|
7502
|
16
|
|
|
|
|
92
|
return $last_nonblank_token; |
|
7503
|
|
|
|
|
|
|
} |
|
7504
|
|
|
|
|
|
|
|
|
7505
|
|
|
|
|
|
|
# anything else must be anonymous hash reference |
|
7506
|
|
|
|
|
|
|
else { |
|
7507
|
231
|
|
|
|
|
555
|
return EMPTY_STRING; |
|
7508
|
|
|
|
|
|
|
} |
|
7509
|
|
|
|
|
|
|
} ## end sub code_block_type |
|
7510
|
|
|
|
|
|
|
|
|
7511
|
|
|
|
|
|
|
sub decide_if_code_block { |
|
7512
|
|
|
|
|
|
|
|
|
7513
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token |
|
7514
|
182
|
|
|
182
|
0
|
416
|
my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_; |
|
7515
|
|
|
|
|
|
|
|
|
7516
|
182
|
|
|
|
|
607
|
my ( $next_nonblank_token, $i_next_uu ) = |
|
7517
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
|
7518
|
|
|
|
|
|
|
|
|
7519
|
|
|
|
|
|
|
# we are at a '{' where a statement may appear. |
|
7520
|
|
|
|
|
|
|
# We must decide if this brace starts an anonymous hash or a code |
|
7521
|
|
|
|
|
|
|
# block. |
|
7522
|
|
|
|
|
|
|
# return "" if anonymous hash, and $last_nonblank_token otherwise |
|
7523
|
|
|
|
|
|
|
|
|
7524
|
|
|
|
|
|
|
# initialize to be code BLOCK |
|
7525
|
182
|
|
|
|
|
341
|
my $code_block_type = $last_nonblank_token; |
|
7526
|
|
|
|
|
|
|
|
|
7527
|
|
|
|
|
|
|
# Check for the common case of an empty anonymous hash reference: |
|
7528
|
|
|
|
|
|
|
# Maybe something like sub { { } } |
|
7529
|
182
|
100
|
|
|
|
448
|
if ( $next_nonblank_token eq '}' ) { |
|
7530
|
5
|
|
|
|
|
8
|
$code_block_type = EMPTY_STRING; |
|
7531
|
|
|
|
|
|
|
} |
|
7532
|
|
|
|
|
|
|
|
|
7533
|
|
|
|
|
|
|
else { |
|
7534
|
|
|
|
|
|
|
|
|
7535
|
|
|
|
|
|
|
# To guess if this '{' is an anonymous hash reference, look ahead |
|
7536
|
|
|
|
|
|
|
# and test as follows: |
|
7537
|
|
|
|
|
|
|
# |
|
7538
|
|
|
|
|
|
|
# it is a hash reference if next come: |
|
7539
|
|
|
|
|
|
|
# - a string or digit followed by a comma or => |
|
7540
|
|
|
|
|
|
|
# - bareword followed by => |
|
7541
|
|
|
|
|
|
|
# otherwise it is a code block |
|
7542
|
|
|
|
|
|
|
# |
|
7543
|
|
|
|
|
|
|
# Examples of anonymous hash ref: |
|
7544
|
|
|
|
|
|
|
# {'aa',}; |
|
7545
|
|
|
|
|
|
|
# {1,2} |
|
7546
|
|
|
|
|
|
|
# |
|
7547
|
|
|
|
|
|
|
# Examples of code blocks: |
|
7548
|
|
|
|
|
|
|
# {1; print "hello\n", 1;} |
|
7549
|
|
|
|
|
|
|
# {$a,1}; |
|
7550
|
|
|
|
|
|
|
|
|
7551
|
|
|
|
|
|
|
# We are only going to look ahead one more (nonblank/comment) line. |
|
7552
|
|
|
|
|
|
|
# Strange formatting could cause a bad guess, but that's unlikely. |
|
7553
|
177
|
|
|
|
|
279
|
my @pre_types; |
|
7554
|
|
|
|
|
|
|
my @pre_tokens; |
|
7555
|
|
|
|
|
|
|
|
|
7556
|
|
|
|
|
|
|
# Ignore the rest of this line if it is a side comment |
|
7557
|
177
|
100
|
|
|
|
450
|
if ( $next_nonblank_token ne '#' ) { |
|
7558
|
152
|
|
|
|
|
471
|
@pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ]; |
|
|
152
|
|
|
|
|
829
|
|
|
7559
|
152
|
|
|
|
|
346
|
@pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ]; |
|
|
152
|
|
|
|
|
923
|
|
|
7560
|
|
|
|
|
|
|
} |
|
7561
|
|
|
|
|
|
|
|
|
7562
|
|
|
|
|
|
|
# Here 20 is arbitrary but generous, and prevents wasting lots of time |
|
7563
|
|
|
|
|
|
|
# in mangled files |
|
7564
|
177
|
|
|
|
|
648
|
my ( $rpre_tokens, $rpre_types ) = |
|
7565
|
|
|
|
|
|
|
$self->peek_ahead_for_n_nonblank_pre_tokens(20); |
|
7566
|
177
|
100
|
66
|
|
|
539
|
if ( defined($rpre_types) && @{$rpre_types} ) { |
|
|
169
|
|
|
|
|
510
|
|
|
7567
|
169
|
|
|
|
|
251
|
push @pre_types, @{$rpre_types}; |
|
|
169
|
|
|
|
|
642
|
|
|
7568
|
169
|
|
|
|
|
297
|
push @pre_tokens, @{$rpre_tokens}; |
|
|
169
|
|
|
|
|
780
|
|
|
7569
|
|
|
|
|
|
|
} |
|
7570
|
|
|
|
|
|
|
|
|
7571
|
|
|
|
|
|
|
# put a sentinel token to simplify stopping the search |
|
7572
|
177
|
|
|
|
|
366
|
push @pre_types, '}'; |
|
7573
|
177
|
|
|
|
|
301
|
push @pre_types, '}'; |
|
7574
|
|
|
|
|
|
|
|
|
7575
|
177
|
|
|
|
|
247
|
my $jbeg = 0; |
|
7576
|
177
|
100
|
|
|
|
469
|
$jbeg = 1 if ( $pre_types[0] eq 'b' ); |
|
7577
|
|
|
|
|
|
|
|
|
7578
|
|
|
|
|
|
|
# first look for one of these |
|
7579
|
|
|
|
|
|
|
# - bareword |
|
7580
|
|
|
|
|
|
|
# - bareword with leading - |
|
7581
|
|
|
|
|
|
|
# - digit |
|
7582
|
|
|
|
|
|
|
# - quoted string |
|
7583
|
177
|
|
|
|
|
294
|
my $j = $jbeg; |
|
7584
|
177
|
100
|
33
|
|
|
1045
|
if ( $pre_types[$j] =~ /^[\'\"]/ ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
7585
|
|
|
|
|
|
|
|
|
7586
|
|
|
|
|
|
|
# find the closing quote; don't worry about escapes |
|
7587
|
1
|
|
|
|
|
3
|
my $quote_mark = $pre_types[$j]; |
|
7588
|
1
|
|
|
|
|
4
|
foreach my $k ( $j + 1 .. @pre_types - 2 ) { |
|
7589
|
1
|
50
|
|
|
|
3
|
if ( $pre_types[$k] eq $quote_mark ) { |
|
7590
|
1
|
|
|
|
|
3
|
$j = $k + 1; |
|
7591
|
1
|
|
|
|
|
2
|
last; |
|
7592
|
|
|
|
|
|
|
} |
|
7593
|
|
|
|
|
|
|
} |
|
7594
|
|
|
|
|
|
|
} |
|
7595
|
|
|
|
|
|
|
elsif ( $pre_types[$j] eq 'd' ) { |
|
7596
|
8
|
|
|
|
|
11
|
$j++; |
|
7597
|
|
|
|
|
|
|
} |
|
7598
|
|
|
|
|
|
|
elsif ( $pre_types[$j] eq 'w' ) { |
|
7599
|
74
|
|
|
|
|
155
|
$j++; |
|
7600
|
|
|
|
|
|
|
} |
|
7601
|
|
|
|
|
|
|
elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { |
|
7602
|
0
|
|
|
|
|
0
|
$j++; |
|
7603
|
|
|
|
|
|
|
} |
|
7604
|
|
|
|
|
|
|
else { |
|
7605
|
|
|
|
|
|
|
# none of the above |
|
7606
|
|
|
|
|
|
|
} |
|
7607
|
177
|
100
|
|
|
|
451
|
if ( $j > $jbeg ) { |
|
7608
|
|
|
|
|
|
|
|
|
7609
|
83
|
100
|
|
|
|
256
|
$j++ if ( $pre_types[$j] eq 'b' ); |
|
7610
|
|
|
|
|
|
|
|
|
7611
|
|
|
|
|
|
|
# Patched for RT #95708 |
|
7612
|
83
|
100
|
33
|
|
|
576
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
7613
|
|
|
|
|
|
|
|
|
7614
|
|
|
|
|
|
|
# it is a comma which is not a pattern delimiter except for qw |
|
7615
|
|
|
|
|
|
|
( |
|
7616
|
|
|
|
|
|
|
$pre_types[$j] eq COMMA |
|
7617
|
|
|
|
|
|
|
&& !$is_q_qq_qx_qr_s_y_tr_m{ $pre_tokens[$jbeg] } |
|
7618
|
|
|
|
|
|
|
) |
|
7619
|
|
|
|
|
|
|
|
|
7620
|
|
|
|
|
|
|
# or a => |
|
7621
|
|
|
|
|
|
|
|| ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) |
|
7622
|
|
|
|
|
|
|
) |
|
7623
|
|
|
|
|
|
|
{ |
|
7624
|
18
|
|
|
|
|
29
|
$code_block_type = EMPTY_STRING; |
|
7625
|
|
|
|
|
|
|
} |
|
7626
|
|
|
|
|
|
|
} |
|
7627
|
|
|
|
|
|
|
|
|
7628
|
177
|
100
|
|
|
|
487
|
if ($code_block_type) { |
|
7629
|
|
|
|
|
|
|
|
|
7630
|
|
|
|
|
|
|
# Patch for cases b1085 b1128: It is uncertain if this is a block. |
|
7631
|
|
|
|
|
|
|
# If this brace follows a bareword, then append a space as a signal |
|
7632
|
|
|
|
|
|
|
# to the formatter that this may not be a block brace. To find the |
|
7633
|
|
|
|
|
|
|
# corresponding code in Formatter.pm search for 'b1085'. |
|
7634
|
|
|
|
|
|
|
# But not for the word 'method': fixes c534; this will cause the |
|
7635
|
|
|
|
|
|
|
# formatter to mark an asub block instead of a sub block. |
|
7636
|
159
|
100
|
66
|
|
|
1324
|
if ( $code_block_type =~ /^\w/ && $code_block_type ne 'method' ) { |
|
7637
|
20
|
|
|
|
|
123
|
$code_block_type .= SPACE; |
|
7638
|
|
|
|
|
|
|
} |
|
7639
|
|
|
|
|
|
|
} |
|
7640
|
|
|
|
|
|
|
} |
|
7641
|
|
|
|
|
|
|
|
|
7642
|
182
|
|
|
|
|
550
|
return $code_block_type; |
|
7643
|
|
|
|
|
|
|
} ## end sub decide_if_code_block |
|
7644
|
|
|
|
|
|
|
|
|
7645
|
|
|
|
|
|
|
sub report_unexpected { |
|
7646
|
|
|
|
|
|
|
|
|
7647
|
|
|
|
|
|
|
# report unexpected token type and show where it is |
|
7648
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
|
7649
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $rcall_hash ) = @_; |
|
7650
|
|
|
|
|
|
|
|
|
7651
|
0
|
|
|
|
|
0
|
my $found = $rcall_hash->{found}; |
|
7652
|
0
|
|
|
|
|
0
|
my $expecting = $rcall_hash->{expecting}; |
|
7653
|
0
|
|
|
|
|
0
|
my $i_tok = $rcall_hash->{i_tok}; |
|
7654
|
0
|
|
|
|
|
0
|
my $last_nonblank_i = $rcall_hash->{last_nonblank_i}; |
|
7655
|
0
|
|
|
|
|
0
|
my $rpretoken_map = $rcall_hash->{rpretoken_map}; |
|
7656
|
0
|
|
|
|
|
0
|
my $rpretoken_type = $rcall_hash->{rpretoken_type}; |
|
7657
|
0
|
|
|
|
|
0
|
my $input_line = $rcall_hash->{input_line}; |
|
7658
|
|
|
|
|
|
|
|
|
7659
|
0
|
0
|
|
|
|
0
|
if ( ++$self->[_unexpected_error_count_] <= MAX_NAG_MESSAGES ) { |
|
7660
|
0
|
|
|
|
|
0
|
my $msg = "found $found where $expecting expected"; |
|
7661
|
0
|
|
|
|
|
0
|
my $pos = $rpretoken_map->[$i_tok]; |
|
7662
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
|
7663
|
0
|
|
|
|
|
0
|
my $input_line_number = $self->[_last_line_number_]; |
|
7664
|
0
|
|
|
|
|
0
|
my ( $offset, $numbered_line, $underline ) = |
|
7665
|
|
|
|
|
|
|
make_numbered_line( $input_line_number, $input_line, $pos ); |
|
7666
|
0
|
|
|
|
|
0
|
$underline = write_on_underline( $underline, $pos - $offset, '^' ); |
|
7667
|
|
|
|
|
|
|
|
|
7668
|
0
|
|
|
|
|
0
|
my $trailer = EMPTY_STRING; |
|
7669
|
0
|
0
|
0
|
|
|
0
|
if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { |
|
7670
|
0
|
|
|
|
|
0
|
my $pos_prev = $rpretoken_map->[$last_nonblank_i]; |
|
7671
|
0
|
|
|
|
|
0
|
my $num; |
|
7672
|
0
|
0
|
|
|
|
0
|
if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) { |
|
7673
|
0
|
|
|
|
|
0
|
$num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev; |
|
7674
|
|
|
|
|
|
|
} |
|
7675
|
|
|
|
|
|
|
else { |
|
7676
|
0
|
|
|
|
|
0
|
$num = $pos - $pos_prev; |
|
7677
|
|
|
|
|
|
|
} |
|
7678
|
0
|
0
|
|
|
|
0
|
if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
7679
|
|
|
|
|
|
|
|
|
7680
|
|
|
|
|
|
|
$underline = |
|
7681
|
0
|
|
|
|
|
0
|
write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); |
|
7682
|
0
|
|
|
|
|
0
|
$trailer = " (previous token underlined)"; |
|
7683
|
|
|
|
|
|
|
} |
|
7684
|
0
|
|
|
|
|
0
|
$underline =~ s/\s+$//; |
|
7685
|
0
|
|
|
|
|
0
|
$self->warning( $numbered_line . "\n" ); |
|
7686
|
0
|
|
|
|
|
0
|
$self->warning( $underline . "\n" ); |
|
7687
|
0
|
|
|
|
|
0
|
$self->warning( $msg . $trailer . "\n" ); |
|
7688
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
|
7689
|
|
|
|
|
|
|
} |
|
7690
|
0
|
|
|
|
|
0
|
return; |
|
7691
|
|
|
|
|
|
|
} ## end sub report_unexpected |
|
7692
|
|
|
|
|
|
|
|
|
7693
|
|
|
|
|
|
|
my %is_sigil_or_paren; |
|
7694
|
|
|
|
|
|
|
my %is_R_closing_sb; |
|
7695
|
|
|
|
|
|
|
|
|
7696
|
|
|
|
|
|
|
BEGIN { |
|
7697
|
|
|
|
|
|
|
|
|
7698
|
44
|
|
|
44
|
|
278
|
my @q = qw< $ & % * @ ) >; |
|
7699
|
44
|
|
|
|
|
298
|
$is_sigil_or_paren{$_} = 1 for @q; |
|
7700
|
|
|
|
|
|
|
|
|
7701
|
44
|
|
|
|
|
114
|
@q = qw( R ] ); |
|
7702
|
44
|
|
|
|
|
71695
|
$is_R_closing_sb{$_} = 1 for @q; |
|
7703
|
|
|
|
|
|
|
} ## end BEGIN |
|
7704
|
|
|
|
|
|
|
|
|
7705
|
|
|
|
|
|
|
sub is_non_structural_brace { |
|
7706
|
|
|
|
|
|
|
|
|
7707
|
|
|
|
|
|
|
# Decide if a brace or bracket is structural or non-structural |
|
7708
|
|
|
|
|
|
|
# by looking at the previous token and type |
|
7709
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token |
|
7710
|
|
|
|
|
|
|
|
|
7711
|
|
|
|
|
|
|
# EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. |
|
7712
|
|
|
|
|
|
|
# Tentatively deactivated because it caused the wrong operator expectation |
|
7713
|
|
|
|
|
|
|
# for this code: |
|
7714
|
|
|
|
|
|
|
# $user = @vars[1] / 100; |
|
7715
|
|
|
|
|
|
|
# Must update sub operator_expected before re-implementing. |
|
7716
|
|
|
|
|
|
|
# if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { |
|
7717
|
|
|
|
|
|
|
# return 0; |
|
7718
|
|
|
|
|
|
|
# } |
|
7719
|
|
|
|
|
|
|
|
|
7720
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
|
7721
|
|
|
|
|
|
|
# NOTE: braces after type characters start code blocks, but for |
|
7722
|
|
|
|
|
|
|
# simplicity these are not identified as such. See also |
|
7723
|
|
|
|
|
|
|
# sub code_block_type |
|
7724
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
|
7725
|
|
|
|
|
|
|
|
|
7726
|
|
|
|
|
|
|
##if ($last_nonblank_type eq 't') {return 0} |
|
7727
|
|
|
|
|
|
|
|
|
7728
|
|
|
|
|
|
|
# otherwise, it is non-structural if it is decorated |
|
7729
|
|
|
|
|
|
|
# by type information. |
|
7730
|
|
|
|
|
|
|
# For example, the '{' here is non-structural: ${xxx} |
|
7731
|
|
|
|
|
|
|
# Removed '::' to fix c074 |
|
7732
|
|
|
|
|
|
|
## $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ |
|
7733
|
|
|
|
|
|
|
return ( |
|
7734
|
|
|
|
|
|
|
## $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->)/ |
|
7735
|
|
|
|
|
|
|
$is_sigil_or_paren{ substr( $last_nonblank_token, 0, 1 ) } |
|
7736
|
|
|
|
|
|
|
|| substr( $last_nonblank_token, 0, 2 ) eq '->' |
|
7737
|
|
|
|
|
|
|
|
|
7738
|
|
|
|
|
|
|
# or if we follow a hash or array closing curly brace or bracket |
|
7739
|
|
|
|
|
|
|
# For example, the second '{' in this is non-structural: $a{'x'}{'y'} |
|
7740
|
|
|
|
|
|
|
# because the first '}' would have been given type 'R' |
|
7741
|
|
|
|
|
|
|
##|| $last_nonblank_type =~ /^([R\]])$/ |
|
7742
|
2822
|
|
66
|
2822
|
0
|
14523
|
|| $is_R_closing_sb{$last_nonblank_type} |
|
7743
|
|
|
|
|
|
|
); |
|
7744
|
|
|
|
|
|
|
} ## end sub is_non_structural_brace |
|
7745
|
|
|
|
|
|
|
|
|
7746
|
|
|
|
|
|
|
####################################################################### |
|
7747
|
|
|
|
|
|
|
# Tokenizer routines for tracking container nesting depths |
|
7748
|
|
|
|
|
|
|
####################################################################### |
|
7749
|
|
|
|
|
|
|
|
|
7750
|
|
|
|
|
|
|
# The following routines keep track of nesting depths of the nesting |
|
7751
|
|
|
|
|
|
|
# types, ( [ { and ?. This is necessary for determining the indentation |
|
7752
|
|
|
|
|
|
|
# level, and also for debugging programs. Not only do they keep track of |
|
7753
|
|
|
|
|
|
|
# nesting depths of the individual brace types, but they check that each |
|
7754
|
|
|
|
|
|
|
# of the other brace types is balanced within matching pairs. For |
|
7755
|
|
|
|
|
|
|
# example, if the program sees this sequence: |
|
7756
|
|
|
|
|
|
|
# |
|
7757
|
|
|
|
|
|
|
# { ( ( ) } |
|
7758
|
|
|
|
|
|
|
# |
|
7759
|
|
|
|
|
|
|
# then it can determine that there is an extra left paren somewhere |
|
7760
|
|
|
|
|
|
|
# between the { and the }. And so on with every other possible |
|
7761
|
|
|
|
|
|
|
# combination of outer and inner brace types. For another |
|
7762
|
|
|
|
|
|
|
# example: |
|
7763
|
|
|
|
|
|
|
# |
|
7764
|
|
|
|
|
|
|
# ( [ ..... ] ] ) |
|
7765
|
|
|
|
|
|
|
# |
|
7766
|
|
|
|
|
|
|
# which has an extra ] within the parens. |
|
7767
|
|
|
|
|
|
|
# |
|
7768
|
|
|
|
|
|
|
# The brace types have indexes 0 .. 3 which are indexes into |
|
7769
|
|
|
|
|
|
|
# the matrices. |
|
7770
|
|
|
|
|
|
|
# |
|
7771
|
|
|
|
|
|
|
# The pair ? : are treated as just another nesting type, with ? acting |
|
7772
|
|
|
|
|
|
|
# as the opening brace and : acting as the closing brace. |
|
7773
|
|
|
|
|
|
|
# |
|
7774
|
|
|
|
|
|
|
# The matrix |
|
7775
|
|
|
|
|
|
|
# |
|
7776
|
|
|
|
|
|
|
# $rdepth_array->[$a][$b][ $rcurrent_depth->[$a] ] = $rcurrent_depth->[$b]; |
|
7777
|
|
|
|
|
|
|
# |
|
7778
|
|
|
|
|
|
|
# saves the nesting depth of brace type $b (where $b is either of the other |
|
7779
|
|
|
|
|
|
|
# nesting types) when brace type $a enters a new depth. When this depth |
|
7780
|
|
|
|
|
|
|
# decreases, a check is made that the current depth of brace types $b is |
|
7781
|
|
|
|
|
|
|
# unchanged, or otherwise there must have been an error. This can |
|
7782
|
|
|
|
|
|
|
# be very useful for localizing errors, particularly when perl runs to |
|
7783
|
|
|
|
|
|
|
# the end of a large file (such as this one) and announces that there |
|
7784
|
|
|
|
|
|
|
# is a problem somewhere. |
|
7785
|
|
|
|
|
|
|
# |
|
7786
|
|
|
|
|
|
|
# A numerical sequence number is maintained for every nesting type, |
|
7787
|
|
|
|
|
|
|
# so that each matching pair can be uniquely identified in a simple |
|
7788
|
|
|
|
|
|
|
# way. |
|
7789
|
|
|
|
|
|
|
|
|
7790
|
|
|
|
|
|
|
sub increase_nesting_depth { |
|
7791
|
5446
|
|
|
5446
|
0
|
9085
|
my ( $self, $aa, $pos ) = @_; |
|
7792
|
|
|
|
|
|
|
|
|
7793
|
|
|
|
|
|
|
# Given: |
|
7794
|
|
|
|
|
|
|
# $aa = integer code of container type, 0-3 |
|
7795
|
|
|
|
|
|
|
# $pos = position of character, for error message |
|
7796
|
|
|
|
|
|
|
|
|
7797
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $rcurrent_depth, |
|
7798
|
|
|
|
|
|
|
# $rcurrent_sequence_number, $rdepth_array, |
|
7799
|
|
|
|
|
|
|
# $rstarting_line_of_current_depth, $statement_type |
|
7800
|
5446
|
|
|
|
|
7951
|
my $cd_aa = ++$rcurrent_depth->[$aa]; |
|
7801
|
5446
|
|
|
|
|
6305
|
$total_depth++; |
|
7802
|
5446
|
|
|
|
|
8998
|
$rtotal_depth->[$aa]->[$cd_aa] = $total_depth; |
|
7803
|
5446
|
|
|
|
|
7428
|
my $input_line_number = $self->[_last_line_number_]; |
|
7804
|
5446
|
|
|
|
|
7376
|
my $input_line = $self->[_line_of_text_]; |
|
7805
|
|
|
|
|
|
|
|
|
7806
|
|
|
|
|
|
|
# Sequence numbers increment by number of items. This keeps |
|
7807
|
|
|
|
|
|
|
# a unique set of numbers but still allows the relative location |
|
7808
|
|
|
|
|
|
|
# of any type to be determined. |
|
7809
|
|
|
|
|
|
|
|
|
7810
|
|
|
|
|
|
|
# make a new unique sequence number |
|
7811
|
5446
|
|
|
|
|
7540
|
my $seqno = $next_sequence_number++; |
|
7812
|
|
|
|
|
|
|
|
|
7813
|
5446
|
|
|
|
|
8693
|
$rcurrent_sequence_number->[$aa]->[$cd_aa] = $seqno; |
|
7814
|
|
|
|
|
|
|
|
|
7815
|
5446
|
|
|
|
|
15655
|
$rstarting_line_of_current_depth->[$aa]->[$cd_aa] = |
|
7816
|
|
|
|
|
|
|
[ $input_line_number, $input_line, $pos ]; |
|
7817
|
|
|
|
|
|
|
|
|
7818
|
5446
|
|
|
|
|
13554
|
for my $bb ( 0 .. @closing_brace_names - 1 ) { |
|
7819
|
21784
|
100
|
|
|
|
30670
|
next if ( $bb == $aa ); |
|
7820
|
16338
|
|
|
|
|
27812
|
$rdepth_array->[$aa]->[$bb]->[$cd_aa] = $rcurrent_depth->[$bb]; |
|
7821
|
|
|
|
|
|
|
} |
|
7822
|
|
|
|
|
|
|
|
|
7823
|
|
|
|
|
|
|
# set a flag for indenting a nested ternary statement |
|
7824
|
5446
|
|
|
|
|
7390
|
my $indent = 0; |
|
7825
|
5446
|
100
|
|
|
|
8998
|
if ( $aa == QUESTION_COLON ) { |
|
7826
|
193
|
|
|
|
|
406
|
$rnested_ternary_flag->[$cd_aa] = 0; |
|
7827
|
193
|
100
|
|
|
|
497
|
if ( $cd_aa > 1 ) { |
|
7828
|
17
|
100
|
|
|
|
58
|
if ( $rnested_ternary_flag->[ $cd_aa - 1 ] == 0 ) { |
|
7829
|
16
|
|
|
|
|
37
|
my $pdepth = $rtotal_depth->[$aa]->[ $cd_aa - 1 ]; |
|
7830
|
16
|
100
|
|
|
|
56
|
if ( $pdepth == $total_depth - 1 ) { |
|
7831
|
8
|
|
|
|
|
13
|
$indent = 1; |
|
7832
|
8
|
|
|
|
|
20
|
$rnested_ternary_flag->[ $cd_aa - 1 ] = -1; |
|
7833
|
|
|
|
|
|
|
} |
|
7834
|
|
|
|
|
|
|
} |
|
7835
|
|
|
|
|
|
|
} |
|
7836
|
|
|
|
|
|
|
} |
|
7837
|
|
|
|
|
|
|
|
|
7838
|
|
|
|
|
|
|
# Fix part #1 for git82: save last token type for propagation of type 'Z' |
|
7839
|
5446
|
|
|
|
|
17219
|
$rnested_statement_type->[$aa]->[$cd_aa] = |
|
7840
|
|
|
|
|
|
|
[ $statement_type, $last_nonblank_type, $last_nonblank_token ]; |
|
7841
|
5446
|
|
|
|
|
7333
|
$statement_type = EMPTY_STRING; |
|
7842
|
5446
|
|
|
|
|
11617
|
return ( $seqno, $indent ); |
|
7843
|
|
|
|
|
|
|
} ## end sub increase_nesting_depth |
|
7844
|
|
|
|
|
|
|
|
|
7845
|
|
|
|
|
|
|
sub is_balanced_closing_container { |
|
7846
|
|
|
|
|
|
|
|
|
7847
|
47
|
|
|
47
|
0
|
93
|
my ($aa) = @_; |
|
7848
|
|
|
|
|
|
|
|
|
7849
|
|
|
|
|
|
|
# Return true if a closing container can go here without error |
|
7850
|
|
|
|
|
|
|
# Return false if not |
|
7851
|
|
|
|
|
|
|
# Given: |
|
7852
|
|
|
|
|
|
|
# $aa = integer code of container type, 0-3 |
|
7853
|
|
|
|
|
|
|
|
|
7854
|
|
|
|
|
|
|
# cannot close if there was no opening |
|
7855
|
47
|
|
|
|
|
65
|
my $cd_aa = $rcurrent_depth->[$aa]; |
|
7856
|
47
|
100
|
|
|
|
150
|
return if ( $cd_aa <= 0 ); |
|
7857
|
|
|
|
|
|
|
|
|
7858
|
|
|
|
|
|
|
# check that any other brace types $bb contained within would be balanced |
|
7859
|
8
|
|
|
|
|
51
|
for my $bb ( 0 .. @closing_brace_names - 1 ) { |
|
7860
|
8
|
50
|
|
|
|
20
|
next if ( $bb == $aa ); |
|
7861
|
|
|
|
|
|
|
return |
|
7862
|
|
|
|
|
|
|
if ( |
|
7863
|
8
|
50
|
|
|
|
39
|
$rdepth_array->[$aa]->[$bb]->[$cd_aa] != $rcurrent_depth->[$bb] ); |
|
7864
|
|
|
|
|
|
|
} |
|
7865
|
|
|
|
|
|
|
|
|
7866
|
|
|
|
|
|
|
# OK, everything will be balanced |
|
7867
|
0
|
|
|
|
|
0
|
return 1; |
|
7868
|
|
|
|
|
|
|
} ## end sub is_balanced_closing_container |
|
7869
|
|
|
|
|
|
|
|
|
7870
|
|
|
|
|
|
|
sub decrease_nesting_depth { |
|
7871
|
|
|
|
|
|
|
|
|
7872
|
5446
|
|
|
5446
|
0
|
8923
|
my ( $self, $aa, $pos ) = @_; |
|
7873
|
|
|
|
|
|
|
|
|
7874
|
|
|
|
|
|
|
# Given: |
|
7875
|
|
|
|
|
|
|
# $aa = integer code of container type, 0-3 |
|
7876
|
|
|
|
|
|
|
# $pos = position of character, for error message |
|
7877
|
|
|
|
|
|
|
|
|
7878
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $rcurrent_depth, |
|
7879
|
|
|
|
|
|
|
# $rcurrent_sequence_number, $rdepth_array, $rstarting_line_of_current_depth |
|
7880
|
|
|
|
|
|
|
# $statement_type |
|
7881
|
5446
|
|
|
|
|
6319
|
my $seqno = 0; |
|
7882
|
5446
|
|
|
|
|
11775
|
my $input_line_number = $self->[_last_line_number_]; |
|
7883
|
5446
|
|
|
|
|
7527
|
my $input_line = $self->[_line_of_text_]; |
|
7884
|
|
|
|
|
|
|
|
|
7885
|
5446
|
|
|
|
|
6191
|
my $outdent = 0; |
|
7886
|
5446
|
|
|
|
|
6360
|
$total_depth--; |
|
7887
|
5446
|
|
|
|
|
7821
|
my $cd_aa = $rcurrent_depth->[$aa]; |
|
7888
|
5446
|
50
|
|
|
|
8957
|
if ( $cd_aa > 0 ) { |
|
7889
|
|
|
|
|
|
|
|
|
7890
|
|
|
|
|
|
|
# set a flag for un-indenting after seeing a nested ternary statement |
|
7891
|
5446
|
|
|
|
|
8249
|
$seqno = $rcurrent_sequence_number->[$aa]->[$cd_aa]; |
|
7892
|
5446
|
100
|
|
|
|
9281
|
if ( $aa == QUESTION_COLON ) { |
|
7893
|
193
|
|
|
|
|
376
|
$outdent = $rnested_ternary_flag->[$cd_aa]; |
|
7894
|
|
|
|
|
|
|
} |
|
7895
|
|
|
|
|
|
|
|
|
7896
|
|
|
|
|
|
|
# Fix part #2 for git82: use saved type for propagation of type 'Z' |
|
7897
|
|
|
|
|
|
|
# through type L-R braces. Perl seems to allow ${bareword} |
|
7898
|
|
|
|
|
|
|
# as an indirect object, but nothing much more complex than that. |
|
7899
|
|
|
|
|
|
|
( $statement_type, my $saved_type, my $saved_token_uu ) = |
|
7900
|
5446
|
|
|
|
|
6181
|
@{ $rnested_statement_type->[$aa]->[ $rcurrent_depth->[$aa] ] }; |
|
|
5446
|
|
|
|
|
13257
|
|
|
7901
|
5446
|
50
|
100
|
|
|
14477
|
if ( $aa == BRACE |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
7902
|
|
|
|
|
|
|
&& $saved_type eq 'Z' |
|
7903
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'w' |
|
7904
|
|
|
|
|
|
|
&& $rbrace_structural_type->[$brace_depth] eq 'L' ) |
|
7905
|
|
|
|
|
|
|
{ |
|
7906
|
1
|
|
|
|
|
2
|
$last_nonblank_type = $saved_type; |
|
7907
|
|
|
|
|
|
|
} |
|
7908
|
|
|
|
|
|
|
|
|
7909
|
|
|
|
|
|
|
# check that any brace types $bb contained within are balanced |
|
7910
|
5446
|
|
|
|
|
12043
|
for my $bb ( 0 .. @closing_brace_names - 1 ) { |
|
7911
|
21784
|
100
|
|
|
|
30593
|
next if ( $bb == $aa ); |
|
7912
|
|
|
|
|
|
|
|
|
7913
|
16338
|
50
|
|
|
|
31569
|
if ( $rdepth_array->[$aa]->[$bb]->[$cd_aa] != |
|
7914
|
|
|
|
|
|
|
$rcurrent_depth->[$bb] ) |
|
7915
|
|
|
|
|
|
|
{ |
|
7916
|
0
|
|
|
|
|
0
|
my $diff = |
|
7917
|
|
|
|
|
|
|
$rcurrent_depth->[$bb] - |
|
7918
|
|
|
|
|
|
|
$rdepth_array->[$aa]->[$bb]->[$cd_aa]; |
|
7919
|
|
|
|
|
|
|
|
|
7920
|
|
|
|
|
|
|
# don't whine too many times |
|
7921
|
0
|
|
|
|
|
0
|
my $saw_brace_error = $self->get_saw_brace_error(); |
|
7922
|
0
|
0
|
0
|
|
|
0
|
if ( |
|
|
|
|
0
|
|
|
|
|
|
7923
|
|
|
|
|
|
|
$saw_brace_error <= MAX_NAG_MESSAGES |
|
7924
|
|
|
|
|
|
|
|
|
7925
|
|
|
|
|
|
|
# if too many closing types have occurred, we probably |
|
7926
|
|
|
|
|
|
|
# already caught this error |
|
7927
|
|
|
|
|
|
|
&& ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) |
|
7928
|
|
|
|
|
|
|
) |
|
7929
|
|
|
|
|
|
|
{ |
|
7930
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
|
7931
|
0
|
|
|
|
|
0
|
my $rsl = $rstarting_line_of_current_depth->[$aa]->[$cd_aa]; |
|
7932
|
0
|
|
|
|
|
0
|
my $sl = $rsl->[0]; |
|
7933
|
0
|
|
|
|
|
0
|
my $rel = [ $input_line_number, $input_line, $pos ]; |
|
7934
|
0
|
|
|
|
|
0
|
my $el = $rel->[0]; |
|
7935
|
0
|
|
|
|
|
0
|
my ($ess); |
|
7936
|
|
|
|
|
|
|
|
|
7937
|
0
|
0
|
0
|
|
|
0
|
if ( $diff == 1 || $diff == -1 ) { |
|
7938
|
0
|
|
|
|
|
0
|
$ess = EMPTY_STRING; |
|
7939
|
|
|
|
|
|
|
} |
|
7940
|
|
|
|
|
|
|
else { |
|
7941
|
0
|
|
|
|
|
0
|
$ess = 's'; |
|
7942
|
|
|
|
|
|
|
} |
|
7943
|
0
|
0
|
|
|
|
0
|
my $bname = |
|
7944
|
|
|
|
|
|
|
( $diff > 0 ) |
|
7945
|
|
|
|
|
|
|
? $opening_brace_names[$bb] |
|
7946
|
|
|
|
|
|
|
: $closing_brace_names[$bb]; |
|
7947
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( @{$rsl}, '^' ); |
|
|
0
|
|
|
|
|
0
|
|
|
7948
|
0
|
|
|
|
|
0
|
my $msg = <<"EOM"; |
|
7949
|
|
|
|
|
|
|
Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el |
|
7950
|
|
|
|
|
|
|
EOM |
|
7951
|
|
|
|
|
|
|
|
|
7952
|
0
|
0
|
|
|
|
0
|
if ( $diff > 0 ) { |
|
7953
|
0
|
|
|
|
|
0
|
my $rml = |
|
7954
|
|
|
|
|
|
|
$rstarting_line_of_current_depth->[$bb] |
|
7955
|
|
|
|
|
|
|
->[ $rcurrent_depth->[$bb] ]; |
|
7956
|
0
|
|
|
|
|
0
|
my $ml = $rml->[0]; |
|
7957
|
0
|
|
|
|
|
0
|
$msg .= |
|
7958
|
|
|
|
|
|
|
" The most recent un-matched $bname is on line $ml\n"; |
|
7959
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( @{$rml}, '^' ); |
|
|
0
|
|
|
|
|
0
|
|
|
7960
|
|
|
|
|
|
|
} |
|
7961
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( @{$rel}, '^' ); |
|
|
0
|
|
|
|
|
0
|
|
|
7962
|
0
|
|
|
|
|
0
|
$self->warning($msg); |
|
7963
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
|
7964
|
|
|
|
|
|
|
} |
|
7965
|
0
|
|
|
|
|
0
|
$self->increment_brace_error(); |
|
7966
|
0
|
0
|
|
|
|
0
|
if ( $bb eq BRACE ) { $self->[_show_indentation_table_] = 1 } |
|
|
0
|
|
|
|
|
0
|
|
|
7967
|
|
|
|
|
|
|
} |
|
7968
|
|
|
|
|
|
|
} |
|
7969
|
5446
|
|
|
|
|
8153
|
$rcurrent_depth->[$aa]--; |
|
7970
|
|
|
|
|
|
|
} |
|
7971
|
|
|
|
|
|
|
else { |
|
7972
|
|
|
|
|
|
|
|
|
7973
|
0
|
|
|
|
|
0
|
my $saw_brace_error = $self->get_saw_brace_error(); |
|
7974
|
0
|
0
|
|
|
|
0
|
if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { |
|
7975
|
0
|
|
|
|
|
0
|
my $msg = <<"EOM"; |
|
7976
|
|
|
|
|
|
|
There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number |
|
7977
|
|
|
|
|
|
|
EOM |
|
7978
|
0
|
|
|
|
|
0
|
$self->indicate_error( $msg, $input_line_number, $input_line, $pos, |
|
7979
|
|
|
|
|
|
|
'^' ); |
|
7980
|
|
|
|
|
|
|
} |
|
7981
|
0
|
|
|
|
|
0
|
$self->increment_brace_error(); |
|
7982
|
0
|
0
|
|
|
|
0
|
if ( $aa eq BRACE ) { $self->[_show_indentation_table_] = 1 } |
|
|
0
|
|
|
|
|
0
|
|
|
7983
|
|
|
|
|
|
|
|
|
7984
|
|
|
|
|
|
|
# keep track of errors in braces alone (ignoring ternary nesting errors) |
|
7985
|
0
|
0
|
|
|
|
0
|
$self->[_true_brace_error_count_]++ |
|
7986
|
|
|
|
|
|
|
if ( $closing_brace_names[$aa] ne "':'" ); |
|
7987
|
|
|
|
|
|
|
} |
|
7988
|
5446
|
|
|
|
|
11244
|
return ( $seqno, $outdent ); |
|
7989
|
|
|
|
|
|
|
} ## end sub decrease_nesting_depth |
|
7990
|
|
|
|
|
|
|
|
|
7991
|
|
|
|
|
|
|
sub check_final_nesting_depths { |
|
7992
|
|
|
|
|
|
|
|
|
7993
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $rcurrent_depth, $rstarting_line_of_current_depth |
|
7994
|
649
|
|
|
649
|
0
|
1110
|
my $self = shift; |
|
7995
|
|
|
|
|
|
|
|
|
7996
|
649
|
|
|
|
|
2239
|
for my $aa ( 0 .. @closing_brace_names - 1 ) { |
|
7997
|
|
|
|
|
|
|
|
|
7998
|
2596
|
|
|
|
|
3415
|
my $cd_aa = $rcurrent_depth->[$aa]; |
|
7999
|
2596
|
50
|
|
|
|
4519
|
if ($cd_aa) { |
|
8000
|
0
|
|
|
|
|
0
|
my $rsl = $rstarting_line_of_current_depth->[$aa]->[$cd_aa]; |
|
8001
|
0
|
|
|
|
|
0
|
my $sl = $rsl->[0]; |
|
8002
|
|
|
|
|
|
|
|
|
8003
|
|
|
|
|
|
|
# Add hint for something like a missing terminal ':' of a ternary |
|
8004
|
0
|
|
|
|
|
0
|
my $hint = EMPTY_STRING; |
|
8005
|
0
|
0
|
|
|
|
0
|
if ( $cd_aa == 1 ) { |
|
8006
|
0
|
|
|
|
|
0
|
$hint = |
|
8007
|
|
|
|
|
|
|
" .. did not find its closing $closing_brace_names[$aa]"; |
|
8008
|
|
|
|
|
|
|
} |
|
8009
|
0
|
|
|
|
|
0
|
my $msg = <<"EOM"; |
|
8010
|
|
|
|
|
|
|
Final nesting depth of $opening_brace_names[$aa]s is $cd_aa |
|
8011
|
|
|
|
|
|
|
The most recent un-matched $opening_brace_names[$aa] is on line $sl$hint |
|
8012
|
|
|
|
|
|
|
EOM |
|
8013
|
0
|
|
|
|
|
0
|
$self->indicate_error( $msg, @{$rsl}, '^' ); |
|
|
0
|
|
|
|
|
0
|
|
|
8014
|
0
|
|
|
|
|
0
|
$self->increment_brace_error(); |
|
8015
|
0
|
0
|
|
|
|
0
|
if ( $aa eq BRACE ) { $self->[_show_indentation_table_] = 1 } |
|
|
0
|
|
|
|
|
0
|
|
|
8016
|
|
|
|
|
|
|
} |
|
8017
|
|
|
|
|
|
|
} |
|
8018
|
649
|
|
|
|
|
1063
|
return; |
|
8019
|
|
|
|
|
|
|
} ## end sub check_final_nesting_depths |
|
8020
|
|
|
|
|
|
|
|
|
8021
|
|
|
|
|
|
|
####################################################################### |
|
8022
|
|
|
|
|
|
|
# Tokenizer routines for looking ahead in input stream |
|
8023
|
|
|
|
|
|
|
####################################################################### |
|
8024
|
|
|
|
|
|
|
|
|
8025
|
|
|
|
|
|
|
sub peek_ahead_for_n_nonblank_pre_tokens { |
|
8026
|
|
|
|
|
|
|
|
|
8027
|
184
|
|
|
184
|
0
|
389
|
my ( $self, $max_pretokens ) = @_; |
|
8028
|
|
|
|
|
|
|
|
|
8029
|
|
|
|
|
|
|
# Given: |
|
8030
|
|
|
|
|
|
|
# $max_pretokens = number of pretokens wanted |
|
8031
|
|
|
|
|
|
|
# Return: |
|
8032
|
|
|
|
|
|
|
# next $max_pretokens pretokens if they exist |
|
8033
|
|
|
|
|
|
|
# undef's if hits eof without seeing any pretokens |
|
8034
|
|
|
|
|
|
|
|
|
8035
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
|
8036
|
184
|
|
|
|
|
281
|
my $line; |
|
8037
|
184
|
|
|
|
|
287
|
my $i = 0; |
|
8038
|
184
|
|
|
|
|
331
|
my ( $rpre_tokens, $rmap, $rpre_types ); |
|
8039
|
|
|
|
|
|
|
|
|
8040
|
184
|
|
|
|
|
562
|
while ( defined( $line = $self->peek_ahead( $i++ ) ) ) { |
|
8041
|
202
|
|
|
|
|
1337
|
$line =~ s/^\s+//; # trim leading blanks |
|
8042
|
202
|
100
|
|
|
|
512
|
next if ( length($line) <= 0 ); # skip blank |
|
8043
|
196
|
100
|
|
|
|
607
|
next if ( $line =~ /^#/ ); # skip comment |
|
8044
|
176
|
|
|
|
|
404
|
( $rpre_tokens, $rmap, $rpre_types ) = |
|
8045
|
|
|
|
|
|
|
pre_tokenize( $line, $max_pretokens ); |
|
8046
|
176
|
|
|
|
|
321
|
last; |
|
8047
|
|
|
|
|
|
|
} ## end while ( defined( $line = ...)) |
|
8048
|
184
|
|
|
|
|
511
|
return ( $rpre_tokens, $rpre_types ); |
|
8049
|
|
|
|
|
|
|
} ## end sub peek_ahead_for_n_nonblank_pre_tokens |
|
8050
|
|
|
|
|
|
|
|
|
8051
|
|
|
|
|
|
|
# look ahead for next non-blank, non-comment line of code |
|
8052
|
|
|
|
|
|
|
sub peek_ahead_for_nonblank_token { |
|
8053
|
|
|
|
|
|
|
|
|
8054
|
139
|
|
|
139
|
0
|
342
|
my ( $self, $rtokens, $max_token_index ) = @_; |
|
8055
|
|
|
|
|
|
|
|
|
8056
|
|
|
|
|
|
|
# Given: |
|
8057
|
|
|
|
|
|
|
# $rtokens = ref to token array |
|
8058
|
|
|
|
|
|
|
# $max_token_index = index of last token in $rtokens |
|
8059
|
|
|
|
|
|
|
# Task: |
|
8060
|
|
|
|
|
|
|
# Update $rtokens with next nonblank token |
|
8061
|
|
|
|
|
|
|
|
|
8062
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
|
8063
|
139
|
|
|
|
|
240
|
my $line; |
|
8064
|
139
|
|
|
|
|
227
|
my $i = 0; |
|
8065
|
|
|
|
|
|
|
|
|
8066
|
139
|
|
|
|
|
537
|
while ( defined( $line = $self->peek_ahead( $i++ ) ) ) { |
|
8067
|
190
|
|
|
|
|
843
|
$line =~ s/^\s+//; # trim leading blanks |
|
8068
|
190
|
100
|
|
|
|
554
|
next if ( length($line) <= 0 ); # skip blank |
|
8069
|
161
|
100
|
|
|
|
526
|
next if ( $line =~ /^#/ ); # skip comment |
|
8070
|
|
|
|
|
|
|
|
|
8071
|
|
|
|
|
|
|
# Updated from 2 to 3 to get trigraphs, added for case b1175 |
|
8072
|
137
|
|
|
|
|
400
|
my ( $rtok, $rmap_uu, $rtype_uu ) = pre_tokenize( $line, 3 ); |
|
8073
|
137
|
|
|
|
|
286
|
my $j = $max_token_index + 1; |
|
8074
|
|
|
|
|
|
|
|
|
8075
|
137
|
|
|
|
|
231
|
foreach my $tok ( @{$rtok} ) { |
|
|
137
|
|
|
|
|
315
|
|
|
8076
|
394
|
100
|
|
|
|
815
|
last if ( $tok =~ "\n" ); |
|
8077
|
351
|
|
|
|
|
717
|
$rtokens->[ ++$j ] = $tok; |
|
8078
|
|
|
|
|
|
|
} |
|
8079
|
137
|
|
|
|
|
468
|
last; |
|
8080
|
|
|
|
|
|
|
} ## end while ( defined( $line = ...)) |
|
8081
|
139
|
|
|
|
|
303
|
return; |
|
8082
|
|
|
|
|
|
|
} ## end sub peek_ahead_for_nonblank_token |
|
8083
|
|
|
|
|
|
|
|
|
8084
|
|
|
|
|
|
|
####################################################################### |
|
8085
|
|
|
|
|
|
|
# Tokenizer guessing routines for ambiguous situations |
|
8086
|
|
|
|
|
|
|
####################################################################### |
|
8087
|
|
|
|
|
|
|
|
|
8088
|
|
|
|
|
|
|
my %is_non_ternary_pretok; |
|
8089
|
|
|
|
|
|
|
|
|
8090
|
|
|
|
|
|
|
BEGIN { |
|
8091
|
|
|
|
|
|
|
|
|
8092
|
|
|
|
|
|
|
# Some pre-tokens which cannot immediately follow a ternary '?' |
|
8093
|
44
|
|
|
44
|
|
295
|
my @q = qw# ; ? : ) } ] = > #; |
|
8094
|
44
|
|
|
|
|
152
|
push @q, COMMA; |
|
8095
|
44
|
|
|
|
|
181
|
%is_non_ternary_pretok = map { $_ => 1 } @q; |
|
|
396
|
|
|
|
|
21911
|
|
|
8096
|
|
|
|
|
|
|
} |
|
8097
|
|
|
|
|
|
|
|
|
8098
|
|
|
|
|
|
|
sub guess_if_pattern_or_conditional { |
|
8099
|
|
|
|
|
|
|
|
|
8100
|
12
|
|
|
12
|
0
|
37
|
my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map_uu, $max_token_index ) |
|
8101
|
|
|
|
|
|
|
= @_; |
|
8102
|
|
|
|
|
|
|
|
|
8103
|
|
|
|
|
|
|
# This routine is called when we have encountered a ? following an |
|
8104
|
|
|
|
|
|
|
# unknown bareword, and we must decide if it starts a pattern or not |
|
8105
|
|
|
|
|
|
|
# Given: |
|
8106
|
|
|
|
|
|
|
# $i - token index of the ? starting possible pattern |
|
8107
|
|
|
|
|
|
|
# $rtokens ... = the token arrays |
|
8108
|
|
|
|
|
|
|
# Return: |
|
8109
|
|
|
|
|
|
|
# $is_pattern = 0 if probably not pattern, =1 if probably a pattern |
|
8110
|
|
|
|
|
|
|
# msg = a warning or diagnostic message |
|
8111
|
|
|
|
|
|
|
|
|
8112
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token |
|
8113
|
|
|
|
|
|
|
|
|
8114
|
12
|
|
|
|
|
22
|
my $is_pattern = 0; |
|
8115
|
12
|
|
|
|
|
31
|
my $msg = |
|
8116
|
|
|
|
|
|
|
"guessing that ? after type='$last_nonblank_type' token='$last_nonblank_token' starts a "; |
|
8117
|
|
|
|
|
|
|
|
|
8118
|
12
|
50
|
|
|
|
65
|
if ( $i >= $max_token_index ) { |
|
8119
|
0
|
|
|
|
|
0
|
$msg .= "conditional (no end to pattern found on the line)\n"; |
|
8120
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
|
8121
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8122
|
|
|
|
|
|
|
} |
|
8123
|
|
|
|
|
|
|
|
|
8124
|
|
|
|
|
|
|
# See if we can rule out a ternary operator here before proceeding. c547. |
|
8125
|
12
|
|
|
|
|
43
|
my ( $next_nonblank_token, $i_next_uu ) = |
|
8126
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); |
|
8127
|
12
|
50
|
33
|
|
|
103
|
if ( |
|
|
|
|
33
|
|
|
|
|
|
8128
|
|
|
|
|
|
|
!( |
|
8129
|
|
|
|
|
|
|
$is_non_ternary_pretok{$next_nonblank_token} |
|
8130
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 'k' && $last_nonblank_token eq 'split' ) |
|
8131
|
|
|
|
|
|
|
) |
|
8132
|
|
|
|
|
|
|
) |
|
8133
|
|
|
|
|
|
|
{ |
|
8134
|
|
|
|
|
|
|
# A ternary cannot be ruled out here. We will assume this is a ternary |
|
8135
|
|
|
|
|
|
|
# operator since '?' as pattern delimiter is deprecated. |
|
8136
|
12
|
|
|
|
|
21
|
$is_pattern = 0; |
|
8137
|
12
|
|
|
|
|
48
|
$msg .= "conditional (cannot rule it out)"; |
|
8138
|
12
|
|
|
|
|
43
|
return ( $is_pattern, $msg ); |
|
8139
|
|
|
|
|
|
|
} |
|
8140
|
|
|
|
|
|
|
|
|
8141
|
|
|
|
|
|
|
# If we get here, then we either have a ternary with a syntax error or some |
|
8142
|
|
|
|
|
|
|
# ancient code which uses ? as a pattern delimiter. We will only select the |
|
8143
|
|
|
|
|
|
|
# pattern delimiter if we can find its matching closing delimiter. |
|
8144
|
|
|
|
|
|
|
|
|
8145
|
0
|
|
|
|
|
0
|
my $ibeg = $i; |
|
8146
|
0
|
|
|
|
|
0
|
$i = $ibeg + 1; |
|
8147
|
|
|
|
|
|
|
##my $next_token = $rtokens->[$i]; # first token after ? |
|
8148
|
|
|
|
|
|
|
|
|
8149
|
|
|
|
|
|
|
# look for a possible ending ? on this line.. |
|
8150
|
0
|
|
|
|
|
0
|
my $in_quote = 1; |
|
8151
|
0
|
|
|
|
|
0
|
my $quote_depth = 0; |
|
8152
|
0
|
|
|
|
|
0
|
my $quote_character = EMPTY_STRING; |
|
8153
|
0
|
|
|
|
|
0
|
my $quote_pos = 0; |
|
8154
|
0
|
|
|
|
|
0
|
my $quoted_string; |
|
8155
|
|
|
|
|
|
|
( |
|
8156
|
|
|
|
|
|
|
|
|
8157
|
0
|
|
|
|
|
0
|
$i, |
|
8158
|
|
|
|
|
|
|
$in_quote, |
|
8159
|
|
|
|
|
|
|
$quote_character, |
|
8160
|
|
|
|
|
|
|
$quote_pos, |
|
8161
|
|
|
|
|
|
|
$quote_depth, |
|
8162
|
|
|
|
|
|
|
$quoted_string, |
|
8163
|
|
|
|
|
|
|
|
|
8164
|
|
|
|
|
|
|
) = $self->follow_quoted_string( |
|
8165
|
|
|
|
|
|
|
|
|
8166
|
|
|
|
|
|
|
$ibeg, |
|
8167
|
|
|
|
|
|
|
$in_quote, |
|
8168
|
|
|
|
|
|
|
$rtokens, |
|
8169
|
|
|
|
|
|
|
$rtoken_type, |
|
8170
|
|
|
|
|
|
|
$quote_character, |
|
8171
|
|
|
|
|
|
|
$quote_pos, |
|
8172
|
|
|
|
|
|
|
$quote_depth, |
|
8173
|
|
|
|
|
|
|
$max_token_index, |
|
8174
|
|
|
|
|
|
|
|
|
8175
|
|
|
|
|
|
|
); |
|
8176
|
|
|
|
|
|
|
|
|
8177
|
0
|
0
|
|
|
|
0
|
if ($in_quote) { |
|
8178
|
|
|
|
|
|
|
|
|
8179
|
|
|
|
|
|
|
# we didn't find an ending ? on this line, |
|
8180
|
|
|
|
|
|
|
# so we bias towards conditional |
|
8181
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
|
8182
|
0
|
|
|
|
|
0
|
$msg .= "conditional (no ending ? on this line)\n"; |
|
8183
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8184
|
|
|
|
|
|
|
} |
|
8185
|
|
|
|
|
|
|
|
|
8186
|
|
|
|
|
|
|
# we found an ending ?, so we bias towards a pattern |
|
8187
|
|
|
|
|
|
|
|
|
8188
|
|
|
|
|
|
|
# Watch out for an ending ? in quotes, like this |
|
8189
|
|
|
|
|
|
|
# my $case_flag = File::Spec->case_tolerant ? '(?i)' : ''; |
|
8190
|
0
|
|
|
|
|
0
|
my $s_quote = 0; |
|
8191
|
0
|
|
|
|
|
0
|
my $d_quote = 0; |
|
8192
|
0
|
|
|
|
|
0
|
my $colons = 0; |
|
8193
|
0
|
|
|
|
|
0
|
foreach my $ii ( $ibeg + 1 .. $i - 1 ) { |
|
8194
|
0
|
|
|
|
|
0
|
my $tok = $rtokens->[$ii]; |
|
8195
|
0
|
0
|
|
|
|
0
|
if ( $tok eq ":" ) { $colons++ } |
|
|
0
|
|
|
|
|
0
|
|
|
8196
|
0
|
0
|
|
|
|
0
|
if ( $tok eq "'" ) { $s_quote++ } |
|
|
0
|
|
|
|
|
0
|
|
|
8197
|
0
|
0
|
|
|
|
0
|
if ( $tok eq '"' ) { $d_quote++ } |
|
|
0
|
|
|
|
|
0
|
|
|
8198
|
|
|
|
|
|
|
} |
|
8199
|
0
|
0
|
0
|
|
|
0
|
if ( $s_quote % 2 || $d_quote % 2 || $colons ) { |
|
|
|
|
0
|
|
|
|
|
|
8200
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
|
8201
|
0
|
|
|
|
|
0
|
$msg .= "conditional: found ending ? but unbalanced quote chars\n"; |
|
8202
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8203
|
|
|
|
|
|
|
} |
|
8204
|
0
|
0
|
|
|
|
0
|
if ( $self->pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { |
|
8205
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
|
8206
|
0
|
|
|
|
|
0
|
$msg .= "pattern (found ending ? and pattern expected)\n"; |
|
8207
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8208
|
|
|
|
|
|
|
} |
|
8209
|
|
|
|
|
|
|
|
|
8210
|
|
|
|
|
|
|
# NOTE: An ultimate decision could be made on version, since ? is a ternary |
|
8211
|
|
|
|
|
|
|
# after version 5.22. But we may be formatting an ancient script with a |
|
8212
|
|
|
|
|
|
|
# newer perl, and it might run on an older perl, so we cannot be certain. |
|
8213
|
|
|
|
|
|
|
# if ($] >=5.022) {$is_pattern=0} else { ... not sure |
|
8214
|
|
|
|
|
|
|
|
|
8215
|
0
|
|
|
|
|
0
|
$msg .= "conditional (but uncertain)\n"; |
|
8216
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8217
|
|
|
|
|
|
|
} ## end sub guess_if_pattern_or_conditional |
|
8218
|
|
|
|
|
|
|
|
|
8219
|
|
|
|
|
|
|
my %is_known_constant; |
|
8220
|
|
|
|
|
|
|
my %is_known_function; |
|
8221
|
|
|
|
|
|
|
|
|
8222
|
|
|
|
|
|
|
BEGIN { |
|
8223
|
|
|
|
|
|
|
|
|
8224
|
|
|
|
|
|
|
# Constants like 'pi' in Trig.pm are common |
|
8225
|
44
|
|
|
44
|
|
244
|
my @q = qw( pi pi2 pi4 pip2 pip4 ); |
|
8226
|
44
|
|
|
|
|
353
|
$is_known_constant{$_} = 1 for @q; |
|
8227
|
|
|
|
|
|
|
|
|
8228
|
|
|
|
|
|
|
# parenless calls of 'ok' are common |
|
8229
|
44
|
|
|
|
|
108
|
@q = qw( ok ); |
|
8230
|
44
|
|
|
|
|
76040
|
$is_known_function{$_} = 1 for @q; |
|
8231
|
|
|
|
|
|
|
} ## end BEGIN |
|
8232
|
|
|
|
|
|
|
|
|
8233
|
|
|
|
|
|
|
sub guess_if_pattern_or_division { |
|
8234
|
|
|
|
|
|
|
|
|
8235
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map, $max_token_index ) = |
|
8236
|
|
|
|
|
|
|
@_; |
|
8237
|
|
|
|
|
|
|
|
|
8238
|
|
|
|
|
|
|
# This routine is called when we have encountered a / following an |
|
8239
|
|
|
|
|
|
|
# unknown bareword, and we must decide if it starts a pattern or is a |
|
8240
|
|
|
|
|
|
|
# division. |
|
8241
|
|
|
|
|
|
|
# Given: |
|
8242
|
|
|
|
|
|
|
# $i - token index of the / starting possible pattern |
|
8243
|
|
|
|
|
|
|
# $rtokens ... = the token arrays |
|
8244
|
|
|
|
|
|
|
# Return: |
|
8245
|
|
|
|
|
|
|
# $is_pattern = 0 if probably division, =1 if probably a pattern |
|
8246
|
|
|
|
|
|
|
# msg = a warning or diagnostic message |
|
8247
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token |
|
8248
|
0
|
|
|
|
|
0
|
my $msg = "guessing that / after '$last_nonblank_token' starts a "; |
|
8249
|
0
|
|
|
|
|
0
|
my $ibeg = $i; |
|
8250
|
0
|
|
|
|
|
0
|
my $is_pattern = 0; |
|
8251
|
|
|
|
|
|
|
|
|
8252
|
0
|
|
|
|
|
0
|
my $divide_possible = |
|
8253
|
|
|
|
|
|
|
$self->is_possible_numerator( $i, $rtokens, $max_token_index ); |
|
8254
|
|
|
|
|
|
|
|
|
8255
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible < 0 ) { |
|
8256
|
0
|
|
|
|
|
0
|
$msg .= "pattern (division not possible here)\n"; |
|
8257
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
|
8258
|
0
|
|
|
|
|
0
|
$self->saw_bareword_function($last_nonblank_token); |
|
8259
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8260
|
|
|
|
|
|
|
} |
|
8261
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible == 4 ) { |
|
8262
|
0
|
|
|
|
|
0
|
$msg .= "division (pattern not possible here)\n"; |
|
8263
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
|
8264
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8265
|
|
|
|
|
|
|
} |
|
8266
|
|
|
|
|
|
|
|
|
8267
|
|
|
|
|
|
|
# anything left on line? |
|
8268
|
0
|
0
|
|
|
|
0
|
if ( $i >= $max_token_index ) { |
|
8269
|
0
|
|
|
|
|
0
|
$msg .= "division (line ends with this /)\n"; |
|
8270
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
|
8271
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8272
|
|
|
|
|
|
|
} |
|
8273
|
|
|
|
|
|
|
|
|
8274
|
|
|
|
|
|
|
# quick check for no pattern-ending slash on this line |
|
8275
|
0
|
|
|
|
|
0
|
my $pos_beg = $rtoken_map->[$ibeg]; |
|
8276
|
0
|
|
|
|
|
0
|
my $input_line = $self->[_line_of_text_]; |
|
8277
|
0
|
0
|
|
|
|
0
|
if ( index( $input_line, '/', $pos_beg + 1 ) < 0 ) { |
|
8278
|
0
|
|
|
|
|
0
|
$msg .= "division (no ending / on this line)\n"; |
|
8279
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
|
8280
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8281
|
|
|
|
|
|
|
} |
|
8282
|
|
|
|
|
|
|
|
|
8283
|
|
|
|
|
|
|
# Setup spacing rule before we change $i below.. |
|
8284
|
0
|
|
|
|
|
0
|
$i = $ibeg + 1; |
|
8285
|
0
|
|
|
|
|
0
|
my $next_token = $rtokens->[$i]; # first token after slash |
|
8286
|
|
|
|
|
|
|
|
|
8287
|
|
|
|
|
|
|
# There are four possible spacings around the first slash: |
|
8288
|
|
|
|
|
|
|
# |
|
8289
|
|
|
|
|
|
|
# return pi/two;#/; -/- |
|
8290
|
|
|
|
|
|
|
# return pi/ two;#/; -/+ |
|
8291
|
|
|
|
|
|
|
# return pi / two;#/; +/+ |
|
8292
|
|
|
|
|
|
|
# return pi /two;#/; +/- <-- possible pattern |
|
8293
|
|
|
|
|
|
|
# |
|
8294
|
|
|
|
|
|
|
# Spacing rule: a space before the slash but not after the slash |
|
8295
|
|
|
|
|
|
|
# usually indicates a pattern. We can use this to break ties. |
|
8296
|
|
|
|
|
|
|
# Note: perl seems to take a newline as a space in this rule (c243) |
|
8297
|
0
|
|
0
|
|
|
0
|
my $space_before = $i < 2 || $rtokens->[ $i - 2 ] =~ m/^\s/; |
|
8298
|
0
|
|
|
|
|
0
|
my $space_after = $next_token =~ m/^\s/; |
|
8299
|
0
|
|
0
|
|
|
0
|
my $is_pattern_by_spacing = $space_before && !$space_after; |
|
8300
|
|
|
|
|
|
|
|
|
8301
|
|
|
|
|
|
|
# Make an accurate search for a possible terminating / on this line.. |
|
8302
|
0
|
|
|
|
|
0
|
my $in_quote = 1; |
|
8303
|
0
|
|
|
|
|
0
|
my $quote_depth = 0; |
|
8304
|
0
|
|
|
|
|
0
|
my $quote_character = EMPTY_STRING; |
|
8305
|
0
|
|
|
|
|
0
|
my $quote_pos = 0; |
|
8306
|
0
|
|
|
|
|
0
|
my $quoted_string; |
|
8307
|
|
|
|
|
|
|
( |
|
8308
|
|
|
|
|
|
|
|
|
8309
|
0
|
|
|
|
|
0
|
$i, |
|
8310
|
|
|
|
|
|
|
$in_quote, |
|
8311
|
|
|
|
|
|
|
$quote_character, |
|
8312
|
|
|
|
|
|
|
$quote_pos, |
|
8313
|
|
|
|
|
|
|
$quote_depth, |
|
8314
|
|
|
|
|
|
|
$quoted_string, |
|
8315
|
|
|
|
|
|
|
) |
|
8316
|
|
|
|
|
|
|
= $self->follow_quoted_string( |
|
8317
|
|
|
|
|
|
|
|
|
8318
|
|
|
|
|
|
|
$ibeg, |
|
8319
|
|
|
|
|
|
|
$in_quote, |
|
8320
|
|
|
|
|
|
|
$rtokens, |
|
8321
|
|
|
|
|
|
|
$rtoken_type, |
|
8322
|
|
|
|
|
|
|
$quote_character, |
|
8323
|
|
|
|
|
|
|
$quote_pos, |
|
8324
|
|
|
|
|
|
|
$quote_depth, |
|
8325
|
|
|
|
|
|
|
$max_token_index, |
|
8326
|
|
|
|
|
|
|
); |
|
8327
|
|
|
|
|
|
|
|
|
8328
|
|
|
|
|
|
|
# if we didn't find an ending / on this line .. |
|
8329
|
0
|
0
|
|
|
|
0
|
if ($in_quote) { |
|
8330
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
|
8331
|
0
|
|
|
|
|
0
|
$msg .= "division (no ending / on this line)\n"; |
|
8332
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8333
|
|
|
|
|
|
|
} |
|
8334
|
|
|
|
|
|
|
|
|
8335
|
|
|
|
|
|
|
# we found an ending /, see if it might terminate a pattern |
|
8336
|
0
|
|
|
|
|
0
|
my $pattern_expected = |
|
8337
|
|
|
|
|
|
|
$self->pattern_expected( $i, $rtokens, $max_token_index ); |
|
8338
|
|
|
|
|
|
|
|
|
8339
|
0
|
0
|
|
|
|
0
|
if ( $pattern_expected < 0 ) { |
|
8340
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
|
8341
|
0
|
|
|
|
|
0
|
$msg .= "division (pattern not possible)\n"; |
|
8342
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8343
|
|
|
|
|
|
|
} |
|
8344
|
|
|
|
|
|
|
|
|
8345
|
|
|
|
|
|
|
# Both pattern and divide can work here... |
|
8346
|
|
|
|
|
|
|
# Check for known constants in the numerator, like 'pi' |
|
8347
|
0
|
0
|
|
|
|
0
|
if ( $is_known_constant{$last_nonblank_token} ) { |
|
8348
|
0
|
|
|
|
|
0
|
$msg .= |
|
8349
|
|
|
|
|
|
|
"division (pattern works too but saw known constant '$last_nonblank_token')\n"; |
|
8350
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
|
8351
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8352
|
|
|
|
|
|
|
} |
|
8353
|
|
|
|
|
|
|
|
|
8354
|
|
|
|
|
|
|
# Check for known functions like 'ok' |
|
8355
|
0
|
0
|
|
|
|
0
|
if ( $is_known_function{$last_nonblank_token} ) { |
|
8356
|
0
|
|
|
|
|
0
|
$msg .= "pattern (division works too but saw '$last_nonblank_token')\n"; |
|
8357
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
|
8358
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8359
|
|
|
|
|
|
|
} |
|
8360
|
|
|
|
|
|
|
|
|
8361
|
|
|
|
|
|
|
# If one rule is more probable, use it |
|
8362
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible > $pattern_expected ) { |
|
8363
|
0
|
|
|
|
|
0
|
$msg .= "division (more likely based on following tokens)\n"; |
|
8364
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
|
8365
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8366
|
|
|
|
|
|
|
} |
|
8367
|
|
|
|
|
|
|
|
|
8368
|
|
|
|
|
|
|
# finally, we have to use the spacing rule |
|
8369
|
0
|
0
|
|
|
|
0
|
if ($is_pattern_by_spacing) { |
|
8370
|
0
|
|
|
|
|
0
|
$msg .= "pattern (guess on spacing, but division possible too)\n"; |
|
8371
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
|
8372
|
|
|
|
|
|
|
} |
|
8373
|
|
|
|
|
|
|
else { |
|
8374
|
0
|
|
|
|
|
0
|
$msg .= "division (guess on spacing, but pattern is possible too)\n"; |
|
8375
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
|
8376
|
|
|
|
|
|
|
} |
|
8377
|
|
|
|
|
|
|
|
|
8378
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
|
8379
|
|
|
|
|
|
|
} ## end sub guess_if_pattern_or_division |
|
8380
|
|
|
|
|
|
|
|
|
8381
|
|
|
|
|
|
|
sub guess_if_here_doc { |
|
8382
|
|
|
|
|
|
|
|
|
8383
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $next_token ) = @_; |
|
8384
|
|
|
|
|
|
|
|
|
8385
|
|
|
|
|
|
|
# Try to resolve here-doc vs. shift by looking ahead for |
|
8386
|
|
|
|
|
|
|
# non-code or the end token (currently only looks for end token) |
|
8387
|
|
|
|
|
|
|
|
|
8388
|
|
|
|
|
|
|
# Given: |
|
8389
|
|
|
|
|
|
|
# $next_token = the next token after '<<' |
|
8390
|
|
|
|
|
|
|
|
|
8391
|
|
|
|
|
|
|
# Return: |
|
8392
|
|
|
|
|
|
|
# 1 if it is probably a here doc |
|
8393
|
|
|
|
|
|
|
# 0 if not |
|
8394
|
|
|
|
|
|
|
|
|
8395
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $current_package $ris_constant, |
|
8396
|
|
|
|
|
|
|
|
|
8397
|
|
|
|
|
|
|
# This is how many lines we will search for a target as part of the |
|
8398
|
|
|
|
|
|
|
# guessing strategy. There is probably little reason to change it. |
|
8399
|
0
|
|
|
|
|
0
|
my $HERE_DOC_WINDOW = 40; |
|
8400
|
|
|
|
|
|
|
|
|
8401
|
0
|
|
|
|
|
0
|
my $here_doc_expected = 0; |
|
8402
|
0
|
|
|
|
|
0
|
my $line; |
|
8403
|
0
|
|
|
|
|
0
|
my $k = 0; |
|
8404
|
0
|
|
|
|
|
0
|
my $msg = "checking <<"; |
|
8405
|
|
|
|
|
|
|
|
|
8406
|
0
|
|
|
|
|
0
|
while ( defined( $line = $self->peek_ahead( $k++ ) ) ) { |
|
8407
|
0
|
|
|
|
|
0
|
chomp $line; |
|
8408
|
0
|
0
|
|
|
|
0
|
if ( $line eq $next_token ) { |
|
8409
|
0
|
|
|
|
|
0
|
$msg .= " -- found target $next_token ahead $k lines\n"; |
|
8410
|
0
|
|
|
|
|
0
|
$here_doc_expected = 1; # got it |
|
8411
|
0
|
|
|
|
|
0
|
last; |
|
8412
|
|
|
|
|
|
|
} |
|
8413
|
0
|
0
|
|
|
|
0
|
last if ( $k >= $HERE_DOC_WINDOW ); |
|
8414
|
|
|
|
|
|
|
} ## end while ( defined( $line = ...)) |
|
8415
|
|
|
|
|
|
|
|
|
8416
|
0
|
0
|
|
|
|
0
|
if ( !$here_doc_expected ) { |
|
8417
|
|
|
|
|
|
|
|
|
8418
|
0
|
0
|
|
|
|
0
|
if ( !defined($line) ) { |
|
8419
|
0
|
|
|
|
|
0
|
$here_doc_expected = -1; # hit eof without seeing target |
|
8420
|
0
|
|
|
|
|
0
|
$msg .= " -- must be shift; target $next_token not in file\n"; |
|
8421
|
|
|
|
|
|
|
} |
|
8422
|
|
|
|
|
|
|
else { # still unsure..taking a wild guess |
|
8423
|
|
|
|
|
|
|
|
|
8424
|
0
|
0
|
|
|
|
0
|
if ( !$ris_constant->{$current_package}->{$next_token} ) { |
|
8425
|
0
|
|
|
|
|
0
|
$here_doc_expected = 1; |
|
8426
|
0
|
|
|
|
|
0
|
$msg .= |
|
8427
|
|
|
|
|
|
|
" -- guessing it's a here-doc ($next_token not a constant)\n"; |
|
8428
|
|
|
|
|
|
|
} |
|
8429
|
|
|
|
|
|
|
else { |
|
8430
|
0
|
|
|
|
|
0
|
$msg .= |
|
8431
|
|
|
|
|
|
|
" -- guessing it's a shift ($next_token is a constant)\n"; |
|
8432
|
|
|
|
|
|
|
} |
|
8433
|
0
|
|
|
|
|
0
|
if (DEBUG_GUESS_MODE) { |
|
8434
|
|
|
|
|
|
|
$self->warning("DEBUG_GUESS_MODE message:\n$msg\n"); |
|
8435
|
|
|
|
|
|
|
} |
|
8436
|
|
|
|
|
|
|
} |
|
8437
|
|
|
|
|
|
|
} |
|
8438
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry($msg); |
|
8439
|
0
|
|
|
|
|
0
|
return $here_doc_expected; |
|
8440
|
|
|
|
|
|
|
} ## end sub guess_if_here_doc |
|
8441
|
|
|
|
|
|
|
|
|
8442
|
|
|
|
|
|
|
####################################################################### |
|
8443
|
|
|
|
|
|
|
# Tokenizer Routines for scanning identifiers and related items |
|
8444
|
|
|
|
|
|
|
####################################################################### |
|
8445
|
|
|
|
|
|
|
|
|
8446
|
|
|
|
|
|
|
sub scan_bare_identifier_do { |
|
8447
|
|
|
|
|
|
|
|
|
8448
|
|
|
|
|
|
|
my ( |
|
8449
|
|
|
|
|
|
|
|
|
8450
|
1862
|
|
|
1862
|
0
|
4844
|
$self, |
|
8451
|
|
|
|
|
|
|
|
|
8452
|
|
|
|
|
|
|
$input_line, |
|
8453
|
|
|
|
|
|
|
$i, |
|
8454
|
|
|
|
|
|
|
$tok, |
|
8455
|
|
|
|
|
|
|
$type, |
|
8456
|
|
|
|
|
|
|
$prototype, |
|
8457
|
|
|
|
|
|
|
$rtoken_map, |
|
8458
|
|
|
|
|
|
|
$max_token_index, |
|
8459
|
|
|
|
|
|
|
|
|
8460
|
|
|
|
|
|
|
) = @_; |
|
8461
|
|
|
|
|
|
|
|
|
8462
|
|
|
|
|
|
|
# This routine is called to scan a token starting with an alphanumeric |
|
8463
|
|
|
|
|
|
|
# variable or package separator, :: or '. |
|
8464
|
|
|
|
|
|
|
|
|
8465
|
|
|
|
|
|
|
# Given: |
|
8466
|
|
|
|
|
|
|
# current scan state variables |
|
8467
|
|
|
|
|
|
|
|
|
8468
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, |
|
8469
|
|
|
|
|
|
|
# $last_nonblank_type, $rparen_type, $paren_depth |
|
8470
|
|
|
|
|
|
|
|
|
8471
|
1862
|
|
|
|
|
2713
|
my $package = undef; |
|
8472
|
|
|
|
|
|
|
|
|
8473
|
1862
|
|
|
|
|
2475
|
my $i_beg = $i; |
|
8474
|
|
|
|
|
|
|
|
|
8475
|
|
|
|
|
|
|
# we have to back up one pretoken at a :: since each : is one pretoken |
|
8476
|
1862
|
100
|
|
|
|
3909
|
if ( $tok eq '::' ) { $i_beg-- } |
|
|
9
|
|
|
|
|
10
|
|
|
8477
|
1862
|
|
|
|
|
2769
|
my $pos_beg = $rtoken_map->[$i_beg]; |
|
8478
|
1862
|
|
|
|
|
5602
|
pos($input_line) = $pos_beg; |
|
8479
|
|
|
|
|
|
|
|
|
8480
|
|
|
|
|
|
|
# Examples: |
|
8481
|
|
|
|
|
|
|
# A::B::C |
|
8482
|
|
|
|
|
|
|
# A:: |
|
8483
|
|
|
|
|
|
|
# ::A |
|
8484
|
|
|
|
|
|
|
# A'B |
|
8485
|
1862
|
50
|
|
|
|
11714
|
if ( |
|
8486
|
|
|
|
|
|
|
$input_line =~ m{ |
|
8487
|
|
|
|
|
|
|
\G\s* # start at pos |
|
8488
|
|
|
|
|
|
|
( (?:\w*(?:'|::))* ) # $1 = maybe package name like A:: A::B:: or A' |
|
8489
|
|
|
|
|
|
|
(\w+)? # $2 = maybe followed by sub name |
|
8490
|
|
|
|
|
|
|
}gcx |
|
8491
|
|
|
|
|
|
|
) |
|
8492
|
|
|
|
|
|
|
{ |
|
8493
|
1862
|
|
|
|
|
2974
|
my $pos = pos($input_line); |
|
8494
|
1862
|
|
|
|
|
2743
|
my $numc = $pos - $pos_beg; |
|
8495
|
1862
|
|
|
|
|
3602
|
$tok = substr( $input_line, $pos_beg, $numc ); |
|
8496
|
|
|
|
|
|
|
|
|
8497
|
|
|
|
|
|
|
# type 'w' includes anything without leading type info |
|
8498
|
|
|
|
|
|
|
# ($,%,@,*) including something like abc::def::ghi |
|
8499
|
1862
|
|
|
|
|
2627
|
$type = 'w'; |
|
8500
|
|
|
|
|
|
|
|
|
8501
|
1862
|
|
|
|
|
2617
|
my $sub_name = EMPTY_STRING; |
|
8502
|
1862
|
100
|
|
|
|
4470
|
if ( defined($2) ) { $sub_name = $2; } |
|
|
1857
|
|
|
|
|
3191
|
|
|
8503
|
1862
|
100
|
66
|
|
|
6790
|
if ( defined($1) && length($1) ) { |
|
8504
|
280
|
|
|
|
|
513
|
$package = $1; |
|
8505
|
|
|
|
|
|
|
|
|
8506
|
|
|
|
|
|
|
# patch: check for package call A::B::C-> |
|
8507
|
|
|
|
|
|
|
# in this case, C is part of the package name |
|
8508
|
280
|
100
|
|
|
|
681
|
if ($sub_name) { |
|
8509
|
275
|
100
|
|
|
|
1075
|
if ( $input_line =~ m{ \G\s*(?:->) }gcx ) { |
|
8510
|
117
|
|
|
|
|
235
|
$package .= $sub_name; |
|
8511
|
117
|
|
|
|
|
222
|
$sub_name = EMPTY_STRING; |
|
8512
|
|
|
|
|
|
|
} |
|
8513
|
275
|
|
|
|
|
615
|
pos($input_line) = $pos; |
|
8514
|
|
|
|
|
|
|
} |
|
8515
|
|
|
|
|
|
|
|
|
8516
|
|
|
|
|
|
|
# patch: don't allow isolated package name which just ends |
|
8517
|
|
|
|
|
|
|
# in the old style package separator (single quote). Example: |
|
8518
|
|
|
|
|
|
|
# use CGI':all'; |
|
8519
|
280
|
50
|
66
|
|
|
1122
|
if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { |
|
8520
|
0
|
|
|
|
|
0
|
$pos--; |
|
8521
|
|
|
|
|
|
|
} |
|
8522
|
|
|
|
|
|
|
|
|
8523
|
280
|
|
|
|
|
595
|
$package =~ s/\'/::/g; |
|
8524
|
280
|
100
|
|
|
|
678
|
if ( $package =~ /^\:/ ) { $package = 'main' . $package } |
|
|
9
|
|
|
|
|
15
|
|
|
8525
|
280
|
|
|
|
|
903
|
$package =~ s/::$//; |
|
8526
|
|
|
|
|
|
|
} |
|
8527
|
|
|
|
|
|
|
else { |
|
8528
|
1582
|
|
|
|
|
2358
|
$package = $current_package; |
|
8529
|
|
|
|
|
|
|
|
|
8530
|
|
|
|
|
|
|
# patched for c043, part 1: keyword does not follow '->' |
|
8531
|
1582
|
50
|
66
|
|
|
4708
|
if ( $is_keyword{$tok} && $last_nonblank_type ne '->' ) { |
|
8532
|
0
|
|
|
|
|
0
|
$type = 'k'; |
|
8533
|
|
|
|
|
|
|
} |
|
8534
|
|
|
|
|
|
|
} |
|
8535
|
|
|
|
|
|
|
|
|
8536
|
|
|
|
|
|
|
# if it is a bareword.. patched for c043, part 2: not following '->' |
|
8537
|
1862
|
100
|
66
|
|
|
6342
|
if ( $type eq 'w' && $last_nonblank_type ne '->' ) { |
|
8538
|
|
|
|
|
|
|
|
|
8539
|
|
|
|
|
|
|
# check for v-string with leading 'v' type character |
|
8540
|
|
|
|
|
|
|
# (This seems to have precedence over filehandle, type 'Y') |
|
8541
|
1076
|
100
|
100
|
|
|
14320
|
if ( substr( $tok, 0, 1 ) eq 'v' && $tok =~ /^v\d[_\d]*$/ ) { |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
8542
|
|
|
|
|
|
|
|
|
8543
|
|
|
|
|
|
|
# we only have the first part - something like 'v101' - |
|
8544
|
|
|
|
|
|
|
# look for more |
|
8545
|
2
|
50
|
|
|
|
10
|
if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { |
|
8546
|
2
|
|
|
|
|
4
|
$pos = pos($input_line); |
|
8547
|
2
|
|
|
|
|
6
|
$numc = $pos - $pos_beg; |
|
8548
|
2
|
|
|
|
|
5
|
$tok = substr( $input_line, $pos_beg, $numc ); |
|
8549
|
|
|
|
|
|
|
} |
|
8550
|
2
|
|
|
|
|
6
|
$type = 'v'; |
|
8551
|
2
|
|
|
|
|
14
|
$self->report_v_string($tok); |
|
8552
|
|
|
|
|
|
|
} |
|
8553
|
|
|
|
|
|
|
|
|
8554
|
|
|
|
|
|
|
# bareword after sort has implied empty prototype; for example: |
|
8555
|
|
|
|
|
|
|
# @sorted = sort numerically ( 53, 29, 11, 32, 7 ); |
|
8556
|
|
|
|
|
|
|
# This has priority over whatever the user has specified. |
|
8557
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq 'sort' |
|
8558
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'k' ) |
|
8559
|
|
|
|
|
|
|
{ |
|
8560
|
1
|
|
|
|
|
2
|
$type = 'Z'; |
|
8561
|
|
|
|
|
|
|
} |
|
8562
|
|
|
|
|
|
|
|
|
8563
|
|
|
|
|
|
|
# issue c382: this elsif statement moved from above because |
|
8564
|
|
|
|
|
|
|
# previous check for type 'Z' after sort has priority. |
|
8565
|
|
|
|
|
|
|
elsif ( $ris_constant->{$package}->{$sub_name} ) { |
|
8566
|
12
|
|
|
|
|
25
|
$type = 'C'; |
|
8567
|
|
|
|
|
|
|
} |
|
8568
|
|
|
|
|
|
|
|
|
8569
|
|
|
|
|
|
|
# Note: strangely, perl does not seem to really let you create |
|
8570
|
|
|
|
|
|
|
# functions which act like eval and do, in the sense that eval |
|
8571
|
|
|
|
|
|
|
# and do may have operators following the final }, but any operators |
|
8572
|
|
|
|
|
|
|
# that you create with prototype (&) apparently do not allow |
|
8573
|
|
|
|
|
|
|
# trailing operators, only terms. This seems strange. |
|
8574
|
|
|
|
|
|
|
# If this ever changes, here is the update |
|
8575
|
|
|
|
|
|
|
# to make perltidy behave accordingly: |
|
8576
|
|
|
|
|
|
|
|
|
8577
|
|
|
|
|
|
|
# elsif ( $ris_block_function->{$package}{$tok} ) { |
|
8578
|
|
|
|
|
|
|
# $tok='eval'; # patch to do braces like eval - doesn't work |
|
8579
|
|
|
|
|
|
|
# $type = 'k'; |
|
8580
|
|
|
|
|
|
|
#} |
|
8581
|
|
|
|
|
|
|
# TODO: This could become a separate type to allow for different |
|
8582
|
|
|
|
|
|
|
# future behavior: |
|
8583
|
|
|
|
|
|
|
elsif ( $ris_block_function->{$package}->{$sub_name} ) { |
|
8584
|
0
|
|
|
|
|
0
|
$type = 'G'; |
|
8585
|
|
|
|
|
|
|
} |
|
8586
|
|
|
|
|
|
|
elsif ( $ris_block_list_function->{$package}->{$sub_name} ) { |
|
8587
|
0
|
|
|
|
|
0
|
$type = 'G'; |
|
8588
|
|
|
|
|
|
|
} |
|
8589
|
|
|
|
|
|
|
elsif ( $ris_user_function->{$package}->{$sub_name} ) { |
|
8590
|
6
|
|
|
|
|
16
|
$type = 'U'; |
|
8591
|
6
|
|
|
|
|
17
|
$prototype = $ruser_function_prototype->{$package}->{$sub_name}; |
|
8592
|
|
|
|
|
|
|
} |
|
8593
|
|
|
|
|
|
|
|
|
8594
|
|
|
|
|
|
|
# check for indirect object |
|
8595
|
|
|
|
|
|
|
elsif ( |
|
8596
|
|
|
|
|
|
|
|
|
8597
|
|
|
|
|
|
|
# added 2001-03-27: must not be followed immediately by '(' |
|
8598
|
|
|
|
|
|
|
# see fhandle.t |
|
8599
|
|
|
|
|
|
|
( $input_line !~ m/\G\(/gc ) |
|
8600
|
|
|
|
|
|
|
|
|
8601
|
|
|
|
|
|
|
# and |
|
8602
|
|
|
|
|
|
|
&& ( |
|
8603
|
|
|
|
|
|
|
|
|
8604
|
|
|
|
|
|
|
# preceded by keyword like 'print', 'printf' and friends |
|
8605
|
|
|
|
|
|
|
$is_indirect_object_taker{$last_nonblank_token} |
|
8606
|
|
|
|
|
|
|
|
|
8607
|
|
|
|
|
|
|
# or preceded by something like 'print(' or 'printf(' |
|
8608
|
|
|
|
|
|
|
|| ( |
|
8609
|
|
|
|
|
|
|
( $last_nonblank_token eq '(' ) |
|
8610
|
|
|
|
|
|
|
&& $is_indirect_object_taker{ |
|
8611
|
|
|
|
|
|
|
$rparen_type->[$paren_depth] |
|
8612
|
|
|
|
|
|
|
} |
|
8613
|
|
|
|
|
|
|
|
|
8614
|
|
|
|
|
|
|
) |
|
8615
|
|
|
|
|
|
|
) |
|
8616
|
|
|
|
|
|
|
) |
|
8617
|
|
|
|
|
|
|
{ |
|
8618
|
|
|
|
|
|
|
|
|
8619
|
|
|
|
|
|
|
# may not be indirect object unless followed by a space; |
|
8620
|
|
|
|
|
|
|
# updated 2021-01-16 to consider newline to be a space. |
|
8621
|
|
|
|
|
|
|
# updated for case b990 to look for either ';' or space |
|
8622
|
4
|
50
|
33
|
|
|
39
|
if ( pos($input_line) == length($input_line) |
|
8623
|
|
|
|
|
|
|
|| $input_line =~ m/\G[;\s]/gc ) |
|
8624
|
|
|
|
|
|
|
{ |
|
8625
|
4
|
|
|
|
|
8
|
$type = 'Y'; |
|
8626
|
|
|
|
|
|
|
|
|
8627
|
|
|
|
|
|
|
# Abandon Hope ... |
|
8628
|
|
|
|
|
|
|
# Perl's indirect object notation is a very bad |
|
8629
|
|
|
|
|
|
|
# thing and can cause subtle bugs, especially for |
|
8630
|
|
|
|
|
|
|
# beginning programmers. And I haven't even been |
|
8631
|
|
|
|
|
|
|
# able to figure out a sane warning scheme which |
|
8632
|
|
|
|
|
|
|
# doesn't get in the way of good scripts. |
|
8633
|
|
|
|
|
|
|
|
|
8634
|
|
|
|
|
|
|
# Complain if a filehandle has any lower case |
|
8635
|
|
|
|
|
|
|
# letters. This is suggested good practice. |
|
8636
|
|
|
|
|
|
|
# Use 'sub_name' because something like |
|
8637
|
|
|
|
|
|
|
# main::MYHANDLE is ok for filehandle |
|
8638
|
4
|
100
|
|
|
|
16
|
if ( $sub_name =~ /[a-z]/ ) { |
|
8639
|
|
|
|
|
|
|
|
|
8640
|
|
|
|
|
|
|
# could be bug caused by older perltidy if |
|
8641
|
|
|
|
|
|
|
# followed by '(' |
|
8642
|
1
|
50
|
|
|
|
20
|
if ( $input_line =~ m/\G\s*\(/gc ) { |
|
8643
|
1
|
|
|
|
|
6
|
$self->complain( |
|
8644
|
|
|
|
|
|
|
"Caution: unknown word '$tok' in indirect object slot\n" |
|
8645
|
|
|
|
|
|
|
); |
|
8646
|
|
|
|
|
|
|
} |
|
8647
|
|
|
|
|
|
|
} |
|
8648
|
|
|
|
|
|
|
} |
|
8649
|
|
|
|
|
|
|
|
|
8650
|
|
|
|
|
|
|
# bareword not followed by a space -- may not be filehandle |
|
8651
|
|
|
|
|
|
|
# (may be function call defined in a 'use' statement) |
|
8652
|
|
|
|
|
|
|
else { |
|
8653
|
0
|
|
|
|
|
0
|
$type = 'Z'; |
|
8654
|
|
|
|
|
|
|
} |
|
8655
|
|
|
|
|
|
|
} |
|
8656
|
|
|
|
|
|
|
|
|
8657
|
|
|
|
|
|
|
# none of the above special types |
|
8658
|
|
|
|
|
|
|
else { |
|
8659
|
|
|
|
|
|
|
} |
|
8660
|
|
|
|
|
|
|
} |
|
8661
|
|
|
|
|
|
|
|
|
8662
|
|
|
|
|
|
|
# Now we must convert back from character position |
|
8663
|
|
|
|
|
|
|
# to pre_token index. |
|
8664
|
|
|
|
|
|
|
# I don't think an error flag can occur here ..but who knows |
|
8665
|
1862
|
|
|
|
|
2537
|
my $error; |
|
8666
|
1862
|
|
|
|
|
4679
|
( $i, $error ) = |
|
8667
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
|
8668
|
1862
|
50
|
|
|
|
4058
|
if ($error) { |
|
8669
|
0
|
|
|
|
|
0
|
$self->warning( |
|
8670
|
|
|
|
|
|
|
"scan_bare_identifier: Possibly invalid tokenization\n"); |
|
8671
|
|
|
|
|
|
|
} |
|
8672
|
|
|
|
|
|
|
} |
|
8673
|
|
|
|
|
|
|
|
|
8674
|
|
|
|
|
|
|
# no match but line not blank - could be syntax error |
|
8675
|
|
|
|
|
|
|
# perl will take '::' alone without complaint |
|
8676
|
|
|
|
|
|
|
else { |
|
8677
|
0
|
|
|
|
|
0
|
$type = 'w'; |
|
8678
|
|
|
|
|
|
|
|
|
8679
|
|
|
|
|
|
|
# change this warning to log message if it becomes annoying |
|
8680
|
0
|
|
|
|
|
0
|
$self->warning("didn't find identifier after leading ::\n"); |
|
8681
|
|
|
|
|
|
|
} |
|
8682
|
1862
|
|
|
|
|
7171
|
return ( $i, $tok, $type, $prototype ); |
|
8683
|
|
|
|
|
|
|
} ## end sub scan_bare_identifier_do |
|
8684
|
|
|
|
|
|
|
|
|
8685
|
|
|
|
|
|
|
sub scan_id_do { |
|
8686
|
|
|
|
|
|
|
|
|
8687
|
|
|
|
|
|
|
my ( |
|
8688
|
|
|
|
|
|
|
|
|
8689
|
405
|
|
|
405
|
0
|
1245
|
$self, |
|
8690
|
|
|
|
|
|
|
|
|
8691
|
|
|
|
|
|
|
$input_line, |
|
8692
|
|
|
|
|
|
|
$i, |
|
8693
|
|
|
|
|
|
|
$tok, |
|
8694
|
|
|
|
|
|
|
$rtokens, |
|
8695
|
|
|
|
|
|
|
$rtoken_map, |
|
8696
|
|
|
|
|
|
|
$id_scan_state, |
|
8697
|
|
|
|
|
|
|
$max_token_index, |
|
8698
|
|
|
|
|
|
|
|
|
8699
|
|
|
|
|
|
|
) = @_; |
|
8700
|
|
|
|
|
|
|
|
|
8701
|
|
|
|
|
|
|
# Scan identifier following a type token. |
|
8702
|
|
|
|
|
|
|
# Given: |
|
8703
|
|
|
|
|
|
|
# current scan state variables |
|
8704
|
|
|
|
|
|
|
|
|
8705
|
|
|
|
|
|
|
# This is the new scanner and may eventually replace scan_identifier. |
|
8706
|
|
|
|
|
|
|
# Only type 'sub' and 'package' are implemented. |
|
8707
|
|
|
|
|
|
|
# Token types $ * % @ & -> are not yet implemented. |
|
8708
|
|
|
|
|
|
|
# |
|
8709
|
|
|
|
|
|
|
# The type of call depends on $id_scan_state: $id_scan_state = '' |
|
8710
|
|
|
|
|
|
|
# for starting call, in which case $tok must be the token defining |
|
8711
|
|
|
|
|
|
|
# the type. |
|
8712
|
|
|
|
|
|
|
# |
|
8713
|
|
|
|
|
|
|
# If the type token is the last nonblank token on the line, a value |
|
8714
|
|
|
|
|
|
|
# of $id_scan_state = $tok is returned, indicating that further |
|
8715
|
|
|
|
|
|
|
# calls must be made to get the identifier. If the type token is |
|
8716
|
|
|
|
|
|
|
# not the last nonblank token on the line, the identifier is |
|
8717
|
|
|
|
|
|
|
# scanned and handled and a value of '' is returned. |
|
8718
|
|
|
|
|
|
|
|
|
8719
|
44
|
|
|
44
|
|
380
|
use constant DEBUG_NSCAN => 0; |
|
|
44
|
|
|
|
|
99
|
|
|
|
44
|
|
|
|
|
57837
|
|
|
8720
|
405
|
|
|
|
|
658
|
my $type = EMPTY_STRING; |
|
8721
|
405
|
|
|
|
|
560
|
my $i_beg; |
|
8722
|
|
|
|
|
|
|
|
|
8723
|
|
|
|
|
|
|
#print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; |
|
8724
|
|
|
|
|
|
|
#my ($a,$b,$c) = caller; |
|
8725
|
|
|
|
|
|
|
#print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; |
|
8726
|
|
|
|
|
|
|
|
|
8727
|
|
|
|
|
|
|
# on re-entry, start scanning at first token on the line |
|
8728
|
405
|
100
|
|
|
|
801
|
if ($id_scan_state) { |
|
8729
|
10
|
|
|
|
|
19
|
$i_beg = $i; |
|
8730
|
10
|
|
|
|
|
19
|
$type = EMPTY_STRING; |
|
8731
|
|
|
|
|
|
|
} |
|
8732
|
|
|
|
|
|
|
|
|
8733
|
|
|
|
|
|
|
# on initial entry, start scanning just after type token |
|
8734
|
|
|
|
|
|
|
else { |
|
8735
|
395
|
|
|
|
|
588
|
$i_beg = $i + 1; |
|
8736
|
395
|
|
|
|
|
528
|
$id_scan_state = $tok; |
|
8737
|
395
|
|
|
|
|
668
|
$type = 't'; |
|
8738
|
|
|
|
|
|
|
} |
|
8739
|
|
|
|
|
|
|
|
|
8740
|
|
|
|
|
|
|
# find $i_beg = index of next nonblank token, |
|
8741
|
|
|
|
|
|
|
# and handle empty lines |
|
8742
|
405
|
|
|
|
|
600
|
my $blank_line = 0; |
|
8743
|
405
|
|
|
|
|
586
|
my $is_lexical_method = 0; |
|
8744
|
405
|
|
|
|
|
741
|
my $next_nonblank_token = $rtokens->[$i_beg]; |
|
8745
|
405
|
100
|
|
|
|
937
|
if ( $i_beg > $max_token_index ) { |
|
8746
|
2
|
|
|
|
|
4
|
$blank_line = 1; |
|
8747
|
|
|
|
|
|
|
} |
|
8748
|
|
|
|
|
|
|
else { |
|
8749
|
|
|
|
|
|
|
|
|
8750
|
|
|
|
|
|
|
# only a '#' immediately after a '$' is not a comment |
|
8751
|
403
|
50
|
|
|
|
1024
|
if ( $next_nonblank_token eq '#' ) { |
|
8752
|
0
|
0
|
|
|
|
0
|
if ( $tok ne '$' ) { |
|
8753
|
0
|
|
|
|
|
0
|
$blank_line = 1; |
|
8754
|
|
|
|
|
|
|
} |
|
8755
|
|
|
|
|
|
|
} |
|
8756
|
|
|
|
|
|
|
|
|
8757
|
403
|
100
|
|
|
|
1576
|
if ( $next_nonblank_token =~ /^\s/ ) { |
|
8758
|
383
|
|
|
|
|
1136
|
( $next_nonblank_token, $i_beg ) = |
|
8759
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i_beg, $rtokens, |
|
8760
|
|
|
|
|
|
|
$max_token_index ); |
|
8761
|
383
|
100
|
|
|
|
1897
|
if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { |
|
8762
|
4
|
|
|
|
|
13
|
$blank_line = 1; |
|
8763
|
|
|
|
|
|
|
} |
|
8764
|
|
|
|
|
|
|
} |
|
8765
|
|
|
|
|
|
|
|
|
8766
|
|
|
|
|
|
|
# Patch for Object::Pad lexical method like 'method $var {': |
|
8767
|
|
|
|
|
|
|
# Skip past a '$' |
|
8768
|
403
|
100
|
100
|
|
|
1991
|
if ( !$blank_line |
|
|
|
|
66
|
|
|
|
|
|
8769
|
|
|
|
|
|
|
&& $next_nonblank_token eq '$' |
|
8770
|
|
|
|
|
|
|
&& $id_scan_state eq 'method' ) |
|
8771
|
|
|
|
|
|
|
{ |
|
8772
|
2
|
|
|
|
|
6
|
( $next_nonblank_token, $i_beg ) = |
|
8773
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i_beg, $rtokens, |
|
8774
|
|
|
|
|
|
|
$max_token_index ); |
|
8775
|
2
|
|
|
|
|
4
|
$is_lexical_method = 1; |
|
8776
|
|
|
|
|
|
|
} |
|
8777
|
|
|
|
|
|
|
} |
|
8778
|
|
|
|
|
|
|
|
|
8779
|
|
|
|
|
|
|
# handle non-blank line; identifier, if any, must follow |
|
8780
|
405
|
100
|
|
|
|
1028
|
if ( !$blank_line ) { |
|
8781
|
|
|
|
|
|
|
|
|
8782
|
399
|
100
|
|
|
|
970
|
if ( $is_sub{$id_scan_state} ) { |
|
|
|
50
|
|
|
|
|
|
|
8783
|
349
|
|
|
|
|
3770
|
( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub( |
|
8784
|
|
|
|
|
|
|
{ |
|
8785
|
|
|
|
|
|
|
input_line => $input_line, |
|
8786
|
|
|
|
|
|
|
i => $i, |
|
8787
|
|
|
|
|
|
|
i_beg => $i_beg, |
|
8788
|
|
|
|
|
|
|
tok => $tok, |
|
8789
|
|
|
|
|
|
|
type => $type, |
|
8790
|
|
|
|
|
|
|
rtokens => $rtokens, |
|
8791
|
|
|
|
|
|
|
rtoken_map => $rtoken_map, |
|
8792
|
|
|
|
|
|
|
id_scan_state => $id_scan_state, |
|
8793
|
|
|
|
|
|
|
max_token_index => $max_token_index, |
|
8794
|
|
|
|
|
|
|
is_lexical_method => $is_lexical_method, |
|
8795
|
|
|
|
|
|
|
} |
|
8796
|
|
|
|
|
|
|
); |
|
8797
|
|
|
|
|
|
|
} |
|
8798
|
|
|
|
|
|
|
|
|
8799
|
|
|
|
|
|
|
elsif ( $is_package{$id_scan_state} ) { |
|
8800
|
50
|
|
|
|
|
447
|
( $i, $tok, $type ) = $self->do_scan_package( |
|
8801
|
|
|
|
|
|
|
{ |
|
8802
|
|
|
|
|
|
|
input_line => $input_line, |
|
8803
|
|
|
|
|
|
|
i => $i, |
|
8804
|
|
|
|
|
|
|
i_beg => $i_beg, |
|
8805
|
|
|
|
|
|
|
tok => $tok, |
|
8806
|
|
|
|
|
|
|
type => $type, |
|
8807
|
|
|
|
|
|
|
rtokens => $rtokens, |
|
8808
|
|
|
|
|
|
|
rtoken_map => $rtoken_map, |
|
8809
|
|
|
|
|
|
|
max_token_index => $max_token_index, |
|
8810
|
|
|
|
|
|
|
} |
|
8811
|
|
|
|
|
|
|
); |
|
8812
|
50
|
|
|
|
|
193
|
$id_scan_state = EMPTY_STRING; |
|
8813
|
|
|
|
|
|
|
} |
|
8814
|
|
|
|
|
|
|
|
|
8815
|
|
|
|
|
|
|
else { |
|
8816
|
0
|
|
|
|
|
0
|
$self->warning("invalid token in scan_id: $tok\n"); |
|
8817
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
|
8818
|
|
|
|
|
|
|
} |
|
8819
|
|
|
|
|
|
|
} |
|
8820
|
|
|
|
|
|
|
|
|
8821
|
405
|
50
|
33
|
|
|
2111
|
if ( $id_scan_state && ( !defined($type) || !$type ) ) { |
|
|
|
|
66
|
|
|
|
|
|
8822
|
|
|
|
|
|
|
|
|
8823
|
|
|
|
|
|
|
# shouldn't happen: |
|
8824
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
|
8825
|
|
|
|
|
|
|
Fault(<<EOM); |
|
8826
|
|
|
|
|
|
|
Program bug in scan_id: undefined type but scan_state=$id_scan_state |
|
8827
|
|
|
|
|
|
|
EOM |
|
8828
|
|
|
|
|
|
|
} |
|
8829
|
|
|
|
|
|
|
$self->warning( |
|
8830
|
0
|
|
|
|
|
0
|
"Possible program bug in sub scan_id: undefined type but scan_state=$id_scan_state\n" |
|
8831
|
|
|
|
|
|
|
); |
|
8832
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
|
8833
|
|
|
|
|
|
|
} |
|
8834
|
|
|
|
|
|
|
|
|
8835
|
405
|
|
|
|
|
530
|
DEBUG_NSCAN && do { |
|
8836
|
|
|
|
|
|
|
print {*STDOUT} |
|
8837
|
|
|
|
|
|
|
"NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; |
|
8838
|
|
|
|
|
|
|
}; |
|
8839
|
405
|
|
|
|
|
1513
|
return ( $i, $tok, $type, $id_scan_state ); |
|
8840
|
|
|
|
|
|
|
} ## end sub scan_id_do |
|
8841
|
|
|
|
|
|
|
|
|
8842
|
|
|
|
|
|
|
sub check_prototype { |
|
8843
|
173
|
|
|
173
|
0
|
386
|
my ( $proto, $package, $subname ) = @_; |
|
8844
|
|
|
|
|
|
|
|
|
8845
|
|
|
|
|
|
|
# Classify a sub based on its prototype |
|
8846
|
173
|
50
|
|
|
|
402
|
return if ( !defined($package) ); |
|
8847
|
173
|
50
|
|
|
|
424
|
return if ( !defined($subname) ); |
|
8848
|
173
|
100
|
|
|
|
533
|
if ( defined($proto) ) { |
|
8849
|
34
|
|
|
|
|
127
|
$proto =~ s/^\s*\(\s*//; |
|
8850
|
34
|
|
|
|
|
111
|
$proto =~ s/\s*\)$//; |
|
8851
|
34
|
100
|
|
|
|
90
|
if ($proto) { |
|
8852
|
5
|
|
|
|
|
18
|
$ris_user_function->{$package}->{$subname} = 1; |
|
8853
|
5
|
|
|
|
|
17
|
$ruser_function_prototype->{$package}->{$subname} = "($proto)"; |
|
8854
|
|
|
|
|
|
|
|
|
8855
|
|
|
|
|
|
|
# prototypes containing '&' must be treated specially.. |
|
8856
|
5
|
100
|
|
|
|
18
|
if ( $proto =~ /\&/ ) { |
|
8857
|
|
|
|
|
|
|
|
|
8858
|
|
|
|
|
|
|
# right curly braces of prototypes ending in |
|
8859
|
|
|
|
|
|
|
# '&' may be followed by an operator |
|
8860
|
1
|
50
|
|
|
|
2
|
if ( $proto =~ /\&$/ ) { |
|
8861
|
0
|
|
|
|
|
0
|
$ris_block_function->{$package}->{$subname} = 1; |
|
8862
|
|
|
|
|
|
|
} |
|
8863
|
|
|
|
|
|
|
|
|
8864
|
|
|
|
|
|
|
# right curly braces of prototypes NOT ending in |
|
8865
|
|
|
|
|
|
|
# '&' may NOT be followed by an operator |
|
8866
|
|
|
|
|
|
|
else { |
|
8867
|
1
|
|
|
|
|
3
|
$ris_block_list_function->{$package}->{$subname} = 1; |
|
8868
|
|
|
|
|
|
|
} |
|
8869
|
|
|
|
|
|
|
} |
|
8870
|
|
|
|
|
|
|
} |
|
8871
|
|
|
|
|
|
|
else { |
|
8872
|
29
|
|
|
|
|
69
|
$ris_constant->{$package}->{$subname} = 1; |
|
8873
|
|
|
|
|
|
|
} |
|
8874
|
|
|
|
|
|
|
} |
|
8875
|
|
|
|
|
|
|
else { |
|
8876
|
139
|
|
|
|
|
369
|
$ris_user_function->{$package}->{$subname} = 1; |
|
8877
|
|
|
|
|
|
|
} |
|
8878
|
173
|
|
|
|
|
334
|
return; |
|
8879
|
|
|
|
|
|
|
} ## end sub check_prototype |
|
8880
|
|
|
|
|
|
|
|
|
8881
|
|
|
|
|
|
|
sub do_scan_package { |
|
8882
|
|
|
|
|
|
|
|
|
8883
|
50
|
|
|
50
|
0
|
110
|
my ( $self, $rcall_hash ) = @_; |
|
8884
|
|
|
|
|
|
|
|
|
8885
|
|
|
|
|
|
|
# Parse a package name. |
|
8886
|
|
|
|
|
|
|
|
|
8887
|
50
|
|
|
|
|
100
|
my $input_line = $rcall_hash->{input_line}; |
|
8888
|
50
|
|
|
|
|
82
|
my $i = $rcall_hash->{i}; |
|
8889
|
50
|
|
|
|
|
75
|
my $i_beg = $rcall_hash->{i_beg}; |
|
8890
|
50
|
|
|
|
|
90
|
my $tok = $rcall_hash->{tok}; |
|
8891
|
50
|
|
|
|
|
79
|
my $type = $rcall_hash->{type}; |
|
8892
|
50
|
|
|
|
|
70
|
my $rtokens = $rcall_hash->{rtokens}; |
|
8893
|
50
|
|
|
|
|
84
|
my $rtoken_map = $rcall_hash->{rtoken_map}; |
|
8894
|
50
|
|
|
|
|
62
|
my $max_token_index = $rcall_hash->{max_token_index}; |
|
8895
|
|
|
|
|
|
|
|
|
8896
|
|
|
|
|
|
|
# This is called with $i_beg equal to the index of the first nonblank |
|
8897
|
|
|
|
|
|
|
# token following a 'package' token. |
|
8898
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $current_package, |
|
8899
|
|
|
|
|
|
|
|
|
8900
|
|
|
|
|
|
|
# package NAMESPACE |
|
8901
|
|
|
|
|
|
|
# package NAMESPACE VERSION |
|
8902
|
|
|
|
|
|
|
# package NAMESPACE BLOCK |
|
8903
|
|
|
|
|
|
|
# package NAMESPACE VERSION BLOCK |
|
8904
|
|
|
|
|
|
|
# |
|
8905
|
|
|
|
|
|
|
# If VERSION is provided, package sets the $VERSION variable in the given |
|
8906
|
|
|
|
|
|
|
# namespace to a version object with the VERSION provided. VERSION must be |
|
8907
|
|
|
|
|
|
|
# a "strict" style version number as defined by the version module: a |
|
8908
|
|
|
|
|
|
|
# positive decimal number (integer or decimal-fraction) without |
|
8909
|
|
|
|
|
|
|
# exponentiation or else a dotted-decimal v-string with a leading 'v' |
|
8910
|
|
|
|
|
|
|
# character and at least three components. |
|
8911
|
|
|
|
|
|
|
# reference http://perldoc.perl.org/functions/package.html |
|
8912
|
|
|
|
|
|
|
|
|
8913
|
50
|
|
|
|
|
80
|
my $package = undef; |
|
8914
|
50
|
|
|
|
|
92
|
my $pos_beg = $rtoken_map->[$i_beg]; |
|
8915
|
50
|
|
|
|
|
163
|
pos($input_line) = $pos_beg; |
|
8916
|
|
|
|
|
|
|
|
|
8917
|
|
|
|
|
|
|
# handle non-blank line; package name, if any, must follow |
|
8918
|
50
|
50
|
|
|
|
278
|
if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w*)/gc ) { |
|
8919
|
50
|
|
|
|
|
110
|
$package = $1; |
|
8920
|
50
|
50
|
33
|
|
|
243
|
$package = ( defined($1) && $1 ) ? $1 : 'main'; |
|
8921
|
50
|
|
|
|
|
114
|
$package =~ s/\'/::/g; |
|
8922
|
50
|
50
|
|
|
|
134
|
if ( $package =~ /^\:/ ) { $package = 'main' . $package } |
|
|
0
|
|
|
|
|
0
|
|
|
8923
|
50
|
|
|
|
|
86
|
$package =~ s/::$//; |
|
8924
|
50
|
|
|
|
|
78
|
my $pos = pos($input_line); |
|
8925
|
50
|
|
|
|
|
83
|
my $numc = $pos - $pos_beg; |
|
8926
|
50
|
|
|
|
|
122
|
$tok = 'package ' . substr( $input_line, $pos_beg, $numc ); |
|
8927
|
50
|
|
|
|
|
70
|
$type = 'P'; # Fix for c250, previously 'i' |
|
8928
|
|
|
|
|
|
|
|
|
8929
|
|
|
|
|
|
|
# Now we must convert back from character position |
|
8930
|
|
|
|
|
|
|
# to pre_token index. |
|
8931
|
|
|
|
|
|
|
# I don't think an error flag can occur here ..but ? |
|
8932
|
50
|
|
|
|
|
67
|
my $error; |
|
8933
|
50
|
|
|
|
|
182
|
( $i, $error ) = |
|
8934
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
|
8935
|
50
|
50
|
|
|
|
175
|
if ($error) { $self->warning("Possibly invalid package\n") } |
|
|
0
|
|
|
|
|
0
|
|
|
8936
|
50
|
|
|
|
|
79
|
$current_package = $package; |
|
8937
|
|
|
|
|
|
|
|
|
8938
|
|
|
|
|
|
|
# we should now have package NAMESPACE |
|
8939
|
|
|
|
|
|
|
# now expecting VERSION, BLOCK, or ; to follow ... |
|
8940
|
|
|
|
|
|
|
# package NAMESPACE VERSION |
|
8941
|
|
|
|
|
|
|
# package NAMESPACE BLOCK |
|
8942
|
|
|
|
|
|
|
# package NAMESPACE VERSION BLOCK |
|
8943
|
50
|
|
|
|
|
167
|
my ( $next_nonblank_token, $i_next_uu ) = |
|
8944
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
|
8945
|
|
|
|
|
|
|
|
|
8946
|
|
|
|
|
|
|
# check that something recognizable follows, but do not parse. |
|
8947
|
|
|
|
|
|
|
# A VERSION number will be parsed later as a number or v-string in the |
|
8948
|
|
|
|
|
|
|
# normal way. What is important is to set the statement type if |
|
8949
|
|
|
|
|
|
|
# everything looks okay so that the operator_expected() routine |
|
8950
|
|
|
|
|
|
|
# knows that the number is in a package statement. |
|
8951
|
|
|
|
|
|
|
# Examples of valid primitive tokens that might follow are: |
|
8952
|
|
|
|
|
|
|
# 1235 . ; { } v3 v |
|
8953
|
|
|
|
|
|
|
# FIX: added a '#' since a side comment may also follow |
|
8954
|
|
|
|
|
|
|
# Added ':' for class attributes (for --use-feature=class, rt145706) |
|
8955
|
50
|
50
|
|
|
|
244
|
if ( $next_nonblank_token =~ /^([v\.\d;\{\}\#\:])|v\d|\d+$/ ) { |
|
8956
|
50
|
|
|
|
|
118
|
$statement_type = $tok; |
|
8957
|
|
|
|
|
|
|
} |
|
8958
|
|
|
|
|
|
|
else { |
|
8959
|
0
|
|
|
|
|
0
|
$self->warning( |
|
8960
|
|
|
|
|
|
|
"Unexpected '$next_nonblank_token' after package name '$tok'\n" |
|
8961
|
|
|
|
|
|
|
); |
|
8962
|
|
|
|
|
|
|
} |
|
8963
|
|
|
|
|
|
|
} |
|
8964
|
|
|
|
|
|
|
|
|
8965
|
|
|
|
|
|
|
# no match but line not blank -- |
|
8966
|
|
|
|
|
|
|
# could be a label with name package, like package: , for example. |
|
8967
|
|
|
|
|
|
|
else { |
|
8968
|
0
|
|
|
|
|
0
|
$type = 'k'; |
|
8969
|
|
|
|
|
|
|
} |
|
8970
|
|
|
|
|
|
|
|
|
8971
|
50
|
|
|
|
|
188
|
return ( $i, $tok, $type ); |
|
8972
|
|
|
|
|
|
|
} ## end sub do_scan_package |
|
8973
|
|
|
|
|
|
|
|
|
8974
|
|
|
|
|
|
|
{ ## begin closure for sub scan_complex_identifier |
|
8975
|
|
|
|
|
|
|
|
|
8976
|
44
|
|
|
44
|
|
358
|
use constant DEBUG_SCAN_ID => 0; |
|
|
44
|
|
|
|
|
80
|
|
|
|
44
|
|
|
|
|
5339
|
|
|
8977
|
|
|
|
|
|
|
|
|
8978
|
|
|
|
|
|
|
# Constant hash: |
|
8979
|
|
|
|
|
|
|
my %is_special_variable_char; |
|
8980
|
|
|
|
|
|
|
|
|
8981
|
|
|
|
|
|
|
BEGIN { |
|
8982
|
|
|
|
|
|
|
|
|
8983
|
|
|
|
|
|
|
# These are the only characters which can (currently) form special |
|
8984
|
|
|
|
|
|
|
# variables, like $^W: (issue c066). |
|
8985
|
44
|
|
|
44
|
|
297
|
my @q = qw{ |
|
8986
|
|
|
|
|
|
|
? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ ] ^ _ |
|
8987
|
|
|
|
|
|
|
}; |
|
8988
|
44
|
|
|
|
|
110
|
push @q, BACKSLASH; |
|
8989
|
44
|
|
|
|
|
149469
|
$is_special_variable_char{$_} = 1 for @q; |
|
8990
|
|
|
|
|
|
|
} ## end BEGIN |
|
8991
|
|
|
|
|
|
|
|
|
8992
|
|
|
|
|
|
|
# These are the possible states for this scanner: |
|
8993
|
|
|
|
|
|
|
my $scan_state_SIGIL = '$'; |
|
8994
|
|
|
|
|
|
|
my $scan_state_ALPHA = 'A'; |
|
8995
|
|
|
|
|
|
|
my $scan_state_COLON = ':'; |
|
8996
|
|
|
|
|
|
|
my $scan_state_LPAREN = '('; |
|
8997
|
|
|
|
|
|
|
my $scan_state_RPAREN = ')'; |
|
8998
|
|
|
|
|
|
|
my $scan_state_AMPERSAND = '&'; |
|
8999
|
|
|
|
|
|
|
my $scan_state_SPLIT = '^'; |
|
9000
|
|
|
|
|
|
|
|
|
9001
|
|
|
|
|
|
|
# Only these non-blank states may be returned to caller: |
|
9002
|
|
|
|
|
|
|
my %is_returnable_scan_state = ( |
|
9003
|
|
|
|
|
|
|
$scan_state_SIGIL => 1, |
|
9004
|
|
|
|
|
|
|
$scan_state_AMPERSAND => 1, |
|
9005
|
|
|
|
|
|
|
); |
|
9006
|
|
|
|
|
|
|
|
|
9007
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: |
|
9008
|
|
|
|
|
|
|
# $context, $last_nonblank_token, $last_nonblank_type |
|
9009
|
|
|
|
|
|
|
|
|
9010
|
|
|
|
|
|
|
#----------- |
|
9011
|
|
|
|
|
|
|
# call args: |
|
9012
|
|
|
|
|
|
|
#----------- |
|
9013
|
|
|
|
|
|
|
my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, |
|
9014
|
|
|
|
|
|
|
$expecting, $container_type ); |
|
9015
|
|
|
|
|
|
|
|
|
9016
|
|
|
|
|
|
|
#------------------------------------------- |
|
9017
|
|
|
|
|
|
|
# my variables, re-initialized on each call: |
|
9018
|
|
|
|
|
|
|
#------------------------------------------- |
|
9019
|
|
|
|
|
|
|
my $i_begin; # starting index $i |
|
9020
|
|
|
|
|
|
|
my $type; # returned identifier type |
|
9021
|
|
|
|
|
|
|
my $tok_begin; # starting token |
|
9022
|
|
|
|
|
|
|
my $tok; # returned token |
|
9023
|
|
|
|
|
|
|
my $id_scan_state_begin; # starting scan state |
|
9024
|
|
|
|
|
|
|
my $identifier_begin; # starting identifier |
|
9025
|
|
|
|
|
|
|
my $i_save; # a last good index, in case of error |
|
9026
|
|
|
|
|
|
|
my $message; # hold error message for log file |
|
9027
|
|
|
|
|
|
|
my $tok_is_blank; |
|
9028
|
|
|
|
|
|
|
my $last_tok_is_blank; |
|
9029
|
|
|
|
|
|
|
my $in_prototype_or_signature; |
|
9030
|
|
|
|
|
|
|
my $saw_alpha; |
|
9031
|
|
|
|
|
|
|
my $saw_type; |
|
9032
|
|
|
|
|
|
|
my $allow_tick; |
|
9033
|
|
|
|
|
|
|
|
|
9034
|
|
|
|
|
|
|
sub initialize_my_scan_id_vars { |
|
9035
|
|
|
|
|
|
|
|
|
9036
|
|
|
|
|
|
|
# Initialize all 'my' vars on entry |
|
9037
|
551
|
|
|
551
|
0
|
892
|
$i_begin = $i; |
|
9038
|
551
|
|
|
|
|
834
|
$type = EMPTY_STRING; |
|
9039
|
551
|
|
|
|
|
908
|
$tok_begin = $rtokens->[$i_begin]; |
|
9040
|
551
|
|
|
|
|
754
|
$tok = $tok_begin; |
|
9041
|
551
|
50
|
|
|
|
1352
|
if ( $tok_begin eq ':' ) { $tok_begin = '::' } |
|
|
0
|
|
|
|
|
0
|
|
|
9042
|
551
|
|
|
|
|
777
|
$id_scan_state_begin = $id_scan_state; |
|
9043
|
551
|
|
|
|
|
761
|
$identifier_begin = $identifier; |
|
9044
|
551
|
|
|
|
|
746
|
$i_save = undef; |
|
9045
|
|
|
|
|
|
|
|
|
9046
|
551
|
|
|
|
|
784
|
$message = EMPTY_STRING; |
|
9047
|
551
|
|
|
|
|
706
|
$tok_is_blank = undef; # a flag to speed things up |
|
9048
|
551
|
|
|
|
|
660
|
$last_tok_is_blank = undef; |
|
9049
|
|
|
|
|
|
|
|
|
9050
|
551
|
|
100
|
|
|
1570
|
$in_prototype_or_signature = |
|
9051
|
|
|
|
|
|
|
$container_type && $container_type =~ /^sub\b/; |
|
9052
|
|
|
|
|
|
|
|
|
9053
|
|
|
|
|
|
|
# these flags will be used to help figure out the type: |
|
9054
|
551
|
|
|
|
|
711
|
$saw_alpha = undef; |
|
9055
|
551
|
|
|
|
|
763
|
$saw_type = undef; |
|
9056
|
|
|
|
|
|
|
|
|
9057
|
|
|
|
|
|
|
# allow old package separator (') except in 'use' statement |
|
9058
|
551
|
|
|
|
|
890
|
$allow_tick = ( $last_nonblank_token ne 'use' ); |
|
9059
|
551
|
|
|
|
|
810
|
return; |
|
9060
|
|
|
|
|
|
|
} ## end sub initialize_my_scan_id_vars |
|
9061
|
|
|
|
|
|
|
|
|
9062
|
|
|
|
|
|
|
#---------------------------------- |
|
9063
|
|
|
|
|
|
|
# Routines for handling scan states |
|
9064
|
|
|
|
|
|
|
#---------------------------------- |
|
9065
|
|
|
|
|
|
|
sub do_id_scan_state_dollar { |
|
9066
|
|
|
|
|
|
|
|
|
9067
|
609
|
|
|
609
|
0
|
824
|
my $self = shift; |
|
9068
|
|
|
|
|
|
|
|
|
9069
|
|
|
|
|
|
|
# We saw a sigil, now looking to start a variable name |
|
9070
|
609
|
100
|
66
|
|
|
3855
|
if ( $tok eq '$' ) { |
|
|
|
100
|
33
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
9071
|
|
|
|
|
|
|
|
|
9072
|
59
|
|
|
|
|
126
|
$identifier .= $tok; |
|
9073
|
|
|
|
|
|
|
|
|
9074
|
|
|
|
|
|
|
# we've got a punctuation variable if end of line (punct.t) |
|
9075
|
59
|
50
|
|
|
|
170
|
if ( $i == $max_token_index ) { |
|
9076
|
0
|
|
|
|
|
0
|
$type = 'i'; |
|
9077
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
|
9078
|
|
|
|
|
|
|
} |
|
9079
|
|
|
|
|
|
|
} |
|
9080
|
|
|
|
|
|
|
elsif ( $tok =~ /^\w/ ) { # alphanumeric .. |
|
9081
|
310
|
|
|
|
|
432
|
$saw_alpha = 1; |
|
9082
|
310
|
|
|
|
|
451
|
$identifier .= $tok; |
|
9083
|
|
|
|
|
|
|
|
|
9084
|
|
|
|
|
|
|
# now need :: except for special digit vars like '$1' (c208) |
|
9085
|
310
|
100
|
|
|
|
890
|
$id_scan_state = $tok =~ /^\d/ ? EMPTY_STRING : $scan_state_COLON; |
|
9086
|
|
|
|
|
|
|
} |
|
9087
|
|
|
|
|
|
|
elsif ( $tok eq '::' ) { |
|
9088
|
16
|
|
|
|
|
42
|
$id_scan_state = $scan_state_ALPHA; |
|
9089
|
16
|
|
|
|
|
37
|
$identifier .= $tok; |
|
9090
|
|
|
|
|
|
|
} |
|
9091
|
|
|
|
|
|
|
|
|
9092
|
|
|
|
|
|
|
# POSTDEFREF ->@ ->% ->& ->* |
|
9093
|
|
|
|
|
|
|
elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) { |
|
9094
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9095
|
|
|
|
|
|
|
} |
|
9096
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. |
|
9097
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
|
9098
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; # now need :: |
|
9099
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9100
|
|
|
|
|
|
|
|
|
9101
|
|
|
|
|
|
|
# Perl will accept leading digits in identifiers, |
|
9102
|
|
|
|
|
|
|
# although they may not always produce useful results. |
|
9103
|
|
|
|
|
|
|
# Something like $main::0 is ok. But this also works: |
|
9104
|
|
|
|
|
|
|
# |
|
9105
|
|
|
|
|
|
|
# sub howdy::123::bubba{ print "bubba $54321!\n" } |
|
9106
|
|
|
|
|
|
|
# howdy::123::bubba(); |
|
9107
|
|
|
|
|
|
|
# |
|
9108
|
|
|
|
|
|
|
} |
|
9109
|
|
|
|
|
|
|
elsif ( $tok eq '#' ) { |
|
9110
|
|
|
|
|
|
|
|
|
9111
|
100
|
|
|
|
|
177
|
my $is_punct_var = $identifier eq '$$'; |
|
9112
|
|
|
|
|
|
|
|
|
9113
|
|
|
|
|
|
|
# side comment or identifier? |
|
9114
|
100
|
100
|
66
|
|
|
946
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
9115
|
|
|
|
|
|
|
|
|
9116
|
|
|
|
|
|
|
# A '#' starts a comment if it follows a space. For example, |
|
9117
|
|
|
|
|
|
|
# the following is equivalent to $ans=40. |
|
9118
|
|
|
|
|
|
|
# my $ # |
|
9119
|
|
|
|
|
|
|
# ans = 40; |
|
9120
|
|
|
|
|
|
|
!$last_tok_is_blank |
|
9121
|
|
|
|
|
|
|
|
|
9122
|
|
|
|
|
|
|
# a # inside a prototype or signature can only start a |
|
9123
|
|
|
|
|
|
|
# comment |
|
9124
|
|
|
|
|
|
|
&& !$in_prototype_or_signature |
|
9125
|
|
|
|
|
|
|
|
|
9126
|
|
|
|
|
|
|
# these are valid punctuation vars: *# %# @# $# |
|
9127
|
|
|
|
|
|
|
# May also be '$#array' or POSTDEFREF ->$# |
|
9128
|
|
|
|
|
|
|
&& ( $identifier =~ /^[\%\@\$\*]$/ |
|
9129
|
|
|
|
|
|
|
|| $identifier =~ /\$$/ ) |
|
9130
|
|
|
|
|
|
|
|
|
9131
|
|
|
|
|
|
|
# but a '#' after '$$' is a side comment; see c147 |
|
9132
|
|
|
|
|
|
|
&& !$is_punct_var |
|
9133
|
|
|
|
|
|
|
|
|
9134
|
|
|
|
|
|
|
) |
|
9135
|
|
|
|
|
|
|
{ |
|
9136
|
96
|
|
|
|
|
228
|
$identifier .= $tok; # keep same state, a $ could follow |
|
9137
|
|
|
|
|
|
|
} |
|
9138
|
|
|
|
|
|
|
else { |
|
9139
|
|
|
|
|
|
|
|
|
9140
|
|
|
|
|
|
|
# otherwise it is a side comment |
|
9141
|
4
|
50
|
|
|
|
16
|
if ( $identifier eq '->' ) { } |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
9142
|
0
|
|
|
|
|
0
|
elsif ($is_punct_var) { $type = 'i' } |
|
9143
|
4
|
|
|
|
|
7
|
elsif ( $id_scan_state eq $scan_state_SIGIL ) { $type = 't' } |
|
9144
|
0
|
|
|
|
|
0
|
else { $type = 'i' } |
|
9145
|
4
|
|
|
|
|
6
|
$i = $i_save; |
|
9146
|
4
|
|
|
|
|
6
|
$id_scan_state = EMPTY_STRING; |
|
9147
|
|
|
|
|
|
|
} |
|
9148
|
|
|
|
|
|
|
} |
|
9149
|
|
|
|
|
|
|
|
|
9150
|
|
|
|
|
|
|
elsif ( $tok eq '{' ) { |
|
9151
|
|
|
|
|
|
|
|
|
9152
|
|
|
|
|
|
|
# check for something like ${#} or ${?}, where ? is a special char |
|
9153
|
47
|
100
|
100
|
|
|
520
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
9154
|
|
|
|
|
|
|
( |
|
9155
|
|
|
|
|
|
|
$identifier eq '$' |
|
9156
|
|
|
|
|
|
|
|| $identifier eq '@' |
|
9157
|
|
|
|
|
|
|
|| $identifier eq '$#' |
|
9158
|
|
|
|
|
|
|
) |
|
9159
|
|
|
|
|
|
|
&& $i + 2 <= $max_token_index |
|
9160
|
|
|
|
|
|
|
&& $rtokens->[ $i + 2 ] eq '}' |
|
9161
|
|
|
|
|
|
|
&& $rtokens->[ $i + 1 ] !~ /[\s\w]/ |
|
9162
|
|
|
|
|
|
|
) |
|
9163
|
|
|
|
|
|
|
{ |
|
9164
|
1
|
|
|
|
|
3
|
my $next2 = $rtokens->[ $i + 2 ]; |
|
9165
|
1
|
|
|
|
|
2
|
my $next1 = $rtokens->[ $i + 1 ]; |
|
9166
|
1
|
|
|
|
|
4
|
$identifier .= $tok . $next1 . $next2; |
|
9167
|
1
|
|
|
|
|
1
|
$i += 2; |
|
9168
|
1
|
|
|
|
|
2
|
$id_scan_state = EMPTY_STRING; |
|
9169
|
|
|
|
|
|
|
} |
|
9170
|
|
|
|
|
|
|
else { |
|
9171
|
|
|
|
|
|
|
|
|
9172
|
|
|
|
|
|
|
# skip something like ${xxx} or ->{ |
|
9173
|
46
|
|
|
|
|
74
|
$id_scan_state = EMPTY_STRING; |
|
9174
|
|
|
|
|
|
|
|
|
9175
|
|
|
|
|
|
|
# if this is the first token of a line, any tokens for this |
|
9176
|
|
|
|
|
|
|
# identifier have already been accumulated |
|
9177
|
46
|
100
|
66
|
|
|
165
|
if ( $identifier eq '$' || $i == 0 ) { |
|
9178
|
31
|
|
|
|
|
85
|
$identifier = EMPTY_STRING; |
|
9179
|
|
|
|
|
|
|
} |
|
9180
|
46
|
|
|
|
|
77
|
$i = $i_save; |
|
9181
|
|
|
|
|
|
|
} |
|
9182
|
|
|
|
|
|
|
} |
|
9183
|
|
|
|
|
|
|
|
|
9184
|
|
|
|
|
|
|
# space ok after leading $ % * & @ |
|
9185
|
|
|
|
|
|
|
elsif ( $tok =~ /^\s*$/ ) { |
|
9186
|
|
|
|
|
|
|
|
|
9187
|
20
|
|
|
|
|
40
|
$tok_is_blank = 1; |
|
9188
|
|
|
|
|
|
|
|
|
9189
|
|
|
|
|
|
|
# note: an id with a leading '&' does not actually come this way |
|
9190
|
20
|
50
|
|
|
|
72
|
if ( $identifier =~ /^[\$\%\*\&\@]/ ) { |
|
|
|
0
|
|
|
|
|
|
|
9191
|
|
|
|
|
|
|
|
|
9192
|
20
|
100
|
|
|
|
53
|
if ( length($identifier) > 1 ) { |
|
9193
|
8
|
|
|
|
|
17
|
$id_scan_state = EMPTY_STRING; |
|
9194
|
8
|
|
|
|
|
13
|
$i = $i_save; |
|
9195
|
8
|
|
|
|
|
19
|
$type = 'i'; # probably punctuation variable |
|
9196
|
|
|
|
|
|
|
} |
|
9197
|
|
|
|
|
|
|
else { |
|
9198
|
|
|
|
|
|
|
|
|
9199
|
|
|
|
|
|
|
# fix c139: trim line-ending type 't' |
|
9200
|
12
|
100
|
|
|
|
53
|
if ( $i == $max_token_index ) { |
|
|
|
100
|
|
|
|
|
|
|
9201
|
1
|
|
|
|
|
2
|
$i = $i_save; |
|
9202
|
1
|
|
|
|
|
2
|
$type = 't'; |
|
9203
|
|
|
|
|
|
|
} |
|
9204
|
|
|
|
|
|
|
|
|
9205
|
|
|
|
|
|
|
# spaces after $'s are common, and space after @ |
|
9206
|
|
|
|
|
|
|
# is harmless, so only complain about space |
|
9207
|
|
|
|
|
|
|
# after other type characters. Space after $ and |
|
9208
|
|
|
|
|
|
|
# @ will be removed in formatting. Report space |
|
9209
|
|
|
|
|
|
|
# after % and * because they might indicate a |
|
9210
|
|
|
|
|
|
|
# parsing error. In other words '% ' might be a |
|
9211
|
|
|
|
|
|
|
# modulo operator. Delete this warning if it |
|
9212
|
|
|
|
|
|
|
# gets annoying. |
|
9213
|
|
|
|
|
|
|
elsif ( $identifier !~ /^[\@\$]$/ ) { |
|
9214
|
1
|
|
|
|
|
2
|
$message = |
|
9215
|
|
|
|
|
|
|
"Space in identifier, following $identifier\n"; |
|
9216
|
|
|
|
|
|
|
} |
|
9217
|
|
|
|
|
|
|
else { |
|
9218
|
|
|
|
|
|
|
# silently accept space after '$' and '@' sigils |
|
9219
|
|
|
|
|
|
|
} |
|
9220
|
|
|
|
|
|
|
} |
|
9221
|
|
|
|
|
|
|
} |
|
9222
|
|
|
|
|
|
|
|
|
9223
|
|
|
|
|
|
|
elsif ( $identifier eq '->' ) { |
|
9224
|
|
|
|
|
|
|
|
|
9225
|
|
|
|
|
|
|
# space after '->' is ok except at line end .. |
|
9226
|
|
|
|
|
|
|
# so trim line-ending in type '->' (fixes c139) |
|
9227
|
0
|
0
|
|
|
|
0
|
if ( $i == $max_token_index ) { |
|
9228
|
0
|
|
|
|
|
0
|
$i = $i_save; |
|
9229
|
0
|
|
|
|
|
0
|
$type = '->'; |
|
9230
|
|
|
|
|
|
|
} |
|
9231
|
|
|
|
|
|
|
} |
|
9232
|
|
|
|
|
|
|
|
|
9233
|
|
|
|
|
|
|
# stop at space after something other than -> or sigil |
|
9234
|
|
|
|
|
|
|
# Example of what can arrive here: |
|
9235
|
|
|
|
|
|
|
# eval { $MyClass->$$ }; |
|
9236
|
|
|
|
|
|
|
else { |
|
9237
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
|
9238
|
0
|
|
|
|
|
0
|
$i = $i_save; |
|
9239
|
0
|
|
|
|
|
0
|
$type = 'i'; |
|
9240
|
|
|
|
|
|
|
} |
|
9241
|
|
|
|
|
|
|
} |
|
9242
|
|
|
|
|
|
|
elsif ( $tok eq '^' ) { |
|
9243
|
|
|
|
|
|
|
|
|
9244
|
|
|
|
|
|
|
# check for some special variables like $^ $^W |
|
9245
|
11
|
50
|
|
|
|
36
|
if ( $identifier =~ /^[\$\*\@\%]$/ ) { |
|
9246
|
11
|
|
|
|
|
26
|
$identifier .= $tok; |
|
9247
|
11
|
|
|
|
|
16
|
$type = 'i'; |
|
9248
|
|
|
|
|
|
|
|
|
9249
|
|
|
|
|
|
|
# There may be one more character, not a space, after the ^ |
|
9250
|
11
|
|
|
|
|
17
|
my $next1 = $rtokens->[ $i + 1 ]; |
|
9251
|
11
|
|
|
|
|
22
|
my $chr = substr( $next1, 0, 1 ); |
|
9252
|
11
|
100
|
|
|
|
31
|
if ( $is_special_variable_char{$chr} ) { |
|
9253
|
|
|
|
|
|
|
|
|
9254
|
|
|
|
|
|
|
# It is something like $^W |
|
9255
|
|
|
|
|
|
|
# Test case (c066) : $^Oeq'linux' |
|
9256
|
9
|
|
|
|
|
13
|
$i++; |
|
9257
|
9
|
|
|
|
|
15
|
$identifier .= $next1; |
|
9258
|
|
|
|
|
|
|
|
|
9259
|
|
|
|
|
|
|
# If pretoken $next1 is more than one character long, |
|
9260
|
|
|
|
|
|
|
# set a flag indicating that it needs to be split. |
|
9261
|
9
|
100
|
|
|
|
24
|
$id_scan_state = |
|
9262
|
|
|
|
|
|
|
( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING; |
|
9263
|
|
|
|
|
|
|
} |
|
9264
|
|
|
|
|
|
|
else { |
|
9265
|
|
|
|
|
|
|
|
|
9266
|
|
|
|
|
|
|
# it is just $^ |
|
9267
|
|
|
|
|
|
|
# Simple test case (c065): '$aa=$^if($bb)'; |
|
9268
|
2
|
|
|
|
|
3
|
$id_scan_state = EMPTY_STRING; |
|
9269
|
|
|
|
|
|
|
} |
|
9270
|
|
|
|
|
|
|
} |
|
9271
|
|
|
|
|
|
|
else { |
|
9272
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
|
9273
|
0
|
|
|
|
|
0
|
$i = $i_save; |
|
9274
|
|
|
|
|
|
|
} |
|
9275
|
|
|
|
|
|
|
} |
|
9276
|
|
|
|
|
|
|
else { # something else |
|
9277
|
|
|
|
|
|
|
|
|
9278
|
46
|
100
|
66
|
|
|
376
|
if ( $in_prototype_or_signature && $tok =~ /^[\),=#]/ ) { |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9279
|
|
|
|
|
|
|
|
|
9280
|
|
|
|
|
|
|
# We might be in an extrusion of |
|
9281
|
|
|
|
|
|
|
# sub foo2 ( $first, $, $third ) { |
|
9282
|
|
|
|
|
|
|
# looking at a line starting with a comma, like |
|
9283
|
|
|
|
|
|
|
# $ |
|
9284
|
|
|
|
|
|
|
# , |
|
9285
|
|
|
|
|
|
|
# in this case the comma ends the signature variable |
|
9286
|
|
|
|
|
|
|
# '$' which will have been previously marked type 't' |
|
9287
|
|
|
|
|
|
|
# rather than 'i'. |
|
9288
|
3
|
100
|
|
|
|
8
|
if ( $i == $i_begin ) { |
|
9289
|
1
|
|
|
|
|
3
|
$identifier = EMPTY_STRING; |
|
9290
|
1
|
|
|
|
|
1
|
$type = EMPTY_STRING; |
|
9291
|
|
|
|
|
|
|
} |
|
9292
|
|
|
|
|
|
|
|
|
9293
|
|
|
|
|
|
|
# at a # we have to mark as type 't' because more may |
|
9294
|
|
|
|
|
|
|
# follow, otherwise, in a signature we can let '$' be an |
|
9295
|
|
|
|
|
|
|
# identifier here for better formatting. |
|
9296
|
|
|
|
|
|
|
# See 'mangle4.in' for a test case. |
|
9297
|
|
|
|
|
|
|
else { |
|
9298
|
2
|
|
|
|
|
3
|
$type = 'i'; |
|
9299
|
2
|
50
|
33
|
|
|
10
|
if ( $id_scan_state eq $scan_state_SIGIL && $tok eq '#' ) { |
|
9300
|
0
|
|
|
|
|
0
|
$type = 't'; |
|
9301
|
|
|
|
|
|
|
} |
|
9302
|
2
|
|
|
|
|
2
|
$i = $i_save; |
|
9303
|
|
|
|
|
|
|
} |
|
9304
|
3
|
|
|
|
|
6
|
$id_scan_state = EMPTY_STRING; |
|
9305
|
|
|
|
|
|
|
} |
|
9306
|
|
|
|
|
|
|
|
|
9307
|
|
|
|
|
|
|
# check for various punctuation variables |
|
9308
|
|
|
|
|
|
|
elsif ( $identifier =~ /^[\$\*\@\%]$/ ) { |
|
9309
|
35
|
|
|
|
|
79
|
$identifier .= $tok; |
|
9310
|
|
|
|
|
|
|
} |
|
9311
|
|
|
|
|
|
|
|
|
9312
|
|
|
|
|
|
|
# POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#* |
|
9313
|
|
|
|
|
|
|
elsif ($tok eq '*' |
|
9314
|
|
|
|
|
|
|
&& $identifier =~ /\-\>([\@\%\$\*\&]|\$\#)$/ ) |
|
9315
|
|
|
|
|
|
|
{ |
|
9316
|
6
|
|
|
|
|
9
|
$identifier .= $tok; |
|
9317
|
|
|
|
|
|
|
} |
|
9318
|
|
|
|
|
|
|
|
|
9319
|
|
|
|
|
|
|
elsif ( $identifier eq '$#' ) { |
|
9320
|
|
|
|
|
|
|
|
|
9321
|
2
|
50
|
|
|
|
9
|
if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } |
|
|
0
|
50
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
9322
|
|
|
|
|
|
|
|
|
9323
|
|
|
|
|
|
|
# perl seems to allow just these: $#: $#- $#+ |
|
9324
|
|
|
|
|
|
|
elsif ( $tok =~ /^[\:\-\+]$/ ) { |
|
9325
|
0
|
|
|
|
|
0
|
$type = 'i'; |
|
9326
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9327
|
|
|
|
|
|
|
} |
|
9328
|
|
|
|
|
|
|
else { |
|
9329
|
2
|
|
|
|
|
4
|
$i = $i_save; |
|
9330
|
2
|
|
|
|
|
6
|
$self->write_logfile_entry( |
|
9331
|
|
|
|
|
|
|
'Use of $# is deprecated' . "\n" ); |
|
9332
|
|
|
|
|
|
|
} |
|
9333
|
|
|
|
|
|
|
} |
|
9334
|
|
|
|
|
|
|
elsif ( $identifier eq '$$' ) { |
|
9335
|
|
|
|
|
|
|
|
|
9336
|
|
|
|
|
|
|
# perl does not allow references to punctuation |
|
9337
|
|
|
|
|
|
|
# variables without braces. For example, this |
|
9338
|
|
|
|
|
|
|
# won't work: |
|
9339
|
|
|
|
|
|
|
# $:=\4; |
|
9340
|
|
|
|
|
|
|
# $a = $$:; |
|
9341
|
|
|
|
|
|
|
# You would have to use |
|
9342
|
|
|
|
|
|
|
# $a = ${$:}; |
|
9343
|
|
|
|
|
|
|
|
|
9344
|
|
|
|
|
|
|
# '$$' alone is punctuation variable for PID |
|
9345
|
0
|
|
|
|
|
0
|
$i = $i_save; |
|
9346
|
0
|
0
|
|
|
|
0
|
if ( $tok eq '{' ) { $type = 't' } |
|
|
0
|
|
|
|
|
0
|
|
|
9347
|
0
|
|
|
|
|
0
|
else { $type = 'i' } |
|
9348
|
|
|
|
|
|
|
} |
|
9349
|
|
|
|
|
|
|
elsif ( $identifier eq '->' ) { |
|
9350
|
0
|
|
|
|
|
0
|
$i = $i_save; |
|
9351
|
|
|
|
|
|
|
} |
|
9352
|
|
|
|
|
|
|
else { |
|
9353
|
0
|
|
|
|
|
0
|
$i = $i_save; |
|
9354
|
0
|
0
|
|
|
|
0
|
if ( length($identifier) == 1 ) { |
|
9355
|
0
|
|
|
|
|
0
|
$identifier = EMPTY_STRING; |
|
9356
|
|
|
|
|
|
|
} |
|
9357
|
|
|
|
|
|
|
} |
|
9358
|
46
|
|
|
|
|
76
|
$id_scan_state = EMPTY_STRING; |
|
9359
|
|
|
|
|
|
|
} |
|
9360
|
609
|
|
|
|
|
897
|
return; |
|
9361
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_dollar |
|
9362
|
|
|
|
|
|
|
|
|
9363
|
|
|
|
|
|
|
sub do_id_scan_state_alpha { |
|
9364
|
|
|
|
|
|
|
|
|
9365
|
119
|
|
|
119
|
0
|
182
|
my $self = shift; |
|
9366
|
|
|
|
|
|
|
|
|
9367
|
|
|
|
|
|
|
# looking for alphanumeric after :: |
|
9368
|
119
|
|
|
|
|
293
|
$tok_is_blank = $tok =~ /^\s*$/; |
|
9369
|
|
|
|
|
|
|
|
|
9370
|
119
|
100
|
33
|
|
|
369
|
if ( $tok =~ /^\w/ ) { # found it |
|
|
|
50
|
66
|
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
9371
|
106
|
|
|
|
|
161
|
$identifier .= $tok; |
|
9372
|
106
|
|
|
|
|
173
|
$id_scan_state = $scan_state_COLON; # now need :: |
|
9373
|
106
|
|
|
|
|
177
|
$saw_alpha = 1; |
|
9374
|
|
|
|
|
|
|
} |
|
9375
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { |
|
9376
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9377
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; # now need :: |
|
9378
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
|
9379
|
|
|
|
|
|
|
} |
|
9380
|
|
|
|
|
|
|
elsif ( $tok_is_blank && $identifier =~ /^sub / ) { |
|
9381
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_LPAREN; |
|
9382
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9383
|
|
|
|
|
|
|
} |
|
9384
|
|
|
|
|
|
|
elsif ( $tok eq '(' && $identifier =~ /^sub / ) { |
|
9385
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_RPAREN; |
|
9386
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9387
|
|
|
|
|
|
|
} |
|
9388
|
|
|
|
|
|
|
else { |
|
9389
|
13
|
|
|
|
|
21
|
$id_scan_state = EMPTY_STRING; |
|
9390
|
13
|
|
|
|
|
14
|
$i = $i_save; |
|
9391
|
|
|
|
|
|
|
} |
|
9392
|
119
|
|
|
|
|
176
|
return; |
|
9393
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_alpha |
|
9394
|
|
|
|
|
|
|
|
|
9395
|
|
|
|
|
|
|
sub do_id_scan_state_colon { |
|
9396
|
|
|
|
|
|
|
|
|
9397
|
470
|
|
|
470
|
0
|
659
|
my $self = shift; |
|
9398
|
|
|
|
|
|
|
|
|
9399
|
|
|
|
|
|
|
# looking for possible :: after alphanumeric |
|
9400
|
|
|
|
|
|
|
|
|
9401
|
470
|
|
|
|
|
1402
|
$tok_is_blank = $tok =~ /^\s*$/; |
|
9402
|
|
|
|
|
|
|
|
|
9403
|
470
|
100
|
66
|
|
|
3078
|
if ( $tok eq '::' ) { # got it |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
9404
|
103
|
|
|
|
|
167
|
$identifier .= $tok; |
|
9405
|
103
|
|
|
|
|
142
|
$id_scan_state = $scan_state_ALPHA; # now require alpha |
|
9406
|
|
|
|
|
|
|
} |
|
9407
|
|
|
|
|
|
|
elsif ( $tok =~ /^\w/ ) { # more alphanumeric is ok here |
|
9408
|
20
|
|
|
|
|
39
|
$identifier .= $tok; |
|
9409
|
20
|
|
|
|
|
33
|
$id_scan_state = $scan_state_COLON; # now need :: |
|
9410
|
20
|
|
|
|
|
39
|
$saw_alpha = 1; |
|
9411
|
|
|
|
|
|
|
} |
|
9412
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { # tick |
|
9413
|
|
|
|
|
|
|
|
|
9414
|
12
|
50
|
|
|
|
35
|
if ( $is_keyword{$identifier} ) { |
|
9415
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; # that's all |
|
9416
|
0
|
|
|
|
|
0
|
$i = $i_save; |
|
9417
|
|
|
|
|
|
|
} |
|
9418
|
|
|
|
|
|
|
else { |
|
9419
|
12
|
|
|
|
|
47
|
$identifier .= $tok; |
|
9420
|
|
|
|
|
|
|
} |
|
9421
|
|
|
|
|
|
|
} |
|
9422
|
|
|
|
|
|
|
elsif ( $tok_is_blank && $identifier =~ /^sub / ) { |
|
9423
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_LPAREN; |
|
9424
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9425
|
|
|
|
|
|
|
} |
|
9426
|
|
|
|
|
|
|
elsif ( $tok eq '(' && $identifier =~ /^sub / ) { |
|
9427
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_RPAREN; |
|
9428
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9429
|
|
|
|
|
|
|
} |
|
9430
|
|
|
|
|
|
|
else { |
|
9431
|
335
|
|
|
|
|
488
|
$id_scan_state = EMPTY_STRING; # that's all |
|
9432
|
335
|
|
|
|
|
469
|
$i = $i_save; |
|
9433
|
|
|
|
|
|
|
} |
|
9434
|
470
|
|
|
|
|
659
|
return; |
|
9435
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_colon |
|
9436
|
|
|
|
|
|
|
|
|
9437
|
|
|
|
|
|
|
sub do_id_scan_state_left_paren { |
|
9438
|
|
|
|
|
|
|
|
|
9439
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
9440
|
|
|
|
|
|
|
|
|
9441
|
|
|
|
|
|
|
# looking for possible '(' of a prototype |
|
9442
|
|
|
|
|
|
|
|
|
9443
|
0
|
0
|
|
|
|
0
|
if ( $tok eq '(' ) { # got it |
|
|
|
0
|
|
|
|
|
|
|
9444
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9445
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_RPAREN; # now find the end of it |
|
9446
|
|
|
|
|
|
|
} |
|
9447
|
|
|
|
|
|
|
elsif ( $tok =~ /^\s*$/ ) { # blank - keep going |
|
9448
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9449
|
0
|
|
|
|
|
0
|
$tok_is_blank = 1; |
|
9450
|
|
|
|
|
|
|
} |
|
9451
|
|
|
|
|
|
|
else { |
|
9452
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; # that's all - no prototype |
|
9453
|
0
|
|
|
|
|
0
|
$i = $i_save; |
|
9454
|
|
|
|
|
|
|
} |
|
9455
|
0
|
|
|
|
|
0
|
return; |
|
9456
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_left_paren |
|
9457
|
|
|
|
|
|
|
|
|
9458
|
|
|
|
|
|
|
sub do_id_scan_state_right_paren { |
|
9459
|
|
|
|
|
|
|
|
|
9460
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
9461
|
|
|
|
|
|
|
|
|
9462
|
|
|
|
|
|
|
# looking for a ')' of prototype to close a '(' |
|
9463
|
|
|
|
|
|
|
|
|
9464
|
0
|
|
|
|
|
0
|
$tok_is_blank = $tok =~ /^\s*$/; |
|
9465
|
|
|
|
|
|
|
|
|
9466
|
0
|
0
|
|
|
|
0
|
if ( $tok eq ')' ) { # got it |
|
|
|
0
|
|
|
|
|
|
|
9467
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9468
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; # all done |
|
9469
|
|
|
|
|
|
|
} |
|
9470
|
|
|
|
|
|
|
elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { |
|
9471
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9472
|
|
|
|
|
|
|
} |
|
9473
|
|
|
|
|
|
|
else { # probable error in script, but keep going |
|
9474
|
0
|
|
|
|
|
0
|
$self->warning( |
|
9475
|
|
|
|
|
|
|
"Unexpected '$tok' while seeking end of prototype\n"); |
|
9476
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9477
|
|
|
|
|
|
|
} |
|
9478
|
0
|
|
|
|
|
0
|
return; |
|
9479
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_right_paren |
|
9480
|
|
|
|
|
|
|
|
|
9481
|
|
|
|
|
|
|
sub do_id_scan_state_ampersand { |
|
9482
|
|
|
|
|
|
|
|
|
9483
|
104
|
|
|
104
|
0
|
142
|
my $self = shift; |
|
9484
|
|
|
|
|
|
|
|
|
9485
|
|
|
|
|
|
|
# Starting sub call after seeing an '&' |
|
9486
|
104
|
100
|
33
|
|
|
620
|
if ( $tok =~ /^[\$\w]/ ) { # alphanumeric .. |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9487
|
87
|
|
|
|
|
152
|
$id_scan_state = $scan_state_COLON; # now need :: |
|
9488
|
87
|
|
|
|
|
119
|
$saw_alpha = 1; |
|
9489
|
87
|
|
|
|
|
131
|
$identifier .= $tok; |
|
9490
|
|
|
|
|
|
|
} |
|
9491
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. |
|
9492
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; # now need :: |
|
9493
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
|
9494
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9495
|
|
|
|
|
|
|
} |
|
9496
|
|
|
|
|
|
|
elsif ( $tok =~ /^\s*$/ ) { # allow space |
|
9497
|
2
|
|
|
|
|
3
|
$tok_is_blank = 1; |
|
9498
|
|
|
|
|
|
|
|
|
9499
|
|
|
|
|
|
|
# fix c139: trim line-ending type 't' |
|
9500
|
2
|
50
|
33
|
|
|
9
|
if ( length($identifier) == 1 && $i == $max_token_index ) { |
|
9501
|
2
|
|
|
|
|
4
|
$i = $i_save; |
|
9502
|
2
|
|
|
|
|
3
|
$type = 't'; |
|
9503
|
|
|
|
|
|
|
} |
|
9504
|
|
|
|
|
|
|
} |
|
9505
|
|
|
|
|
|
|
elsif ( $tok eq '::' ) { # leading :: |
|
9506
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_ALPHA; # accept alpha next |
|
9507
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9508
|
|
|
|
|
|
|
} |
|
9509
|
|
|
|
|
|
|
elsif ( $tok eq '{' ) { |
|
9510
|
15
|
50
|
33
|
|
|
72
|
if ( $identifier eq '&' || $i == 0 ) { |
|
9511
|
15
|
|
|
|
|
27
|
$identifier = EMPTY_STRING; |
|
9512
|
|
|
|
|
|
|
} |
|
9513
|
15
|
|
|
|
|
25
|
$i = $i_save; |
|
9514
|
15
|
|
|
|
|
25
|
$id_scan_state = EMPTY_STRING; |
|
9515
|
|
|
|
|
|
|
} |
|
9516
|
|
|
|
|
|
|
elsif ( $tok eq '^' ) { |
|
9517
|
0
|
0
|
|
|
|
0
|
if ( $identifier eq '&' ) { |
|
9518
|
|
|
|
|
|
|
|
|
9519
|
|
|
|
|
|
|
# Special variable (c066) |
|
9520
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9521
|
0
|
|
|
|
|
0
|
$type = 'i'; |
|
9522
|
|
|
|
|
|
|
|
|
9523
|
|
|
|
|
|
|
# To be a special $^ variable, there may be one more character, |
|
9524
|
|
|
|
|
|
|
# not a space, after the ^ |
|
9525
|
0
|
|
|
|
|
0
|
my $next1 = $rtokens->[ $i + 1 ]; |
|
9526
|
0
|
|
|
|
|
0
|
my $chr = substr( $next1, 0, 1 ); |
|
9527
|
0
|
0
|
|
|
|
0
|
if ( $is_special_variable_char{$chr} ) { |
|
9528
|
|
|
|
|
|
|
|
|
9529
|
|
|
|
|
|
|
# It is something like &^O |
|
9530
|
0
|
|
|
|
|
0
|
$i++; |
|
9531
|
0
|
|
|
|
|
0
|
$identifier .= $next1; |
|
9532
|
|
|
|
|
|
|
|
|
9533
|
|
|
|
|
|
|
# If pretoken $next1 is more than one character long, |
|
9534
|
|
|
|
|
|
|
# set a flag indicating that it needs to be split. |
|
9535
|
0
|
0
|
|
|
|
0
|
$id_scan_state = |
|
9536
|
|
|
|
|
|
|
( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING; |
|
9537
|
|
|
|
|
|
|
} |
|
9538
|
|
|
|
|
|
|
else { |
|
9539
|
|
|
|
|
|
|
|
|
9540
|
|
|
|
|
|
|
# It is &^. This is parsed by perl as a call to sub '^', |
|
9541
|
|
|
|
|
|
|
# even though it would be difficult to create a sub '^'. |
|
9542
|
|
|
|
|
|
|
# So we mark it as an identifier (c068). |
|
9543
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
|
9544
|
|
|
|
|
|
|
} |
|
9545
|
|
|
|
|
|
|
} |
|
9546
|
|
|
|
|
|
|
else { |
|
9547
|
0
|
|
|
|
|
0
|
$identifier = EMPTY_STRING; |
|
9548
|
0
|
|
|
|
|
0
|
$i = $i_save; |
|
9549
|
|
|
|
|
|
|
} |
|
9550
|
|
|
|
|
|
|
} |
|
9551
|
|
|
|
|
|
|
else { |
|
9552
|
|
|
|
|
|
|
|
|
9553
|
|
|
|
|
|
|
# punctuation variable? |
|
9554
|
|
|
|
|
|
|
# testfile: cunningham4.pl |
|
9555
|
|
|
|
|
|
|
# |
|
9556
|
|
|
|
|
|
|
# We have to be careful here. If we are in an unknown state, |
|
9557
|
|
|
|
|
|
|
# we will reject the punctuation variable. In the following |
|
9558
|
|
|
|
|
|
|
# example the '&' is a binary operator but we are in an unknown |
|
9559
|
|
|
|
|
|
|
# state because there is no sigil on 'Prima', so we don't |
|
9560
|
|
|
|
|
|
|
# know what it is. But it is a bad guess that |
|
9561
|
|
|
|
|
|
|
# '&~' is a function variable. |
|
9562
|
|
|
|
|
|
|
# $self->{text}->{colorMap}->[ |
|
9563
|
|
|
|
|
|
|
# Prima::PodView::COLOR_CODE_FOREGROUND |
|
9564
|
|
|
|
|
|
|
# & ~tb::COLOR_INDEX ] = |
|
9565
|
|
|
|
|
|
|
# $sec->{ColorCode} |
|
9566
|
|
|
|
|
|
|
|
|
9567
|
|
|
|
|
|
|
# Fix for case c033: a '#' here starts a side comment |
|
9568
|
0
|
0
|
0
|
|
|
0
|
if ( $identifier eq '&' && $expecting && $tok ne '#' ) { |
|
|
|
|
0
|
|
|
|
|
|
9569
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
|
9570
|
|
|
|
|
|
|
} |
|
9571
|
|
|
|
|
|
|
else { |
|
9572
|
0
|
|
|
|
|
0
|
$identifier = EMPTY_STRING; |
|
9573
|
0
|
|
|
|
|
0
|
$i = $i_save; |
|
9574
|
0
|
|
|
|
|
0
|
$type = '&'; |
|
9575
|
|
|
|
|
|
|
} |
|
9576
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
|
9577
|
|
|
|
|
|
|
} |
|
9578
|
104
|
|
|
|
|
150
|
return; |
|
9579
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_ampersand |
|
9580
|
|
|
|
|
|
|
|
|
9581
|
|
|
|
|
|
|
#------------------- |
|
9582
|
|
|
|
|
|
|
# hash of scanner subs |
|
9583
|
|
|
|
|
|
|
#------------------- |
|
9584
|
|
|
|
|
|
|
my $scan_identifier_code = { |
|
9585
|
|
|
|
|
|
|
$scan_state_SIGIL => \&do_id_scan_state_dollar, |
|
9586
|
|
|
|
|
|
|
$scan_state_ALPHA => \&do_id_scan_state_alpha, |
|
9587
|
|
|
|
|
|
|
$scan_state_COLON => \&do_id_scan_state_colon, |
|
9588
|
|
|
|
|
|
|
$scan_state_LPAREN => \&do_id_scan_state_left_paren, |
|
9589
|
|
|
|
|
|
|
$scan_state_RPAREN => \&do_id_scan_state_right_paren, |
|
9590
|
|
|
|
|
|
|
$scan_state_AMPERSAND => \&do_id_scan_state_ampersand, |
|
9591
|
|
|
|
|
|
|
}; |
|
9592
|
|
|
|
|
|
|
|
|
9593
|
|
|
|
|
|
|
sub scan_complex_identifier { |
|
9594
|
|
|
|
|
|
|
|
|
9595
|
|
|
|
|
|
|
( |
|
9596
|
551
|
|
|
551
|
0
|
1673
|
my $self, |
|
9597
|
|
|
|
|
|
|
|
|
9598
|
|
|
|
|
|
|
$i, |
|
9599
|
|
|
|
|
|
|
$id_scan_state, |
|
9600
|
|
|
|
|
|
|
$identifier, |
|
9601
|
|
|
|
|
|
|
$rtokens, |
|
9602
|
|
|
|
|
|
|
$max_token_index, |
|
9603
|
|
|
|
|
|
|
$expecting, |
|
9604
|
|
|
|
|
|
|
$container_type, |
|
9605
|
|
|
|
|
|
|
|
|
9606
|
|
|
|
|
|
|
) = @_; |
|
9607
|
|
|
|
|
|
|
|
|
9608
|
|
|
|
|
|
|
# This routine assembles tokens into identifiers. It maintains a |
|
9609
|
|
|
|
|
|
|
# scan state, id_scan_state. It updates id_scan_state based upon |
|
9610
|
|
|
|
|
|
|
# current id_scan_state and token, and returns an updated |
|
9611
|
|
|
|
|
|
|
# id_scan_state and the next index after the identifier. |
|
9612
|
|
|
|
|
|
|
|
|
9613
|
|
|
|
|
|
|
# This routine now serves a backup for sub scan_simple_identifier |
|
9614
|
|
|
|
|
|
|
# which handles most identifiers. |
|
9615
|
|
|
|
|
|
|
|
|
9616
|
|
|
|
|
|
|
# Note that $self must be a 'my' variable and not be a closure |
|
9617
|
|
|
|
|
|
|
# variables like the other args. Otherwise it will not get |
|
9618
|
|
|
|
|
|
|
# deleted by a DESTROY call at the end of a file. Then an |
|
9619
|
|
|
|
|
|
|
# attempt to create multiple tokenizers can occur when multiple |
|
9620
|
|
|
|
|
|
|
# files are processed, causing an error. |
|
9621
|
|
|
|
|
|
|
|
|
9622
|
|
|
|
|
|
|
# return flag telling caller to split the pretoken |
|
9623
|
551
|
|
|
|
|
2570
|
my $split_pretoken_flag; |
|
9624
|
|
|
|
|
|
|
|
|
9625
|
|
|
|
|
|
|
#------------------- |
|
9626
|
|
|
|
|
|
|
# Initialize my vars |
|
9627
|
|
|
|
|
|
|
#------------------- |
|
9628
|
|
|
|
|
|
|
|
|
9629
|
551
|
|
|
|
|
1601
|
initialize_my_scan_id_vars(); |
|
9630
|
|
|
|
|
|
|
|
|
9631
|
|
|
|
|
|
|
#-------------------------------------------------------- |
|
9632
|
|
|
|
|
|
|
# get started by defining a type and a state if necessary |
|
9633
|
|
|
|
|
|
|
#-------------------------------------------------------- |
|
9634
|
|
|
|
|
|
|
|
|
9635
|
551
|
100
|
|
|
|
1156
|
if ( !$id_scan_state ) { |
|
9636
|
544
|
|
|
|
|
722
|
$context = UNKNOWN_CONTEXT; |
|
9637
|
|
|
|
|
|
|
|
|
9638
|
|
|
|
|
|
|
# fixup for digraph |
|
9639
|
544
|
50
|
|
|
|
1112
|
if ( $tok eq '>' ) { |
|
9640
|
0
|
|
|
|
|
0
|
$tok = '->'; |
|
9641
|
0
|
|
|
|
|
0
|
$tok_begin = $tok; |
|
9642
|
|
|
|
|
|
|
} |
|
9643
|
544
|
|
|
|
|
796
|
$identifier = $tok; |
|
9644
|
|
|
|
|
|
|
|
|
9645
|
544
|
100
|
100
|
|
|
2580
|
if ( $last_nonblank_token eq '->' ) { |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9646
|
8
|
|
|
|
|
13
|
$identifier = '->' . $identifier; |
|
9647
|
8
|
|
|
|
|
13
|
$id_scan_state = $scan_state_SIGIL; |
|
9648
|
|
|
|
|
|
|
} |
|
9649
|
|
|
|
|
|
|
elsif ( $tok eq '$' || $tok eq '*' ) { |
|
9650
|
355
|
|
|
|
|
645
|
$id_scan_state = $scan_state_SIGIL; |
|
9651
|
355
|
|
|
|
|
541
|
$context = SCALAR_CONTEXT; |
|
9652
|
|
|
|
|
|
|
} |
|
9653
|
|
|
|
|
|
|
elsif ( $tok eq '%' || $tok eq '@' ) { |
|
9654
|
79
|
|
|
|
|
124
|
$id_scan_state = $scan_state_SIGIL; |
|
9655
|
79
|
|
|
|
|
129
|
$context = LIST_CONTEXT; |
|
9656
|
|
|
|
|
|
|
} |
|
9657
|
|
|
|
|
|
|
elsif ( $tok eq '&' ) { |
|
9658
|
102
|
|
|
|
|
166
|
$id_scan_state = $scan_state_AMPERSAND; |
|
9659
|
|
|
|
|
|
|
} |
|
9660
|
|
|
|
|
|
|
elsif ( $tok eq 'sub' or $tok eq 'package' ) { |
|
9661
|
0
|
|
|
|
|
0
|
$saw_alpha = 0; # 'sub' is considered type info here |
|
9662
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_SIGIL; |
|
9663
|
0
|
|
|
|
|
0
|
$identifier .= |
|
9664
|
|
|
|
|
|
|
SPACE; # need a space to separate sub from sub name |
|
9665
|
|
|
|
|
|
|
} |
|
9666
|
|
|
|
|
|
|
elsif ( $tok eq '::' ) { |
|
9667
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_ALPHA; |
|
9668
|
|
|
|
|
|
|
} |
|
9669
|
|
|
|
|
|
|
elsif ( $tok =~ /^\w/ ) { |
|
9670
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; |
|
9671
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
|
9672
|
|
|
|
|
|
|
} |
|
9673
|
|
|
|
|
|
|
elsif ( $tok eq '->' ) { |
|
9674
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_SIGIL; |
|
9675
|
|
|
|
|
|
|
} |
|
9676
|
|
|
|
|
|
|
else { |
|
9677
|
|
|
|
|
|
|
|
|
9678
|
|
|
|
|
|
|
# shouldn't happen: bad call parameter |
|
9679
|
0
|
|
|
|
|
0
|
my $msg = |
|
9680
|
|
|
|
|
|
|
"Program bug detected: scan_complex_identifier received bad starting token = '$tok'\n"; |
|
9681
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { Fault($msg) } |
|
9682
|
0
|
0
|
|
|
|
0
|
if ( !$self->[_in_error_] ) { |
|
9683
|
0
|
|
|
|
|
0
|
$self->warning($msg); |
|
9684
|
0
|
|
|
|
|
0
|
$self->[_in_error_] = 1; |
|
9685
|
|
|
|
|
|
|
} |
|
9686
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
|
9687
|
|
|
|
|
|
|
|
|
9688
|
|
|
|
|
|
|
# emergency return |
|
9689
|
0
|
|
|
|
|
0
|
goto RETURN; |
|
9690
|
|
|
|
|
|
|
} |
|
9691
|
544
|
|
|
|
|
818
|
$saw_type = !$saw_alpha; |
|
9692
|
|
|
|
|
|
|
} |
|
9693
|
|
|
|
|
|
|
else { |
|
9694
|
7
|
|
|
|
|
18
|
$i--; |
|
9695
|
7
|
|
|
|
|
19
|
$saw_alpha = ( $tok =~ /^\w/ ); |
|
9696
|
7
|
|
|
|
|
19
|
$saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); |
|
9697
|
|
|
|
|
|
|
|
|
9698
|
|
|
|
|
|
|
# check for a valid starting state |
|
9699
|
7
|
|
|
|
|
10
|
if ( DEVEL_MODE && !$is_returnable_scan_state{$id_scan_state} ) { |
|
9700
|
|
|
|
|
|
|
Fault(<<EOM); |
|
9701
|
|
|
|
|
|
|
Unexpected starting scan state in sub scan_complex_identifier: '$id_scan_state' |
|
9702
|
|
|
|
|
|
|
EOM |
|
9703
|
|
|
|
|
|
|
} |
|
9704
|
|
|
|
|
|
|
} |
|
9705
|
|
|
|
|
|
|
|
|
9706
|
|
|
|
|
|
|
#------------------------------ |
|
9707
|
|
|
|
|
|
|
# loop to gather the identifier |
|
9708
|
|
|
|
|
|
|
#------------------------------ |
|
9709
|
|
|
|
|
|
|
|
|
9710
|
551
|
|
|
|
|
794
|
$i_save = $i; |
|
9711
|
|
|
|
|
|
|
|
|
9712
|
551
|
|
100
|
|
|
2043
|
while ( $i < $max_token_index && $id_scan_state ) { |
|
9713
|
|
|
|
|
|
|
|
|
9714
|
|
|
|
|
|
|
# Be sure we have code to handle this state before we proceed |
|
9715
|
1305
|
|
|
|
|
2182
|
my $code = $scan_identifier_code->{$id_scan_state}; |
|
9716
|
1305
|
100
|
|
|
|
2232
|
if ( !$code ) { |
|
9717
|
|
|
|
|
|
|
|
|
9718
|
3
|
50
|
|
|
|
7
|
if ( $id_scan_state eq $scan_state_SPLIT ) { |
|
9719
|
|
|
|
|
|
|
## OK: this is the signal to exit and split the pretoken |
|
9720
|
|
|
|
|
|
|
} |
|
9721
|
|
|
|
|
|
|
|
|
9722
|
|
|
|
|
|
|
# unknown state - should not happen |
|
9723
|
|
|
|
|
|
|
else { |
|
9724
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
|
9725
|
|
|
|
|
|
|
Fault(<<EOM); |
|
9726
|
|
|
|
|
|
|
Unknown scan state in sub scan_complex_identifier: '$id_scan_state' |
|
9727
|
|
|
|
|
|
|
Scan state at sub entry was '$id_scan_state_begin' |
|
9728
|
|
|
|
|
|
|
EOM |
|
9729
|
|
|
|
|
|
|
} |
|
9730
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
|
9731
|
0
|
|
|
|
|
0
|
$i = $i_save; |
|
9732
|
|
|
|
|
|
|
} |
|
9733
|
3
|
|
|
|
|
5
|
last; |
|
9734
|
|
|
|
|
|
|
} |
|
9735
|
|
|
|
|
|
|
|
|
9736
|
|
|
|
|
|
|
# Remember the starting index for progress check below |
|
9737
|
1302
|
|
|
|
|
1452
|
my $i_start_loop = $i; |
|
9738
|
|
|
|
|
|
|
|
|
9739
|
1302
|
|
|
|
|
1573
|
$last_tok_is_blank = $tok_is_blank; |
|
9740
|
1302
|
100
|
|
|
|
1905
|
if ($tok_is_blank) { $tok_is_blank = undef } |
|
|
11
|
|
|
|
|
20
|
|
|
9741
|
1291
|
|
|
|
|
1467
|
else { $i_save = $i } |
|
9742
|
|
|
|
|
|
|
|
|
9743
|
1302
|
|
|
|
|
1829
|
$tok = $rtokens->[ ++$i ]; |
|
9744
|
|
|
|
|
|
|
|
|
9745
|
|
|
|
|
|
|
# patch to make digraph :: if necessary |
|
9746
|
1302
|
100
|
100
|
|
|
2670
|
if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) { |
|
9747
|
119
|
|
|
|
|
161
|
$tok = '::'; |
|
9748
|
119
|
|
|
|
|
177
|
$i++; |
|
9749
|
|
|
|
|
|
|
} |
|
9750
|
|
|
|
|
|
|
|
|
9751
|
1302
|
|
|
|
|
2678
|
$code->($self); |
|
9752
|
|
|
|
|
|
|
|
|
9753
|
|
|
|
|
|
|
# check for forward progress: a decrease in the index $i |
|
9754
|
|
|
|
|
|
|
# implies that scanning has finished |
|
9755
|
1302
|
100
|
|
|
|
3467
|
last if ( $i <= $i_start_loop ); |
|
9756
|
|
|
|
|
|
|
|
|
9757
|
|
|
|
|
|
|
} ## end while ( $i < $max_token_index...) |
|
9758
|
|
|
|
|
|
|
|
|
9759
|
|
|
|
|
|
|
#------------- |
|
9760
|
|
|
|
|
|
|
# Check result |
|
9761
|
|
|
|
|
|
|
#------------- |
|
9762
|
|
|
|
|
|
|
|
|
9763
|
|
|
|
|
|
|
# Be sure a valid state is returned |
|
9764
|
551
|
100
|
|
|
|
1102
|
if ($id_scan_state) { |
|
9765
|
|
|
|
|
|
|
|
|
9766
|
24
|
100
|
|
|
|
112
|
if ( !$is_returnable_scan_state{$id_scan_state} ) { |
|
9767
|
|
|
|
|
|
|
|
|
9768
|
17
|
100
|
|
|
|
54
|
if ( $id_scan_state eq $scan_state_SPLIT ) { |
|
9769
|
3
|
|
|
|
|
5
|
$split_pretoken_flag = 1; |
|
9770
|
|
|
|
|
|
|
} |
|
9771
|
|
|
|
|
|
|
|
|
9772
|
17
|
50
|
|
|
|
53
|
if ( $id_scan_state eq $scan_state_RPAREN ) { |
|
9773
|
0
|
|
|
|
|
0
|
$self->warning( |
|
9774
|
|
|
|
|
|
|
"Hit end of line while seeking ) to end prototype\n"); |
|
9775
|
|
|
|
|
|
|
} |
|
9776
|
|
|
|
|
|
|
|
|
9777
|
17
|
|
|
|
|
33
|
$id_scan_state = EMPTY_STRING; |
|
9778
|
|
|
|
|
|
|
} |
|
9779
|
|
|
|
|
|
|
|
|
9780
|
|
|
|
|
|
|
# Patch: the deprecated variable $# does not combine with anything |
|
9781
|
|
|
|
|
|
|
# on the next line. |
|
9782
|
24
|
50
|
|
|
|
65
|
if ( $identifier eq '$#' ) { $id_scan_state = EMPTY_STRING } |
|
|
0
|
|
|
|
|
0
|
|
|
9783
|
|
|
|
|
|
|
} |
|
9784
|
|
|
|
|
|
|
|
|
9785
|
|
|
|
|
|
|
# Be sure the token index is valid |
|
9786
|
551
|
50
|
|
|
|
1206
|
if ( $i < 0 ) { $i = 0 } |
|
|
0
|
|
|
|
|
0
|
|
|
9787
|
|
|
|
|
|
|
|
|
9788
|
|
|
|
|
|
|
# Be sure a token type is defined |
|
9789
|
551
|
100
|
|
|
|
1182
|
if ( !$type ) { |
|
9790
|
|
|
|
|
|
|
|
|
9791
|
523
|
100
|
|
|
|
1015
|
if ($saw_type) { |
|
|
|
100
|
|
|
|
|
|
|
9792
|
|
|
|
|
|
|
|
|
9793
|
517
|
100
|
66
|
|
|
2035
|
if ($saw_alpha) { |
|
|
|
50
|
100
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
9794
|
|
|
|
|
|
|
|
|
9795
|
|
|
|
|
|
|
# The type without the -> should be the same as with the -> so |
|
9796
|
|
|
|
|
|
|
# that if they get separated we get the same bond strengths, |
|
9797
|
|
|
|
|
|
|
# etc. See b1234 |
|
9798
|
404
|
50
|
66
|
|
|
1372
|
if ( $identifier =~ /^->/ |
|
|
|
|
33
|
|
|
|
|
|
9799
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'w' |
|
9800
|
|
|
|
|
|
|
&& substr( $identifier, 2, 1 ) =~ /^\w/ ) |
|
9801
|
|
|
|
|
|
|
{ |
|
9802
|
0
|
|
|
|
|
0
|
$type = 'w'; |
|
9803
|
|
|
|
|
|
|
} |
|
9804
|
404
|
|
|
|
|
776
|
else { $type = 'i' } |
|
9805
|
|
|
|
|
|
|
} |
|
9806
|
|
|
|
|
|
|
elsif ( $identifier eq '->' ) { |
|
9807
|
0
|
|
|
|
|
0
|
$type = '->'; |
|
9808
|
|
|
|
|
|
|
} |
|
9809
|
|
|
|
|
|
|
elsif ( |
|
9810
|
|
|
|
|
|
|
( length($identifier) > 1 ) |
|
9811
|
|
|
|
|
|
|
|
|
9812
|
|
|
|
|
|
|
# In something like '@$=' we have an identifier '@$' |
|
9813
|
|
|
|
|
|
|
# In something like '$${' we have type '$$' (and only |
|
9814
|
|
|
|
|
|
|
# part of an identifier) |
|
9815
|
|
|
|
|
|
|
&& !( $identifier =~ /\$$/ && $tok eq '{' ) |
|
9816
|
|
|
|
|
|
|
&& $identifier ne 'sub ' |
|
9817
|
|
|
|
|
|
|
&& $identifier ne 'package ' |
|
9818
|
|
|
|
|
|
|
) |
|
9819
|
|
|
|
|
|
|
{ |
|
9820
|
53
|
|
|
|
|
105
|
$type = 'i'; |
|
9821
|
|
|
|
|
|
|
} |
|
9822
|
60
|
|
|
|
|
114
|
else { $type = 't' } |
|
9823
|
|
|
|
|
|
|
} |
|
9824
|
|
|
|
|
|
|
elsif ($saw_alpha) { |
|
9825
|
|
|
|
|
|
|
|
|
9826
|
|
|
|
|
|
|
# type 'w' includes anything without leading type info |
|
9827
|
|
|
|
|
|
|
# ($,%,@,*) including something like abc::def::ghi |
|
9828
|
5
|
|
|
|
|
8
|
$type = 'w'; |
|
9829
|
|
|
|
|
|
|
|
|
9830
|
|
|
|
|
|
|
# Fix for b1337, if restarting scan after line break between |
|
9831
|
|
|
|
|
|
|
# '->' or sigil and identifier name, use type 'i' |
|
9832
|
5
|
50
|
33
|
|
|
25
|
if ( $id_scan_state_begin |
|
9833
|
|
|
|
|
|
|
&& $identifier =~ /^([\$\%\@\*\&]|->)/ ) |
|
9834
|
|
|
|
|
|
|
{ |
|
9835
|
5
|
|
|
|
|
6
|
$type = 'i'; |
|
9836
|
|
|
|
|
|
|
} |
|
9837
|
|
|
|
|
|
|
} |
|
9838
|
|
|
|
|
|
|
else { |
|
9839
|
1
|
|
|
|
|
2
|
$type = EMPTY_STRING; |
|
9840
|
|
|
|
|
|
|
} # this can happen on a restart |
|
9841
|
|
|
|
|
|
|
} |
|
9842
|
|
|
|
|
|
|
|
|
9843
|
|
|
|
|
|
|
# See if we formed an identifier... |
|
9844
|
551
|
100
|
|
|
|
1089
|
if ($identifier) { |
|
9845
|
504
|
|
|
|
|
744
|
$tok = $identifier; |
|
9846
|
504
|
100
|
|
|
|
1101
|
if ($message) { $self->write_logfile_entry($message) } |
|
|
1
|
|
|
|
|
6
|
|
|
9847
|
|
|
|
|
|
|
} |
|
9848
|
|
|
|
|
|
|
|
|
9849
|
|
|
|
|
|
|
# did not find an identifier, back up |
|
9850
|
|
|
|
|
|
|
else { |
|
9851
|
47
|
|
|
|
|
78
|
$tok = $tok_begin; |
|
9852
|
47
|
|
|
|
|
76
|
$i = $i_begin; |
|
9853
|
|
|
|
|
|
|
} |
|
9854
|
|
|
|
|
|
|
|
|
9855
|
|
|
|
|
|
|
RETURN: |
|
9856
|
|
|
|
|
|
|
|
|
9857
|
551
|
|
|
|
|
665
|
DEBUG_SCAN_ID && do { |
|
9858
|
|
|
|
|
|
|
my ( $a, $b, $c ) = caller(); |
|
9859
|
|
|
|
|
|
|
print {*STDOUT} |
|
9860
|
|
|
|
|
|
|
"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; |
|
9861
|
|
|
|
|
|
|
print {*STDOUT} |
|
9862
|
|
|
|
|
|
|
"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; |
|
9863
|
|
|
|
|
|
|
}; |
|
9864
|
|
|
|
|
|
|
|
|
9865
|
|
|
|
|
|
|
return ( |
|
9866
|
|
|
|
|
|
|
|
|
9867
|
551
|
|
|
|
|
2324
|
$i, |
|
9868
|
|
|
|
|
|
|
$tok, |
|
9869
|
|
|
|
|
|
|
$type, |
|
9870
|
|
|
|
|
|
|
$id_scan_state, |
|
9871
|
|
|
|
|
|
|
$identifier, |
|
9872
|
|
|
|
|
|
|
$split_pretoken_flag, |
|
9873
|
|
|
|
|
|
|
); |
|
9874
|
|
|
|
|
|
|
} ## end sub scan_complex_identifier |
|
9875
|
|
|
|
|
|
|
} ## end closure for sub scan_complex_identifier |
|
9876
|
|
|
|
|
|
|
|
|
9877
|
|
|
|
|
|
|
{ ## closure for sub do_scan_sub |
|
9878
|
|
|
|
|
|
|
|
|
9879
|
|
|
|
|
|
|
# saved package and subnames in case prototype is on separate line |
|
9880
|
|
|
|
|
|
|
my ( $package_saved, $subname_saved ); |
|
9881
|
|
|
|
|
|
|
|
|
9882
|
|
|
|
|
|
|
# initialize subname each time a new 'sub' keyword is encountered |
|
9883
|
|
|
|
|
|
|
sub initialize_subname { |
|
9884
|
345
|
|
|
345
|
0
|
613
|
$package_saved = EMPTY_STRING; |
|
9885
|
345
|
|
|
|
|
529
|
$subname_saved = EMPTY_STRING; |
|
9886
|
345
|
|
|
|
|
520
|
return; |
|
9887
|
|
|
|
|
|
|
} |
|
9888
|
|
|
|
|
|
|
|
|
9889
|
|
|
|
|
|
|
use constant { |
|
9890
|
44
|
|
|
|
|
98728
|
SUB_CALL => 1, |
|
9891
|
|
|
|
|
|
|
PAREN_CALL => 2, |
|
9892
|
|
|
|
|
|
|
PROTOTYPE_CALL => 3, |
|
9893
|
44
|
|
|
44
|
|
349
|
}; |
|
|
44
|
|
|
|
|
75
|
|
|
9894
|
|
|
|
|
|
|
|
|
9895
|
|
|
|
|
|
|
sub do_scan_sub { |
|
9896
|
|
|
|
|
|
|
|
|
9897
|
351
|
|
|
351
|
0
|
723
|
my ( $self, $rcall_hash ) = @_; |
|
9898
|
|
|
|
|
|
|
|
|
9899
|
|
|
|
|
|
|
# Parse a sub name and prototype. |
|
9900
|
|
|
|
|
|
|
|
|
9901
|
351
|
|
|
|
|
744
|
my $input_line = $rcall_hash->{input_line}; |
|
9902
|
351
|
|
|
|
|
615
|
my $i = $rcall_hash->{i}; |
|
9903
|
351
|
|
|
|
|
589
|
my $i_beg = $rcall_hash->{i_beg}; |
|
9904
|
351
|
|
|
|
|
615
|
my $tok = $rcall_hash->{tok}; |
|
9905
|
351
|
|
|
|
|
587
|
my $type = $rcall_hash->{type}; |
|
9906
|
351
|
|
|
|
|
622
|
my $rtokens = $rcall_hash->{rtokens}; |
|
9907
|
351
|
|
|
|
|
561
|
my $rtoken_map = $rcall_hash->{rtoken_map}; |
|
9908
|
351
|
|
|
|
|
567
|
my $id_scan_state = $rcall_hash->{id_scan_state}; |
|
9909
|
351
|
|
|
|
|
537
|
my $max_token_index = $rcall_hash->{max_token_index}; |
|
9910
|
|
|
|
|
|
|
|
|
9911
|
351
|
100
|
|
|
|
772
|
my $id_prefix = $rcall_hash->{is_lexical_method} ? '$' : EMPTY_STRING; |
|
9912
|
|
|
|
|
|
|
|
|
9913
|
|
|
|
|
|
|
# At present there are three basic CALL TYPES which are |
|
9914
|
|
|
|
|
|
|
# distinguished by the starting value of '$tok': |
|
9915
|
|
|
|
|
|
|
# 1. $tok='sub', id_scan_state='sub' |
|
9916
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of the first nonblank |
|
9917
|
|
|
|
|
|
|
# token following a 'sub' token. |
|
9918
|
|
|
|
|
|
|
# 2. $tok='(', id_scan_state='sub', |
|
9919
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of a '(' which may |
|
9920
|
|
|
|
|
|
|
# start a prototype. |
|
9921
|
|
|
|
|
|
|
# 3. $tok='prototype', id_scan_state='prototype' |
|
9922
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of a '(' which is |
|
9923
|
|
|
|
|
|
|
# preceded by ': prototype' and has $id_scan_state eq 'prototype' |
|
9924
|
|
|
|
|
|
|
|
|
9925
|
|
|
|
|
|
|
# Examples: |
|
9926
|
|
|
|
|
|
|
|
|
9927
|
|
|
|
|
|
|
# A single type 1 call will get both the sub and prototype |
|
9928
|
|
|
|
|
|
|
# sub foo1 ( $$ ) { } |
|
9929
|
|
|
|
|
|
|
# ^ |
|
9930
|
|
|
|
|
|
|
|
|
9931
|
|
|
|
|
|
|
# The subname will be obtained with a 'sub' call |
|
9932
|
|
|
|
|
|
|
# The prototype on line 2 will be obtained with a '(' call |
|
9933
|
|
|
|
|
|
|
# sub foo1 |
|
9934
|
|
|
|
|
|
|
# ^ <---call type 1 |
|
9935
|
|
|
|
|
|
|
# ( $$ ) { } |
|
9936
|
|
|
|
|
|
|
# ^ <---call type 2 |
|
9937
|
|
|
|
|
|
|
|
|
9938
|
|
|
|
|
|
|
# The subname will be obtained with a 'sub' call |
|
9939
|
|
|
|
|
|
|
# The prototype will be obtained with a 'prototype' call |
|
9940
|
|
|
|
|
|
|
# sub foo1 ( $x, $y ) : prototype ( $$ ) { } |
|
9941
|
|
|
|
|
|
|
# ^ <---type 1 ^ <---type 3 |
|
9942
|
|
|
|
|
|
|
|
|
9943
|
|
|
|
|
|
|
# TODO: add future error checks to be sure we have a valid |
|
9944
|
|
|
|
|
|
|
# sub name. For example, 'sub &doit' is wrong. Also, be sure |
|
9945
|
|
|
|
|
|
|
# a name is given if and only if a non-anonymous sub is |
|
9946
|
|
|
|
|
|
|
# appropriate. |
|
9947
|
|
|
|
|
|
|
# USES GLOBAL VARS: $current_package, $last_nonblank_token, |
|
9948
|
|
|
|
|
|
|
# $rsaw_function_definition, |
|
9949
|
|
|
|
|
|
|
# $statement_type |
|
9950
|
|
|
|
|
|
|
|
|
9951
|
351
|
|
|
|
|
500
|
my $i_entry = $i; |
|
9952
|
|
|
|
|
|
|
|
|
9953
|
|
|
|
|
|
|
# Determine the CALL TYPE |
|
9954
|
|
|
|
|
|
|
# 1=sub |
|
9955
|
|
|
|
|
|
|
# 2=( |
|
9956
|
|
|
|
|
|
|
# 3=prototype |
|
9957
|
351
|
100
|
|
|
|
1065
|
my $call_type = |
|
|
|
100
|
|
|
|
|
|
|
9958
|
|
|
|
|
|
|
$tok eq 'prototype' ? PROTOTYPE_CALL |
|
9959
|
|
|
|
|
|
|
: $tok eq '(' ? PAREN_CALL |
|
9960
|
|
|
|
|
|
|
: SUB_CALL; |
|
9961
|
|
|
|
|
|
|
|
|
9962
|
351
|
|
|
|
|
576
|
$id_scan_state = EMPTY_STRING; # normally we get everything in one call |
|
9963
|
351
|
|
|
|
|
520
|
my $subname = $subname_saved; |
|
9964
|
351
|
|
|
|
|
562
|
my $package = $package_saved; |
|
9965
|
351
|
|
|
|
|
556
|
my $proto = undef; |
|
9966
|
351
|
|
|
|
|
486
|
my $attrs = undef; |
|
9967
|
351
|
|
|
|
|
463
|
my $match; |
|
9968
|
|
|
|
|
|
|
|
|
9969
|
351
|
|
|
|
|
560
|
my $pos_beg = $rtoken_map->[$i_beg]; |
|
9970
|
351
|
|
|
|
|
1125
|
pos($input_line) = $pos_beg; |
|
9971
|
|
|
|
|
|
|
|
|
9972
|
|
|
|
|
|
|
# Look for the sub NAME if this is a SUB call |
|
9973
|
351
|
100
|
100
|
|
|
2695
|
if ( |
|
9974
|
|
|
|
|
|
|
$call_type == SUB_CALL |
|
9975
|
|
|
|
|
|
|
&& $input_line =~ m{\G\s* |
|
9976
|
|
|
|
|
|
|
((?:\w*(?:'|::))*) # package - something that ends in :: or ' |
|
9977
|
|
|
|
|
|
|
(\w+) # NAME - required |
|
9978
|
|
|
|
|
|
|
}gcx |
|
9979
|
|
|
|
|
|
|
) |
|
9980
|
|
|
|
|
|
|
{ |
|
9981
|
158
|
|
|
|
|
283
|
$match = 1; |
|
9982
|
158
|
|
|
|
|
339
|
$subname = $2; |
|
9983
|
|
|
|
|
|
|
my $is_lexical_sub = $last_nonblank_type eq 'k' |
|
9984
|
158
|
|
33
|
|
|
495
|
&& $is_my_our_state{$last_nonblank_token}; |
|
9985
|
158
|
0
|
33
|
|
|
1924
|
if ( $is_lexical_sub && $1 ) { |
|
9986
|
0
|
|
|
|
|
0
|
$self->warning( |
|
9987
|
|
|
|
|
|
|
"'$last_nonblank_token' sub $subname cannot be in package '$1'\n" |
|
9988
|
|
|
|
|
|
|
); |
|
9989
|
0
|
|
|
|
|
0
|
$is_lexical_sub = 0; |
|
9990
|
|
|
|
|
|
|
} |
|
9991
|
|
|
|
|
|
|
|
|
9992
|
158
|
50
|
|
|
|
383
|
if ($is_lexical_sub) { |
|
9993
|
|
|
|
|
|
|
|
|
9994
|
|
|
|
|
|
|
# Lexical subs use the containing block sequence number as a |
|
9995
|
|
|
|
|
|
|
# package name. |
|
9996
|
0
|
|
|
|
|
0
|
my $seqno = |
|
9997
|
|
|
|
|
|
|
$rcurrent_sequence_number->[BRACE] |
|
9998
|
|
|
|
|
|
|
->[ $rcurrent_depth->[BRACE] ]; |
|
9999
|
0
|
0
|
|
|
|
0
|
$seqno = SEQ_ROOT if ( !defined($seqno) ); |
|
10000
|
0
|
|
|
|
|
0
|
$package = $seqno; |
|
10001
|
|
|
|
|
|
|
|
|
10002
|
|
|
|
|
|
|
# The value will eventually be the sequence number of the |
|
10003
|
|
|
|
|
|
|
# opening curly brace of the definition (if any). We use -1 |
|
10004
|
|
|
|
|
|
|
# until we find it. |
|
10005
|
0
|
|
|
|
|
0
|
$ris_lexical_sub->{$subname}->{$package} = -1; |
|
10006
|
|
|
|
|
|
|
|
|
10007
|
|
|
|
|
|
|
# Set a special signal to tell sub do_LEFT_CURLY_BRACKET to |
|
10008
|
|
|
|
|
|
|
# update this value if the next opening sub block brace is for |
|
10009
|
|
|
|
|
|
|
# this sub. The reason we need this value is to avoid applying |
|
10010
|
|
|
|
|
|
|
# this new sub in its own definition block. Note that '911' is |
|
10011
|
|
|
|
|
|
|
# not a possible sub name. Search for '911' for related code. |
|
10012
|
0
|
|
|
|
|
0
|
$ris_lexical_sub->{911} = [ $subname, $package ]; |
|
10013
|
|
|
|
|
|
|
|
|
10014
|
|
|
|
|
|
|
# Complain if lexical sub name hides a quote operator |
|
10015
|
0
|
0
|
|
|
|
0
|
if ( $is_q_qq_qw_qx_qr_s_y_tr_m{$subname} ) { |
|
10016
|
0
|
|
|
|
|
0
|
$self->complain( |
|
10017
|
|
|
|
|
|
|
"'my' sub '$subname' matches a builtin quote operator\n" |
|
10018
|
|
|
|
|
|
|
); |
|
10019
|
|
|
|
|
|
|
## OLD CODING, before improved handling of lexical subs: |
|
10020
|
|
|
|
|
|
|
## This may end badly, it is safest to avoid formatting. |
|
10021
|
|
|
|
|
|
|
## For an example, see perl527/lexsub.t (issue c203) |
|
10022
|
|
|
|
|
|
|
## $self->[_do_not_format_] = 1; |
|
10023
|
|
|
|
|
|
|
} |
|
10024
|
|
|
|
|
|
|
} |
|
10025
|
|
|
|
|
|
|
else { |
|
10026
|
158
|
100
|
66
|
|
|
827
|
$package = ( defined($1) && $1 ) ? $1 : $current_package; |
|
10027
|
158
|
|
|
|
|
390
|
$package =~ s/\'/::/g; |
|
10028
|
158
|
50
|
|
|
|
440
|
if ( $package =~ /^\:/ ) { $package = 'main' . $package } |
|
|
0
|
|
|
|
|
0
|
|
|
10029
|
158
|
|
|
|
|
349
|
$package =~ s/::$//; |
|
10030
|
|
|
|
|
|
|
} |
|
10031
|
|
|
|
|
|
|
|
|
10032
|
158
|
|
|
|
|
280
|
my $pos = pos($input_line); |
|
10033
|
158
|
|
|
|
|
271
|
my $numc = $pos - $pos_beg; |
|
10034
|
158
|
|
|
|
|
393
|
$tok = 'sub ' . $id_prefix . substr( $input_line, $pos_beg, $numc ); |
|
10035
|
158
|
|
|
|
|
248
|
$type = 'S'; ## Fix for c250, was 'i'; |
|
10036
|
|
|
|
|
|
|
|
|
10037
|
|
|
|
|
|
|
# remember the sub name in case another call is needed to |
|
10038
|
|
|
|
|
|
|
# get the prototype |
|
10039
|
158
|
|
|
|
|
244
|
$package_saved = $package; |
|
10040
|
158
|
|
|
|
|
296
|
$subname_saved = $subname; |
|
10041
|
|
|
|
|
|
|
} |
|
10042
|
|
|
|
|
|
|
|
|
10043
|
|
|
|
|
|
|
# Now look for PROTO ATTRS for all call types |
|
10044
|
|
|
|
|
|
|
# Look for prototype/attributes which are usually on the same |
|
10045
|
|
|
|
|
|
|
# line as the sub name but which might be on a separate line. |
|
10046
|
|
|
|
|
|
|
# For example, we might have an anonymous sub with attributes, |
|
10047
|
|
|
|
|
|
|
# or a prototype on a separate line from its sub name |
|
10048
|
|
|
|
|
|
|
|
|
10049
|
|
|
|
|
|
|
# NOTE: We only want to parse PROTOTYPES here. If we see anything that |
|
10050
|
|
|
|
|
|
|
# does not look like a prototype, we assume it is a SIGNATURE and we |
|
10051
|
|
|
|
|
|
|
# will stop and let the standard tokenizer handle it. In |
|
10052
|
|
|
|
|
|
|
# particular, we stop if we see any nested parens, braces, or commas. |
|
10053
|
|
|
|
|
|
|
# Also note, a valid prototype cannot contain any alphabetic character |
|
10054
|
|
|
|
|
|
|
# -- see https://perldoc.perl.org/perlsub |
|
10055
|
|
|
|
|
|
|
# But it appears that an underscore is valid in a prototype, so the |
|
10056
|
|
|
|
|
|
|
# regex below uses [A-Za-z] rather than \w |
|
10057
|
|
|
|
|
|
|
# This is the old regex which has been replaced: |
|
10058
|
|
|
|
|
|
|
# $input_line =~ m/\G(\s*\([^\)\(\}\{\,#]*\))? # PROTO |
|
10059
|
|
|
|
|
|
|
# Added '=' for issue c362 |
|
10060
|
351
|
|
|
|
|
1020
|
my $saw_opening_paren = $input_line =~ /\G\s*\(/; |
|
10061
|
351
|
100
|
100
|
|
|
3222
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
10062
|
|
|
|
|
|
|
$input_line =~ m{\G(\s*\([^\)\(\}\{\,#A-Za-z=]*\))? # PROTO |
|
10063
|
|
|
|
|
|
|
(\s*:)? # ATTRS leading ':' |
|
10064
|
|
|
|
|
|
|
}gcx |
|
10065
|
|
|
|
|
|
|
&& ( $1 || $2 ) |
|
10066
|
|
|
|
|
|
|
) |
|
10067
|
|
|
|
|
|
|
{ |
|
10068
|
45
|
|
|
|
|
84
|
$proto = $1; |
|
10069
|
45
|
|
|
|
|
77
|
$attrs = $2; |
|
10070
|
|
|
|
|
|
|
|
|
10071
|
|
|
|
|
|
|
# Append the prototype to the starting token if it is 'sub' or |
|
10072
|
|
|
|
|
|
|
# 'prototype'. This is not necessary but for compatibility with |
|
10073
|
|
|
|
|
|
|
# previous versions when the -csc flag is used: |
|
10074
|
45
|
100
|
100
|
|
|
245
|
if ( $proto && ( $match || $call_type == PROTOTYPE_CALL ) ) { |
|
|
|
100
|
100
|
|
|
|
|
|
10075
|
24
|
|
|
|
|
39
|
$tok .= $proto; |
|
10076
|
|
|
|
|
|
|
} |
|
10077
|
|
|
|
|
|
|
|
|
10078
|
|
|
|
|
|
|
# If we just entered the sub at an opening paren on this call, not |
|
10079
|
|
|
|
|
|
|
# a following :prototype, label it with the previous token. This is |
|
10080
|
|
|
|
|
|
|
# necessary to propagate the sub name to its opening block. |
|
10081
|
|
|
|
|
|
|
elsif ( $call_type == PAREN_CALL ) { |
|
10082
|
2
|
|
|
|
|
4
|
$tok = $last_nonblank_token; |
|
10083
|
|
|
|
|
|
|
} |
|
10084
|
|
|
|
|
|
|
else { |
|
10085
|
|
|
|
|
|
|
} |
|
10086
|
|
|
|
|
|
|
|
|
10087
|
45
|
|
100
|
|
|
139
|
$match ||= 1; |
|
10088
|
|
|
|
|
|
|
|
|
10089
|
|
|
|
|
|
|
# Patch part #1 to fixes cases b994 and b1053: |
|
10090
|
|
|
|
|
|
|
# Mark an anonymous sub keyword without prototype as type 'k', i.e. |
|
10091
|
|
|
|
|
|
|
# 'sub : lvalue { ...' |
|
10092
|
45
|
|
|
|
|
67
|
$type = 'S'; ## C250, was 'i'; |
|
10093
|
45
|
100
|
100
|
|
|
229
|
if ( $tok eq 'sub' && !$proto ) { $type = 'k' } |
|
|
2
|
|
|
|
|
5
|
|
|
10094
|
|
|
|
|
|
|
} |
|
10095
|
|
|
|
|
|
|
|
|
10096
|
351
|
100
|
|
|
|
770
|
if ($match) { |
|
10097
|
|
|
|
|
|
|
|
|
10098
|
|
|
|
|
|
|
# ATTRS: if there are attributes, back up and let the ':' be |
|
10099
|
|
|
|
|
|
|
# found later by the scanner. |
|
10100
|
173
|
|
|
|
|
285
|
my $pos = pos($input_line); |
|
10101
|
173
|
100
|
|
|
|
432
|
if ($attrs) { |
|
10102
|
15
|
|
|
|
|
27
|
$pos -= length($attrs); |
|
10103
|
|
|
|
|
|
|
} |
|
10104
|
|
|
|
|
|
|
|
|
10105
|
173
|
|
|
|
|
335
|
my $next_nonblank_token = $tok; |
|
10106
|
|
|
|
|
|
|
|
|
10107
|
|
|
|
|
|
|
# catch case of line with leading ATTR ':' after anonymous sub |
|
10108
|
173
|
100
|
100
|
|
|
548
|
if ( $pos == $pos_beg && $tok eq ':' ) { |
|
10109
|
1
|
|
|
|
|
2
|
$type = 'A'; |
|
10110
|
1
|
|
|
|
|
3
|
$self->[_in_attribute_list_] = 1; |
|
10111
|
|
|
|
|
|
|
} |
|
10112
|
|
|
|
|
|
|
|
|
10113
|
|
|
|
|
|
|
# Otherwise, if we found a match we must convert back from |
|
10114
|
|
|
|
|
|
|
# string position to the pre_token index for continued parsing. |
|
10115
|
|
|
|
|
|
|
else { |
|
10116
|
|
|
|
|
|
|
|
|
10117
|
|
|
|
|
|
|
# I don't think an error flag can occur here ..but ? |
|
10118
|
172
|
|
|
|
|
266
|
my $error; |
|
10119
|
172
|
|
|
|
|
569
|
( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, |
|
10120
|
|
|
|
|
|
|
$max_token_index ); |
|
10121
|
172
|
50
|
|
|
|
438
|
if ($error) { $self->warning("Possibly invalid sub\n") } |
|
|
0
|
|
|
|
|
0
|
|
|
10122
|
|
|
|
|
|
|
|
|
10123
|
|
|
|
|
|
|
# Patch part #2 to fixes cases b994 and b1053: |
|
10124
|
|
|
|
|
|
|
# Do not let spaces be part of the token of an anonymous sub |
|
10125
|
|
|
|
|
|
|
# keyword which we marked as type 'k' above...i.e. for |
|
10126
|
|
|
|
|
|
|
# something like: |
|
10127
|
|
|
|
|
|
|
# 'sub : lvalue { ...' |
|
10128
|
|
|
|
|
|
|
# Back up and let it be parsed as a blank |
|
10129
|
172
|
50
|
66
|
|
|
600
|
if ( $type eq 'k' |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
10130
|
|
|
|
|
|
|
&& $attrs |
|
10131
|
|
|
|
|
|
|
&& $i > $i_entry |
|
10132
|
|
|
|
|
|
|
&& substr( $rtokens->[$i], 0, 1 ) =~ m/\s/ ) |
|
10133
|
|
|
|
|
|
|
{ |
|
10134
|
2
|
|
|
|
|
6
|
$i--; |
|
10135
|
|
|
|
|
|
|
} |
|
10136
|
|
|
|
|
|
|
|
|
10137
|
|
|
|
|
|
|
# check for multiple definitions of a sub |
|
10138
|
172
|
|
|
|
|
370
|
( $next_nonblank_token, my $i_next_uu ) = |
|
10139
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i, $rtokens, |
|
10140
|
|
|
|
|
|
|
$max_token_index ); |
|
10141
|
|
|
|
|
|
|
} |
|
10142
|
|
|
|
|
|
|
|
|
10143
|
173
|
100
|
|
|
|
724
|
if ( $next_nonblank_token =~ /^(\s*|#)$/ ) |
|
10144
|
|
|
|
|
|
|
{ # skip blank or side comment |
|
10145
|
7
|
|
|
|
|
38
|
my ( $rpre_tokens, $rpre_types_uu ) = |
|
10146
|
|
|
|
|
|
|
$self->peek_ahead_for_n_nonblank_pre_tokens(1); |
|
10147
|
7
|
50
|
33
|
|
|
29
|
if ( defined($rpre_tokens) && @{$rpre_tokens} ) { |
|
|
7
|
|
|
|
|
27
|
|
|
10148
|
7
|
|
|
|
|
19
|
$next_nonblank_token = $rpre_tokens->[0]; |
|
10149
|
|
|
|
|
|
|
} |
|
10150
|
|
|
|
|
|
|
else { |
|
10151
|
0
|
|
|
|
|
0
|
$next_nonblank_token = '}'; |
|
10152
|
|
|
|
|
|
|
} |
|
10153
|
|
|
|
|
|
|
} |
|
10154
|
|
|
|
|
|
|
|
|
10155
|
|
|
|
|
|
|
# See what's next... |
|
10156
|
173
|
100
|
|
|
|
700
|
if ( $next_nonblank_token eq '{' ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10157
|
135
|
100
|
|
|
|
346
|
if ($subname) { |
|
10158
|
|
|
|
|
|
|
|
|
10159
|
|
|
|
|
|
|
# Check for multiple definitions of a sub, but |
|
10160
|
|
|
|
|
|
|
# it is ok to have multiple sub BEGIN, etc, |
|
10161
|
|
|
|
|
|
|
# so we do not complain if name is all caps |
|
10162
|
125
|
50
|
33
|
|
|
635
|
if ( $rsaw_function_definition->{$subname}->{$package} |
|
10163
|
|
|
|
|
|
|
&& $subname !~ /^[A-Z]+$/ ) |
|
10164
|
|
|
|
|
|
|
{ |
|
10165
|
|
|
|
|
|
|
my $lno = |
|
10166
|
0
|
|
|
|
|
0
|
$rsaw_function_definition->{$subname}->{$package}; |
|
10167
|
0
|
0
|
|
|
|
0
|
if ( $package =~ /^\d/ ) { |
|
10168
|
0
|
|
|
|
|
0
|
$self->warning( |
|
10169
|
|
|
|
|
|
|
"already saw definition of lexical 'sub $subname' at line $lno\n" |
|
10170
|
|
|
|
|
|
|
); |
|
10171
|
|
|
|
|
|
|
|
|
10172
|
|
|
|
|
|
|
} |
|
10173
|
|
|
|
|
|
|
else { |
|
10174
|
0
|
|
|
|
|
0
|
if ( !DEVEL_MODE ) { |
|
10175
|
0
|
|
|
|
|
0
|
$self->warning( |
|
10176
|
|
|
|
|
|
|
"already saw definition of 'sub $subname' in package '$package' at line $lno\n" |
|
10177
|
|
|
|
|
|
|
); |
|
10178
|
|
|
|
|
|
|
} |
|
10179
|
|
|
|
|
|
|
} |
|
10180
|
|
|
|
|
|
|
} |
|
10181
|
125
|
|
|
|
|
383
|
$rsaw_function_definition->{$subname}->{$package} = |
|
10182
|
|
|
|
|
|
|
$self->[_last_line_number_]; |
|
10183
|
|
|
|
|
|
|
} |
|
10184
|
|
|
|
|
|
|
} |
|
10185
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq ';' ) { |
|
10186
|
|
|
|
|
|
|
} |
|
10187
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq '}' ) { |
|
10188
|
|
|
|
|
|
|
} |
|
10189
|
|
|
|
|
|
|
|
|
10190
|
|
|
|
|
|
|
# ATTRS - if an attribute list follows, remember the name |
|
10191
|
|
|
|
|
|
|
# of the sub so the next opening brace can be labeled. |
|
10192
|
|
|
|
|
|
|
# Setting 'statement_type' causes any ':'s to introduce |
|
10193
|
|
|
|
|
|
|
# attributes. |
|
10194
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq ':' ) { |
|
10195
|
16
|
100
|
|
|
|
48
|
if ( $call_type == SUB_CALL ) { |
|
10196
|
14
|
100
|
|
|
|
66
|
$statement_type = |
|
10197
|
|
|
|
|
|
|
substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub'; |
|
10198
|
|
|
|
|
|
|
} |
|
10199
|
|
|
|
|
|
|
} |
|
10200
|
|
|
|
|
|
|
|
|
10201
|
|
|
|
|
|
|
# if we stopped before an open paren ... |
|
10202
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq '(' ) { |
|
10203
|
|
|
|
|
|
|
|
|
10204
|
|
|
|
|
|
|
# If we DID NOT see this paren above then it must be on the |
|
10205
|
|
|
|
|
|
|
# next line so we will set a flag to come back here and see if |
|
10206
|
|
|
|
|
|
|
# it is a PROTOTYPE |
|
10207
|
|
|
|
|
|
|
|
|
10208
|
|
|
|
|
|
|
# Otherwise, we assume it is a SIGNATURE rather than a |
|
10209
|
|
|
|
|
|
|
# PROTOTYPE and let the normal tokenizer handle it as a list |
|
10210
|
21
|
100
|
|
|
|
91
|
if ( !$saw_opening_paren ) { |
|
10211
|
4
|
|
|
|
|
8
|
$id_scan_state = 'sub'; # we must come back to get proto |
|
10212
|
|
|
|
|
|
|
} |
|
10213
|
21
|
50
|
|
|
|
64
|
if ( $call_type == SUB_CALL ) { |
|
10214
|
21
|
50
|
|
|
|
73
|
$statement_type = |
|
10215
|
|
|
|
|
|
|
substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub'; |
|
10216
|
|
|
|
|
|
|
} |
|
10217
|
|
|
|
|
|
|
} |
|
10218
|
|
|
|
|
|
|
|
|
10219
|
|
|
|
|
|
|
# something else.. |
|
10220
|
|
|
|
|
|
|
elsif ($next_nonblank_token) { |
|
10221
|
|
|
|
|
|
|
|
|
10222
|
0
|
0
|
0
|
|
|
0
|
if ( $rcall_hash->{tok} eq 'method' && $call_type == SUB_CALL ) |
|
10223
|
|
|
|
|
|
|
{ |
|
10224
|
|
|
|
|
|
|
# For a method call, silently ignore this error (rt145706) |
|
10225
|
|
|
|
|
|
|
# to avoid needless warnings. Example which can produce it: |
|
10226
|
|
|
|
|
|
|
# test(method Pack (), "method"); |
|
10227
|
|
|
|
|
|
|
|
|
10228
|
|
|
|
|
|
|
# TODO: scan for use feature 'class' and: |
|
10229
|
|
|
|
|
|
|
# - if we saw 'use feature 'class' then issue the warning. |
|
10230
|
|
|
|
|
|
|
# - if we did not see use feature 'class' then issue the |
|
10231
|
|
|
|
|
|
|
# warning and suggest turning off --use-feature=class |
|
10232
|
|
|
|
|
|
|
} |
|
10233
|
|
|
|
|
|
|
else { |
|
10234
|
0
|
0
|
|
|
|
0
|
$subname = EMPTY_STRING unless ( defined($subname) ); |
|
10235
|
0
|
|
|
|
|
0
|
$self->warning( |
|
10236
|
|
|
|
|
|
|
"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" |
|
10237
|
|
|
|
|
|
|
); |
|
10238
|
|
|
|
|
|
|
} |
|
10239
|
|
|
|
|
|
|
} |
|
10240
|
|
|
|
|
|
|
|
|
10241
|
|
|
|
|
|
|
# EOF technically ok |
|
10242
|
|
|
|
|
|
|
else { |
|
10243
|
|
|
|
|
|
|
} |
|
10244
|
|
|
|
|
|
|
|
|
10245
|
173
|
|
|
|
|
585
|
check_prototype( $proto, $package, $subname ); |
|
10246
|
|
|
|
|
|
|
} |
|
10247
|
|
|
|
|
|
|
|
|
10248
|
|
|
|
|
|
|
# no match to either sub name or prototype, but line not blank |
|
10249
|
|
|
|
|
|
|
else { |
|
10250
|
|
|
|
|
|
|
|
|
10251
|
|
|
|
|
|
|
} |
|
10252
|
351
|
|
|
|
|
1792
|
return ( $i, $tok, $type, $id_scan_state ); |
|
10253
|
|
|
|
|
|
|
} ## end sub do_scan_sub |
|
10254
|
|
|
|
|
|
|
} |
|
10255
|
|
|
|
|
|
|
|
|
10256
|
|
|
|
|
|
|
######################################################################### |
|
10257
|
|
|
|
|
|
|
# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS |
|
10258
|
|
|
|
|
|
|
######################################################################### |
|
10259
|
|
|
|
|
|
|
|
|
10260
|
|
|
|
|
|
|
sub find_next_nonblank_token { |
|
10261
|
458
|
|
|
458
|
0
|
1183
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
|
10262
|
|
|
|
|
|
|
|
|
10263
|
|
|
|
|
|
|
# Returns the next nonblank token after the token at index $i |
|
10264
|
|
|
|
|
|
|
# To skip past a side comment, and any subsequent block comments |
|
10265
|
|
|
|
|
|
|
# and blank lines, call with i=$max_token_index |
|
10266
|
|
|
|
|
|
|
|
|
10267
|
|
|
|
|
|
|
# Skip any ending blank (fix c258). It would be cleaner if caller passed |
|
10268
|
|
|
|
|
|
|
# $rtoken_map, so we could check for type 'b', and avoid a regex test, but |
|
10269
|
|
|
|
|
|
|
# benchmarking shows that this test does not take significant time. So |
|
10270
|
|
|
|
|
|
|
# that would be a nice update but not essential. Also note that ending |
|
10271
|
|
|
|
|
|
|
# blanks will not occur for text previously processed by perltidy. |
|
10272
|
458
|
100
|
100
|
|
|
1832
|
if ( $i == $max_token_index - 1 |
|
10273
|
|
|
|
|
|
|
&& $rtokens->[$max_token_index] =~ /^\s+$/ ) |
|
10274
|
|
|
|
|
|
|
{ |
|
10275
|
9
|
|
|
|
|
22
|
$i++; |
|
10276
|
|
|
|
|
|
|
} |
|
10277
|
|
|
|
|
|
|
|
|
10278
|
458
|
100
|
|
|
|
1095
|
if ( $i >= $max_token_index ) { |
|
10279
|
141
|
100
|
|
|
|
459
|
if ( !peeked_ahead() ) { |
|
10280
|
139
|
|
|
|
|
380
|
peeked_ahead(1); |
|
10281
|
139
|
|
|
|
|
540
|
$self->peek_ahead_for_nonblank_token( $rtokens, $max_token_index ); |
|
10282
|
|
|
|
|
|
|
} |
|
10283
|
|
|
|
|
|
|
} |
|
10284
|
|
|
|
|
|
|
|
|
10285
|
458
|
|
|
|
|
910
|
my $next_nonblank_token = $rtokens->[ ++$i ]; |
|
10286
|
|
|
|
|
|
|
|
|
10287
|
|
|
|
|
|
|
# Any more tokens? |
|
10288
|
458
|
50
|
33
|
|
|
1852
|
return ( SPACE, $i ) |
|
10289
|
|
|
|
|
|
|
if ( !defined($next_nonblank_token) || !length($next_nonblank_token) ); |
|
10290
|
|
|
|
|
|
|
|
|
10291
|
|
|
|
|
|
|
# Skip over whitespace |
|
10292
|
458
|
|
|
|
|
936
|
my $ord = ord( substr( $next_nonblank_token, 0, 1 ) ); |
|
10293
|
458
|
0
|
66
|
|
|
2523
|
if ( |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
10294
|
|
|
|
|
|
|
|
|
10295
|
|
|
|
|
|
|
( $ord <= ORD_PRINTABLE_MIN || $ord >= ORD_PRINTABLE_MAX ) |
|
10296
|
|
|
|
|
|
|
|
|
10297
|
|
|
|
|
|
|
# Quick test for ascii space or tab |
|
10298
|
|
|
|
|
|
|
&& ( |
|
10299
|
|
|
|
|
|
|
( $ord == ORD_SPACE || $ord == ORD_TAB ) |
|
10300
|
|
|
|
|
|
|
|
|
10301
|
|
|
|
|
|
|
# Slow test to for something else identified as whitespace |
|
10302
|
|
|
|
|
|
|
|| $next_nonblank_token =~ /^\s+$/ |
|
10303
|
|
|
|
|
|
|
) |
|
10304
|
|
|
|
|
|
|
) |
|
10305
|
|
|
|
|
|
|
{ |
|
10306
|
301
|
|
|
|
|
563
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
|
10307
|
301
|
50
|
|
|
|
747
|
return ( SPACE, $i ) unless ( defined($next_nonblank_token) ); |
|
10308
|
|
|
|
|
|
|
} |
|
10309
|
|
|
|
|
|
|
|
|
10310
|
|
|
|
|
|
|
# We should be at a nonblank now |
|
10311
|
458
|
|
|
|
|
1422
|
return ( $next_nonblank_token, $i ); |
|
10312
|
|
|
|
|
|
|
|
|
10313
|
|
|
|
|
|
|
} ## end sub find_next_nonblank_token |
|
10314
|
|
|
|
|
|
|
|
|
10315
|
|
|
|
|
|
|
sub find_next_noncomment_token { |
|
10316
|
104
|
|
|
104
|
0
|
258
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
|
10317
|
|
|
|
|
|
|
|
|
10318
|
|
|
|
|
|
|
# Given the current character position, look ahead past any comments |
|
10319
|
|
|
|
|
|
|
# and blank lines and return the next token, including digraphs and |
|
10320
|
|
|
|
|
|
|
# trigraphs. |
|
10321
|
|
|
|
|
|
|
|
|
10322
|
104
|
|
|
|
|
379
|
my ( $next_nonblank_token, $i_next ) = |
|
10323
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
|
10324
|
|
|
|
|
|
|
|
|
10325
|
|
|
|
|
|
|
# skip past any side comment |
|
10326
|
104
|
50
|
|
|
|
318
|
if ( $next_nonblank_token eq '#' ) { |
|
10327
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
|
10328
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i_next, $rtokens, |
|
10329
|
|
|
|
|
|
|
$max_token_index ); |
|
10330
|
|
|
|
|
|
|
} |
|
10331
|
|
|
|
|
|
|
|
|
10332
|
|
|
|
|
|
|
# check for a digraph |
|
10333
|
104
|
50
|
33
|
|
|
701
|
if ( $next_nonblank_token |
|
|
|
|
33
|
|
|
|
|
|
10334
|
|
|
|
|
|
|
&& $next_nonblank_token ne SPACE |
|
10335
|
|
|
|
|
|
|
&& defined( $rtokens->[ $i_next + 1 ] ) ) |
|
10336
|
|
|
|
|
|
|
{ |
|
10337
|
104
|
|
|
|
|
237
|
my $test2 = $next_nonblank_token . $rtokens->[ $i_next + 1 ]; |
|
10338
|
104
|
100
|
|
|
|
320
|
if ( $is_digraph{$test2} ) { |
|
10339
|
15
|
|
|
|
|
24
|
$next_nonblank_token = $test2; |
|
10340
|
15
|
|
|
|
|
22
|
$i_next = $i_next + 1; |
|
10341
|
|
|
|
|
|
|
|
|
10342
|
|
|
|
|
|
|
# check for a trigraph |
|
10343
|
15
|
50
|
|
|
|
48
|
if ( defined( $rtokens->[ $i_next + 1 ] ) ) { |
|
10344
|
15
|
|
|
|
|
32
|
my $test3 = $next_nonblank_token . $rtokens->[ $i_next + 1 ]; |
|
10345
|
15
|
50
|
|
|
|
46
|
if ( $is_trigraph{$test3} ) { |
|
10346
|
0
|
|
|
|
|
0
|
$next_nonblank_token = $test3; |
|
10347
|
0
|
|
|
|
|
0
|
$i_next = $i_next + 1; |
|
10348
|
|
|
|
|
|
|
} |
|
10349
|
|
|
|
|
|
|
} |
|
10350
|
|
|
|
|
|
|
} |
|
10351
|
|
|
|
|
|
|
} |
|
10352
|
|
|
|
|
|
|
|
|
10353
|
104
|
|
|
|
|
257
|
return ( $next_nonblank_token, $i_next ); |
|
10354
|
|
|
|
|
|
|
} ## end sub find_next_noncomment_token |
|
10355
|
|
|
|
|
|
|
|
|
10356
|
|
|
|
|
|
|
sub is_possible_numerator { |
|
10357
|
|
|
|
|
|
|
|
|
10358
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
|
10359
|
|
|
|
|
|
|
|
|
10360
|
|
|
|
|
|
|
# Look at the next non-comment character and decide if it could be a |
|
10361
|
|
|
|
|
|
|
# numerator. Returns the following code: |
|
10362
|
|
|
|
|
|
|
# -1 - division not possible |
|
10363
|
|
|
|
|
|
|
# 0 - can't tell if division possible |
|
10364
|
|
|
|
|
|
|
# 1 - division possible |
|
10365
|
|
|
|
|
|
|
# 2 - division probable: number follows |
|
10366
|
|
|
|
|
|
|
# 3 - division very probable: number and one of ; ] } follow |
|
10367
|
|
|
|
|
|
|
# 4 - is division, not pattern: number and ) follow |
|
10368
|
|
|
|
|
|
|
|
|
10369
|
0
|
|
|
|
|
0
|
my $divide_possible_code = 0; |
|
10370
|
|
|
|
|
|
|
|
|
10371
|
0
|
|
|
|
|
0
|
my $next_token = $rtokens->[ $i + 1 ]; |
|
10372
|
0
|
0
|
|
|
|
0
|
if ( $next_token eq '=' ) { $i++; } # handle /= |
|
|
0
|
|
|
|
|
0
|
|
|
10373
|
0
|
|
|
|
|
0
|
my ( $next_nonblank_token, $i_next ) = |
|
10374
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
|
10375
|
|
|
|
|
|
|
|
|
10376
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token eq '#' ) { |
|
10377
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
|
10378
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, $rtokens, |
|
10379
|
|
|
|
|
|
|
$max_token_index ); |
|
10380
|
|
|
|
|
|
|
} |
|
10381
|
|
|
|
|
|
|
|
|
10382
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token =~ / [ \( \$ \w \. \@ ] /x ) { |
|
|
|
0
|
|
|
|
|
|
|
10383
|
0
|
|
|
|
|
0
|
$divide_possible_code = 1; |
|
10384
|
|
|
|
|
|
|
|
|
10385
|
|
|
|
|
|
|
# look ahead one more token for some common patterns, such as |
|
10386
|
|
|
|
|
|
|
# pi/2) pi/2; pi/2} |
|
10387
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token =~ /^\d/ ) { |
|
10388
|
0
|
|
|
|
|
0
|
my ( $next_next_nonblank_token, $i_next_next_uu ) = |
|
10389
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i_next, $rtokens, |
|
10390
|
|
|
|
|
|
|
$max_token_index ); |
|
10391
|
0
|
0
|
0
|
|
|
0
|
if ( $next_next_nonblank_token eq ')' ) { |
|
|
|
0
|
0
|
|
|
|
|
|
10392
|
0
|
|
|
|
|
0
|
$divide_possible_code = 4; |
|
10393
|
|
|
|
|
|
|
} |
|
10394
|
|
|
|
|
|
|
elsif ($next_next_nonblank_token eq ';' |
|
10395
|
|
|
|
|
|
|
|| $next_next_nonblank_token eq ']' |
|
10396
|
|
|
|
|
|
|
|| $next_next_nonblank_token eq '}' ) |
|
10397
|
|
|
|
|
|
|
{ |
|
10398
|
0
|
|
|
|
|
0
|
$divide_possible_code = 3; |
|
10399
|
|
|
|
|
|
|
} |
|
10400
|
|
|
|
|
|
|
else { |
|
10401
|
0
|
|
|
|
|
0
|
$divide_possible_code = 2; |
|
10402
|
|
|
|
|
|
|
} |
|
10403
|
|
|
|
|
|
|
} |
|
10404
|
|
|
|
|
|
|
} |
|
10405
|
|
|
|
|
|
|
elsif ( $next_nonblank_token =~ /^\s*$/ ) { |
|
10406
|
0
|
|
|
|
|
0
|
$divide_possible_code = 0; |
|
10407
|
|
|
|
|
|
|
} |
|
10408
|
|
|
|
|
|
|
else { |
|
10409
|
0
|
|
|
|
|
0
|
$divide_possible_code = -1; |
|
10410
|
|
|
|
|
|
|
} |
|
10411
|
|
|
|
|
|
|
|
|
10412
|
0
|
|
|
|
|
0
|
return $divide_possible_code; |
|
10413
|
|
|
|
|
|
|
} ## end sub is_possible_numerator |
|
10414
|
|
|
|
|
|
|
|
|
10415
|
|
|
|
|
|
|
{ ## closure for sub pattern_expected |
|
10416
|
|
|
|
|
|
|
my %pattern_test; |
|
10417
|
|
|
|
|
|
|
|
|
10418
|
|
|
|
|
|
|
BEGIN { |
|
10419
|
|
|
|
|
|
|
|
|
10420
|
|
|
|
|
|
|
# List of tokens which may follow a pattern. Note that we will not |
|
10421
|
|
|
|
|
|
|
# have formed digraphs at this point, so we will see '&' instead of |
|
10422
|
|
|
|
|
|
|
# '&&' and '|' instead of '||' |
|
10423
|
|
|
|
|
|
|
|
|
10424
|
|
|
|
|
|
|
# /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ |
|
10425
|
44
|
|
|
44
|
|
285
|
my @q = qw( & && | || ? : + - * and or while if unless ); |
|
10426
|
44
|
|
|
|
|
207
|
push @q, ')', '}', ']', '>', COMMA, ';'; |
|
10427
|
44
|
|
|
|
|
118924
|
$pattern_test{$_} = 1 for @q; |
|
10428
|
|
|
|
|
|
|
} ## end BEGIN |
|
10429
|
|
|
|
|
|
|
|
|
10430
|
|
|
|
|
|
|
sub pattern_expected { |
|
10431
|
|
|
|
|
|
|
|
|
10432
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
|
10433
|
|
|
|
|
|
|
|
|
10434
|
|
|
|
|
|
|
# This a filter for a possible pattern. |
|
10435
|
|
|
|
|
|
|
# It looks at the token after a possible pattern and tries to |
|
10436
|
|
|
|
|
|
|
# determine if that token could end a pattern. |
|
10437
|
|
|
|
|
|
|
# returns - |
|
10438
|
|
|
|
|
|
|
# 1 - yes |
|
10439
|
|
|
|
|
|
|
# 0 - can't tell |
|
10440
|
|
|
|
|
|
|
# -1 - no |
|
10441
|
0
|
|
|
|
|
0
|
my $is_pattern = 0; |
|
10442
|
|
|
|
|
|
|
|
|
10443
|
0
|
|
|
|
|
0
|
my $next_token = $rtokens->[ $i + 1 ]; |
|
10444
|
|
|
|
|
|
|
|
|
10445
|
|
|
|
|
|
|
# skip a possible quote modifier |
|
10446
|
0
|
|
|
|
|
0
|
my $possible_modifiers = $quote_modifiers{'m'}; |
|
10447
|
0
|
0
|
|
|
|
0
|
if ( $next_token =~ /^$possible_modifiers/ ) { |
|
10448
|
0
|
|
|
|
|
0
|
$i++; |
|
10449
|
|
|
|
|
|
|
} |
|
10450
|
|
|
|
|
|
|
|
|
10451
|
0
|
|
|
|
|
0
|
my ( $next_nonblank_token, $i_next_uu ) = |
|
10452
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
|
10453
|
|
|
|
|
|
|
|
|
10454
|
0
|
0
|
|
|
|
0
|
if ( $pattern_test{$next_nonblank_token} ) { |
|
10455
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
|
10456
|
|
|
|
|
|
|
} |
|
10457
|
|
|
|
|
|
|
else { |
|
10458
|
|
|
|
|
|
|
|
|
10459
|
|
|
|
|
|
|
# Added '#' to fix issue c044 |
|
10460
|
0
|
0
|
0
|
|
|
0
|
if ( $next_nonblank_token =~ /^\s*$/ |
|
10461
|
|
|
|
|
|
|
|| $next_nonblank_token eq '#' ) |
|
10462
|
|
|
|
|
|
|
{ |
|
10463
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
|
10464
|
|
|
|
|
|
|
} |
|
10465
|
|
|
|
|
|
|
else { |
|
10466
|
0
|
|
|
|
|
0
|
$is_pattern = -1; |
|
10467
|
|
|
|
|
|
|
} |
|
10468
|
|
|
|
|
|
|
} |
|
10469
|
0
|
|
|
|
|
0
|
return $is_pattern; |
|
10470
|
|
|
|
|
|
|
} ## end sub pattern_expected |
|
10471
|
|
|
|
|
|
|
} |
|
10472
|
|
|
|
|
|
|
|
|
10473
|
|
|
|
|
|
|
sub find_next_nonblank_token_on_this_line { |
|
10474
|
579
|
|
|
579
|
0
|
1060
|
my ( $i, $rtokens, $max_token_index ) = @_; |
|
10475
|
579
|
|
|
|
|
820
|
my $next_nonblank_token; |
|
10476
|
|
|
|
|
|
|
|
|
10477
|
579
|
100
|
|
|
|
1118
|
if ( $i < $max_token_index ) { |
|
10478
|
571
|
|
|
|
|
948
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
|
10479
|
|
|
|
|
|
|
|
|
10480
|
571
|
100
|
|
|
|
2275
|
if ( $next_nonblank_token =~ /^\s*$/ ) { |
|
10481
|
|
|
|
|
|
|
|
|
10482
|
164
|
100
|
|
|
|
415
|
if ( $i < $max_token_index ) { |
|
10483
|
162
|
|
|
|
|
358
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
|
10484
|
|
|
|
|
|
|
} |
|
10485
|
|
|
|
|
|
|
} |
|
10486
|
|
|
|
|
|
|
} |
|
10487
|
|
|
|
|
|
|
else { |
|
10488
|
8
|
|
|
|
|
19
|
$next_nonblank_token = EMPTY_STRING; |
|
10489
|
|
|
|
|
|
|
} |
|
10490
|
579
|
|
|
|
|
1538
|
return ( $next_nonblank_token, $i ); |
|
10491
|
|
|
|
|
|
|
} ## end sub find_next_nonblank_token_on_this_line |
|
10492
|
|
|
|
|
|
|
|
|
10493
|
|
|
|
|
|
|
sub find_angle_operator_termination { |
|
10494
|
|
|
|
|
|
|
|
|
10495
|
8
|
|
|
8
|
0
|
35
|
my ( $self, $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) |
|
10496
|
|
|
|
|
|
|
= @_; |
|
10497
|
|
|
|
|
|
|
|
|
10498
|
|
|
|
|
|
|
# We are looking at a '<' and want to know if it is an angle operator. |
|
10499
|
|
|
|
|
|
|
# Return: |
|
10500
|
|
|
|
|
|
|
# $i = pretoken index of ending '>' if found, current $i otherwise |
|
10501
|
|
|
|
|
|
|
# $type = 'Q' if found, '>' otherwise |
|
10502
|
|
|
|
|
|
|
|
|
10503
|
8
|
|
|
|
|
14
|
my $i = $i_beg; |
|
10504
|
8
|
|
|
|
|
16
|
my $type = '<'; |
|
10505
|
8
|
|
|
|
|
39
|
pos($input_line) = 1 + $rtoken_map->[$i]; |
|
10506
|
|
|
|
|
|
|
|
|
10507
|
|
|
|
|
|
|
# The token sequence '><' implies a markup language |
|
10508
|
8
|
50
|
|
|
|
27
|
if ( $last_nonblank_token eq '>' ) { |
|
10509
|
0
|
|
|
|
|
0
|
$self->[_html_tag_count_]++; |
|
10510
|
|
|
|
|
|
|
} |
|
10511
|
|
|
|
|
|
|
|
|
10512
|
8
|
|
|
|
|
14
|
my $filter; |
|
10513
|
|
|
|
|
|
|
|
|
10514
|
8
|
|
|
|
|
16
|
my $expecting_TERM = $expecting == TERM; |
|
10515
|
|
|
|
|
|
|
|
|
10516
|
|
|
|
|
|
|
# we just have to find the next '>' if a term is expected |
|
10517
|
8
|
100
|
|
|
|
23
|
if ($expecting_TERM) { $filter = '[\>]' } |
|
|
6
|
50
|
|
|
|
13
|
|
|
10518
|
|
|
|
|
|
|
|
|
10519
|
|
|
|
|
|
|
# we have to guess if we don't know what is expected |
|
10520
|
2
|
|
|
|
|
3
|
elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } |
|
10521
|
|
|
|
|
|
|
|
|
10522
|
|
|
|
|
|
|
# shouldn't happen - we shouldn't be here if operator is expected |
|
10523
|
|
|
|
|
|
|
else { |
|
10524
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
|
10525
|
|
|
|
|
|
|
Fault(<<EOM); |
|
10526
|
|
|
|
|
|
|
Bad call to find_angle_operator_termination |
|
10527
|
|
|
|
|
|
|
EOM |
|
10528
|
|
|
|
|
|
|
} |
|
10529
|
0
|
|
|
|
|
0
|
return ( $i, $type ); |
|
10530
|
|
|
|
|
|
|
} |
|
10531
|
|
|
|
|
|
|
|
|
10532
|
|
|
|
|
|
|
# To illustrate what we might be looking at, in case we are |
|
10533
|
|
|
|
|
|
|
# guessing, here are some examples of valid angle operators |
|
10534
|
|
|
|
|
|
|
# (or file globs): |
|
10535
|
|
|
|
|
|
|
# <tmp_imp/*> |
|
10536
|
|
|
|
|
|
|
# <FH> |
|
10537
|
|
|
|
|
|
|
# <$fh> |
|
10538
|
|
|
|
|
|
|
# <*.c *.h> |
|
10539
|
|
|
|
|
|
|
# <_> |
|
10540
|
|
|
|
|
|
|
# <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t) |
|
10541
|
|
|
|
|
|
|
# <${PREFIX}*img*.$IMAGE_TYPE> |
|
10542
|
|
|
|
|
|
|
# <img*.$IMAGE_TYPE> |
|
10543
|
|
|
|
|
|
|
# <Timg*.$IMAGE_TYPE> |
|
10544
|
|
|
|
|
|
|
# <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> |
|
10545
|
|
|
|
|
|
|
# |
|
10546
|
|
|
|
|
|
|
# Here are some examples of lines which do not have angle operators: |
|
10547
|
|
|
|
|
|
|
# return unless $self->[2]++ < $#{$self->[1]}; |
|
10548
|
|
|
|
|
|
|
# < 2 || @$t > |
|
10549
|
|
|
|
|
|
|
# |
|
10550
|
|
|
|
|
|
|
# the following line from dlister.pl caused trouble: |
|
10551
|
|
|
|
|
|
|
# print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; |
|
10552
|
|
|
|
|
|
|
# |
|
10553
|
|
|
|
|
|
|
# If the '<' starts an angle operator, it must end on this line and |
|
10554
|
|
|
|
|
|
|
# it must not have certain characters like ';' and '=' in it. I use |
|
10555
|
|
|
|
|
|
|
# this to limit the testing. This filter should be improved if |
|
10556
|
|
|
|
|
|
|
# possible. |
|
10557
|
|
|
|
|
|
|
|
|
10558
|
8
|
50
|
|
|
|
190
|
if ( $input_line =~ /($filter)/g ) { |
|
10559
|
|
|
|
|
|
|
|
|
10560
|
8
|
50
|
|
|
|
35
|
if ( $1 eq '>' ) { |
|
10561
|
|
|
|
|
|
|
|
|
10562
|
|
|
|
|
|
|
# We MAY have found an angle operator termination if we get |
|
10563
|
|
|
|
|
|
|
# here, but we need to do more to be sure we haven't been |
|
10564
|
|
|
|
|
|
|
# fooled. |
|
10565
|
8
|
|
|
|
|
15
|
my $pos = pos($input_line); |
|
10566
|
|
|
|
|
|
|
|
|
10567
|
8
|
|
|
|
|
16
|
my $pos_beg = $rtoken_map->[$i]; |
|
10568
|
8
|
|
|
|
|
25
|
my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); |
|
10569
|
|
|
|
|
|
|
|
|
10570
|
|
|
|
|
|
|
# Test for '<' after possible filehandle, issue c103 |
|
10571
|
|
|
|
|
|
|
# print $fh <>; # syntax error |
|
10572
|
|
|
|
|
|
|
# print $fh <DATA>; # ok |
|
10573
|
|
|
|
|
|
|
# print $fh < DATA>; # syntax error at '>' |
|
10574
|
|
|
|
|
|
|
# print STDERR < DATA>; # ok, prints word 'DATA' |
|
10575
|
|
|
|
|
|
|
# print BLABLA <DATA>; # ok; does nothing unless BLABLA is defined |
|
10576
|
8
|
100
|
|
|
|
21
|
if ( $last_nonblank_type eq 'Z' ) { |
|
10577
|
|
|
|
|
|
|
|
|
10578
|
|
|
|
|
|
|
# $str includes brackets; something like '<DATA>' |
|
10579
|
1
|
0
|
33
|
|
|
10
|
if ( substr( $last_nonblank_token, 0, 1 ) !~ /[A-Za-z_]/ |
|
10580
|
|
|
|
|
|
|
&& substr( $str, 1, 1 ) !~ /[A-Za-z_]/ ) |
|
10581
|
|
|
|
|
|
|
{ |
|
10582
|
0
|
|
|
|
|
0
|
return ( $i, $type ); |
|
10583
|
|
|
|
|
|
|
} |
|
10584
|
|
|
|
|
|
|
} |
|
10585
|
|
|
|
|
|
|
|
|
10586
|
|
|
|
|
|
|
# Reject if the closing '>' follows a '-' as in: |
|
10587
|
|
|
|
|
|
|
# if ( VERSION < 5.009 && $op-> name eq 'assign' ) { } |
|
10588
|
8
|
100
|
|
|
|
25
|
if ( $expecting eq UNKNOWN ) { |
|
10589
|
2
|
|
|
|
|
3
|
my $check = substr( $input_line, $pos - 2, 1 ); |
|
10590
|
2
|
100
|
|
|
|
4
|
if ( $check eq '-' ) { |
|
10591
|
1
|
|
|
|
|
4
|
return ( $i, $type ); |
|
10592
|
|
|
|
|
|
|
} |
|
10593
|
|
|
|
|
|
|
} |
|
10594
|
|
|
|
|
|
|
|
|
10595
|
|
|
|
|
|
|
######################################debug##### |
|
10596
|
|
|
|
|
|
|
#$self->write_diagnostics( "ANGLE? :$str\n"); |
|
10597
|
|
|
|
|
|
|
#print "ANGLE: found $1 at pos=$pos str=$str check=$check\n"; |
|
10598
|
|
|
|
|
|
|
######################################debug##### |
|
10599
|
7
|
|
|
|
|
14
|
$type = 'Q'; |
|
10600
|
7
|
|
|
|
|
12
|
my $error; |
|
10601
|
7
|
|
|
|
|
22
|
( $i, $error ) = |
|
10602
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
|
10603
|
|
|
|
|
|
|
|
|
10604
|
|
|
|
|
|
|
# It may be possible that a quote ends midway in a pretoken. |
|
10605
|
|
|
|
|
|
|
# If this happens, it may be necessary to split the pretoken. |
|
10606
|
7
|
50
|
|
|
|
24
|
if ($error) { |
|
10607
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
|
10608
|
|
|
|
|
|
|
Fault(<<EOM); |
|
10609
|
|
|
|
|
|
|
unexpected error condition returned by inverse_pretoken_map |
|
10610
|
|
|
|
|
|
|
EOM |
|
10611
|
|
|
|
|
|
|
} |
|
10612
|
|
|
|
|
|
|
$self->warning( |
|
10613
|
0
|
|
|
|
|
0
|
"Possible tokenization error..please check this line\n"); |
|
10614
|
|
|
|
|
|
|
} |
|
10615
|
|
|
|
|
|
|
|
|
10616
|
|
|
|
|
|
|
# Check for accidental formatting of a markup language doc... |
|
10617
|
|
|
|
|
|
|
# Formatting will be skipped if we set _html_tag_count_ and |
|
10618
|
|
|
|
|
|
|
# also set a warning of any kind. |
|
10619
|
7
|
|
|
|
|
11
|
my $is_html_tag; |
|
10620
|
7
|
|
33
|
|
|
23
|
my $is_first_string = |
|
10621
|
|
|
|
|
|
|
$i_beg == 0 && $self->[_last_line_number_] == 1; |
|
10622
|
|
|
|
|
|
|
|
|
10623
|
|
|
|
|
|
|
# html comment '<!...' of any type |
|
10624
|
7
|
50
|
33
|
|
|
158
|
if ( $str =~ /^<\s*!/ ) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
10625
|
0
|
|
|
|
|
0
|
$is_html_tag = 1; |
|
10626
|
0
|
0
|
|
|
|
0
|
if ($is_first_string) { |
|
10627
|
0
|
|
|
|
|
0
|
$self->warning( |
|
10628
|
|
|
|
|
|
|
"looks like a markup language, continuing error checks\n" |
|
10629
|
|
|
|
|
|
|
); |
|
10630
|
|
|
|
|
|
|
} |
|
10631
|
|
|
|
|
|
|
} |
|
10632
|
|
|
|
|
|
|
|
|
10633
|
|
|
|
|
|
|
# html end tag, something like </h1> |
|
10634
|
|
|
|
|
|
|
elsif ( $str =~ /^<\s*\/\w+\s*>$/ ) { |
|
10635
|
0
|
|
|
|
|
0
|
$is_html_tag = 1; |
|
10636
|
|
|
|
|
|
|
} |
|
10637
|
|
|
|
|
|
|
|
|
10638
|
|
|
|
|
|
|
# xml prolog? |
|
10639
|
|
|
|
|
|
|
elsif ( $str =~ /^<\?xml\s.*\?>$/i && $is_first_string ) { |
|
10640
|
0
|
|
|
|
|
0
|
$is_html_tag = 1; |
|
10641
|
0
|
|
|
|
|
0
|
$self->warning( |
|
10642
|
|
|
|
|
|
|
"looks like a markup language, continuing error checks\n"); |
|
10643
|
|
|
|
|
|
|
} |
|
10644
|
|
|
|
|
|
|
else { |
|
10645
|
|
|
|
|
|
|
## doesn't look like a markup tag |
|
10646
|
|
|
|
|
|
|
} |
|
10647
|
|
|
|
|
|
|
|
|
10648
|
7
|
50
|
|
|
|
22
|
if ($is_html_tag) { |
|
10649
|
0
|
|
|
|
|
0
|
$self->[_html_tag_count_]++; |
|
10650
|
|
|
|
|
|
|
} |
|
10651
|
|
|
|
|
|
|
|
|
10652
|
|
|
|
|
|
|
# count blanks on inside of brackets |
|
10653
|
7
|
|
|
|
|
15
|
my $blank_count = 0; |
|
10654
|
7
|
100
|
|
|
|
31
|
$blank_count++ if ( $str =~ /<\s+/ ); |
|
10655
|
7
|
100
|
|
|
|
30
|
$blank_count++ if ( $str =~ /\s+>/ ); |
|
10656
|
|
|
|
|
|
|
|
|
10657
|
|
|
|
|
|
|
# Now let's see where we stand.... |
|
10658
|
|
|
|
|
|
|
# OK if math op not possible |
|
10659
|
7
|
100
|
|
|
|
26
|
if ($expecting_TERM) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10660
|
|
|
|
|
|
|
} |
|
10661
|
|
|
|
|
|
|
|
|
10662
|
|
|
|
|
|
|
elsif ($is_html_tag) { |
|
10663
|
|
|
|
|
|
|
} |
|
10664
|
|
|
|
|
|
|
|
|
10665
|
|
|
|
|
|
|
# OK if there are no more than 2 non-blank pre-tokens inside |
|
10666
|
|
|
|
|
|
|
# (not possible to write 2 token math between < and >) |
|
10667
|
|
|
|
|
|
|
# This catches most common cases |
|
10668
|
|
|
|
|
|
|
elsif ( $i <= $i_beg + 3 + $blank_count ) { |
|
10669
|
|
|
|
|
|
|
|
|
10670
|
|
|
|
|
|
|
# No longer any need to document this common case |
|
10671
|
|
|
|
|
|
|
## $self->write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); |
|
10672
|
|
|
|
|
|
|
} |
|
10673
|
|
|
|
|
|
|
|
|
10674
|
|
|
|
|
|
|
# OK if there is some kind of identifier inside |
|
10675
|
|
|
|
|
|
|
# print $fh <tvg::INPUT>; |
|
10676
|
|
|
|
|
|
|
elsif ( $str =~ /^<\s*\$?(\w|::|\s)+\s*>$/ ) { |
|
10677
|
0
|
|
|
|
|
0
|
$self->write_diagnostics("ANGLE (contains identifier): $str\n"); |
|
10678
|
|
|
|
|
|
|
} |
|
10679
|
|
|
|
|
|
|
|
|
10680
|
|
|
|
|
|
|
# Not sure.. |
|
10681
|
|
|
|
|
|
|
else { |
|
10682
|
|
|
|
|
|
|
|
|
10683
|
|
|
|
|
|
|
# Let's try a Brace Test: any braces inside must balance |
|
10684
|
0
|
|
|
|
|
0
|
my $br = $str =~ tr/\{/{/ - $str =~ tr/\}/}/; |
|
10685
|
0
|
|
|
|
|
0
|
my $sb = $str =~ tr/\[/[/ - $str =~ tr/\]/]/; |
|
10686
|
0
|
|
|
|
|
0
|
my $pr = $str =~ tr/\(/(/ - $str =~ tr/\)/)/; |
|
10687
|
|
|
|
|
|
|
|
|
10688
|
|
|
|
|
|
|
# if braces do not balance - not angle operator |
|
10689
|
0
|
0
|
0
|
|
|
0
|
if ( $br || $sb || $pr ) { |
|
|
|
|
0
|
|
|
|
|
|
10690
|
0
|
|
|
|
|
0
|
$i = $i_beg; |
|
10691
|
0
|
|
|
|
|
0
|
$type = '<'; |
|
10692
|
0
|
|
|
|
|
0
|
$self->write_diagnostics( |
|
10693
|
|
|
|
|
|
|
"NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); |
|
10694
|
|
|
|
|
|
|
} |
|
10695
|
|
|
|
|
|
|
|
|
10696
|
|
|
|
|
|
|
# we should keep doing more checks here...to be continued |
|
10697
|
|
|
|
|
|
|
# Tentatively accepting this as a valid angle operator. |
|
10698
|
|
|
|
|
|
|
# There are lots more things that can be checked. |
|
10699
|
|
|
|
|
|
|
else { |
|
10700
|
0
|
|
|
|
|
0
|
$self->write_diagnostics( |
|
10701
|
|
|
|
|
|
|
"ANGLE-Guessing yes: $str expecting=$expecting\n"); |
|
10702
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
|
10703
|
|
|
|
|
|
|
"Guessing angle operator here: $str\n"); |
|
10704
|
|
|
|
|
|
|
} |
|
10705
|
|
|
|
|
|
|
} |
|
10706
|
|
|
|
|
|
|
} |
|
10707
|
|
|
|
|
|
|
|
|
10708
|
|
|
|
|
|
|
# didn't find ending > |
|
10709
|
|
|
|
|
|
|
else { |
|
10710
|
0
|
0
|
|
|
|
0
|
if ($expecting_TERM) { |
|
10711
|
0
|
|
|
|
|
0
|
$self->warning("No ending > for angle operator\n"); |
|
10712
|
|
|
|
|
|
|
} |
|
10713
|
|
|
|
|
|
|
} |
|
10714
|
|
|
|
|
|
|
} |
|
10715
|
7
|
|
|
|
|
27
|
return ( $i, $type ); |
|
10716
|
|
|
|
|
|
|
} ## end sub find_angle_operator_termination |
|
10717
|
|
|
|
|
|
|
|
|
10718
|
|
|
|
|
|
|
sub scan_number_do { |
|
10719
|
|
|
|
|
|
|
|
|
10720
|
683
|
|
|
683
|
0
|
1488
|
my ( $self, $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = |
|
10721
|
|
|
|
|
|
|
@_; |
|
10722
|
|
|
|
|
|
|
|
|
10723
|
|
|
|
|
|
|
# Scan a number in any of the formats that Perl accepts |
|
10724
|
|
|
|
|
|
|
# Underbars (_) are allowed in decimal numbers. |
|
10725
|
|
|
|
|
|
|
# Given: |
|
10726
|
|
|
|
|
|
|
# $input_line - the string to scan |
|
10727
|
|
|
|
|
|
|
# $i - pre_token index to start scanning |
|
10728
|
|
|
|
|
|
|
# $rtoken_map - reference to the pre_token map giving starting |
|
10729
|
|
|
|
|
|
|
# character position in $input_line of token $i |
|
10730
|
|
|
|
|
|
|
# Return: |
|
10731
|
|
|
|
|
|
|
# $i - last pre_token index of the number just scanned |
|
10732
|
|
|
|
|
|
|
# $type - the token type ('v' or 'n') |
|
10733
|
|
|
|
|
|
|
# number - the number (characters); or undef if not a number |
|
10734
|
|
|
|
|
|
|
|
|
10735
|
683
|
|
|
|
|
1087
|
my $pos_beg = $rtoken_map->[$i]; |
|
10736
|
683
|
|
|
|
|
848
|
my $pos; |
|
10737
|
|
|
|
|
|
|
##my $i_begin = $i; |
|
10738
|
683
|
|
|
|
|
999
|
my $number = undef; |
|
10739
|
683
|
|
|
|
|
864
|
my $type = $input_type; |
|
10740
|
|
|
|
|
|
|
|
|
10741
|
683
|
|
|
|
|
1363
|
my $first_char = substr( $input_line, $pos_beg, 1 ); |
|
10742
|
|
|
|
|
|
|
|
|
10743
|
|
|
|
|
|
|
# Look for bad starting characters; Shouldn't happen.. |
|
10744
|
683
|
50
|
|
|
|
2841
|
if ( $first_char !~ /[\d\.\+\-Ee]/ ) { |
|
10745
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
|
10746
|
|
|
|
|
|
|
Fault(<<EOM); |
|
10747
|
|
|
|
|
|
|
Program bug - scan_number given bad first character = '$first_char' |
|
10748
|
|
|
|
|
|
|
EOM |
|
10749
|
|
|
|
|
|
|
} |
|
10750
|
0
|
|
|
|
|
0
|
return ( $i, $type, $number ); |
|
10751
|
|
|
|
|
|
|
} |
|
10752
|
|
|
|
|
|
|
|
|
10753
|
|
|
|
|
|
|
# handle v-string without leading 'v' character ('Two Dot' rule) |
|
10754
|
|
|
|
|
|
|
# (vstring.t) |
|
10755
|
|
|
|
|
|
|
# Here is the format prior to including underscores: |
|
10756
|
|
|
|
|
|
|
## if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { |
|
10757
|
683
|
|
|
|
|
1828
|
pos($input_line) = $pos_beg; |
|
10758
|
683
|
50
|
|
|
|
2996
|
if ( $input_line =~ /\G((\d[_\d]*)?\.[\d_]+(\.[\d_]+)+)/g ) { |
|
10759
|
0
|
|
|
|
|
0
|
$pos = pos($input_line); |
|
10760
|
0
|
|
|
|
|
0
|
my $numc = $pos - $pos_beg; |
|
10761
|
0
|
|
|
|
|
0
|
$number = substr( $input_line, $pos_beg, $numc ); |
|
10762
|
0
|
|
|
|
|
0
|
$type = 'v'; |
|
10763
|
|
|
|
|
|
|
} |
|
10764
|
|
|
|
|
|
|
|
|
10765
|
|
|
|
|
|
|
# handle octal, hex, binary |
|
10766
|
683
|
50
|
|
|
|
1408
|
if ( !defined($number) ) { |
|
10767
|
683
|
|
|
|
|
1129
|
pos($input_line) = $pos_beg; |
|
10768
|
|
|
|
|
|
|
|
|
10769
|
|
|
|
|
|
|
# Perl 5.22 added floating point literals, like '0x0.b17217f7d1cf78p0' |
|
10770
|
|
|
|
|
|
|
# For reference, the format prior to hex floating point is: |
|
10771
|
|
|
|
|
|
|
# /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g ) |
|
10772
|
|
|
|
|
|
|
# (hex) (octal) (binary) |
|
10773
|
683
|
100
|
|
|
|
2157
|
if ( |
|
10774
|
|
|
|
|
|
|
$input_line =~ m{ |
|
10775
|
|
|
|
|
|
|
|
|
10776
|
|
|
|
|
|
|
\G[+-]?0( # leading [signed] 0 |
|
10777
|
|
|
|
|
|
|
|
|
10778
|
|
|
|
|
|
|
# a hex float, i.e. '0x0.b17217f7d1cf78p0' |
|
10779
|
|
|
|
|
|
|
([xX][0-9a-fA-F_]* # X and optional leading digits |
|
10780
|
|
|
|
|
|
|
(\.([0-9a-fA-F][0-9a-fA-F_]*)?)? # optional decimal and fraction |
|
10781
|
|
|
|
|
|
|
[Pp][+-]?[0-9a-fA-F] # REQUIRED exponent with digit |
|
10782
|
|
|
|
|
|
|
[0-9a-fA-F_]*) # optional Additional exponent digits |
|
10783
|
|
|
|
|
|
|
|
|
10784
|
|
|
|
|
|
|
# or hex integer |
|
10785
|
|
|
|
|
|
|
|([xX][0-9a-fA-F_]+) |
|
10786
|
|
|
|
|
|
|
|
|
10787
|
|
|
|
|
|
|
# or octal fraction |
|
10788
|
|
|
|
|
|
|
|([oO]?[0-7_]+ # string of octal digits |
|
10789
|
|
|
|
|
|
|
(\.([0-7][0-7_]*)?)? # optional decimal and fraction |
|
10790
|
|
|
|
|
|
|
[Pp][+-]?[0-7] # REQUIRED exponent, no underscore |
|
10791
|
|
|
|
|
|
|
[0-7_]*) # Additional exponent digits with underscores |
|
10792
|
|
|
|
|
|
|
|
|
10793
|
|
|
|
|
|
|
# or octal integer |
|
10794
|
|
|
|
|
|
|
|([oO]?[0-7_]+) # string of octal digits |
|
10795
|
|
|
|
|
|
|
|
|
10796
|
|
|
|
|
|
|
# or a binary float |
|
10797
|
|
|
|
|
|
|
|([bB][01_]* # 'b' with string of binary digits |
|
10798
|
|
|
|
|
|
|
(\.([01][01_]*)?)? # optional decimal and fraction |
|
10799
|
|
|
|
|
|
|
[Pp][+-]?[01] # Required exponent indicator, no underscore |
|
10800
|
|
|
|
|
|
|
[01_]*) # additional exponent bits |
|
10801
|
|
|
|
|
|
|
|
|
10802
|
|
|
|
|
|
|
# or binary integer |
|
10803
|
|
|
|
|
|
|
|([bB][01_]+) # 'b' with string of binary digits |
|
10804
|
|
|
|
|
|
|
|
|
10805
|
|
|
|
|
|
|
)}gx |
|
10806
|
|
|
|
|
|
|
) |
|
10807
|
|
|
|
|
|
|
{ |
|
10808
|
72
|
|
|
|
|
103
|
$pos = pos($input_line); |
|
10809
|
72
|
|
|
|
|
133
|
my $numc = $pos - $pos_beg; |
|
10810
|
72
|
|
|
|
|
114
|
$number = substr( $input_line, $pos_beg, $numc ); |
|
10811
|
72
|
|
|
|
|
115
|
$type = 'n'; |
|
10812
|
|
|
|
|
|
|
} |
|
10813
|
|
|
|
|
|
|
} |
|
10814
|
|
|
|
|
|
|
|
|
10815
|
|
|
|
|
|
|
# handle decimal |
|
10816
|
683
|
100
|
|
|
|
1301
|
if ( !defined($number) ) { |
|
10817
|
611
|
|
|
|
|
944
|
pos($input_line) = $pos_beg; |
|
10818
|
|
|
|
|
|
|
|
|
10819
|
611
|
50
|
|
|
|
2570
|
if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { |
|
10820
|
611
|
|
|
|
|
876
|
$pos = pos($input_line); |
|
10821
|
|
|
|
|
|
|
|
|
10822
|
|
|
|
|
|
|
# watch out for things like 0..40 which would give 0. by this; |
|
10823
|
611
|
100
|
100
|
|
|
1869
|
if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) |
|
10824
|
|
|
|
|
|
|
&& ( substr( $input_line, $pos, 1 ) eq '.' ) ) |
|
10825
|
|
|
|
|
|
|
{ |
|
10826
|
38
|
|
|
|
|
50
|
$pos--; |
|
10827
|
|
|
|
|
|
|
} |
|
10828
|
611
|
|
|
|
|
798
|
my $numc = $pos - $pos_beg; |
|
10829
|
611
|
|
|
|
|
931
|
$number = substr( $input_line, $pos_beg, $numc ); |
|
10830
|
611
|
|
|
|
|
914
|
$type = 'n'; |
|
10831
|
|
|
|
|
|
|
} |
|
10832
|
|
|
|
|
|
|
} |
|
10833
|
|
|
|
|
|
|
|
|
10834
|
|
|
|
|
|
|
# filter out non-numbers like e + - . e2 .e3 +e6 |
|
10835
|
|
|
|
|
|
|
# the rule: at least one digit, and any 'e' must be preceded by a digit |
|
10836
|
683
|
100
|
66
|
|
|
3027
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
10837
|
|
|
|
|
|
|
$number !~ /\d/ # no digits |
|
10838
|
|
|
|
|
|
|
|| ( $number =~ /^(.*)[eE]/ |
|
10839
|
|
|
|
|
|
|
&& $1 !~ /\d/ ) # or no digits before the 'e' |
|
10840
|
|
|
|
|
|
|
) |
|
10841
|
|
|
|
|
|
|
{ |
|
10842
|
304
|
|
|
|
|
467
|
$number = undef; |
|
10843
|
304
|
|
|
|
|
399
|
$type = $input_type; |
|
10844
|
304
|
|
|
|
|
1081
|
return ( $i, $type, $number ); |
|
10845
|
|
|
|
|
|
|
} |
|
10846
|
|
|
|
|
|
|
|
|
10847
|
|
|
|
|
|
|
# Found a number; now we must convert back from character position |
|
10848
|
|
|
|
|
|
|
# to pre_token index. An error here implies user syntax error. |
|
10849
|
|
|
|
|
|
|
# An example would be an invalid octal number like '009'. |
|
10850
|
379
|
|
|
|
|
519
|
my $error; |
|
10851
|
379
|
|
|
|
|
754
|
( $i, $error ) = |
|
10852
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
|
10853
|
379
|
50
|
|
|
|
709
|
if ($error) { $self->warning("Possibly invalid number\n") } |
|
|
0
|
|
|
|
|
0
|
|
|
10854
|
|
|
|
|
|
|
|
|
10855
|
379
|
|
|
|
|
1327
|
return ( $i, $type, $number ); |
|
10856
|
|
|
|
|
|
|
} ## end sub scan_number_do |
|
10857
|
|
|
|
|
|
|
|
|
10858
|
|
|
|
|
|
|
sub inverse_pretoken_map { |
|
10859
|
|
|
|
|
|
|
|
|
10860
|
|
|
|
|
|
|
# Starting with the current pre_token index $i, scan forward until |
|
10861
|
|
|
|
|
|
|
# finding the index of the next pre_token whose position is $pos. |
|
10862
|
2470
|
|
|
2470
|
0
|
4736
|
my ( $i, $pos, $rtoken_map, $max_token_index ) = @_; |
|
10863
|
2470
|
|
|
|
|
3437
|
my $error = 0; |
|
10864
|
|
|
|
|
|
|
|
|
10865
|
2470
|
|
|
|
|
5200
|
while ( ++$i <= $max_token_index ) { |
|
10866
|
|
|
|
|
|
|
|
|
10867
|
4631
|
100
|
|
|
|
8686
|
if ( $pos <= $rtoken_map->[$i] ) { |
|
10868
|
|
|
|
|
|
|
|
|
10869
|
|
|
|
|
|
|
# Let the calling routine handle errors in which we do not |
|
10870
|
|
|
|
|
|
|
# land on a pre-token boundary. It can happen by running |
|
10871
|
|
|
|
|
|
|
# perltidy on some non-perl scripts, for example. |
|
10872
|
2426
|
50
|
|
|
|
4695
|
if ( $pos < $rtoken_map->[$i] ) { $error = 1 } |
|
|
0
|
|
|
|
|
0
|
|
|
10873
|
2426
|
|
|
|
|
3097
|
$i--; |
|
10874
|
2426
|
|
|
|
|
3651
|
last; |
|
10875
|
|
|
|
|
|
|
} |
|
10876
|
|
|
|
|
|
|
} ## end while ( ++$i <= $max_token_index) |
|
10877
|
2470
|
|
|
|
|
5173
|
return ( $i, $error ); |
|
10878
|
|
|
|
|
|
|
} ## end sub inverse_pretoken_map |
|
10879
|
|
|
|
|
|
|
|
|
10880
|
|
|
|
|
|
|
sub find_here_doc { |
|
10881
|
|
|
|
|
|
|
|
|
10882
|
|
|
|
|
|
|
my ( |
|
10883
|
|
|
|
|
|
|
|
|
10884
|
9
|
|
|
9
|
0
|
25
|
$self, |
|
10885
|
|
|
|
|
|
|
|
|
10886
|
|
|
|
|
|
|
$expecting, |
|
10887
|
|
|
|
|
|
|
$i, |
|
10888
|
|
|
|
|
|
|
$rtokens, |
|
10889
|
|
|
|
|
|
|
$rtoken_type, |
|
10890
|
|
|
|
|
|
|
$rtoken_map_uu, |
|
10891
|
|
|
|
|
|
|
$max_token_index, |
|
10892
|
|
|
|
|
|
|
|
|
10893
|
|
|
|
|
|
|
) = @_; |
|
10894
|
|
|
|
|
|
|
|
|
10895
|
|
|
|
|
|
|
# Find the target of a here document, if any |
|
10896
|
|
|
|
|
|
|
# Given: |
|
10897
|
|
|
|
|
|
|
# $i - token index of the second < of << |
|
10898
|
|
|
|
|
|
|
# ($i must be less than the last token index if this is called) |
|
10899
|
|
|
|
|
|
|
# Return: |
|
10900
|
|
|
|
|
|
|
# $found_target = 0 didn't find target; =1 found target |
|
10901
|
|
|
|
|
|
|
# HERE_TARGET - the target string (may be empty string) |
|
10902
|
|
|
|
|
|
|
# $i - unchanged if not here doc, |
|
10903
|
|
|
|
|
|
|
# or index of the last token of the here target |
|
10904
|
|
|
|
|
|
|
# $saw_error - flag noting unbalanced quote on here target |
|
10905
|
9
|
|
|
|
|
18
|
my $ibeg = $i; |
|
10906
|
9
|
|
|
|
|
18
|
my $found_target = 0; |
|
10907
|
9
|
|
|
|
|
19
|
my $here_doc_target = EMPTY_STRING; |
|
10908
|
9
|
|
|
|
|
18
|
my $here_quote_character = EMPTY_STRING; |
|
10909
|
9
|
|
|
|
|
14
|
my $saw_error = 0; |
|
10910
|
9
|
|
|
|
|
20
|
my ( $next_nonblank_token, $i_next_nonblank, $next_token ); |
|
10911
|
9
|
|
|
|
|
21
|
$next_token = $rtokens->[ $i + 1 ]; |
|
10912
|
|
|
|
|
|
|
|
|
10913
|
|
|
|
|
|
|
# perl allows a backslash before the target string (heredoc.t) |
|
10914
|
9
|
|
|
|
|
18
|
my $backslash = 0; |
|
10915
|
9
|
50
|
|
|
|
33
|
if ( $next_token eq BACKSLASH ) { |
|
10916
|
0
|
|
|
|
|
0
|
$backslash = 1; |
|
10917
|
0
|
|
|
|
|
0
|
$next_token = $rtokens->[ $i + 2 ]; |
|
10918
|
|
|
|
|
|
|
} |
|
10919
|
|
|
|
|
|
|
|
|
10920
|
9
|
|
|
|
|
38
|
( $next_nonblank_token, $i_next_nonblank ) = |
|
10921
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); |
|
10922
|
|
|
|
|
|
|
|
|
10923
|
9
|
100
|
33
|
|
|
57
|
if ( $next_nonblank_token =~ /[\'\"\`]/ ) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
10924
|
|
|
|
|
|
|
|
|
10925
|
6
|
|
|
|
|
12
|
my $in_quote = 1; |
|
10926
|
6
|
|
|
|
|
10
|
my $quote_depth = 0; |
|
10927
|
6
|
|
|
|
|
12
|
my $quote_pos = 0; |
|
10928
|
6
|
|
|
|
|
7
|
my $quoted_string; |
|
10929
|
|
|
|
|
|
|
|
|
10930
|
|
|
|
|
|
|
( |
|
10931
|
|
|
|
|
|
|
|
|
10932
|
6
|
|
|
|
|
26
|
$i, |
|
10933
|
|
|
|
|
|
|
$in_quote, |
|
10934
|
|
|
|
|
|
|
$here_quote_character, |
|
10935
|
|
|
|
|
|
|
$quote_pos, |
|
10936
|
|
|
|
|
|
|
$quote_depth, |
|
10937
|
|
|
|
|
|
|
$quoted_string, |
|
10938
|
|
|
|
|
|
|
) |
|
10939
|
|
|
|
|
|
|
= $self->follow_quoted_string( |
|
10940
|
|
|
|
|
|
|
|
|
10941
|
|
|
|
|
|
|
$i_next_nonblank, |
|
10942
|
|
|
|
|
|
|
$in_quote, |
|
10943
|
|
|
|
|
|
|
$rtokens, |
|
10944
|
|
|
|
|
|
|
$rtoken_type, |
|
10945
|
|
|
|
|
|
|
$here_quote_character, |
|
10946
|
|
|
|
|
|
|
$quote_pos, |
|
10947
|
|
|
|
|
|
|
$quote_depth, |
|
10948
|
|
|
|
|
|
|
$max_token_index, |
|
10949
|
|
|
|
|
|
|
); |
|
10950
|
|
|
|
|
|
|
|
|
10951
|
6
|
50
|
|
|
|
17
|
if ($in_quote) { # didn't find end of quote, so no target found |
|
10952
|
0
|
|
|
|
|
0
|
$i = $ibeg; |
|
10953
|
0
|
0
|
|
|
|
0
|
if ( $expecting == TERM ) { |
|
10954
|
0
|
|
|
|
|
0
|
$self->warning( |
|
10955
|
|
|
|
|
|
|
"Did not find here-doc string terminator ($here_quote_character) before end of line \n" |
|
10956
|
|
|
|
|
|
|
); |
|
10957
|
0
|
|
|
|
|
0
|
$saw_error = 1; |
|
10958
|
|
|
|
|
|
|
} |
|
10959
|
|
|
|
|
|
|
} |
|
10960
|
|
|
|
|
|
|
else { # found ending quote |
|
10961
|
6
|
|
|
|
|
11
|
$found_target = 1; |
|
10962
|
|
|
|
|
|
|
|
|
10963
|
6
|
|
|
|
|
11
|
my $tokj; |
|
10964
|
6
|
|
|
|
|
22
|
foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) { |
|
10965
|
6
|
|
|
|
|
13
|
$tokj = $rtokens->[$j]; |
|
10966
|
|
|
|
|
|
|
|
|
10967
|
|
|
|
|
|
|
# we have to remove any backslash before the quote character |
|
10968
|
|
|
|
|
|
|
# so that the here-doc-target exactly matches this string |
|
10969
|
|
|
|
|
|
|
next |
|
10970
|
6
|
0
|
33
|
|
|
24
|
if ( $tokj eq BACKSLASH |
|
|
|
|
33
|
|
|
|
|
|
10971
|
|
|
|
|
|
|
&& $j < $i - 1 |
|
10972
|
|
|
|
|
|
|
&& $rtokens->[ $j + 1 ] eq $here_quote_character ); |
|
10973
|
6
|
|
|
|
|
16
|
$here_doc_target .= $tokj; |
|
10974
|
|
|
|
|
|
|
} |
|
10975
|
|
|
|
|
|
|
} |
|
10976
|
|
|
|
|
|
|
} |
|
10977
|
|
|
|
|
|
|
|
|
10978
|
|
|
|
|
|
|
elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { |
|
10979
|
0
|
|
|
|
|
0
|
$found_target = 1; |
|
10980
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
|
10981
|
|
|
|
|
|
|
"found blank here-target after <<; suggest using \"\"\n"); |
|
10982
|
0
|
|
|
|
|
0
|
$i = $ibeg; |
|
10983
|
|
|
|
|
|
|
} |
|
10984
|
|
|
|
|
|
|
elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << |
|
10985
|
|
|
|
|
|
|
|
|
10986
|
3
|
|
|
|
|
7
|
my $here_doc_expected; |
|
10987
|
3
|
50
|
|
|
|
25
|
if ( $expecting == UNKNOWN ) { |
|
10988
|
0
|
|
|
|
|
0
|
$here_doc_expected = $self->guess_if_here_doc($next_token); |
|
10989
|
|
|
|
|
|
|
} |
|
10990
|
|
|
|
|
|
|
else { |
|
10991
|
3
|
|
|
|
|
8
|
$here_doc_expected = 1; |
|
10992
|
|
|
|
|
|
|
} |
|
10993
|
|
|
|
|
|
|
|
|
10994
|
3
|
50
|
|
|
|
10
|
if ($here_doc_expected) { |
|
10995
|
3
|
|
|
|
|
6
|
$found_target = 1; |
|
10996
|
3
|
|
|
|
|
5
|
$here_doc_target = $next_token; |
|
10997
|
3
|
|
|
|
|
6
|
$i = $ibeg + 1; |
|
10998
|
|
|
|
|
|
|
} |
|
10999
|
|
|
|
|
|
|
|
|
11000
|
|
|
|
|
|
|
} |
|
11001
|
|
|
|
|
|
|
else { |
|
11002
|
|
|
|
|
|
|
|
|
11003
|
0
|
0
|
|
|
|
0
|
if ( $expecting == TERM ) { |
|
11004
|
0
|
|
|
|
|
0
|
$found_target = 1; |
|
11005
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("Note: bare here-doc operator <<\n"); |
|
11006
|
|
|
|
|
|
|
} |
|
11007
|
|
|
|
|
|
|
else { |
|
11008
|
0
|
|
|
|
|
0
|
$i = $ibeg; |
|
11009
|
|
|
|
|
|
|
} |
|
11010
|
|
|
|
|
|
|
} |
|
11011
|
|
|
|
|
|
|
|
|
11012
|
|
|
|
|
|
|
# patch to neglect any prepended backslash |
|
11013
|
9
|
50
|
33
|
|
|
50
|
if ( $found_target && $backslash ) { $i++ } |
|
|
0
|
|
|
|
|
0
|
|
|
11014
|
|
|
|
|
|
|
|
|
11015
|
9
|
|
|
|
|
48
|
return ( $found_target, $here_doc_target, $here_quote_character, $i, |
|
11016
|
|
|
|
|
|
|
$saw_error ); |
|
11017
|
|
|
|
|
|
|
} ## end sub find_here_doc |
|
11018
|
|
|
|
|
|
|
|
|
11019
|
|
|
|
|
|
|
sub do_quote { |
|
11020
|
|
|
|
|
|
|
|
|
11021
|
|
|
|
|
|
|
my ( |
|
11022
|
|
|
|
|
|
|
|
|
11023
|
3170
|
|
|
3170
|
0
|
8218
|
$self, |
|
11024
|
|
|
|
|
|
|
|
|
11025
|
|
|
|
|
|
|
$i, |
|
11026
|
|
|
|
|
|
|
$in_quote, |
|
11027
|
|
|
|
|
|
|
$quote_character, |
|
11028
|
|
|
|
|
|
|
$quote_pos, |
|
11029
|
|
|
|
|
|
|
$quote_depth, |
|
11030
|
|
|
|
|
|
|
$quoted_string_1, |
|
11031
|
|
|
|
|
|
|
$quoted_string_2, |
|
11032
|
|
|
|
|
|
|
$rtokens, |
|
11033
|
|
|
|
|
|
|
$rtoken_type, |
|
11034
|
|
|
|
|
|
|
$rtoken_map_uu, |
|
11035
|
|
|
|
|
|
|
$max_token_index, |
|
11036
|
|
|
|
|
|
|
|
|
11037
|
|
|
|
|
|
|
) = @_; |
|
11038
|
|
|
|
|
|
|
|
|
11039
|
|
|
|
|
|
|
# Follow (or continue following) quoted string(s) |
|
11040
|
|
|
|
|
|
|
# $in_quote = return code: |
|
11041
|
|
|
|
|
|
|
# 0 - ok, found end |
|
11042
|
|
|
|
|
|
|
# 1 - still must find end of quote whose target is $quote_character |
|
11043
|
|
|
|
|
|
|
# 2 - still looking for end of first of two quotes |
|
11044
|
|
|
|
|
|
|
# |
|
11045
|
|
|
|
|
|
|
# Returns updated strings: |
|
11046
|
|
|
|
|
|
|
# $quoted_string_1 = quoted string seen while in_quote=1 |
|
11047
|
|
|
|
|
|
|
# $quoted_string_2 = quoted string seen while in_quote=2 |
|
11048
|
|
|
|
|
|
|
|
|
11049
|
3170
|
|
|
|
|
4037
|
my $quoted_string; |
|
11050
|
3170
|
100
|
|
|
|
5638
|
if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow |
|
11051
|
31
|
|
|
|
|
67
|
my $ibeg = $i; |
|
11052
|
|
|
|
|
|
|
( |
|
11053
|
|
|
|
|
|
|
|
|
11054
|
31
|
|
|
|
|
109
|
$i, |
|
11055
|
|
|
|
|
|
|
$in_quote, |
|
11056
|
|
|
|
|
|
|
$quote_character, |
|
11057
|
|
|
|
|
|
|
$quote_pos, |
|
11058
|
|
|
|
|
|
|
$quote_depth, |
|
11059
|
|
|
|
|
|
|
$quoted_string, |
|
11060
|
|
|
|
|
|
|
) |
|
11061
|
|
|
|
|
|
|
= $self->follow_quoted_string( |
|
11062
|
|
|
|
|
|
|
|
|
11063
|
|
|
|
|
|
|
$ibeg, |
|
11064
|
|
|
|
|
|
|
$in_quote, |
|
11065
|
|
|
|
|
|
|
$rtokens, |
|
11066
|
|
|
|
|
|
|
$rtoken_type, |
|
11067
|
|
|
|
|
|
|
$quote_character, |
|
11068
|
|
|
|
|
|
|
$quote_pos, |
|
11069
|
|
|
|
|
|
|
$quote_depth, |
|
11070
|
|
|
|
|
|
|
$max_token_index, |
|
11071
|
|
|
|
|
|
|
); |
|
11072
|
31
|
|
|
|
|
76
|
$quoted_string_2 .= $quoted_string; |
|
11073
|
31
|
50
|
|
|
|
76
|
if ( $in_quote == 1 ) { |
|
11074
|
31
|
100
|
|
|
|
132
|
if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } |
|
|
1
|
|
|
|
|
1
|
|
|
11075
|
31
|
|
|
|
|
65
|
$quote_character = EMPTY_STRING; |
|
11076
|
|
|
|
|
|
|
} |
|
11077
|
|
|
|
|
|
|
else { |
|
11078
|
0
|
|
|
|
|
0
|
$quoted_string_2 .= "\n"; |
|
11079
|
|
|
|
|
|
|
} |
|
11080
|
|
|
|
|
|
|
} |
|
11081
|
|
|
|
|
|
|
|
|
11082
|
3170
|
50
|
|
|
|
5541
|
if ( $in_quote == 1 ) { # one (more) quote to follow |
|
11083
|
3170
|
|
|
|
|
3833
|
my $ibeg = $i; |
|
11084
|
|
|
|
|
|
|
( |
|
11085
|
|
|
|
|
|
|
|
|
11086
|
3170
|
|
|
|
|
7816
|
$i, |
|
11087
|
|
|
|
|
|
|
$in_quote, |
|
11088
|
|
|
|
|
|
|
$quote_character, |
|
11089
|
|
|
|
|
|
|
$quote_pos, |
|
11090
|
|
|
|
|
|
|
$quote_depth, |
|
11091
|
|
|
|
|
|
|
$quoted_string, |
|
11092
|
|
|
|
|
|
|
) |
|
11093
|
|
|
|
|
|
|
= $self->follow_quoted_string( |
|
11094
|
|
|
|
|
|
|
|
|
11095
|
|
|
|
|
|
|
$ibeg, |
|
11096
|
|
|
|
|
|
|
$in_quote, |
|
11097
|
|
|
|
|
|
|
$rtokens, |
|
11098
|
|
|
|
|
|
|
$rtoken_type, |
|
11099
|
|
|
|
|
|
|
$quote_character, |
|
11100
|
|
|
|
|
|
|
$quote_pos, |
|
11101
|
|
|
|
|
|
|
$quote_depth, |
|
11102
|
|
|
|
|
|
|
$max_token_index, |
|
11103
|
|
|
|
|
|
|
); |
|
11104
|
3170
|
|
|
|
|
5680
|
$quoted_string_1 .= $quoted_string; |
|
11105
|
3170
|
100
|
|
|
|
5609
|
if ( $in_quote == 1 ) { |
|
11106
|
244
|
|
|
|
|
388
|
$quoted_string_1 .= "\n"; |
|
11107
|
|
|
|
|
|
|
} |
|
11108
|
|
|
|
|
|
|
} |
|
11109
|
|
|
|
|
|
|
return ( |
|
11110
|
|
|
|
|
|
|
|
|
11111
|
3170
|
|
|
|
|
9611
|
$i, |
|
11112
|
|
|
|
|
|
|
$in_quote, |
|
11113
|
|
|
|
|
|
|
$quote_character, |
|
11114
|
|
|
|
|
|
|
$quote_pos, |
|
11115
|
|
|
|
|
|
|
$quote_depth, |
|
11116
|
|
|
|
|
|
|
$quoted_string_1, |
|
11117
|
|
|
|
|
|
|
$quoted_string_2, |
|
11118
|
|
|
|
|
|
|
|
|
11119
|
|
|
|
|
|
|
); |
|
11120
|
|
|
|
|
|
|
} ## end sub do_quote |
|
11121
|
|
|
|
|
|
|
|
|
11122
|
44
|
|
|
44
|
|
367
|
use constant DEBUG_FIND_INTERPOLATED_HERE_TARGETS => 0; |
|
|
44
|
|
|
|
|
87
|
|
|
|
44
|
|
|
|
|
27873
|
|
|
11123
|
|
|
|
|
|
|
|
|
11124
|
|
|
|
|
|
|
sub find_interpolated_here_targets { |
|
11125
|
2
|
|
|
2
|
0
|
4
|
my ( $self, $quoted_string, $len_starting_lines ) = @_; |
|
11126
|
|
|
|
|
|
|
|
|
11127
|
|
|
|
|
|
|
# Search for here targets in a quoted string |
|
11128
|
|
|
|
|
|
|
# Given: |
|
11129
|
|
|
|
|
|
|
# $quoted_string = the complete string of an interpolated quote |
|
11130
|
|
|
|
|
|
|
# $len_starting_lines = number of characters of the first n-1 lines |
|
11131
|
|
|
|
|
|
|
# (=0 if this is a single-line quote) |
|
11132
|
|
|
|
|
|
|
# Task: |
|
11133
|
|
|
|
|
|
|
# Find and return a list of all here targets on the last line; |
|
11134
|
|
|
|
|
|
|
# i.e., if here target is index ii, we only return the |
|
11135
|
|
|
|
|
|
|
# target if $rmap->[$ii]>=$len_starting_lines |
|
11136
|
|
|
|
|
|
|
|
|
11137
|
|
|
|
|
|
|
# The items returned are the format needed for @{$rhere_target_list}; |
|
11138
|
|
|
|
|
|
|
# [ $here_doc_target, $here_quote_character ] |
|
11139
|
|
|
|
|
|
|
# there can be multiple here targets. |
|
11140
|
|
|
|
|
|
|
|
|
11141
|
2
|
|
|
|
|
3
|
my $rht; |
|
11142
|
|
|
|
|
|
|
|
|
11143
|
|
|
|
|
|
|
# Break the entire quote into pre-tokens, even if multi-line, because we |
|
11144
|
|
|
|
|
|
|
# have to determine which parts are in single quotes |
|
11145
|
2
|
|
|
|
|
5
|
my ( $rtokens, $rmap, $rtoken_type ) = pre_tokenize($quoted_string); |
|
11146
|
2
|
|
|
|
|
4
|
my $max_ii = @{$rtokens} - 1; |
|
|
2
|
|
|
|
|
4
|
|
|
11147
|
|
|
|
|
|
|
|
|
11148
|
|
|
|
|
|
|
# Depth of braces controlled by a sigil |
|
11149
|
2
|
|
|
|
|
3
|
my $code_depth = 0; |
|
11150
|
|
|
|
|
|
|
|
|
11151
|
|
|
|
|
|
|
# Loop over pre-tokens |
|
11152
|
2
|
|
|
|
|
4
|
my $ii = -1; |
|
11153
|
2
|
|
|
|
|
7
|
while ( ++$ii <= $max_ii ) { |
|
11154
|
21
|
|
|
|
|
24
|
my $token = $rtokens->[$ii]; |
|
11155
|
21
|
|
|
|
|
19
|
if (DEBUG_FIND_INTERPOLATED_HERE_TARGETS) { |
|
11156
|
|
|
|
|
|
|
print "i=$ii tok=$token block=$code_depth\n"; |
|
11157
|
|
|
|
|
|
|
} |
|
11158
|
|
|
|
|
|
|
|
|
11159
|
21
|
100
|
|
|
|
28
|
if ( $token eq BACKSLASH ) { |
|
11160
|
4
|
50
|
|
|
|
9
|
if ( !$code_depth ) { $ii++ } |
|
|
0
|
|
|
|
|
0
|
|
|
11161
|
4
|
|
|
|
|
5
|
next; |
|
11162
|
|
|
|
|
|
|
} |
|
11163
|
|
|
|
|
|
|
|
|
11164
|
|
|
|
|
|
|
# Look for start of interpolation code block, '${', '@{', '$var{', etc |
|
11165
|
17
|
100
|
|
|
|
22
|
if ( !$code_depth ) { |
|
11166
|
4
|
50
|
33
|
|
|
10
|
if ( $token eq '$' || $token eq '@' ) { |
|
11167
|
|
|
|
|
|
|
|
|
11168
|
4
|
100
|
66
|
|
|
19
|
$ii++ |
|
11169
|
|
|
|
|
|
|
if ( $ii < $max_ii && $rtoken_type->[ $ii + 1 ] eq 'b' ); |
|
11170
|
|
|
|
|
|
|
|
|
11171
|
4
|
|
33
|
|
|
23
|
while ( |
|
|
|
|
33
|
|
|
|
|
|
11172
|
|
|
|
|
|
|
$ii < $max_ii |
|
11173
|
|
|
|
|
|
|
&& ( $rtoken_type->[ $ii + 1 ] eq 'w' |
|
11174
|
|
|
|
|
|
|
|| $rtoken_type->[ $ii + 1 ] eq '::' ) |
|
11175
|
|
|
|
|
|
|
) |
|
11176
|
|
|
|
|
|
|
{ |
|
11177
|
0
|
|
|
|
|
0
|
$ii++; |
|
11178
|
|
|
|
|
|
|
} ## end while ( $ii < $max_ii && ...) |
|
11179
|
|
|
|
|
|
|
|
|
11180
|
4
|
50
|
33
|
|
|
13
|
$ii++ |
|
11181
|
|
|
|
|
|
|
if ( $ii < $max_ii && $rtoken_type->[ $ii + 1 ] eq 'b' ); |
|
11182
|
|
|
|
|
|
|
|
|
11183
|
4
|
50
|
33
|
|
|
18
|
if ( $ii < $max_ii && $rtokens->[ $ii + 1 ] eq '{' ) { |
|
11184
|
4
|
|
|
|
|
5
|
$ii++; |
|
11185
|
4
|
|
|
|
|
6
|
$code_depth++; |
|
11186
|
|
|
|
|
|
|
} |
|
11187
|
|
|
|
|
|
|
} |
|
11188
|
4
|
|
|
|
|
6
|
next; |
|
11189
|
|
|
|
|
|
|
} |
|
11190
|
|
|
|
|
|
|
|
|
11191
|
|
|
|
|
|
|
# Continue interpolating while $code_depth > 0.. |
|
11192
|
13
|
50
|
|
|
|
20
|
if ( $token eq '{' ) { |
|
11193
|
0
|
|
|
|
|
0
|
$code_depth++; |
|
11194
|
0
|
|
|
|
|
0
|
next; |
|
11195
|
|
|
|
|
|
|
} |
|
11196
|
13
|
100
|
|
|
|
18
|
if ( $token eq '}' ) { |
|
11197
|
4
|
|
|
|
|
4
|
$code_depth--; |
|
11198
|
4
|
|
|
|
|
7
|
next; |
|
11199
|
|
|
|
|
|
|
} |
|
11200
|
|
|
|
|
|
|
|
|
11201
|
|
|
|
|
|
|
# Look for '<<' |
|
11202
|
9
|
50
|
66
|
|
|
29
|
if ( $token ne '<' |
|
|
|
|
66
|
|
|
|
|
|
11203
|
|
|
|
|
|
|
|| $ii >= $max_ii - 1 |
|
11204
|
|
|
|
|
|
|
|| $rtokens->[ $ii + 1 ] ne '<' ) |
|
11205
|
|
|
|
|
|
|
{ |
|
11206
|
5
|
|
|
|
|
7
|
next; |
|
11207
|
|
|
|
|
|
|
} |
|
11208
|
|
|
|
|
|
|
|
|
11209
|
|
|
|
|
|
|
# Remember the location of the first '<' so it can be modified |
|
11210
|
4
|
|
|
|
|
5
|
my $ii_left_shift = $ii; |
|
11211
|
|
|
|
|
|
|
|
|
11212
|
4
|
|
|
|
|
5
|
$ii++; |
|
11213
|
|
|
|
|
|
|
|
|
11214
|
|
|
|
|
|
|
# or '<<~' |
|
11215
|
4
|
50
|
33
|
|
|
14
|
if ( $rtoken_type->[ $ii + 1 ] eq '~' && $ii < $max_ii - 2 ) { |
|
11216
|
0
|
|
|
|
|
0
|
$ii++; |
|
11217
|
|
|
|
|
|
|
} |
|
11218
|
|
|
|
|
|
|
|
|
11219
|
|
|
|
|
|
|
# blanks ok before targets in quotes |
|
11220
|
4
|
|
|
|
|
6
|
my $saw_blank; |
|
11221
|
4
|
100
|
66
|
|
|
13
|
if ( $rtoken_type->[ $ii + 1 ] eq 'b' && $ii < $max_ii - 2 ) { |
|
11222
|
2
|
|
|
|
|
3
|
$saw_blank = 1; |
|
11223
|
2
|
|
|
|
|
3
|
$ii++; |
|
11224
|
|
|
|
|
|
|
} |
|
11225
|
|
|
|
|
|
|
|
|
11226
|
4
|
|
|
|
|
5
|
my $next_type = $rtoken_type->[ $ii + 1 ]; |
|
11227
|
|
|
|
|
|
|
|
|
11228
|
|
|
|
|
|
|
# Look for unquoted targets like "${ \<<END1 }" |
|
11229
|
4
|
100
|
66
|
|
|
14
|
if ( $next_type eq 'w' ) { |
|
|
|
50
|
33
|
|
|
|
|
|
11230
|
2
|
50
|
|
|
|
5
|
if ($saw_blank) { |
|
11231
|
|
|
|
|
|
|
## error: blank target is deprecated |
|
11232
|
|
|
|
|
|
|
} |
|
11233
|
|
|
|
|
|
|
else { |
|
11234
|
2
|
|
|
|
|
4
|
$ii++; |
|
11235
|
2
|
50
|
|
|
|
7
|
if ( $rmap->[$ii] >= $len_starting_lines ) { |
|
11236
|
2
|
|
|
|
|
3
|
push @{$rht}, [ $rtokens->[$ii], EMPTY_STRING ]; |
|
|
2
|
|
|
|
|
7
|
|
|
11237
|
|
|
|
|
|
|
|
|
11238
|
|
|
|
|
|
|
# Modify the string so the target is not found again |
|
11239
|
2
|
|
|
|
|
4
|
my $pos_left_shift = $rmap->[$ii_left_shift]; |
|
11240
|
2
|
|
|
|
|
6
|
substr( $quoted_string, $pos_left_shift, 1, SPACE ); |
|
11241
|
2
|
|
|
|
|
5
|
substr( $quoted_string, $pos_left_shift + 1, 1, SPACE ); |
|
11242
|
|
|
|
|
|
|
} |
|
11243
|
|
|
|
|
|
|
} |
|
11244
|
|
|
|
|
|
|
} |
|
11245
|
|
|
|
|
|
|
|
|
11246
|
|
|
|
|
|
|
# Look for quoted targets like "${ \<< 'END1' }${ \<<\"END2\" }"; |
|
11247
|
|
|
|
|
|
|
elsif ( $next_type eq "'" || $next_type eq '"' || $next_type eq '`' ) { |
|
11248
|
2
|
|
|
|
|
3
|
my $quote_char = $next_type; |
|
11249
|
2
|
|
|
|
|
4
|
$ii++; |
|
11250
|
2
|
|
|
|
|
2
|
my $here_target = EMPTY_STRING; |
|
11251
|
2
|
|
66
|
|
|
8
|
while ( ++$ii <= $max_ii && $rtokens->[$ii] ne $quote_char ) { |
|
11252
|
|
|
|
|
|
|
next |
|
11253
|
2
|
50
|
66
|
|
|
8
|
if ( $quote_char ne "'" && $rtokens->[$ii] eq BACKSLASH ); |
|
11254
|
2
|
|
|
|
|
48
|
$here_target .= $rtokens->[$ii]; |
|
11255
|
|
|
|
|
|
|
} |
|
11256
|
2
|
50
|
|
|
|
11
|
if ( $rmap->[$ii] >= $len_starting_lines ) { |
|
11257
|
2
|
|
|
|
|
3
|
push @{$rht}, [ $here_target, $quote_char ]; |
|
|
2
|
|
|
|
|
10
|
|
|
11258
|
|
|
|
|
|
|
} |
|
11259
|
|
|
|
|
|
|
} |
|
11260
|
|
|
|
|
|
|
else { |
|
11261
|
|
|
|
|
|
|
## no here target found |
|
11262
|
|
|
|
|
|
|
} |
|
11263
|
4
|
|
|
|
|
10
|
next; |
|
11264
|
|
|
|
|
|
|
} ## end while ( ++$ii <= $max_ii ) |
|
11265
|
2
|
|
|
|
|
14
|
return ( $rht, $quoted_string ); |
|
11266
|
|
|
|
|
|
|
} ## end sub find_interpolated_here_targets |
|
11267
|
|
|
|
|
|
|
|
|
11268
|
|
|
|
|
|
|
# Some possible non-word quote delimiters, for preliminary checking |
|
11269
|
|
|
|
|
|
|
my %is_punct_char; |
|
11270
|
|
|
|
|
|
|
|
|
11271
|
|
|
|
|
|
|
BEGIN { |
|
11272
|
|
|
|
|
|
|
|
|
11273
|
44
|
|
|
44
|
|
339
|
my @q = qw# / " ' { } ( ) [ ] < > ; + - * | % ! x ~ = ? : . ^ & #; |
|
11274
|
44
|
|
|
|
|
138
|
push @q, '#'; |
|
11275
|
44
|
|
|
|
|
114
|
push @q, COMMA; |
|
11276
|
44
|
|
|
|
|
131088
|
$is_punct_char{$_} = 1 for @q; |
|
11277
|
|
|
|
|
|
|
} |
|
11278
|
|
|
|
|
|
|
|
|
11279
|
|
|
|
|
|
|
sub follow_quoted_string { |
|
11280
|
|
|
|
|
|
|
|
|
11281
|
|
|
|
|
|
|
my ( |
|
11282
|
|
|
|
|
|
|
|
|
11283
|
3207
|
|
|
3207
|
0
|
6516
|
$self, |
|
11284
|
|
|
|
|
|
|
|
|
11285
|
|
|
|
|
|
|
$i_beg, |
|
11286
|
|
|
|
|
|
|
$in_quote, |
|
11287
|
|
|
|
|
|
|
$rtokens, |
|
11288
|
|
|
|
|
|
|
$rtoken_type, |
|
11289
|
|
|
|
|
|
|
$beginning_tok, |
|
11290
|
|
|
|
|
|
|
$quote_pos, |
|
11291
|
|
|
|
|
|
|
$quote_depth, |
|
11292
|
|
|
|
|
|
|
$max_token_index, |
|
11293
|
|
|
|
|
|
|
|
|
11294
|
|
|
|
|
|
|
) = @_; |
|
11295
|
|
|
|
|
|
|
|
|
11296
|
|
|
|
|
|
|
# Scan for a specific token, skipping escaped characters. |
|
11297
|
|
|
|
|
|
|
# If the quote character is blank, use the first non-blank character. |
|
11298
|
|
|
|
|
|
|
# Given: |
|
11299
|
|
|
|
|
|
|
# $rtokens = reference to the array of tokens |
|
11300
|
|
|
|
|
|
|
# $i = the token index of the first character to search |
|
11301
|
|
|
|
|
|
|
# $in_quote = number of quoted strings being followed |
|
11302
|
|
|
|
|
|
|
# $beginning_tok = the starting quote character |
|
11303
|
|
|
|
|
|
|
# $quote_pos = index to check next for alphanumeric delimiter |
|
11304
|
|
|
|
|
|
|
# Return: |
|
11305
|
|
|
|
|
|
|
# $i = the token index of the ending quote character |
|
11306
|
|
|
|
|
|
|
# $in_quote = decremented if found end, unchanged if not |
|
11307
|
|
|
|
|
|
|
# $beginning_tok = the starting quote character |
|
11308
|
|
|
|
|
|
|
# $quote_pos = index to check next for alphanumeric delimiter |
|
11309
|
|
|
|
|
|
|
# $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. |
|
11310
|
|
|
|
|
|
|
# $quoted_string = the text of the quote (without quotation tokens) |
|
11311
|
3207
|
|
|
|
|
4126
|
my ( $tok, $end_tok ); |
|
11312
|
3207
|
|
|
|
|
4104
|
my $i = $i_beg - 1; |
|
11313
|
3207
|
|
|
|
|
3997
|
my $quoted_string = EMPTY_STRING; |
|
11314
|
|
|
|
|
|
|
|
|
11315
|
3207
|
|
|
|
|
3789
|
0 && do { |
|
11316
|
|
|
|
|
|
|
print {*STDOUT} |
|
11317
|
|
|
|
|
|
|
"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; |
|
11318
|
|
|
|
|
|
|
}; |
|
11319
|
|
|
|
|
|
|
|
|
11320
|
|
|
|
|
|
|
# for a non-blank token, get the corresponding end token |
|
11321
|
3207
|
100
|
33
|
|
|
11897
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
11322
|
|
|
|
|
|
|
$is_punct_char{$beginning_tok} |
|
11323
|
|
|
|
|
|
|
|| ( length($beginning_tok) |
|
11324
|
|
|
|
|
|
|
&& $beginning_tok !~ /^\s+$/ ) |
|
11325
|
|
|
|
|
|
|
) |
|
11326
|
|
|
|
|
|
|
{ |
|
11327
|
|
|
|
|
|
|
$end_tok = |
|
11328
|
|
|
|
|
|
|
$matching_end_token{$beginning_tok} |
|
11329
|
244
|
100
|
|
|
|
637
|
? $matching_end_token{$beginning_tok} |
|
11330
|
|
|
|
|
|
|
: $beginning_tok; |
|
11331
|
|
|
|
|
|
|
} |
|
11332
|
|
|
|
|
|
|
|
|
11333
|
|
|
|
|
|
|
# for a blank token, find and use the first non-blank one |
|
11334
|
|
|
|
|
|
|
else { |
|
11335
|
2963
|
100
|
|
|
|
5130
|
my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr> |
|
11336
|
|
|
|
|
|
|
|
|
11337
|
2963
|
|
|
|
|
5439
|
while ( $i < $max_token_index ) { |
|
11338
|
2965
|
|
|
|
|
4585
|
$tok = $rtokens->[ ++$i ]; |
|
11339
|
|
|
|
|
|
|
|
|
11340
|
2965
|
100
|
|
|
|
5371
|
if ( $rtoken_type->[$i] ne 'b' ) { |
|
11341
|
|
|
|
|
|
|
|
|
11342
|
2963
|
50
|
66
|
|
|
6324
|
if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { |
|
11343
|
0
|
|
|
|
|
0
|
$i = $max_token_index; |
|
11344
|
|
|
|
|
|
|
} |
|
11345
|
|
|
|
|
|
|
else { |
|
11346
|
|
|
|
|
|
|
|
|
11347
|
2963
|
100
|
|
|
|
5032
|
if ( length($tok) > 1 ) { |
|
11348
|
1
|
50
|
|
|
|
4
|
if ( $quote_pos <= 0 ) { $quote_pos = 1 } |
|
|
1
|
|
|
|
|
1
|
|
|
11349
|
1
|
|
|
|
|
3
|
$beginning_tok = substr( $tok, $quote_pos - 1, 1 ); |
|
11350
|
|
|
|
|
|
|
} |
|
11351
|
|
|
|
|
|
|
else { |
|
11352
|
2962
|
|
|
|
|
3531
|
$beginning_tok = $tok; |
|
11353
|
2962
|
|
|
|
|
3642
|
$quote_pos = 0; |
|
11354
|
|
|
|
|
|
|
} |
|
11355
|
|
|
|
|
|
|
$end_tok = |
|
11356
|
|
|
|
|
|
|
$matching_end_token{$beginning_tok} |
|
11357
|
2963
|
100
|
|
|
|
5670
|
? $matching_end_token{$beginning_tok} |
|
11358
|
|
|
|
|
|
|
: $beginning_tok; |
|
11359
|
2963
|
|
|
|
|
3382
|
$quote_depth = 1; |
|
11360
|
2963
|
|
|
|
|
4290
|
last; |
|
11361
|
|
|
|
|
|
|
} |
|
11362
|
|
|
|
|
|
|
} |
|
11363
|
|
|
|
|
|
|
else { |
|
11364
|
2
|
|
|
|
|
4
|
$allow_quote_comments = 1; |
|
11365
|
|
|
|
|
|
|
} |
|
11366
|
|
|
|
|
|
|
} ## end while ( $i < $max_token_index) |
|
11367
|
|
|
|
|
|
|
} |
|
11368
|
|
|
|
|
|
|
|
|
11369
|
|
|
|
|
|
|
# There are two different loops which search for the ending quote |
|
11370
|
|
|
|
|
|
|
# character. In the rare case of an alphanumeric quote delimiter, we |
|
11371
|
|
|
|
|
|
|
# have to look through alphanumeric tokens character-by-character, since |
|
11372
|
|
|
|
|
|
|
# the pre-tokenization process combines multiple alphanumeric |
|
11373
|
|
|
|
|
|
|
# characters, whereas for a non-alphanumeric delimiter, only tokens of |
|
11374
|
|
|
|
|
|
|
# length 1 can match. |
|
11375
|
|
|
|
|
|
|
|
|
11376
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
|
11377
|
|
|
|
|
|
|
# Case 1 (rare): loop for case of alphanumeric quote delimiter.. |
|
11378
|
|
|
|
|
|
|
# "quote_pos" is the position the current word to begin searching |
|
11379
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
|
11380
|
3207
|
100
|
100
|
|
|
7275
|
if ( !$is_punct_char{$beginning_tok} && $beginning_tok =~ /\w/ ) { |
|
11381
|
|
|
|
|
|
|
|
|
11382
|
|
|
|
|
|
|
# Note this because it is not recommended practice except |
|
11383
|
|
|
|
|
|
|
# for obfuscated perl contests |
|
11384
|
1
|
50
|
|
|
|
3
|
if ( $in_quote == 1 ) { |
|
11385
|
1
|
|
|
|
|
4
|
$self->write_logfile_entry( |
|
11386
|
|
|
|
|
|
|
"Note: alphanumeric quote delimiter ($beginning_tok) \n"); |
|
11387
|
|
|
|
|
|
|
} |
|
11388
|
|
|
|
|
|
|
|
|
11389
|
|
|
|
|
|
|
# Note: changed < to <= here to fix c109. Relying on extra end blanks. |
|
11390
|
1
|
|
|
|
|
3
|
while ( $i <= $max_token_index ) { |
|
11391
|
|
|
|
|
|
|
|
|
11392
|
4
|
100
|
66
|
|
|
11
|
if ( $quote_pos == 0 || ( $i < 0 ) ) { |
|
11393
|
3
|
|
|
|
|
4
|
$tok = $rtokens->[ ++$i ]; |
|
11394
|
|
|
|
|
|
|
|
|
11395
|
3
|
100
|
|
|
|
7
|
if ( $tok eq BACKSLASH ) { |
|
11396
|
|
|
|
|
|
|
|
|
11397
|
|
|
|
|
|
|
# retain backslash unless it hides the end token |
|
11398
|
1
|
50
|
|
|
|
4
|
$quoted_string .= $tok |
|
11399
|
|
|
|
|
|
|
unless ( $rtokens->[ $i + 1 ] eq $end_tok ); |
|
11400
|
1
|
|
|
|
|
3
|
$quote_pos++; |
|
11401
|
1
|
50
|
|
|
|
4
|
last if ( $i >= $max_token_index ); |
|
11402
|
1
|
|
|
|
|
2
|
$tok = $rtokens->[ ++$i ]; |
|
11403
|
|
|
|
|
|
|
} |
|
11404
|
|
|
|
|
|
|
} |
|
11405
|
4
|
|
|
|
|
6
|
my $old_pos = $quote_pos; |
|
11406
|
|
|
|
|
|
|
|
|
11407
|
4
|
|
|
|
|
6
|
$quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); |
|
11408
|
|
|
|
|
|
|
|
|
11409
|
4
|
100
|
|
|
|
6
|
if ( $quote_pos > 0 ) { |
|
11410
|
|
|
|
|
|
|
|
|
11411
|
1
|
|
|
|
|
2
|
$quoted_string .= |
|
11412
|
|
|
|
|
|
|
substr( $tok, $old_pos, $quote_pos - $old_pos - 1 ); |
|
11413
|
|
|
|
|
|
|
|
|
11414
|
|
|
|
|
|
|
# NOTE: any quote modifiers will be at the end of '$tok'. If we |
|
11415
|
|
|
|
|
|
|
# wanted to check them, this is the place to get them. But |
|
11416
|
|
|
|
|
|
|
# this quote form is rarely used in practice, so it isn't |
|
11417
|
|
|
|
|
|
|
# worthwhile. |
|
11418
|
|
|
|
|
|
|
|
|
11419
|
1
|
|
|
|
|
2
|
$quote_depth--; |
|
11420
|
|
|
|
|
|
|
|
|
11421
|
1
|
50
|
|
|
|
2
|
if ( $quote_depth == 0 ) { |
|
11422
|
1
|
|
|
|
|
2
|
$in_quote--; |
|
11423
|
1
|
|
|
|
|
2
|
last; |
|
11424
|
|
|
|
|
|
|
} |
|
11425
|
|
|
|
|
|
|
} |
|
11426
|
|
|
|
|
|
|
else { |
|
11427
|
3
|
50
|
|
|
|
6
|
if ( $old_pos <= length($tok) ) { |
|
11428
|
3
|
|
|
|
|
6
|
$quoted_string .= substr( $tok, $old_pos ); |
|
11429
|
|
|
|
|
|
|
} |
|
11430
|
|
|
|
|
|
|
} |
|
11431
|
|
|
|
|
|
|
} ## end while ( $i <= $max_token_index) |
|
11432
|
|
|
|
|
|
|
} |
|
11433
|
|
|
|
|
|
|
|
|
11434
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
|
11435
|
|
|
|
|
|
|
# Case 2 (normal): loop for case of a non-alphanumeric quote delimiter.. |
|
11436
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
|
11437
|
|
|
|
|
|
|
else { |
|
11438
|
|
|
|
|
|
|
|
|
11439
|
3206
|
|
|
|
|
5856
|
while ( $i < $max_token_index ) { |
|
11440
|
12368
|
|
|
|
|
14515
|
$tok = $rtokens->[ ++$i ]; |
|
11441
|
|
|
|
|
|
|
|
|
11442
|
12368
|
100
|
|
|
|
21230
|
if ( $tok eq $end_tok ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
11443
|
2963
|
|
|
|
|
3414
|
$quote_depth--; |
|
11444
|
|
|
|
|
|
|
|
|
11445
|
2963
|
100
|
|
|
|
4819
|
if ( $quote_depth == 0 ) { |
|
11446
|
2962
|
|
|
|
|
3338
|
$in_quote--; |
|
11447
|
2962
|
|
|
|
|
3715
|
last; |
|
11448
|
|
|
|
|
|
|
} |
|
11449
|
|
|
|
|
|
|
} |
|
11450
|
|
|
|
|
|
|
elsif ( $tok eq $beginning_tok ) { |
|
11451
|
1
|
|
|
|
|
2
|
$quote_depth++; |
|
11452
|
|
|
|
|
|
|
} |
|
11453
|
|
|
|
|
|
|
elsif ( $tok eq BACKSLASH ) { |
|
11454
|
|
|
|
|
|
|
|
|
11455
|
|
|
|
|
|
|
# retain backslash unless it hides the beginning or end token |
|
11456
|
451
|
|
|
|
|
684
|
$tok = $rtokens->[ ++$i ]; |
|
11457
|
451
|
100
|
100
|
|
|
1808
|
$quoted_string .= BACKSLASH |
|
11458
|
|
|
|
|
|
|
if ( $tok ne $end_tok && $tok ne $beginning_tok ); |
|
11459
|
|
|
|
|
|
|
} |
|
11460
|
|
|
|
|
|
|
else { |
|
11461
|
|
|
|
|
|
|
## nothing special |
|
11462
|
|
|
|
|
|
|
} |
|
11463
|
9406
|
|
|
|
|
14101
|
$quoted_string .= $tok; |
|
11464
|
|
|
|
|
|
|
} ## end while ( $i < $max_token_index) |
|
11465
|
|
|
|
|
|
|
} |
|
11466
|
3207
|
100
|
|
|
|
5431
|
if ( $i > $max_token_index ) { $i = $max_token_index } |
|
|
10
|
|
|
|
|
12
|
|
|
11467
|
|
|
|
|
|
|
return ( |
|
11468
|
|
|
|
|
|
|
|
|
11469
|
3207
|
|
|
|
|
11335
|
$i, |
|
11470
|
|
|
|
|
|
|
$in_quote, |
|
11471
|
|
|
|
|
|
|
$beginning_tok, |
|
11472
|
|
|
|
|
|
|
$quote_pos, |
|
11473
|
|
|
|
|
|
|
$quote_depth, |
|
11474
|
|
|
|
|
|
|
$quoted_string, |
|
11475
|
|
|
|
|
|
|
|
|
11476
|
|
|
|
|
|
|
); |
|
11477
|
|
|
|
|
|
|
} ## end sub follow_quoted_string |
|
11478
|
|
|
|
|
|
|
|
|
11479
|
|
|
|
|
|
|
sub indicate_error { |
|
11480
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg, $line_number, $input_line, $pos, $caret ) = @_; |
|
11481
|
|
|
|
|
|
|
|
|
11482
|
|
|
|
|
|
|
# write input line and line with carat's showing where error was detected |
|
11483
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
|
11484
|
0
|
|
|
|
|
0
|
$self->warning($msg); |
|
11485
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( $line_number, $input_line, $pos, |
|
11486
|
|
|
|
|
|
|
$caret ); |
|
11487
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
|
11488
|
0
|
|
|
|
|
0
|
return; |
|
11489
|
|
|
|
|
|
|
} ## end sub indicate_error |
|
11490
|
|
|
|
|
|
|
|
|
11491
|
|
|
|
|
|
|
sub write_error_indicator_pair { |
|
11492
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $line_number, $input_line, $pos, $caret ) = @_; |
|
11493
|
0
|
|
|
|
|
0
|
my ( $offset, $numbered_line, $underline ) = |
|
11494
|
|
|
|
|
|
|
make_numbered_line( $line_number, $input_line, $pos ); |
|
11495
|
0
|
|
|
|
|
0
|
$underline = write_on_underline( $underline, $pos - $offset, $caret ); |
|
11496
|
0
|
|
|
|
|
0
|
$self->warning( $numbered_line . "\n" ); |
|
11497
|
0
|
|
|
|
|
0
|
$underline =~ s/\s+$//; |
|
11498
|
0
|
|
|
|
|
0
|
$self->warning( $underline . "\n" ); |
|
11499
|
0
|
|
|
|
|
0
|
return; |
|
11500
|
|
|
|
|
|
|
} ## end sub write_error_indicator_pair |
|
11501
|
|
|
|
|
|
|
|
|
11502
|
|
|
|
|
|
|
sub make_numbered_line { |
|
11503
|
|
|
|
|
|
|
|
|
11504
|
0
|
|
|
0
|
0
|
0
|
my ( $lineno, $str, $pos ) = @_; |
|
11505
|
|
|
|
|
|
|
|
|
11506
|
|
|
|
|
|
|
# Given: |
|
11507
|
|
|
|
|
|
|
# $lineno=line number |
|
11508
|
|
|
|
|
|
|
# $str = an input line |
|
11509
|
|
|
|
|
|
|
# $pos = character position of interest |
|
11510
|
|
|
|
|
|
|
# Create a string not longer than 80 characters of the form: |
|
11511
|
|
|
|
|
|
|
# $lineno: sub_string |
|
11512
|
|
|
|
|
|
|
# such that the sub_string of $str contains the position of interest |
|
11513
|
|
|
|
|
|
|
# |
|
11514
|
|
|
|
|
|
|
# Here is an example of what we want, in this case we add trailing |
|
11515
|
|
|
|
|
|
|
# '...' because the line is long. |
|
11516
|
|
|
|
|
|
|
# |
|
11517
|
|
|
|
|
|
|
# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... |
|
11518
|
|
|
|
|
|
|
# |
|
11519
|
|
|
|
|
|
|
# Here is another example, this time in which we used leading '...' |
|
11520
|
|
|
|
|
|
|
# because of excessive length: |
|
11521
|
|
|
|
|
|
|
# |
|
11522
|
|
|
|
|
|
|
# 2: ... er of the World Wide Web Consortium's |
|
11523
|
|
|
|
|
|
|
# |
|
11524
|
|
|
|
|
|
|
# input parameters are: |
|
11525
|
|
|
|
|
|
|
# $lineno = line number |
|
11526
|
|
|
|
|
|
|
# $str = the text of the line |
|
11527
|
|
|
|
|
|
|
# $pos = position of interest (the error) : 0 = first character |
|
11528
|
|
|
|
|
|
|
# |
|
11529
|
|
|
|
|
|
|
# We return : |
|
11530
|
|
|
|
|
|
|
# - $offset = an offset which corrects the position in case we only |
|
11531
|
|
|
|
|
|
|
# display part of a line, such that $pos-$offset is the effective |
|
11532
|
|
|
|
|
|
|
# position from the start of the displayed line. |
|
11533
|
|
|
|
|
|
|
# - $numbered_line = the numbered line as above, |
|
11534
|
|
|
|
|
|
|
# - $underline = a blank 'underline' which is all spaces with the same |
|
11535
|
|
|
|
|
|
|
# number of characters as the numbered line. |
|
11536
|
|
|
|
|
|
|
|
|
11537
|
0
|
0
|
|
|
|
0
|
my $offset = ( $pos < 60 ) ? 0 : $pos - 40; |
|
11538
|
0
|
|
|
|
|
0
|
my $excess = length($str) - $offset - 68; |
|
11539
|
0
|
0
|
|
|
|
0
|
my $numc = ( $excess > 0 ) ? 68 : undef; |
|
11540
|
|
|
|
|
|
|
|
|
11541
|
0
|
0
|
|
|
|
0
|
if ( defined($numc) ) { |
|
11542
|
0
|
0
|
|
|
|
0
|
if ( $offset == 0 ) { |
|
11543
|
0
|
|
|
|
|
0
|
$str = substr( $str, $offset, $numc - 4 ) . " ..."; |
|
11544
|
|
|
|
|
|
|
} |
|
11545
|
|
|
|
|
|
|
else { |
|
11546
|
0
|
|
|
|
|
0
|
$str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; |
|
11547
|
|
|
|
|
|
|
} |
|
11548
|
|
|
|
|
|
|
} |
|
11549
|
|
|
|
|
|
|
else { |
|
11550
|
|
|
|
|
|
|
|
|
11551
|
0
|
0
|
|
|
|
0
|
if ( $offset == 0 ) { |
|
11552
|
|
|
|
|
|
|
} |
|
11553
|
|
|
|
|
|
|
else { |
|
11554
|
0
|
|
|
|
|
0
|
$str = "... " . substr( $str, $offset + 4 ); |
|
11555
|
|
|
|
|
|
|
} |
|
11556
|
|
|
|
|
|
|
} |
|
11557
|
|
|
|
|
|
|
|
|
11558
|
0
|
|
|
|
|
0
|
my $numbered_line = sprintf( "%d: ", $lineno ); |
|
11559
|
0
|
|
|
|
|
0
|
$offset -= length($numbered_line); |
|
11560
|
0
|
|
|
|
|
0
|
$numbered_line .= $str; |
|
11561
|
0
|
|
|
|
|
0
|
my $underline = SPACE x length($numbered_line); |
|
11562
|
0
|
|
|
|
|
0
|
return ( $offset, $numbered_line, $underline ); |
|
11563
|
|
|
|
|
|
|
} ## end sub make_numbered_line |
|
11564
|
|
|
|
|
|
|
|
|
11565
|
|
|
|
|
|
|
sub write_on_underline { |
|
11566
|
|
|
|
|
|
|
|
|
11567
|
0
|
|
|
0
|
0
|
0
|
my ( $underline, $pos, $pos_chr ) = @_; |
|
11568
|
|
|
|
|
|
|
|
|
11569
|
|
|
|
|
|
|
# The "underline" is a string that shows where an error is; it starts |
|
11570
|
|
|
|
|
|
|
# out as a string of blanks with the same length as the numbered line of |
|
11571
|
|
|
|
|
|
|
# code above it, and we have to add marking to show where an error is. |
|
11572
|
|
|
|
|
|
|
# In the example below, we want to write the string '--^' just below |
|
11573
|
|
|
|
|
|
|
# the line of bad code: |
|
11574
|
|
|
|
|
|
|
# |
|
11575
|
|
|
|
|
|
|
# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... |
|
11576
|
|
|
|
|
|
|
# ---^ |
|
11577
|
|
|
|
|
|
|
# We are given the current underline string, plus a position and a |
|
11578
|
|
|
|
|
|
|
# string to write on it. |
|
11579
|
|
|
|
|
|
|
# |
|
11580
|
|
|
|
|
|
|
# In the above example, there will be 2 calls to do this: |
|
11581
|
|
|
|
|
|
|
# First call: $pos=19, pos_chr=^ |
|
11582
|
|
|
|
|
|
|
# Second call: $pos=16, pos_chr=--- |
|
11583
|
|
|
|
|
|
|
# |
|
11584
|
|
|
|
|
|
|
# This is a trivial thing to do with substr, but there is some |
|
11585
|
|
|
|
|
|
|
# checking to do. |
|
11586
|
|
|
|
|
|
|
|
|
11587
|
|
|
|
|
|
|
# check for error..shouldn't happen |
|
11588
|
0
|
0
|
0
|
|
|
0
|
if ( $pos < 0 || $pos > length($underline) ) { |
|
11589
|
0
|
|
|
|
|
0
|
return $underline; |
|
11590
|
|
|
|
|
|
|
} |
|
11591
|
0
|
|
|
|
|
0
|
my $excess = length($pos_chr) + $pos - length($underline); |
|
11592
|
0
|
0
|
|
|
|
0
|
if ( $excess > 0 ) { |
|
11593
|
0
|
|
|
|
|
0
|
$pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); |
|
11594
|
|
|
|
|
|
|
} |
|
11595
|
0
|
|
|
|
|
0
|
substr( $underline, $pos, length($pos_chr), $pos_chr ); |
|
11596
|
0
|
|
|
|
|
0
|
return ($underline); |
|
11597
|
|
|
|
|
|
|
} ## end sub write_on_underline |
|
11598
|
|
|
|
|
|
|
|
|
11599
|
|
|
|
|
|
|
sub pre_tokenize { |
|
11600
|
|
|
|
|
|
|
|
|
11601
|
7175
|
|
|
7175
|
0
|
12383
|
my ( $str, ($max_tokens_wanted) ) = @_; |
|
11602
|
|
|
|
|
|
|
|
|
11603
|
|
|
|
|
|
|
# Input parameters: |
|
11604
|
|
|
|
|
|
|
# $str = string to be parsed |
|
11605
|
|
|
|
|
|
|
# $max_tokens_wanted > 0 to stop on reaching this many tokens. |
|
11606
|
|
|
|
|
|
|
# = undef or 0 means get all tokens |
|
11607
|
|
|
|
|
|
|
|
|
11608
|
|
|
|
|
|
|
# Break a string, $str, into a sequence of preliminary tokens (pre-tokens). |
|
11609
|
|
|
|
|
|
|
# We look for these types of tokens: |
|
11610
|
|
|
|
|
|
|
# words (type='w'), example: 'max_tokens_wanted' |
|
11611
|
|
|
|
|
|
|
# digits (type = 'd'), example: '0755' |
|
11612
|
|
|
|
|
|
|
# whitespace (type = 'b'), example: ' ' |
|
11613
|
|
|
|
|
|
|
# single character punct (type = char) example: '=' |
|
11614
|
|
|
|
|
|
|
|
|
11615
|
|
|
|
|
|
|
# Later operations will combine one or more of these pre-tokens into final |
|
11616
|
|
|
|
|
|
|
# tokens. We cannot do better than this yet because we might be in a |
|
11617
|
|
|
|
|
|
|
# quoted string or pattern. |
|
11618
|
|
|
|
|
|
|
|
|
11619
|
|
|
|
|
|
|
# An advantage of doing this pre-tokenization step is that it keeps almost |
|
11620
|
|
|
|
|
|
|
# all of the regex parsing very simple and localized right here. A |
|
11621
|
|
|
|
|
|
|
# disadvantage is that in some extremely rare instances we will have to go |
|
11622
|
|
|
|
|
|
|
# back and split a pre-token. |
|
11623
|
|
|
|
|
|
|
|
|
11624
|
|
|
|
|
|
|
# Return parameters: |
|
11625
|
7175
|
|
|
|
|
10432
|
my @tokens = (); # array of the tokens themselves |
|
11626
|
7175
|
|
|
|
|
12657
|
my @token_map = (0); # string position of start of each token |
|
11627
|
7175
|
|
|
|
|
9194
|
my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct |
|
11628
|
|
|
|
|
|
|
|
|
11629
|
7175
|
100
|
|
|
|
11981
|
if ( !$max_tokens_wanted ) { $max_tokens_wanted = -1 } |
|
|
6862
|
|
|
|
|
8409
|
|
|
11630
|
|
|
|
|
|
|
|
|
11631
|
7175
|
|
|
|
|
12727
|
while ( $max_tokens_wanted-- ) { |
|
11632
|
|
|
|
|
|
|
|
|
11633
|
96069
|
100
|
|
|
|
200751
|
if ( |
|
11634
|
|
|
|
|
|
|
$str =~ m{ |
|
11635
|
|
|
|
|
|
|
\G( |
|
11636
|
|
|
|
|
|
|
(\s+) # type 'b' = whitespace - this must come before \W |
|
11637
|
|
|
|
|
|
|
| (\W) # or type=char = single-character, non-whitespace punct |
|
11638
|
|
|
|
|
|
|
| (\d+) # or type 'd' = sequence of digits - must come before \w |
|
11639
|
|
|
|
|
|
|
| (\w+) # or type 'w' = words not starting with a digit |
|
11640
|
|
|
|
|
|
|
) |
|
11641
|
|
|
|
|
|
|
}gcx |
|
11642
|
|
|
|
|
|
|
) |
|
11643
|
|
|
|
|
|
|
{ |
|
11644
|
89052
|
|
|
|
|
136248
|
push @tokens, $1; |
|
11645
|
89052
|
100
|
|
|
|
177240
|
push @type, |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
11646
|
|
|
|
|
|
|
defined($2) ? 'b' : defined($3) ? $1 : defined($4) ? 'd' : 'w'; |
|
11647
|
89052
|
|
|
|
|
137013
|
push @token_map, pos($str); |
|
11648
|
|
|
|
|
|
|
} |
|
11649
|
|
|
|
|
|
|
|
|
11650
|
|
|
|
|
|
|
# that's all.. |
|
11651
|
|
|
|
|
|
|
else { |
|
11652
|
7017
|
|
|
|
|
38716
|
return ( \@tokens, \@token_map, \@type ); |
|
11653
|
|
|
|
|
|
|
} |
|
11654
|
|
|
|
|
|
|
} ## end while ( $max_tokens_wanted...) |
|
11655
|
|
|
|
|
|
|
|
|
11656
|
158
|
|
|
|
|
617
|
return ( \@tokens, \@token_map, \@type ); |
|
11657
|
|
|
|
|
|
|
} ## end sub pre_tokenize |
|
11658
|
|
|
|
|
|
|
|
|
11659
|
|
|
|
|
|
|
sub show_tokens { |
|
11660
|
|
|
|
|
|
|
|
|
11661
|
|
|
|
|
|
|
# This is an uncalled debug routine, saved for reference |
|
11662
|
0
|
|
|
0
|
0
|
|
my ( $rtokens, $rtoken_map ) = @_; |
|
11663
|
0
|
|
|
|
|
|
my $num = scalar( @{$rtokens} ); |
|
|
0
|
|
|
|
|
|
|
|
11664
|
|
|
|
|
|
|
|
|
11665
|
0
|
|
|
|
|
|
foreach my $i ( 0 .. $num - 1 ) { |
|
11666
|
0
|
|
|
|
|
|
my $len = length( $rtokens->[$i] ); |
|
11667
|
0
|
|
|
|
|
|
print {*STDOUT} "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n"; |
|
|
0
|
|
|
|
|
|
|
|
11668
|
|
|
|
|
|
|
} |
|
11669
|
0
|
|
|
|
|
|
return; |
|
11670
|
|
|
|
|
|
|
} ## end sub show_tokens |
|
11671
|
|
|
|
|
|
|
|
|
11672
|
|
|
|
|
|
|
sub dump_token_types { |
|
11673
|
0
|
|
|
0
|
0
|
|
my ( $class, $fh ) = @_; |
|
11674
|
|
|
|
|
|
|
|
|
11675
|
|
|
|
|
|
|
# This should be the latest list of token types in use |
|
11676
|
|
|
|
|
|
|
# adding NEW_TOKENS: add a comment here |
|
11677
|
0
|
|
|
|
|
|
$fh->print(<<'END_OF_LIST'); |
|
11678
|
|
|
|
|
|
|
|
|
11679
|
|
|
|
|
|
|
Here is a list of the token types currently used for lines of type 'CODE'. |
|
11680
|
|
|
|
|
|
|
For the following tokens, the "type" of a token is just the token itself. |
|
11681
|
|
|
|
|
|
|
|
|
11682
|
|
|
|
|
|
|
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> |
|
11683
|
|
|
|
|
|
|
( ) <= >= == =~ !~ != ++ -- /= x= |
|
11684
|
|
|
|
|
|
|
... **= <<= >>= &&= ||= //= <=> |
|
11685
|
|
|
|
|
|
|
, + - / * | % ! x ~ = \ ? : . < > ^ & ^^ |
|
11686
|
|
|
|
|
|
|
|
|
11687
|
|
|
|
|
|
|
The following additional token types are defined: |
|
11688
|
|
|
|
|
|
|
|
|
11689
|
|
|
|
|
|
|
type meaning |
|
11690
|
|
|
|
|
|
|
b blank (white space) |
|
11691
|
|
|
|
|
|
|
{ indent: opening structural curly brace or square bracket or paren |
|
11692
|
|
|
|
|
|
|
(code block, anonymous hash reference, or anonymous array reference) |
|
11693
|
|
|
|
|
|
|
} outdent: right structural curly brace or square bracket or paren |
|
11694
|
|
|
|
|
|
|
[ left non-structural square bracket (enclosing an array index) |
|
11695
|
|
|
|
|
|
|
] right non-structural square bracket |
|
11696
|
|
|
|
|
|
|
( left non-structural paren (all but a list right of an =) |
|
11697
|
|
|
|
|
|
|
) right non-structural paren |
|
11698
|
|
|
|
|
|
|
L left non-structural curly brace (enclosing a key) |
|
11699
|
|
|
|
|
|
|
R right non-structural curly brace |
|
11700
|
|
|
|
|
|
|
; terminal semicolon |
|
11701
|
|
|
|
|
|
|
f indicates a semicolon in a "for" statement |
|
11702
|
|
|
|
|
|
|
h here_doc operator << |
|
11703
|
|
|
|
|
|
|
# a comment |
|
11704
|
|
|
|
|
|
|
Q indicates a quote or pattern |
|
11705
|
|
|
|
|
|
|
q indicates a qw quote block |
|
11706
|
|
|
|
|
|
|
k a perl keyword |
|
11707
|
|
|
|
|
|
|
C user-defined constant or constant function (with void prototype = ()) |
|
11708
|
|
|
|
|
|
|
U user-defined function taking parameters |
|
11709
|
|
|
|
|
|
|
G user-defined function taking block parameter (like grep/map/eval) |
|
11710
|
|
|
|
|
|
|
S sub definition (reported as type 'i' in older versions) |
|
11711
|
|
|
|
|
|
|
P package definition (reported as type 'i' in older versions) |
|
11712
|
|
|
|
|
|
|
t type indicator such as %,$,@,*,&,sub |
|
11713
|
|
|
|
|
|
|
w bare word (perhaps a subroutine call) |
|
11714
|
|
|
|
|
|
|
i identifier of some type (with leading %, $, @, *, &, sub, -> ) |
|
11715
|
|
|
|
|
|
|
n a number |
|
11716
|
|
|
|
|
|
|
v a v-string |
|
11717
|
|
|
|
|
|
|
F a file test operator (like -e) |
|
11718
|
|
|
|
|
|
|
Y File handle |
|
11719
|
|
|
|
|
|
|
Z identifier in indirect object slot: may be file handle, object |
|
11720
|
|
|
|
|
|
|
J LABEL: code block label |
|
11721
|
|
|
|
|
|
|
j LABEL after next, last, redo, goto |
|
11722
|
|
|
|
|
|
|
p unary + |
|
11723
|
|
|
|
|
|
|
m unary - |
|
11724
|
|
|
|
|
|
|
pp pre-increment operator ++ |
|
11725
|
|
|
|
|
|
|
mm pre-decrement operator -- |
|
11726
|
|
|
|
|
|
|
A : used as attribute separator |
|
11727
|
|
|
|
|
|
|
|
|
11728
|
|
|
|
|
|
|
Here are the '_line_type' codes used internally: |
|
11729
|
|
|
|
|
|
|
SYSTEM - system-specific code before hash-bang line |
|
11730
|
|
|
|
|
|
|
CODE - line of perl code (including comments) |
|
11731
|
|
|
|
|
|
|
POD_START - line starting pod, such as '=head' |
|
11732
|
|
|
|
|
|
|
POD - pod documentation text |
|
11733
|
|
|
|
|
|
|
POD_END - last line of pod section, '=cut' |
|
11734
|
|
|
|
|
|
|
HERE - text of here-document |
|
11735
|
|
|
|
|
|
|
HERE_END - last line of here-doc (target word) |
|
11736
|
|
|
|
|
|
|
FORMAT - format section |
|
11737
|
|
|
|
|
|
|
FORMAT_END - last line of format section, '.' |
|
11738
|
|
|
|
|
|
|
SKIP - code skipping section |
|
11739
|
|
|
|
|
|
|
SKIP_END - last line of code skipping section, '#>>V' |
|
11740
|
|
|
|
|
|
|
DATA_START - __DATA__ line |
|
11741
|
|
|
|
|
|
|
DATA - unidentified text following __DATA__ |
|
11742
|
|
|
|
|
|
|
END_START - __END__ line |
|
11743
|
|
|
|
|
|
|
END - unidentified text following __END__ |
|
11744
|
|
|
|
|
|
|
ERROR - we are in big trouble, probably not a perl script |
|
11745
|
|
|
|
|
|
|
END_OF_LIST |
|
11746
|
|
|
|
|
|
|
|
|
11747
|
0
|
|
|
|
|
|
return; |
|
11748
|
|
|
|
|
|
|
} ## end sub dump_token_types |
|
11749
|
|
|
|
|
|
|
|
|
11750
|
|
|
|
|
|
|
#------------------ |
|
11751
|
|
|
|
|
|
|
# About Token Types |
|
11752
|
|
|
|
|
|
|
#------------------ |
|
11753
|
|
|
|
|
|
|
|
|
11754
|
|
|
|
|
|
|
# The array "valid_token_types" in the BEGIN section has an up-to-date list |
|
11755
|
|
|
|
|
|
|
# of token types. Sub 'dump_token_types' should be kept up to date with |
|
11756
|
|
|
|
|
|
|
# token types. |
|
11757
|
|
|
|
|
|
|
|
|
11758
|
|
|
|
|
|
|
# Ideally, tokens are the smallest pieces of text |
|
11759
|
|
|
|
|
|
|
# such that a newline may be inserted between any pair of tokens without |
|
11760
|
|
|
|
|
|
|
# changing or invalidating the program. This version comes close to this, |
|
11761
|
|
|
|
|
|
|
# although there are necessarily a few exceptions which must be caught by |
|
11762
|
|
|
|
|
|
|
# the formatter. Many of these involve the treatment of bare words. |
|
11763
|
|
|
|
|
|
|
# |
|
11764
|
|
|
|
|
|
|
# To simplify things, token types are either a single character, or they |
|
11765
|
|
|
|
|
|
|
# are identical to the tokens themselves. |
|
11766
|
|
|
|
|
|
|
# |
|
11767
|
|
|
|
|
|
|
# As a debugging aid, the -D flag creates a file containing a side-by-side |
|
11768
|
|
|
|
|
|
|
# comparison of the input string and its tokenization for each line of a file. |
|
11769
|
|
|
|
|
|
|
# This is an invaluable debugging aid. |
|
11770
|
|
|
|
|
|
|
# |
|
11771
|
|
|
|
|
|
|
# In addition to tokens, and some associated quantities, the tokenizer |
|
11772
|
|
|
|
|
|
|
# also returns flags indication any special line types. These include |
|
11773
|
|
|
|
|
|
|
# quotes, here_docs, formats. |
|
11774
|
|
|
|
|
|
|
# |
|
11775
|
|
|
|
|
|
|
#------------------ |
|
11776
|
|
|
|
|
|
|
# Adding NEW_TOKENS |
|
11777
|
|
|
|
|
|
|
#------------------ |
|
11778
|
|
|
|
|
|
|
# |
|
11779
|
|
|
|
|
|
|
# Here are some notes on the minimal steps. I wrote these notes while |
|
11780
|
|
|
|
|
|
|
# adding the 'v' token type for v-strings, which are things like version |
|
11781
|
|
|
|
|
|
|
# numbers 5.6.0, and ip addresses, and will use that as an example. ( You |
|
11782
|
|
|
|
|
|
|
# can use your editor to search for the string "NEW_TOKENS" to find the |
|
11783
|
|
|
|
|
|
|
# appropriate sections to change): |
|
11784
|
|
|
|
|
|
|
|
|
11785
|
|
|
|
|
|
|
# *. For another example, search for the smartmatch operator '~~' |
|
11786
|
|
|
|
|
|
|
# with your editor to see where updates were made for it. |
|
11787
|
|
|
|
|
|
|
|
|
11788
|
|
|
|
|
|
|
# *. For another example, search for the string 'c250', which shows |
|
11789
|
|
|
|
|
|
|
# locations where changes for new types 'P' and 'S' were made. |
|
11790
|
|
|
|
|
|
|
|
|
11791
|
|
|
|
|
|
|
# *. Think of a new, unused character for the token type, and add to |
|
11792
|
|
|
|
|
|
|
# the array @valid_token_types in the BEGIN section of this package. |
|
11793
|
|
|
|
|
|
|
# For example, I used 'v' for v-strings. |
|
11794
|
|
|
|
|
|
|
# |
|
11795
|
|
|
|
|
|
|
# *. Implement coding to recognize the $type of the token in this routine. |
|
11796
|
|
|
|
|
|
|
# This is the hardest part, and is best done by imitating or modifying |
|
11797
|
|
|
|
|
|
|
# some of the existing coding. For example, to recognize v-strings, I |
|
11798
|
|
|
|
|
|
|
# patched 'sub scan_bare_identifier' to recognize v-strings beginning with |
|
11799
|
|
|
|
|
|
|
# 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. |
|
11800
|
|
|
|
|
|
|
# |
|
11801
|
|
|
|
|
|
|
# *. Update sub operator_expected. This update is critically important but |
|
11802
|
|
|
|
|
|
|
# the coding is trivial. Look at the comments in that routine for help. |
|
11803
|
|
|
|
|
|
|
# For v-strings, which should behave like numbers, I just added 'v' to the |
|
11804
|
|
|
|
|
|
|
# regex used to handle numbers and strings (types 'n' and 'Q'). |
|
11805
|
|
|
|
|
|
|
# |
|
11806
|
|
|
|
|
|
|
# *. Implement a 'bond strength' rule in sub set_bond_strengths in |
|
11807
|
|
|
|
|
|
|
# Perl::Tidy::Formatter for breaking lines around this token type. You can |
|
11808
|
|
|
|
|
|
|
# skip this step and take the default at first, then adjust later to get |
|
11809
|
|
|
|
|
|
|
# desired results. For adding type 'v', I looked at sub bond_strength and |
|
11810
|
|
|
|
|
|
|
# saw that number type 'n' was using default strengths, so I didn't do |
|
11811
|
|
|
|
|
|
|
# anything. I may tune it up someday if I don't like the way line |
|
11812
|
|
|
|
|
|
|
# breaks with v-strings look. |
|
11813
|
|
|
|
|
|
|
# |
|
11814
|
|
|
|
|
|
|
# *. Implement a 'whitespace' rule in sub set_whitespace_flags in |
|
11815
|
|
|
|
|
|
|
# Perl::Tidy::Formatter. For adding type 'v', I looked at this routine |
|
11816
|
|
|
|
|
|
|
# and saw that type 'n' used spaces on both sides, so I just added 'v' |
|
11817
|
|
|
|
|
|
|
# to the array @spaces_both_sides. |
|
11818
|
|
|
|
|
|
|
# |
|
11819
|
|
|
|
|
|
|
# *. Update HtmlWriter package so that users can colorize the token as |
|
11820
|
|
|
|
|
|
|
# desired. This is quite easy; see comments identified by 'NEW_TOKENS' in |
|
11821
|
|
|
|
|
|
|
# that package. For v-strings, I initially chose to use a default color |
|
11822
|
|
|
|
|
|
|
# equal to the default for numbers, but it might be nice to change that |
|
11823
|
|
|
|
|
|
|
# eventually. |
|
11824
|
|
|
|
|
|
|
# |
|
11825
|
|
|
|
|
|
|
# *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. |
|
11826
|
|
|
|
|
|
|
# |
|
11827
|
|
|
|
|
|
|
# *. Run lots and lots of debug tests. Start with special files designed |
|
11828
|
|
|
|
|
|
|
# to test the new token type. Run with the -D flag to create a .DEBUG |
|
11829
|
|
|
|
|
|
|
# file which shows the tokenization. When these work ok, test as many old |
|
11830
|
|
|
|
|
|
|
# scripts as possible. Start with all of the '.t' files in the 'test' |
|
11831
|
|
|
|
|
|
|
# directory of the distribution file. Compare .tdy output with previous |
|
11832
|
|
|
|
|
|
|
# version and updated version to see the differences. Then include as |
|
11833
|
|
|
|
|
|
|
# many more files as possible. My own technique has been to collect a huge |
|
11834
|
|
|
|
|
|
|
# number of perl scripts (thousands!) into one directory and run perltidy |
|
11835
|
|
|
|
|
|
|
# *, then run diff between the output of the previous version and the |
|
11836
|
|
|
|
|
|
|
# current version. |
|
11837
|
|
|
|
|
|
|
|
|
11838
|
|
|
|
|
|
|
BEGIN { |
|
11839
|
|
|
|
|
|
|
|
|
11840
|
|
|
|
|
|
|
# These names are used in error messages |
|
11841
|
44
|
|
|
44
|
|
358
|
@opening_brace_names = qw# '{' '[' '(' '?' #; |
|
11842
|
44
|
|
|
|
|
159
|
@closing_brace_names = qw# '}' ']' ')' ':' #; |
|
11843
|
|
|
|
|
|
|
|
|
11844
|
44
|
|
|
|
|
114
|
my @q; |
|
11845
|
|
|
|
|
|
|
|
|
11846
|
44
|
|
|
|
|
278
|
my @digraphs = qw# |
|
11847
|
|
|
|
|
|
|
.. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <> |
|
11848
|
|
|
|
|
|
|
<= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^. ^^ |
|
11849
|
|
|
|
|
|
|
#; |
|
11850
|
44
|
|
|
|
|
1201
|
$is_digraph{$_} = 1 for @digraphs; |
|
11851
|
|
|
|
|
|
|
|
|
11852
|
44
|
|
|
|
|
179
|
@q = qw( |
|
11853
|
|
|
|
|
|
|
. : < > * & | / - = + - % ^ ! x ~ |
|
11854
|
|
|
|
|
|
|
); |
|
11855
|
44
|
|
|
|
|
409
|
$can_start_digraph{$_} = 1 for @q; |
|
11856
|
|
|
|
|
|
|
|
|
11857
|
44
|
|
|
|
|
200
|
my @trigraphs = |
|
11858
|
|
|
|
|
|
|
qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~ ^^= ); |
|
11859
|
44
|
|
|
|
|
532
|
$is_trigraph{$_} = 1 for @trigraphs; |
|
11860
|
|
|
|
|
|
|
|
|
11861
|
44
|
|
|
|
|
192
|
my @tetragraphs = qw( <<>> ); |
|
11862
|
44
|
|
|
|
|
161
|
$is_tetragraph{$_} = 1 for @tetragraphs; |
|
11863
|
|
|
|
|
|
|
|
|
11864
|
|
|
|
|
|
|
# make a hash of all valid token types for self-checking the tokenizer |
|
11865
|
|
|
|
|
|
|
# (adding NEW_TOKENS : select a new character and add to this list) |
|
11866
|
|
|
|
|
|
|
# fix for c250: added new token type 'P' and 'S' |
|
11867
|
44
|
|
|
|
|
424
|
my @valid_token_types = qw# |
|
11868
|
|
|
|
|
|
|
A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P S |
|
11869
|
|
|
|
|
|
|
{ } ( ) [ ] ; + - / * | % ! x ~ = ? : . < > ^ & |
|
11870
|
|
|
|
|
|
|
#; |
|
11871
|
44
|
|
|
|
|
164
|
push @valid_token_types, BACKSLASH; |
|
11872
|
44
|
|
|
|
|
331
|
push( @valid_token_types, @digraphs ); |
|
11873
|
44
|
|
|
|
|
180
|
push( @valid_token_types, @trigraphs ); |
|
11874
|
44
|
|
|
|
|
95
|
push( @valid_token_types, @tetragraphs ); |
|
11875
|
44
|
|
|
|
|
96
|
push( @valid_token_types, ( '#', COMMA, 'CORE::' ) ); |
|
11876
|
44
|
|
|
|
|
2363
|
$is_valid_token_type{$_} = 1 for @valid_token_types; |
|
11877
|
|
|
|
|
|
|
|
|
11878
|
|
|
|
|
|
|
# a list of file test letters, as in -e (Table 3-4 of 'camel 3') |
|
11879
|
44
|
|
|
|
|
323
|
my @file_test_operators = |
|
11880
|
|
|
|
|
|
|
qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z ); |
|
11881
|
44
|
|
|
|
|
631
|
$is_file_test_operator{$_} = 1 for @file_test_operators; |
|
11882
|
|
|
|
|
|
|
|
|
11883
|
|
|
|
|
|
|
# these functions have prototypes of the form (&), so when they are |
|
11884
|
|
|
|
|
|
|
# followed by a block, that block MAY BE followed by an operator. |
|
11885
|
|
|
|
|
|
|
# Smartmatch operator ~~ may be followed by anonymous hash or array ref |
|
11886
|
44
|
|
|
|
|
149
|
@q = qw( do eval ); |
|
11887
|
44
|
|
|
|
|
158
|
$is_block_operator{$_} = 1 for @q; |
|
11888
|
|
|
|
|
|
|
|
|
11889
|
|
|
|
|
|
|
# these functions allow an identifier in the indirect object slot |
|
11890
|
44
|
|
|
|
|
135
|
@q = qw( print printf sort exec system say ); |
|
11891
|
44
|
|
|
|
|
281
|
$is_indirect_object_taker{$_} = 1 for @q; |
|
11892
|
|
|
|
|
|
|
|
|
11893
|
|
|
|
|
|
|
# Keywords which definitely produce error if an OPERATOR is expected |
|
11894
|
44
|
|
|
|
|
113
|
@q = qw( my our state local use require ); |
|
11895
|
44
|
|
|
|
|
282
|
$is_TERM_keyword{$_} = 1 for @q; |
|
11896
|
|
|
|
|
|
|
|
|
11897
|
|
|
|
|
|
|
# Note: 'field' will be added by sub check_options if --use-feature=class |
|
11898
|
44
|
|
|
|
|
100
|
@q = qw( my our state ); |
|
11899
|
44
|
|
|
|
|
140
|
$is_my_our_state{$_} = 1 for @q; |
|
11900
|
|
|
|
|
|
|
|
|
11901
|
|
|
|
|
|
|
# These tokens may precede a code block |
|
11902
|
|
|
|
|
|
|
# patched for SWITCH/CASE/CATCH. Actually these could be removed |
|
11903
|
|
|
|
|
|
|
# now and we could let the extended-syntax coding handle them. |
|
11904
|
|
|
|
|
|
|
# Added 'default' for Switch::Plain. |
|
11905
|
|
|
|
|
|
|
# Note: 'ADJUST' will be added by sub check_options if --use-feature=class |
|
11906
|
44
|
|
|
|
|
271
|
@q = qw( |
|
11907
|
|
|
|
|
|
|
BEGIN END CHECK INIT AUTOLOAD DESTROY |
|
11908
|
|
|
|
|
|
|
UNITCHECK continue if elsif else unless |
|
11909
|
|
|
|
|
|
|
do while until eval for foreach |
|
11910
|
|
|
|
|
|
|
map grep sort switch case given |
|
11911
|
|
|
|
|
|
|
when default catch try finally |
|
11912
|
|
|
|
|
|
|
); |
|
11913
|
44
|
|
|
|
|
1050
|
$is_code_block_token{$_} = 1 for @q; |
|
11914
|
|
|
|
|
|
|
|
|
11915
|
|
|
|
|
|
|
# These block types terminate statements and do not need a trailing |
|
11916
|
|
|
|
|
|
|
# semicolon; patched for SWITCH/CASE/; This may be updated in sub |
|
11917
|
|
|
|
|
|
|
# check_options. |
|
11918
|
44
|
|
|
|
|
266
|
@q = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; |
|
11919
|
|
|
|
|
|
|
if elsif else unless while until for foreach switch case given when ); |
|
11920
|
44
|
|
|
|
|
556
|
$is_zero_continuation_block_type{$_} = 1 for @q; |
|
11921
|
|
|
|
|
|
|
|
|
11922
|
|
|
|
|
|
|
# Note: this hash was formerly named '%is_not_zero_continuation_block_type' |
|
11923
|
|
|
|
|
|
|
# to contrast it with the block types in '%is_zero_continuation_block_type' |
|
11924
|
|
|
|
|
|
|
# Note: added 'sub' for anonymous sub blocks (c443) |
|
11925
|
44
|
|
|
|
|
144
|
@q = qw( sort map grep eval do sub ); |
|
11926
|
44
|
|
|
|
|
178
|
$is_sort_map_grep_eval_do_sub{$_} = 1 for @q; |
|
11927
|
|
|
|
|
|
|
|
|
11928
|
44
|
|
|
|
|
140
|
@q = qw( sort map grep ); |
|
11929
|
44
|
|
|
|
|
112
|
$is_sort_map_grep{$_} = 1 for @q; |
|
11930
|
|
|
|
|
|
|
|
|
11931
|
44
|
|
|
|
|
101
|
%is_grep_alias = (); |
|
11932
|
|
|
|
|
|
|
|
|
11933
|
|
|
|
|
|
|
# I'll build the list of keywords incrementally |
|
11934
|
44
|
|
|
|
|
67
|
my @Keywords = (); |
|
11935
|
|
|
|
|
|
|
|
|
11936
|
|
|
|
|
|
|
# keywords and tokens after which a value or pattern is expected, |
|
11937
|
|
|
|
|
|
|
# but not an operator. In other words, these should consume terms |
|
11938
|
|
|
|
|
|
|
# to their right, or at least they are not expected to be followed |
|
11939
|
|
|
|
|
|
|
# immediately by operators. |
|
11940
|
44
|
|
|
|
|
1228
|
my @value_requestor = qw( |
|
11941
|
|
|
|
|
|
|
AUTOLOAD BEGIN CHECK DESTROY |
|
11942
|
|
|
|
|
|
|
END EQ GE GT |
|
11943
|
|
|
|
|
|
|
INIT LE LT NE |
|
11944
|
|
|
|
|
|
|
UNITCHECK abs accept alarm |
|
11945
|
|
|
|
|
|
|
and atan2 bind binmode |
|
11946
|
|
|
|
|
|
|
bless break caller chdir |
|
11947
|
|
|
|
|
|
|
chmod chomp chop chown |
|
11948
|
|
|
|
|
|
|
chr chroot close closedir |
|
11949
|
|
|
|
|
|
|
cmp connect continue cos |
|
11950
|
|
|
|
|
|
|
crypt dbmclose dbmopen defined |
|
11951
|
|
|
|
|
|
|
delete die dump each |
|
11952
|
|
|
|
|
|
|
else elsif eof eq |
|
11953
|
|
|
|
|
|
|
evalbytes exec exists exit |
|
11954
|
|
|
|
|
|
|
exp fc fcntl fileno |
|
11955
|
|
|
|
|
|
|
flock for foreach formline |
|
11956
|
|
|
|
|
|
|
ge getc getgrgid getgrnam |
|
11957
|
|
|
|
|
|
|
gethostbyaddr gethostbyname getnetbyaddr getnetbyname |
|
11958
|
|
|
|
|
|
|
getpeername getpgrp getpriority getprotobyname |
|
11959
|
|
|
|
|
|
|
getprotobynumber getpwnam getpwuid getservbyname |
|
11960
|
|
|
|
|
|
|
getservbyport getsockname getsockopt glob |
|
11961
|
|
|
|
|
|
|
gmtime goto grep gt |
|
11962
|
|
|
|
|
|
|
hex if index int |
|
11963
|
|
|
|
|
|
|
ioctl join keys kill |
|
11964
|
|
|
|
|
|
|
last lc lcfirst le |
|
11965
|
|
|
|
|
|
|
length link listen local |
|
11966
|
|
|
|
|
|
|
localtime lock log lstat |
|
11967
|
|
|
|
|
|
|
lt map mkdir msgctl |
|
11968
|
|
|
|
|
|
|
msgget msgrcv msgsnd my |
|
11969
|
|
|
|
|
|
|
ne next no not |
|
11970
|
|
|
|
|
|
|
oct open opendir or |
|
11971
|
|
|
|
|
|
|
ord our pack pipe |
|
11972
|
|
|
|
|
|
|
pop pos print printf |
|
11973
|
|
|
|
|
|
|
prototype push quotemeta rand |
|
11974
|
|
|
|
|
|
|
read readdir readlink readline |
|
11975
|
|
|
|
|
|
|
readpipe recv redo ref |
|
11976
|
|
|
|
|
|
|
rename require reset return |
|
11977
|
|
|
|
|
|
|
reverse rewinddir rindex rmdir |
|
11978
|
|
|
|
|
|
|
scalar seek seekdir select |
|
11979
|
|
|
|
|
|
|
semctl semget semop send |
|
11980
|
|
|
|
|
|
|
sethostent setnetent setpgrp setpriority |
|
11981
|
|
|
|
|
|
|
setprotoent setservent setsockopt shift |
|
11982
|
|
|
|
|
|
|
shmctl shmget shmread shmwrite |
|
11983
|
|
|
|
|
|
|
shutdown sin sleep socket |
|
11984
|
|
|
|
|
|
|
socketpair sort splice split |
|
11985
|
|
|
|
|
|
|
sprintf sqrt srand stat |
|
11986
|
|
|
|
|
|
|
state study substr symlink |
|
11987
|
|
|
|
|
|
|
syscall sysopen sysread sysseek |
|
11988
|
|
|
|
|
|
|
system syswrite tell telldir |
|
11989
|
|
|
|
|
|
|
tie tied truncate uc |
|
11990
|
|
|
|
|
|
|
ucfirst umask undef unless |
|
11991
|
|
|
|
|
|
|
unlink unpack unshift untie |
|
11992
|
|
|
|
|
|
|
until use utime values |
|
11993
|
|
|
|
|
|
|
vec waitpid warn while |
|
11994
|
|
|
|
|
|
|
write xor case catch |
|
11995
|
|
|
|
|
|
|
default err given isa |
|
11996
|
|
|
|
|
|
|
say switch when |
|
11997
|
|
|
|
|
|
|
); |
|
11998
|
|
|
|
|
|
|
|
|
11999
|
|
|
|
|
|
|
# Note: 'ADJUST', 'field' are added by sub check_options |
|
12000
|
|
|
|
|
|
|
# if --use-feature=class |
|
12001
|
|
|
|
|
|
|
|
|
12002
|
|
|
|
|
|
|
# patched above for SWITCH/CASE given/when err say |
|
12003
|
|
|
|
|
|
|
# 'err' is a fairly safe addition. |
|
12004
|
|
|
|
|
|
|
# Added 'default' for Switch::Plain. Note that we could also have |
|
12005
|
|
|
|
|
|
|
# a separate set of keywords to include if we see 'use Switch::Plain' |
|
12006
|
44
|
|
|
|
|
1429
|
push( @Keywords, @value_requestor ); |
|
12007
|
|
|
|
|
|
|
|
|
12008
|
|
|
|
|
|
|
# These are treated the same but are not keywords: |
|
12009
|
44
|
|
|
|
|
128
|
my @extra_vr = qw( constant vars ); |
|
12010
|
44
|
|
|
|
|
215
|
push( @value_requestor, @extra_vr ); |
|
12011
|
|
|
|
|
|
|
|
|
12012
|
44
|
|
|
|
|
6594
|
$expecting_term_token{$_} = 1 for @value_requestor; |
|
12013
|
|
|
|
|
|
|
|
|
12014
|
|
|
|
|
|
|
# this list contains keywords which do not look for arguments, |
|
12015
|
|
|
|
|
|
|
# so that they might be followed by an operator, or at least |
|
12016
|
|
|
|
|
|
|
# not a term. |
|
12017
|
44
|
|
|
|
|
250
|
my @operator_requestor = qw( |
|
12018
|
|
|
|
|
|
|
endgrent endhostent endnetent endprotoent |
|
12019
|
|
|
|
|
|
|
endpwent endservent fork getgrent |
|
12020
|
|
|
|
|
|
|
gethostent getlogin getnetent getppid |
|
12021
|
|
|
|
|
|
|
getprotoent getpwent getservent setgrent |
|
12022
|
|
|
|
|
|
|
setpwent time times wait |
|
12023
|
|
|
|
|
|
|
wantarray |
|
12024
|
|
|
|
|
|
|
); |
|
12025
|
|
|
|
|
|
|
|
|
12026
|
44
|
|
|
|
|
135
|
push( @Keywords, @operator_requestor ); |
|
12027
|
|
|
|
|
|
|
|
|
12028
|
|
|
|
|
|
|
# These are treated the same but are not considered keywords: |
|
12029
|
44
|
|
|
|
|
91
|
my @extra_or = qw( STDERR STDIN STDOUT ); |
|
12030
|
|
|
|
|
|
|
|
|
12031
|
44
|
|
|
|
|
92
|
push( @operator_requestor, @extra_or ); |
|
12032
|
|
|
|
|
|
|
|
|
12033
|
44
|
|
|
|
|
857
|
$expecting_operator_token{$_} = 1 for @operator_requestor; |
|
12034
|
|
|
|
|
|
|
|
|
12035
|
|
|
|
|
|
|
# these token TYPES expect trailing operator but not a term |
|
12036
|
|
|
|
|
|
|
# note: ++ and -- are post-increment and decrement, 'C' = constant |
|
12037
|
44
|
|
|
|
|
105
|
my @operator_requestor_types = qw( ++ -- C <> q ); |
|
12038
|
|
|
|
|
|
|
|
|
12039
|
|
|
|
|
|
|
# NOTE: This hash is available but not currently used |
|
12040
|
44
|
|
|
|
|
195
|
$expecting_operator_types{$_} = 1 for @operator_requestor_types; |
|
12041
|
|
|
|
|
|
|
|
|
12042
|
|
|
|
|
|
|
# these token TYPES consume values (terms) |
|
12043
|
|
|
|
|
|
|
# note: pp and mm are pre-increment and decrement |
|
12044
|
|
|
|
|
|
|
# f=semicolon in for, F=file test operator |
|
12045
|
44
|
|
|
|
|
904
|
my @value_requestor_type = qw# |
|
12046
|
|
|
|
|
|
|
L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x |
|
12047
|
|
|
|
|
|
|
**= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= |
|
12048
|
|
|
|
|
|
|
<= >= == != => > < % * / ? & | ** <=> ~~ !~~ <<~ |
|
12049
|
|
|
|
|
|
|
f F pp mm Y p m U J G j >> << ^ t |
|
12050
|
|
|
|
|
|
|
~. ^. |. &. ^.= |.= &.= ^^ |
|
12051
|
|
|
|
|
|
|
#; |
|
12052
|
44
|
|
|
|
|
163
|
push @value_requestor_type, BACKSLASH; |
|
12053
|
44
|
|
|
|
|
83
|
push @value_requestor_type, COMMA; |
|
12054
|
|
|
|
|
|
|
|
|
12055
|
|
|
|
|
|
|
# NOTE: This hash is available but not currently used |
|
12056
|
44
|
|
|
|
|
1410
|
$expecting_term_types{$_} = 1 for @value_requestor_type; |
|
12057
|
|
|
|
|
|
|
|
|
12058
|
|
|
|
|
|
|
# Note: the following valid token types are not assigned here to |
|
12059
|
|
|
|
|
|
|
# hashes requesting to be followed by values or terms, but are |
|
12060
|
|
|
|
|
|
|
# instead currently hard-coded into sub operator_expected: |
|
12061
|
|
|
|
|
|
|
# ) -> :: Q R Z ] b h i k n v w } # |
|
12062
|
|
|
|
|
|
|
|
|
12063
|
|
|
|
|
|
|
# A syntax error will occur if following operators are not followed by a |
|
12064
|
|
|
|
|
|
|
# TERM (with an exception made for tokens in sub signatures). |
|
12065
|
|
|
|
|
|
|
# NOTE: this list does not include unary operator '!' |
|
12066
|
|
|
|
|
|
|
|
|
12067
|
|
|
|
|
|
|
# Note the following omissions from the syntax checking operators below |
|
12068
|
|
|
|
|
|
|
# 'U' = user sub, depends on prototype |
|
12069
|
|
|
|
|
|
|
# 'F' = file test works on $_ if no following term |
|
12070
|
|
|
|
|
|
|
# 'Y' = indirect object, too risky to check syntax |
|
12071
|
|
|
|
|
|
|
|
|
12072
|
44
|
|
|
|
|
443
|
my @binary_ops = qw# |
|
12073
|
|
|
|
|
|
|
!~ =~ . .. : && || // = + - x |
|
12074
|
|
|
|
|
|
|
**= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= |
|
12075
|
|
|
|
|
|
|
<= >= == != > < % * / ? & | ** <=> ~~ !~~ <<~ |
|
12076
|
|
|
|
|
|
|
>> << ^ |
|
12077
|
|
|
|
|
|
|
^. |. &. ^.= |.= &.= ^^ |
|
12078
|
|
|
|
|
|
|
#; |
|
12079
|
44
|
|
|
|
|
772
|
$is_binary_operator_type{$_} = 1 for @binary_ops; |
|
12080
|
|
|
|
|
|
|
|
|
12081
|
|
|
|
|
|
|
# Note: omitting unary file test type 'F' here because it assumes $_ |
|
12082
|
44
|
|
|
|
|
129
|
my @unary_ops = qw# ! ~ ~. m p mm pp #; |
|
12083
|
44
|
|
|
|
|
477
|
$is_binary_operator_type{$_} = 1 for @binary_ops; |
|
12084
|
44
|
|
|
|
|
89
|
push @unary_ops, BACKSLASH; |
|
12085
|
44
|
|
|
|
|
1165
|
$is_binary_or_unary_operator_type{$_} = 1 for ( @binary_ops, @unary_ops ); |
|
12086
|
|
|
|
|
|
|
|
|
12087
|
44
|
|
|
|
|
207
|
my @binary_keywords = qw( and or err eq ne cmp ); |
|
12088
|
44
|
|
|
|
|
192
|
$is_binary_keyword{$_} = 1 for @binary_keywords; |
|
12089
|
|
|
|
|
|
|
|
|
12090
|
44
|
|
|
|
|
189
|
$is_binary_or_unary_keyword{$_} = 1 for ( @binary_keywords, 'not' ); |
|
12091
|
|
|
|
|
|
|
|
|
12092
|
|
|
|
|
|
|
# A syntax error occurs if a binary operator follows any of these types: |
|
12093
|
|
|
|
|
|
|
# NOTE: a ',' cannot be included because of parenless calls (c015). |
|
12094
|
|
|
|
|
|
|
# For example this is valid: print "hello\n", && print "goodbye\n"; |
|
12095
|
|
|
|
|
|
|
# NOTE: the 'not' keyword could be added to a corresponding _keyword list |
|
12096
|
|
|
|
|
|
|
# NOTE: label 'j' omitted, for example: -f $file ? redo BLOCK : last BLOCK; |
|
12097
|
|
|
|
|
|
|
# NOTE: Removed 'A': fixes git162.t |
|
12098
|
44
|
|
|
|
|
185
|
@q = qw< L { [ ( ; f J t >; |
|
12099
|
44
|
|
|
|
|
348
|
$is_not_a_TERM_producer_type{$_} = 1 for ( @q, @unary_ops ); |
|
12100
|
|
|
|
|
|
|
|
|
12101
|
44
|
|
|
|
|
182
|
@q = qw( q qq qx qr s y tr m ); |
|
12102
|
44
|
|
|
|
|
268
|
$is_q_qq_qx_qr_s_y_tr_m{$_} = 1 for @q; |
|
12103
|
|
|
|
|
|
|
|
|
12104
|
|
|
|
|
|
|
# Note added 'qw' here |
|
12105
|
44
|
|
|
|
|
144
|
@q = qw( q qq qw qx qr s y tr m ); |
|
12106
|
44
|
|
|
|
|
233
|
$is_q_qq_qw_qx_qr_s_y_tr_m{$_} = 1 for @q; |
|
12107
|
|
|
|
|
|
|
|
|
12108
|
|
|
|
|
|
|
# Quote modifiers: |
|
12109
|
|
|
|
|
|
|
# original ref: camel 3 p 147, |
|
12110
|
|
|
|
|
|
|
# but perl may accept undocumented flags |
|
12111
|
|
|
|
|
|
|
# perl 5.10 adds 'p' (preserve) |
|
12112
|
|
|
|
|
|
|
# Perl version 5.22 added 'n' |
|
12113
|
|
|
|
|
|
|
# From http://perldoc.perl.org/perlop.html we have |
|
12114
|
|
|
|
|
|
|
# /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc |
|
12115
|
|
|
|
|
|
|
# s/PATTERN/REPLACEMENT/msixpodualngcer |
|
12116
|
|
|
|
|
|
|
# y/SEARCHLIST/REPLACEMENTLIST/cdsr |
|
12117
|
|
|
|
|
|
|
# tr/SEARCHLIST/REPLACEMENTLIST/cdsr |
|
12118
|
|
|
|
|
|
|
# qr/STRING/msixpodualn |
|
12119
|
44
|
|
|
|
|
242
|
%quote_modifiers = ( |
|
12120
|
|
|
|
|
|
|
's' => '[msixpodualngcer]', |
|
12121
|
|
|
|
|
|
|
'y' => '[cdsr]', |
|
12122
|
|
|
|
|
|
|
'tr' => '[cdsr]', |
|
12123
|
|
|
|
|
|
|
'm' => '[msixpodualngc]', |
|
12124
|
|
|
|
|
|
|
'qr' => '[msixpodualn]', |
|
12125
|
|
|
|
|
|
|
'q' => EMPTY_STRING, |
|
12126
|
|
|
|
|
|
|
'qq' => EMPTY_STRING, |
|
12127
|
|
|
|
|
|
|
'qw' => EMPTY_STRING, |
|
12128
|
|
|
|
|
|
|
'qx' => EMPTY_STRING, |
|
12129
|
|
|
|
|
|
|
); |
|
12130
|
|
|
|
|
|
|
|
|
12131
|
|
|
|
|
|
|
# Note: 'class' will be added by sub check_options if -use-feature=class |
|
12132
|
44
|
|
|
|
|
133
|
@q = qw( package ); |
|
12133
|
44
|
|
|
|
|
157
|
$is_package{$_} = 1 for @q; |
|
12134
|
|
|
|
|
|
|
|
|
12135
|
44
|
|
|
|
|
100
|
@q = qw( if elsif unless ); |
|
12136
|
44
|
|
|
|
|
118
|
$is_if_elsif_unless{$_} = 1 for @q; |
|
12137
|
|
|
|
|
|
|
|
|
12138
|
44
|
|
|
|
|
107
|
@q = qw( ; t ); |
|
12139
|
44
|
|
|
|
|
122
|
$is_semicolon_or_t{$_} = 1 for @q; |
|
12140
|
|
|
|
|
|
|
|
|
12141
|
44
|
|
|
|
|
116
|
@q = qw( if elsif unless case when ); |
|
12142
|
44
|
|
|
|
|
147
|
$is_if_elsif_unless_case_when{$_} = 1 for @q; |
|
12143
|
|
|
|
|
|
|
|
|
12144
|
|
|
|
|
|
|
# Hash of other possible line endings which may occur. |
|
12145
|
|
|
|
|
|
|
# Keep these coordinated with the regex where this is used. |
|
12146
|
|
|
|
|
|
|
# Note: chr(13) = chr(015)="\r". |
|
12147
|
44
|
|
|
|
|
90
|
@q = ( chr(13), chr(29), chr(26) ); |
|
12148
|
44
|
|
|
|
|
167
|
$other_line_endings{$_} = 1 for @q; |
|
12149
|
|
|
|
|
|
|
|
|
12150
|
|
|
|
|
|
|
# These keywords are handled specially in the tokenizer code: |
|
12151
|
44
|
|
|
|
|
176
|
my @special_keywords = |
|
12152
|
|
|
|
|
|
|
qw( do eval format m package q qq qr qw qx s sub tr y ); |
|
12153
|
44
|
|
|
|
|
365
|
push( @Keywords, @special_keywords ); |
|
12154
|
|
|
|
|
|
|
|
|
12155
|
|
|
|
|
|
|
# Keywords after which list formatting may be used |
|
12156
|
|
|
|
|
|
|
# WARNING: do not include |map|grep|eval or perl may die on |
|
12157
|
|
|
|
|
|
|
# syntax errors (map1.t). |
|
12158
|
44
|
|
|
|
|
527
|
my @keyword_taking_list = qw( |
|
12159
|
|
|
|
|
|
|
and chmod chomp chop |
|
12160
|
|
|
|
|
|
|
chown dbmopen die elsif |
|
12161
|
|
|
|
|
|
|
exec fcntl for foreach |
|
12162
|
|
|
|
|
|
|
formline getsockopt given if |
|
12163
|
|
|
|
|
|
|
index ioctl join kill |
|
12164
|
|
|
|
|
|
|
local msgctl msgrcv msgsnd |
|
12165
|
|
|
|
|
|
|
my open or our |
|
12166
|
|
|
|
|
|
|
pack print printf push |
|
12167
|
|
|
|
|
|
|
read readpipe recv return |
|
12168
|
|
|
|
|
|
|
reverse rindex seek select |
|
12169
|
|
|
|
|
|
|
semctl semget send setpriority |
|
12170
|
|
|
|
|
|
|
setsockopt shmctl shmget shmread |
|
12171
|
|
|
|
|
|
|
shmwrite socket socketpair sort |
|
12172
|
|
|
|
|
|
|
splice split sprintf state |
|
12173
|
|
|
|
|
|
|
substr syscall sysopen sysread |
|
12174
|
|
|
|
|
|
|
sysseek system syswrite tie |
|
12175
|
|
|
|
|
|
|
unless unlink unpack unshift |
|
12176
|
|
|
|
|
|
|
until vec warn when |
|
12177
|
|
|
|
|
|
|
while |
|
12178
|
|
|
|
|
|
|
); |
|
12179
|
|
|
|
|
|
|
|
|
12180
|
|
|
|
|
|
|
# NOTE: This hash is available but not currently used |
|
12181
|
44
|
|
|
|
|
1306
|
$is_keyword_taking_list{$_} = 1 for @keyword_taking_list; |
|
12182
|
|
|
|
|
|
|
|
|
12183
|
|
|
|
|
|
|
# perl functions which may be unary operators. |
|
12184
|
|
|
|
|
|
|
|
|
12185
|
|
|
|
|
|
|
# This list is used to decide if a pattern delimited by slashes, /pattern/, |
|
12186
|
|
|
|
|
|
|
# can follow one of these keywords. |
|
12187
|
44
|
|
|
|
|
140
|
@q = qw( chomp eof eval fc lc pop shift uc undef ); |
|
12188
|
|
|
|
|
|
|
|
|
12189
|
44
|
|
|
|
|
276
|
$is_keyword_rejecting_slash_as_pattern_delimiter{$_} = 1 for @q; |
|
12190
|
|
|
|
|
|
|
|
|
12191
|
|
|
|
|
|
|
# These are keywords for which an arg may optionally be omitted. They are |
|
12192
|
|
|
|
|
|
|
# currently only used to disambiguate a ? used as a ternary from one used |
|
12193
|
|
|
|
|
|
|
# as a (deprecated) pattern delimiter. In the future, they might be used |
|
12194
|
|
|
|
|
|
|
# to give a warning about ambiguous syntax before a /. |
|
12195
|
|
|
|
|
|
|
# Note: split has been omitted (see note below). |
|
12196
|
44
|
|
|
|
|
487
|
my @keywords_taking_optional_arg = qw( |
|
12197
|
|
|
|
|
|
|
abs alarm caller chdir chomp chop |
|
12198
|
|
|
|
|
|
|
chr chroot close cos defined die |
|
12199
|
|
|
|
|
|
|
eof eval evalbytes exit exp fc |
|
12200
|
|
|
|
|
|
|
getc glob gmtime hex int last |
|
12201
|
|
|
|
|
|
|
lc lcfirst length localtime log lstat |
|
12202
|
|
|
|
|
|
|
mkdir next oct ord pop pos |
|
12203
|
|
|
|
|
|
|
print printf prototype quotemeta rand readline |
|
12204
|
|
|
|
|
|
|
readlink readpipe redo ref require reset |
|
12205
|
|
|
|
|
|
|
reverse rmdir say select shift sin |
|
12206
|
|
|
|
|
|
|
sleep sqrt srand stat study tell |
|
12207
|
|
|
|
|
|
|
uc ucfirst umask undef unlink warn |
|
12208
|
|
|
|
|
|
|
write |
|
12209
|
|
|
|
|
|
|
); |
|
12210
|
44
|
|
|
|
|
1055
|
$is_keyword_taking_optional_arg{$_} = 1 for @keywords_taking_optional_arg; |
|
12211
|
|
|
|
|
|
|
|
|
12212
|
|
|
|
|
|
|
# This list is used to decide if a pattern delimited by question marks, |
|
12213
|
|
|
|
|
|
|
# ?pattern?, can follow one of these keywords. Note that from perl 5.22 |
|
12214
|
|
|
|
|
|
|
# on, a ?pattern? is not recognized, so we can be much more strict than |
|
12215
|
|
|
|
|
|
|
# with a /pattern/. Note that 'split' is not in this list. In current |
|
12216
|
|
|
|
|
|
|
# versions of perl a question following split must be a ternary, but |
|
12217
|
|
|
|
|
|
|
# in older versions it could be a pattern. The guessing algorithm will |
|
12218
|
|
|
|
|
|
|
# decide. We are combining two lists here to simplify the test. |
|
12219
|
44
|
|
|
|
|
810
|
@q = ( @keywords_taking_optional_arg, @operator_requestor ); |
|
12220
|
44
|
|
|
|
|
1695
|
$is_keyword_rejecting_question_as_pattern_delimiter{$_} = 1 for @q; |
|
12221
|
|
|
|
|
|
|
|
|
12222
|
|
|
|
|
|
|
# These are not used in any way yet |
|
12223
|
|
|
|
|
|
|
# my @unused_keywords = qw( |
|
12224
|
|
|
|
|
|
|
# __FILE__ |
|
12225
|
|
|
|
|
|
|
# __LINE__ |
|
12226
|
|
|
|
|
|
|
# __PACKAGE__ |
|
12227
|
|
|
|
|
|
|
# ); |
|
12228
|
|
|
|
|
|
|
|
|
12229
|
|
|
|
|
|
|
# The list of keywords was originally extracted from function 'keyword' in |
|
12230
|
|
|
|
|
|
|
# perl file toke.c version 5.005.03, using this utility, plus a |
|
12231
|
|
|
|
|
|
|
# little editing: (file getkwd.pl): |
|
12232
|
|
|
|
|
|
|
# while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } |
|
12233
|
|
|
|
|
|
|
# Add 'get' prefix where necessary, then split into the above lists. |
|
12234
|
|
|
|
|
|
|
# This list should be updated as necessary. |
|
12235
|
|
|
|
|
|
|
# The list should not contain these special variables: |
|
12236
|
|
|
|
|
|
|
# ARGV DATA ENV SIG STDERR STDIN STDOUT |
|
12237
|
|
|
|
|
|
|
# __DATA__ __END__ |
|
12238
|
|
|
|
|
|
|
|
|
12239
|
44
|
|
|
|
|
4429
|
$is_keyword{$_} = 1 for @Keywords; |
|
12240
|
|
|
|
|
|
|
|
|
12241
|
44
|
|
|
|
|
8067
|
%matching_end_token = ( |
|
12242
|
|
|
|
|
|
|
'{' => '}', |
|
12243
|
|
|
|
|
|
|
'(' => ')', |
|
12244
|
|
|
|
|
|
|
'[' => ']', |
|
12245
|
|
|
|
|
|
|
'<' => '>', |
|
12246
|
|
|
|
|
|
|
); |
|
12247
|
|
|
|
|
|
|
} ## end BEGIN |
|
12248
|
|
|
|
|
|
|
|
|
12249
|
|
|
|
|
|
|
} ## end package Perl::Tidy::Tokenizer |
|
12250
|
|
|
|
|
|
|
1; |