File Coverage

blib/lib/Code/ART.pm
Criterion Covered Total %
statement 371 430 86.2
branch 102 160 63.7
condition 63 116 54.3
subroutine 22 25 88.0
pod 0 5 0.0
total 558 736 75.8


line stmt bran cond sub pod time code
1             package Code::ART;
2              
3 10     10   391093 use 5.016;
  10         111  
4 10     10   48 use warnings;
  10         16  
  10         279  
5 10     10   49 use Carp;
  10         16  
  10         813  
6 10     10   59 use Scalar::Util 'looks_like_number';
  10         16  
  10         861  
7 10     10   68 use List::Util qw< min max uniq>;
  10         21  
  10         1180  
8 10     10   4170 use version;
  10         18087  
  10         60  
9              
10             our $VERSION = '0.000005';
11              
12             # Default naming scheme for refactoring...
13             my $DEFAULT_SUB_NAME = '__REFACTORED_SUB__';
14             my $DEFAULT_LEXICAL_NAME = '__HOISTED_LEXICAL__';
15             my $DEFAULT_DATA_PARAM = '@__EXTRA_DATA__';
16             my $DEFAULT_AUTO_RETURN_VALUE = '@__RETURN_VALUE__';
17              
18             # These are the permitted options for refactor_to_sub()...
19             my %VALID_REFACTOR_OPTION = ( name=>1, from=>1, to=>1, data=>1, return=>1 );
20              
21             # These are the permitted options for hoist_to_lexical()...
22             my %VALID_HOIST_OPTION = ( name=>1, from=>1, to=>1, closure=>1, all=>1 );
23              
24             # Load the module...
25             sub import {
26 10     10   168 my $package = shift;
27 10   50     77 my $opt_ref = shift // {};
28              
29 10 50       51 croak("Options argument to 'use $package' must be a hash reference")
30             if ref($opt_ref) ne 'HASH';
31              
32             # # Remember lexically scoped options...
33             # for my $optname (keys %{$opt_ref}) {
34             # croak "Unknown option ('$optname') passed to 'use $package'"
35             # if !$VALID_REFACTOR_OPTION{$optname} && !$VALID_HOIST_OPTION{$optname};
36             # $^H{"Code::ART $optname"} = $opt_ref->{$optname};
37             # }
38              
39             # Export the API...
40 10     10   1594 no strict 'refs';
  10         19  
  10         6775  
41 10         29 *{caller().'::refactor_to_sub'} = \&refactor_to_sub;
  10         93  
42 10         27 *{caller().'::rename_variable'} = \&rename_variable;
  10         62  
43 10         24 *{caller().'::classify_all_vars_in'} = \&classify_all_vars_in;
  10         118  
44 10         26 *{caller().'::hoist_to_lexical'} = \&hoist_to_lexical;
  10         5431  
45             }
46              
47              
48             # This regex recognizes variables that don't need to be passed in
49             # even if they're not locally declared...
50             my $PERL_SPECIAL_VAR = qr{
51             \A
52             [\$\@%]
53             (?:
54             [][\d\{!"#\$%&'()*+,./:;<=>?\@\^`|~_-]
55             |
56             \^ .*
57             |
58             \{\^ .*
59             |
60             ACCUMULATOR | ARG | ARGV | ARRAY_BASE | AUTOLOAD | BASETIME | CHILD_ERROR |
61             COMPILING | DEBUGGING | EFFECTIVE_GROUP_ID | EFFECTIVE_USER_ID | EGID | ENV |
62             ERRNO | EUID | EVAL_ERROR | EXCEPTIONS_BEING_CAUGHT | EXECUTABLE_NAME |
63             EXTENDED_OS_ERROR | F | FORMAT_FORMFEED | FORMAT_LINES_LEFT | FORMAT_LINES_PER_PAGE |
64             FORMAT_LINE_BREAK_CHARACTERS | FORMAT_NAME | FORMAT_PAGE_NUMBER | FORMAT_TOP_NAME |
65             GID | INC | INPLACE_EDIT | INPUT_LINE_NUMBER | INPUT_RECORD_SEPARATOR |
66             LAST_MATCH_END | LAST_MATCH_START | LAST_PAREN_MATCH | LAST_REGEXP_CODE_RESULT |
67             LAST_SUBMATCH_RESULT | LIST_SEPARATOR | MATCH | NR | OFMT | OFS | OLD_PERL_VERSION |
68             ORS | OSNAME | OS_ERROR | OUTPUT_AUTOFLUSH | OUTPUT_FIELD_SEPARATOR |
69             OUTPUT_RECORD_SEPARATOR | PERLDB | PERL_VERSION | PID | POSTMATCH | PREMATCH |
70             PROCESS_ID | PROGRAM_NAME | REAL_GROUP_ID | REAL_USER_ID | RS | SIG | SUBSCRIPT_SEPARATOR |
71             SUBSEP | SYSTEM_FD_MAX | UID | WARNING | a | b
72             )
73             \Z
74             }x;
75              
76             # What a simple variable looks like...
77             my $SIMPLE_VAR = qr{ \A [\$\@%] [^\W\d] \w* \Z }xms;
78              
79             # What whitespace look like...
80             my $OWS = qr{ (?: \s++ | \# [^\n]*+ (?> \n | \Z ))*+ }xms;
81              
82             # This is where the magic happens: parse the code and extract the undeclared variables...
83 10     10   7255 use PPR::X;
  10         422266  
  10         423  
84 10     10   122 use re 'eval';
  10         22  
  10         16942  
85              
86             # Refactor the code into a subroutine...
87             sub refactor_to_sub {
88             # Unpack args...
89 8     8 0 6570 my ($opt_ref) = grep { ref($_) eq 'HASH' } @_, {};
  22         88  
90 8         22 my ($code, @extras) = grep { !ref($_) } @_;
  14         34  
91              
92             # Check raw arguments...
93 8 100 66     347 croak( "'code' argument of refactor_to_sub() must be a string" ) if !defined($code) || ref($code);
94 6         111 croak( "Unexpected extra argument passed to refactor_to_sub(): '$_'" ) for @extras;
95             croak( "'options' argument of refactor_to_sub() must be hash ref, not ", lc(ref($_)), " ref" )
96 5 100       12 for grep { ref($_) && ref($_) ne 'HASH' } @_;
  10         158  
97              
98             # Apply defaults...
99 4   100     26 my $from = $opt_ref->{from} // 0;
100 4   50     20 my $to = $opt_ref->{to} // length($code // q{}) - 1;
      66        
101 4   33     31 my $subname = $opt_ref->{name} // $DEFAULT_SUB_NAME;
102 4   33     21 my $data = $opt_ref->{data} // $DEFAULT_DATA_PARAM;
103 4         18 $data =~ s{\A\s*(\w)}{\@$1}xms;
104 4         10 my $return_expr = $opt_ref->{return};
105              
106             # Check processed arguments...
107             croak( "Unknown option ('$_') passed to refactor_to_sub()" )
108 4         8 for grep { !$VALID_REFACTOR_OPTION{$_} } keys %{$opt_ref};
  7         112  
  4         14  
109             croak( "'from' option of refactor_to_sub() must be a number" )
110 3 50       19 if !looks_like_number($opt_ref->{from});
111             croak( "'to' option of refactor_to_sub() must be a number" )
112 3 50       12 if !looks_like_number($opt_ref->{to});
113              
114             # Extract target code being factored out...
115 3         9 my $target_code = substr($code, $from, $to-$from+1);
116              
117             # Extract any trailing semicolon or comma that may need to be preserved...
118 3         328 $target_code =~ m{ (? $OWS )
119             (?
120             (?> (? ; )
121             | (? , | => | )
122             )
123             )
124             $OWS \Z
125             }xmso;
126 3         63 my %trailing = %+;
127 3         19 $trailing{punctuation} = ($trailing{ows} =~ s/\S/ /gr) . $trailing{punctuation};
128              
129             # Check if the end of the target code is the end of the file...
130 3 50       98 my $final_semicolon = substr($code, $to) =~ m{ $OWS \S }xmso ? q{} : q{;};
131              
132             # Ensure that the code is refactorable...
133 3         13 local %Code::ART::retloc = ();
134 3         6 local $Code::ART::insub; $Code::ART::insub = 0;
  3         5  
135             my $statement_sequence = qr{
136             (?>(?&PerlEntireDocument))
137              
138             (?(DEFINE)
139             (?
140 10         137 (?{ $Code::ART::insub++ })
141             (?>(?&PerlStdSubroutineDeclaration))
142 0         0 (?{ $Code::ART::insub-- })
143             |
144 10         140 (?{ $Code::ART::insub-- })
145             (?!)
146             )
147              
148             (?
149 26         99 (?{ $Code::ART::insub++ })
150             (?>(?&PerlStdAnonymousSubroutine))
151 0         0 (?{ $Code::ART::insub-- })
152             |
153 26         3915 (?{ $Code::ART::insub-- })
154             (?!)
155             )
156              
157             (?
158 26         311 (?{ pos() })
159             (?&PerlStdReturnExpression)
160             (?= (?&PerlOWS) ;? (?&PerlOWS)
161 0 0       0 (?{ $Code::ART::retloc{pos()} = $^R if !$Code::ART::insub; }) )
162             )
163             )
164              
165             $PPR::X::GRAMMAR
166 3         122145 }xmso;
167              
168 3 100       92582 my $test_code = $target_code =~ m{\A (?&PerlOWS) (?&PerlAssignmentOperator) $PPR::X::GRAMMAR }xmso
169             ? '()' . $target_code
170             : $target_code;
171 3 50       679 if ($test_code !~ $statement_sequence) {
172 0         0 return { failed => 'not a valid series of statements',
173             context => $PPR::X::ERROR,
174             args => []
175             }
176             }
177              
178 3         120 my $final_return = exists $Code::ART::retloc{length($target_code)};
179 3         11 my $interim_return = keys %Code::ART::retloc > $final_return;
180 3 50 33     15 if ($interim_return && !$final_return) {
181 0         0 return { failed => 'the code has an internal return statement',
182             context => $PPR::X::ERROR,
183             args => []
184             }
185             }
186              
187             # Find all variables and scopes in the code (if possible)...
188 3         15 my $vardata = classify_all_vars_in($code);
189 3 100       26 return { %{$vardata}, args => [] } if $vardata->{failed};
  1         23  
190              
191             # Extract relevant variables...
192 2         6 my (@in_vars, @out_vars, @lex_vars);
193 2         4 for my $decl (sort {$a<=>$b} grep { $_ >= 0 } keys %{$vardata->{vars}}) {
  9         12  
  9         22  
  2         9  
194             # No need to consider variables declared after the target...
195 7 50       16 last if $decl > $to;
196              
197             # Was the variable declared before the target, and used inside it???
198 7         11 my $used = $vardata->{vars}{$decl}{used_at};
199 7 50       13 if ($decl < $from) {
200 7 100       8 my @usages = grep { $from <= $_ && $_ <= $to } keys %{$used}
  4 100       19  
  7         20  
201             or next;
202 3         6 push @in_vars, { %{$vardata->{vars}{$decl}}, used_at => \@usages };
  3         24  
203             }
204              
205             # Was the variable declared within the target, and used after it???
206             else {
207 0         0 my @usages = grep { $_ <= $to } keys %{$used};
  0         0  
  0         0  
208 0 0       0 if (grep { $_ > $to } keys %{$used}) {
  0         0  
  0         0  
209 0         0 push @out_vars, { %{$vardata->{vars}{$decl}}, used_at => \@usages };
  0         0  
210             }
211             else {
212 0         0 push @lex_vars, { %{$vardata->{vars}{$decl}}, used_at => \@usages };
  0         0  
213             }
214             }
215             }
216              
217             # Determine minimal version of Perl 5 being used...
218 2         6 my $use_version = $vardata->{use_version};
219              
220             # Convert target code to an independent refactorable equivalent...
221 2         13 my %convert_opts
222             = (from=>$from, to=>$to, in_vars=>\@in_vars, out_vars=>\@out_vars, lex_vars =>\@lex_vars);
223 2         10 my ($arg_code, $param_code, $refactored_code, $return_candidates)
224             = _convert_target_code($target_code, \%convert_opts);
225              
226             # Extract any leading whitespace or assignment to be preserved...
227 2         61663 $refactored_code =~ s{ \A (? (?>(?&PerlOWS)) )
228             (?>
229             (?
230             (?>(?&PerlAssignmentOperator)) (?>(?&PerlOWS))
231             )
232             (?
233             (?>(?&PerlConditionalExpression))
234             )
235             )?+
236             (?= (? (?>(?&PerlOWS)) ;?+ (?>(?&PerlOWS)) \z | ) )
237             $PPR::X::GRAMMAR
238 2         2189 }{ ' ' x length($&) }exmso;
239              
240             my ($leading_ws, $leading_assignment, $leading_assignment_expr, $single_expr)
241 2         396 = @+{qw< leading_ws leading_assignment leading_assignment_expr single_expr>};
242 2   50     17 $leading_ws //= q{};
243 2   100     12 $leading_assignment //= q{};
244              
245             # Insert code to handle trailing arguments (if any)...
246 2 100 66     20 if ($trailing{comma} || !$trailing{semicolon} ) {
247 1 50       5 $param_code .= "," if $param_code =~ /\S/;
248 1         4 $param_code .= " $data";
249 1         13 $refactored_code =~ s{\s* \Z}{ $data;\n}xms;
250             }
251              
252             # Reinstate leading assignment (if any) and install return value (if any)...
253 2 100       10 if ($leading_assignment) {
    50          
    50          
254 1 50       4 if ($final_return) {
255 0         0 return { failed => "code has both a leading assignment and an explicit return",
256             args => [],
257             };
258             }
259 1 50       3 if ($single_expr) {
260 1         34 $refactored_code = $leading_ws . $leading_assignment_expr;
261             }
262             else {
263 0         0 $refactored_code =~ s{\A \s*}
264 0         0 { my $DEFAULT_AUTO_RETURN_VALUE = wantarray ? ($leading_assignment_expr) : scalar($leading_assignment_expr)}xms;
265             $refactored_code =~ s{\s* \Z}
266             {\n ;\n return wantarray ? $DEFAULT_AUTO_RETURN_VALUE : shift $DEFAULT_AUTO_RETURN_VALUE;\n}xms;
267             }
268 0         0 }
  0         0  
269 0         0 elsif (defined $return_expr) {
270             my %refactored_name = map { $_->{decl_name} => $_->{new_name} } @in_vars, @out_vars;
271             $return_expr
272             =~ s{ (? \$\# (?&PerlOWS) \K (? \w++ )
273             | \@ (?&PerlOWS) \K (? \w++ ) (?! (?&PerlOWS) \{ )
274             | [\$%] (?&PerlOWS) \K (? \w++ ) (?= (?&PerlOWS) \[ )
275             )
276             |
277             (?
278             \% (?&PerlOWS) \K (? \w++ ) (?! (?&PerlOWS) \[ )
279             | [\$\@] (?&PerlOWS) \K (? \w++ ) (?= (?&PerlOWS) \{ )
280             )
281             |
282             (? \$ (?&PerlOWS) \K (? \w++ ) (?! (?&PerlOWS) [\{\[] ) )
283             $PPR::X::GRAMMAR
284 0 0       0 }
    0          
285 0 0       0 { my $new_name = $+{array} ? $refactored_name{"\@$+{varname}"}
286             : $+{hash} ? $refactored_name{ "%$+{varname}"}
287 0         0 : $refactored_name{"\$$+{varname}"};
288             defined($new_name) ? "{$new_name}" : $+{varname};
289             }gexmso;
290 0         0 $refactored_code =~ s{\s* \Z}{\n ;\n return $return_expr\n}xms;
291             }
292             elsif ($final_return) {
293 1         10 $leading_assignment = 'return ';
294             }
295             else {
296             $refactored_code =~ s{\s* \Z}{\n ;\n # RETURN VALUE HERE?\n}xms;
297 2         11 }
  5         14  
  5         20  
298 2         49  
299 2 50       49 # Format and wrap refactored code in a subroutine declaration...
300             my $min_indent = min map { /^\s*/; length($&) } split(/\n/, $refactored_code);
301             $refactored_code =~ s{ ^ [ ]{$min_indent} }{ }gxms;
302             $refactored_code = "sub $subname"
303             . ($use_version ge v5.22
304             ? " ($param_code) {\n"
305             : " {\n my ($param_code) = \@_;\n\n"
306             )
307             . "$refactored_code\n}\n";
308              
309 2 100 66     24 my $call = $leading_ws . $leading_assignment
310             . $subname
311 2         166 . ($trailing{comma} || !$trailing{semicolon} ? " $arg_code" : "($arg_code)")
312             . $trailing{punctuation};
313              
314             return { code => $refactored_code,
315             call => $call . $final_semicolon,
316             return => $return_candidates,
317             };
318             }
319              
320 3     3 0 287 # Refactor the code into a subroutine...
  9         24  
321 3         9 sub hoist_to_lexical {
  6         15  
322             # Unpack args...
323             my ($opt_ref) = grep { ref($_) eq 'HASH' } @_, {};
324 3 50 33     22 my ($code, @extras) = grep { !ref($_) } @_;
325 3         10  
326             # Check raw arguments...
327 3 100       9 croak( "'code' argument of refactor_to_sub() must be a string" ) if !defined($code) || ref($code);
  6         25  
328             croak( "Unexpected extra argument passed to refactor_to_sub(): '$_'" ) for @extras;
329             croak( "'options' argument of refactor_to_sub() must be hash ref, not ", lc(ref($_)), " ref" )
330 3   33     23 for grep { ref($_) && ref($_) ne 'HASH' } @_;
331 3   50     12  
332 3   0     9 # Apply defaults...
      33        
333 3         6 my $varname = $opt_ref->{name} // $DEFAULT_LEXICAL_NAME;
334 3         6 my $from = $opt_ref->{from} // 0;
335             my $to = $opt_ref->{to} // length($code // q{}) - 1;
336             my $all = $opt_ref->{all};
337             my $closure = $opt_ref->{closure};
338 3         6  
  12         28  
  3         21  
339             # Check processed arguments...
340 3 50       18 croak( "Unknown option ('$_') passed to refactor_to_sub()" )
341             for grep { !$VALID_HOIST_OPTION{$_} } keys %{$opt_ref};
342 3 50       13 croak( "'from' option of hoist_to_lexical() must be a number" )
343             if !looks_like_number($opt_ref->{from});
344             croak( "'to' option of hoist_to_lexical() must be a number" )
345 3         9 if !looks_like_number($opt_ref->{to});
346 3 50       21  
347             # Analyze the file to locate replaceable instances of the expression...
348             my $expr_scope = find_expr_scope($code, $from, $to, $all);
349 3         9 return $expr_scope if $expr_scope->{failed};
350              
351             # Extract target code...
352 3   66     16 my $target = $expr_scope->{target};
      100        
353              
354             # Handle mutators...
355 3         6 $closure ||= $expr_scope->{mutators} > 0 && @{$expr_scope->{matches}} > 1;
356 3         5  
357 3 50       12 # Convert the name and the "call" name to the correct syntax...
358 3 100       34 my $varsubst = $varname;
    50          
359 1         3 my $vardecl;
360 1         3 if ($varname !~ /^[\$\@%]/) {
361             if (!$closure) {
362             $varsubst = $varname = '$'.$varname;
363 0         0 $vardecl = "my $varname = $target;\n";
364 0         0 }
365 0         0 elsif ($expr_scope->{use_version} lt v5.26) {
366             $varname = '$'.$varname;
367             $varsubst = $varname . '->()';
368 2         7 $vardecl = "my $varname = sub { $target };\n";
369 2         8 }
370             else {
371             $varsubst = $varname.'()';
372             $vardecl = "my sub $varname { $target }\n";
373             }
374             }
375              
376 3         10 # Return analysis...
  3         32  
377             return { code => $vardecl,
378             call => $varsubst,
379             %{$expr_scope},
380             };
381             }
382              
383 3     3 0 10 my $SPACE_MARKER = "\1\0\1\0\1\0";
384             my $SPACE_FINDER = quotemeta $SPACE_MARKER;
385 3         8 sub find_expr_scope {
386 3         94027 my ($source, $from, $to, $match_all) = @_;
387              
388             my $target = substr($source, $from, $to-$from+1);
389 10     10   122 $target =~ s{ \A (?>(?&PerlOWS)) | (?>(?&PerlOWS)) \Z $PPR::X::GRAMMAR }{}gxmso;
  10         24  
  10         43379  
390 3         451  
391 3         10 # Verify it's a valid target...
392             use re 'eval';
393             our %ws_locs;
394             our $mutators = 0;
395             my $valid_target = qr{
396 73         849 \A (?>(?&PerlConditionalExpression)) \Z
  73         1086  
397              
398             (?(DEFINE)
399 0         0 (? (?{pos()}) (?&PerlStdOWS) (?{ $ws_locs{$^R} = pos()-$^R; }) )
  0         0  
400              
401             (?
402             (?> \+\+ (?{$mutators++}) | -- (?{$mutators++ })
403             | [!\\+~]
404             | - (?! (?&PPR_X_filetest_name) \b )
405             )
406 1         7 )
407              
408             (?
409             (?> \+\+ | -- ) (?{ $mutators++ })
410             )
411 3         109818 )
412              
413 3 50       478 $PPR::X::GRAMMAR
414 0         0 }xms;
415 0         0  
416             if ($target !~ $valid_target) {
417             return { failed => "it's not a simple expression", target => $target };
418             return;
419             }
420 3         10  
421 3         15 # Convert the target text into a whitespace-tolerant literal search pattern
  28         39  
  20         52  
422 17         35 # and whitespace-minimized rvalue for initializing hoist variable...
423             my $rvalue = $target;
424 17         27 for my $loc (sort {$b<=>$a} grep { $_ < length($target) } keys %ws_locs) {
425 17 100       49 substr($target, $loc, $ws_locs{$loc}, $SPACE_MARKER);
426              
427 3         14 my $raw_ws = substr($rvalue, $loc, $ws_locs{$loc});
428 3         50 substr($rvalue, $loc, $ws_locs{$loc}, $raw_ws =~ /\s/ ? q{ } : q{});
429             }
430             $target = quotemeta $target;
431 3         8 $target =~ s{\Q$SPACE_FINDER\E}{\\s*+}gxms;
432 3         65  
  13         57  
433 12         127 # Locate all target instances...
434             my @matches;
435             while ($source =~ m{(?{pos()}) (? $target)}gcxms) {
436             push @matches, {from => $^R, length => length($+{match}) };
437 3         18 }
438              
439 9 100       27 # Determine every variable in scope for the target expression...
  21 50       70  
  9         18  
440 3         12 my $var_info = classify_all_vars_in($source);
  3         13  
441             my @target_vars = grep { $_->{declared_at} >= 0
442             && grep { $_ >= $from && $_ < $to } keys %{$_->{used_at} } }
443             values %{$var_info->{vars}};
444 3         8  
  12         23  
445 12         20 # Identify matches that use target variables...
446 12 50 100     16 @matches = grep {
  48         137  
447             my $match_from = $_->{from};
448 12         15 my $match_to = $match_from + $_->{length};
  12         20  
449             @target_vars == grep { grep { $match_all ? $match_from <= $_ && $_ <= $match_to
450             : $match_from == $from }
451             keys %{$_->{used_at}}
452             } @target_vars;
453 3         6 } @matches;
  3         12  
454              
455             # Identify earliest position where hoist could be placed...
456             my $hoistloc = min map { $_->{start_of_scope} } @target_vars;
457              
458             return {
459             target => $rvalue,
460             hoistloc => $hoistloc,
461 3         284 matches => \@matches,
462             mutators => $mutators,
463             use_version => $var_info->{use_version},
464             };
465 2     2   6 }
466 2         4  
467             sub _convert_target_code {
468             my ($target_code, $opts_ref) = @_;
469 2         3 my $from = $opts_ref->{from};
  2         7  
470              
471             # Label out-parameters...
472 2         4 $_->{out} = 1 for @{$opts_ref->{out_vars}};
  2         5  
  2         5  
473 2         3  
474 2         3 # Build name translation for each variable...
475 2         6 my @param_vars = (@{$opts_ref->{in_vars}}, @{$opts_ref->{out_vars}});
476             our %rename_at;
477 3 50       5 our %is_state_var;
478             for my $var (@param_vars) {
479             # Construct name of scalar parameter...
480             my $out = $var->{out} ? 'o' : q{};
481             my $new_name
482             = $var->{new_name}
483 3 50       18 = '$'.$var->{raw_name}
    100          
    100          
484             . ( $var->{sigil} eq '@' ? "_${out}aref"
485             : $var->{sigil} eq '%' ? "_${out}href"
486             : $var->{raw_name} =~ /_o?(?:[ahs]ref|sval)$/ ? "_${out}sval"
487             : "_${out}sref"
488 3   50     7 );
489 3 50       6  
490 0         0 # Add "undeclarations" to renaming map and track internal state variables...
491 0         0 my $local_decl = ($var->{declared_at} // -1) - $from;
492             if ($local_decl >= 0) {
493             $rename_at{$local_decl} = $new_name;
494             $is_state_var{$local_decl} = $var->{declarator} eq 'state';
495 3         3 }
  3         6  
496 3         8  
497             # Add all usages to renaming map...
498             for my $usage (@{$var->{used_at}}) {
499             $rename_at{$usage - $from} = $new_name;
500             }
501             }
502 3         6  
  2         5  
503 2         4 # Build argument list and parameter list for call...
  0         0  
  2         6  
504             my $args_code = join(', ',
505             map( { "\\$_->{decl_name}" } @{$opts_ref->{in_vars}} ),
506 2         5 map( { "\\$_->{declarator} $_->{decl_name}" } @{$opts_ref->{out_vars}} )
  3         7  
507             );
508              
509 2         62790 my $param_code = join(', ', map { "$_->{new_name}" } @param_vars);
510 90         242  
511 3         54 # Rename parameters within refactored code...
512             $target_code =~ s{ (?: (?> my | our | state ) (?&PerlOWS) )?+
513             (?(?{$rename_at{pos()}})|(?!))
514             (?{pos()})
515             (? (?> \$\#?+ | [\@%] ) (?&PerlOWS) )
516             (? \{ (?&PerlOWS) | )
517             \w++
518 3 50       48 $PPR::X::GRAMMAR
    100          
519             }
520             { ( $is_state_var{$^R} ? "$&=" : q{} )
521             . $+{sigil}
522 2         72090 . (length($+{braced}) ? "\{$rename_at{$^R}" : "{$rename_at{$^R}}")
523             }egxmso;
524              
525             # Rewrite list declarations to allow hoisting (skipping quoted ones)...
526             $target_code =~ s{ (\A|\W) (?&PerlQuotelike)
527             | (?
528             (? (?> my | our | state ) ) (?&PerlOWS)
529             \( (?&PerlOWS)
530             (? (?&PerlVariable)?+ (?&PerlOWS)
531             (?: , (?&PerlOWS) (?&PerlVariable) (?&PerlOWS) )*+
532             ,?+ (?&PerlOWS)
533             )
534 2 50       28 \)
535 0         0 )
  0         0  
536             $PPR::X::GRAMMAR
537             }
538 2         334 {
539             if ($+{list_decl}) {
540             '('.join(', ', map { "$+{declarator} $_" } split /,\s*/, $+{var_list}).')'
541             }
542             else {
543             $&;
544 0         0 }
545 0         0 }egxmso;
  2         15  
546 2         396  
  3         17  
547             # Build old->name mapping...
548             my $varname_mapping = {
549 2         220 map( { $_->{decl_name} => $_->{decl_name} }
550             grep { $_->{end_of_scope} >= $opts_ref->{to} } @{$opts_ref->{lex_vars}} ),
551             map( { $_->{decl_name} => $_->{sigil}."{$_->{new_name}}" } @param_vars ),
552             };
553              
554 51     51 0 5833458 return ($args_code, $param_code, $target_code, $varname_mapping);
555             }
556 51         245  
557              
558             sub rename_variable {
559 51         229 my ($source, $varpos, $new_name) = @_;
  51         230  
560              
561 51 50       157 my $extraction = _classify_var_at($source, $varpos);
562              
563 51         112 my ($varname, $declared_at, $used_at, $failed)
  522         638  
  51         303  
564 269 50       8508554 = @{ $extraction }{'raw_name', 'declared_at', 'used_at', 'failed'};
565              
566             return { failed => $failed } if $failed;
567              
568             for my $index (sort { $b <=> $a} keys %{$used_at}) {
569             substr($source,$index)
570             =~ s{\A (?: \$\#? | [\@%] ) (?&PerlOWS)
571             \{? (?&PerlOWS)
572 51 100       481 \K $varname $PPR::X::GRAMMAR
573 44 50       1381141 }{$new_name}xms
574             or warn "Internal usage rename error at position $index: '...",
575             substr($source, $index, 20), "...'\n";
576             }
577             if ($declared_at >= 0) {
578             substr($source,$declared_at)
579             =~ s{\A (?: \$\#? | [\@%] ) (?&PerlOWS)
580             \{? (?&PerlOWS)
581             \K $varname $PPR::X::GRAMMAR
582 51         9751 }{$new_name}xms
583             or warn "Internal declaration rename error at position $declared_at: '...",
584             substr($source, $declared_at, 20), "...'\n";
585             }
586              
587             return { source => $source };
588             }
589 0     0   0  
590              
591              
592 0         0 # Convert fancy vars ($# { name }) to simple ones (@name)...
593             sub _normalize_var {
594             my ($var, $accessor) = @_;
595 0 0 0     0  
596             # Remove decorations...
597             $var =~ tr/{} \t\n\f\r//d;
598 0 0 0     0  
      0        
599             # Convert maxindex ($#a) to array (@a)
600             return '@'.substr($var,2) if length($var) > 2 && substr($var,0,2) eq '$#';
601              
602 0 0       0 # Convert derefs (@$s or %$s) to scalar ($s)
603             return substr($var,1) if length($var) > 2
604             && (substr($var,0,2) eq '@$' || substr($var,0,2) eq '%$');
605 0 0       0  
606 0 0       0 # Return entire variables as-are...
607             return $var if !$accessor;
608              
609 0         0 # Convert array and hash look-ups to arrays and hashes...
610             return '@'.substr($var,1) if $accessor eq '[';
611             return '%'.substr($var,1) if $accessor eq '{';
612              
613             # "This can never happen" ;-)
614 0     0   0 die "Internal error: unexpected accessor after $var: '$accessor'";
615             }
616 0         0  
  0         0  
617             # Extract variables from a for loop declaration...
618             sub _extract_vars {
619             my ($decl) = @_;
620              
621             return map { _normalize_var($_) }
622 0     0   0 $decl =~ m{ [\$\@%] \w+ }xmsg;
623 0         0 }
624              
625             # Remove 'use experimental' declarations if not requested...
626             sub _de_experiment {
627             my ($code) = @_;
628 0         0 $code =~ s{ ^ $OWS
629             use \s+ experimental\b $OWS
630             (?>(?&PerlExpression)) $OWS
631             ; $OWS \n?
632             }{}gxmso;
633             return $code;
634             }
635              
636             # How to recognize a variable...
637             my $VAR_PAT = qr{
638             \A
639             (?
640             (? [\@\$%] ) (? \$ ) (?! [\$\{\w] )
641             |
642             (? (?> \$ (?: [#] (?= (?> [\$^\w\{:+] | - (?! > ) ) ))?+ | [\@%] ) )
643             (?>(?&PerlOWS))
644             (?> (? (?&_varname) )
645             | \{ (?>(?&PerlOWS)) (? (?&_varname) ) (?>(?&PerlOWS)) \}
646             )
647             |
648             (? [\@\$%] ) (? \# )
649             )
650              
651             (?(DEFINE)
652             (?<_varname> \d++
653             | \^ [][A-Z^_?\\]
654             | (?>(?&PerlOldQualifiedIdentifier)) (?: :: )?+
655             | [][!"#\$%&'()*+,.\\/:;<=>?\@\^`|~-]
656             )
657             )
658 51     51   157  
659             $PPR::X::GRAMMAR
660             }xms;
661 51         106  
662 51         121 sub _classify_var_at {
663 51         98 my ($source, $varpos) = @_;
664              
665             # Locate the variable...
666 51         231 my $orig_varpos = $varpos;
667             my $orig_sigil = q{};
668 51 50       2076 my %var;
669 51         2903  
670 51         237 POSITION:
671             while ($varpos >= 0) {
672             # Walk backwards, looking for the variable...
673             if (substr($source, $varpos) =~ $VAR_PAT) {
674 51 50 33     520 %var = %+;
      33        
675             $orig_sigil = $var{sigil};
676              
677 51 50       222 # Handle the very special case of $; (need to be sure it's not part of $$;)
678 0         0 next POSITION
679             if $varpos > 0 && $var{name} eq ';' && substr($source, $varpos-1, 1) =~ /[\$\@%]/;
680              
681             # Return a special value if we fail to match a variable at the specified position
682 51         135 if ($varpos + length($var{full}) <= $orig_varpos) {
683             return { failed => "No variable at specified location", at => $orig_varpos }
684             }
685 0         0  
686             # Otherwise, we found it...
687             last POSITION;
688 51 50       188 }
689             }
690             continue { $varpos-- }
691              
692 51         181 # Did we run off the start of the input?
693             return { failed => "No variable at specified location", at => $orig_varpos }
694             if $varpos < 0;
695 51 50       198  
696             # Locate and classify every variable in the source code...
697             my $analysis = classify_all_vars_in($source);
698 51         109  
699 51         88 # Return a failure report if unable to process source code...
  51         242  
700             return $analysis if $analysis->{failed};
701 478 100 100     5111  
702             # Attempt to locate and report information about the requested variable...
703             my $allvars = $analysis->{vars};
704 0         0 for my $varid (keys %{$analysis->{vars}}) {
705             return $allvars->{$varid}
706             if $varid == $varpos || $allvars->{$varid}{used_at}{$varpos};
707             }
708              
709             return { failed => 'Apparent variable is not actually a variable' };
710             }
711              
712             # Descriptions of built-in and other "standard" variables...
713             my %STD_VAR_DESC = (
714             "\$!" => {
715             aliases => { "\$ERRNO" => 1, "\$OS_ERROR" => 1 },
716             desc => "Status from most recent system call (including I/O)",
717             },
718             "\$\"" => {
719             aliases => { "\$LIST_SEPARATOR" => 1 },
720             desc => "List separator for array interpolation",
721             },
722             "\$#" => {
723             aliases => { "\$OFMT" => 1 },
724             desc => "Output number format [deprecated: use printf() instead]",
725             },
726             "\$\$" => {
727             aliases => { "\$PID" => 1, "\$PROCESS_ID" => 1 },
728             desc => "Process ID",
729             },
730             "\$%" => {
731             aliases => { "\$FORMAT_PAGE_NUMBER" => 1 },
732             desc => "Page number of the current output page",
733             },
734             "\$&" => {
735             aliases => { "\$MATCH" => 1 },
736             desc => "Most recent regex match string",
737             },
738             "\$'" => {
739             aliases => { "\$POSTMATCH" => 1 },
740             desc => "String following most recent regex match",
741             },
742             "\$(" => {
743             aliases => { "\$GID" => 1, "\$REAL_GROUP_ID" => 1 },
744             desc => "Real group ID of the current process",
745             },
746             "\$)" => {
747             aliases => { "\$EFFECTIVE_GROUP_ID" => 1, "\$EGID" => 1 },
748             desc => "Effective group ID of the current process",
749             },
750             "\$*" => {
751             aliases => {},
752             desc => "Regex multiline matching flag [removed: use /m instead]",
753             },
754             "\$+" => {
755             aliases => { "\$LAST_PAREN_MATCH" => 1 },
756             desc => "Final capture group of most recent regex match",
757             },
758             "\$," => {
759             aliases => { "\$OFS" => 1, "\$OUTPUT_FIELD_SEPARATOR" => 1 },
760             desc => "Output field separator for print() and say()",
761             },
762             "\$-" => {
763             aliases => { "\$FORMAT_LINES_LEFT" => 1 },
764             desc => "Number of lines remaining in current output page",
765             },
766             "\$." => {
767             aliases => { "\$INPUT_LINE_NUMBER" => 1, "\$NR" => 1 },
768             desc => "Line number of last input line",
769             },
770             "\$/" => {
771             aliases => { "\$INPUT_RECORD_SEPARATOR" => 1, "\$RS" => 1 },
772             desc => "Input record separator (end-of-line marker on inputs)",
773             },
774             "\$0" => { aliases => { "\$PROGRAM_NAME" => 1 }, desc => "Program name" },
775             "\$1" => {
776             aliases => {},
777             desc => "First capture group from most recent regex match",
778             },
779             "\$2" => {
780             aliases => {},
781             desc => "Second capture group from most recent regex match",
782             },
783             "\$3" => {
784             aliases => {},
785             desc => "Third capture group from most recent regex match",
786             },
787             "\$4" => {
788             aliases => {},
789             desc => "Fourth capture group from most recent regex match",
790             },
791             "\$5" => {
792             aliases => {},
793             desc => "Fifth capture group from most recent regex match",
794             },
795             "\$6" => {
796             aliases => {},
797             desc => "Sixth capture group from most recent regex match",
798             },
799             "\$7" => {
800             aliases => {},
801             desc => "Seventh capture group from most recent regex match",
802             },
803             "\$8" => {
804             aliases => {},
805             desc => "Eighth capture group from most recent regex match",
806             },
807             "\$9" => {
808             aliases => {},
809             desc => "Ninth capture group from most recent regex match",
810             },
811             "\$:" => {
812             aliases => { "\$FORMAT_LINE_BREAK_CHARACTERS" => 1 },
813             desc => "Break characters for format() lines",
814             },
815             "\$;" => {
816             aliases => { "\$SUBSCRIPT_SEPARATOR" => 1, "\$SUBSEP" => 1 },
817             desc => "Hash subscript separator for key concatenation",
818             },
819             "\$<" => {
820             aliases => { "\$REAL_USER_ID" => 1, "\$UID" => 1 },
821             desc => "Real uid of the current process",
822             },
823             "\$=" => {
824             aliases => { "\$FORMAT_LINES_PER_PAGE" => 1 },
825             desc => "Page length of selected output channel",
826             },
827             "\$>" => {
828             aliases => { "\$EFFECTIVE_USER_ID" => 1, "\$EUID" => 1 },
829             desc => "Effective uid of the current process",
830             },
831             "\$?" => {
832             aliases => { "\$CHILD_ERROR" => 1 },
833             desc => "Status from most recent system call (including I/O)",
834             },
835             "\$\@" => {
836             aliases => { "\$EVAL_ERROR" => 1 },
837             desc => "Current propagating exception",
838             },
839             "\$[" => {
840             aliases => { "\$ARRAY_BASE" => 1 },
841             desc => "Array index origin [deprecated]",
842             },
843             "\$\\" => {
844             aliases => { "\$ORS" => 1, "\$OUTPUT_RECORD_SEPARATOR" => 1 },
845             desc => "Output record separator (appended to every print())",
846             },
847             "\$]" => {
848             aliases => {},
849             desc => "Perl interpreter version [deprecated: use \$^V]",
850             },
851             "\$^" => {
852             aliases => { "\$FORMAT_TOP_NAME" => 1 },
853             desc => "Name of top-of-page format for selected output channel",
854             },
855             "\$^A" => {
856             aliases => { "\$ACCUMULATOR" => 1 },
857             desc => "Accumulator for format() lines",
858             },
859             "\$^C" => {
860             aliases => { "\$COMPILING" => 1 },
861             desc => "Is the program still compiling?",
862             },
863             "\$^D" =>
864             { aliases => { "\$DEBUGGING" => 1 }, desc => "Debugging flags" },
865             "\$^E" => {
866             aliases => { "\$EXTENDED_OS_ERROR" => 1 },
867             desc => "O/S specific error information",
868             },
869             "\$^F" => {
870             aliases => { "\$SYSTEM_FD_MAX" => 1 },
871             desc => "Maximum system file descriptor",
872             },
873             "\$^H" =>
874             { aliases => {}, desc => "Internal compile-time lexical hints" },
875             "\$^I" => {
876             aliases => { "\$INPLACE_EDIT" => 1 },
877             desc => "In-place editing value",
878             },
879             "\$^L" => {
880             aliases => { "\$FORMAT_FORMFEED" => 1 },
881             desc => "Form-feed sequence for format() pages",
882             },
883             "\$^M" => { aliases => {}, desc => "Emergency memory pool" },
884             "\$^N" => {
885             aliases => { "\$LAST_SUBMATCH_RESULT" => 1 },
886             desc => "Most recent capture group (within regex)",
887             },
888             "\$^O" =>
889             { aliases => { "\$OSNAME" => 1 }, desc => "Operating system name" },
890             "\$^P" =>
891             { aliases => { "\$PERLDB" => 1 }, desc => "Internal debugging flags" },
892             "\$^R" => {
893             aliases => { "\$LAST_REGEXP_CODE_RESULT" => 1 },
894             desc => "Result of last successful code block (within regex)",
895             },
896             "\$^S" => {
897             aliases => { "\$EXCEPTIONS_BEING_CAUGHT" => 1 },
898             desc => "Current eval() state",
899             },
900             "\$^T" =>
901             { aliases => { "\$BASETIME" => 1 }, desc => "Program start time" },
902             "\$^V" => {
903             aliases => { "\$PERL_VERSION" => 1 },
904             desc => "Perl interpreter version",
905             },
906             "\$^W" =>
907             { aliases => { "\$WARNING" => 1 }, desc => "Global warning flags" },
908             "\$^X" => {
909             aliases => { "\$EXECUTABLE_NAME" => 1 },
910             desc => "Perl interpreter invocation name",
911             },
912             "\$_" => {
913             aliases => { "\$ARG" => 1 },
914             desc =>
915             "Topic variable: default argument for matches and many builtins",
916             },
917             "\$`" => {
918             aliases => { "\$PREMATCH" => 1 },
919             desc => "String preceding most recent regex match",
920             },
921             "\$a" => {
922             aliases => {},
923             desc => "Block parameter: automatically provided to sort blocks",
924             },
925             "\$ACCUMULATOR" => {
926             aliases => { "\$^A" => 1 },
927             desc => "Accumulator for format() lines",
928             },
929             "\$ARG" => {
930             aliases => { "\$_" => 1 },
931             desc =>
932             "Topic variable: default argument for matches and many builtins",
933             },
934             "\$ARGV" => {
935             aliases => {},
936             desc => "Name of file being read by readline() or <>",
937             },
938             "\$ARRAY_BASE" => {
939             aliases => { "\$[" => 1 },
940             desc => "Array index origin [deprecated]",
941             },
942             "\$b" => {
943             aliases => {},
944             desc => "Block parameter: automatically provided to sort blocks",
945             },
946             "\$BASETIME" =>
947             { aliases => { "\$^T" => 1 }, desc => "Program start time" },
948             "\$CHILD_ERROR" => {
949             aliases => { "\$?" => 1 },
950             desc => "Status from most recent system call (including I/O)",
951             },
952             "\$COMPILING" => {
953             aliases => { "\$^C" => 1 },
954             desc => "Is the program still compiling?",
955             },
956             "\$DEBUGGING" =>
957             { aliases => { "\$^D" => 1 }, desc => "Debugging flags" },
958             "\$EFFECTIVE_GROUP_ID" => {
959             aliases => { "\$)" => 1, "\$EGID" => 1 },
960             desc => "Effective group ID of the current process",
961             },
962             "\$EFFECTIVE_USER_ID" => {
963             aliases => { "\$>" => 1, "\$EUID" => 1 },
964             desc => "Effective uid of the current process",
965             },
966             "\$EGID" => {
967             aliases => { "\$)" => 1, "\$EFFECTIVE_GROUP_ID" => 1 },
968             desc => "Effective group ID of the current process",
969             },
970             "\$ERRNO" => {
971             aliases => { "\$!" => 1, "\$OS_ERROR" => 1 },
972             desc => "Status from most recent system call (including I/O)",
973             },
974             "\$EUID" => {
975             aliases => { "\$>" => 1, "\$EFFECTIVE_USER_ID" => 1 },
976             desc => "Effective uid of the current process",
977             },
978             "\$EVAL_ERROR" =>
979             { aliases => { "\$\@" => 1 }, desc => "Current propagating exception" },
980             "\$EXCEPTIONS_BEING_CAUGHT" =>
981             { aliases => { "\$^S" => 1 }, desc => "Current eval() state" },
982             "\$EXECUTABLE_NAME" => {
983             aliases => { "\$^X" => 1 },
984             desc => "Perl interpreter invocation name",
985             },
986             "\$EXTENDED_OS_ERROR" => {
987             aliases => { "\$^E" => 1 },
988             desc => "O/S specific error information",
989             },
990             "\$FORMAT_FORMFEED" => {
991             aliases => { "\$^L" => 1 },
992             desc => "Form-feed sequence for format() pages",
993             },
994             "\$FORMAT_LINE_BREAK_CHARACTERS" => {
995             aliases => { "\$:" => 1 },
996             desc => "Break characters for format() lines",
997             },
998             "\$FORMAT_LINES_LEFT" => {
999             aliases => { "\$-" => 1 },
1000             desc => "Number of lines remaining in current output page",
1001             },
1002             "\$FORMAT_LINES_PER_PAGE" => {
1003             aliases => { "\$=" => 1 },
1004             desc => "Page length of selected output channel",
1005             },
1006             "\$FORMAT_NAME" => {
1007             aliases => { "\$~" => 1 },
1008             desc => "Name of format for selected output channel",
1009             },
1010             "\$FORMAT_PAGE_NUMBER" => {
1011             aliases => { "\$%" => 1 },
1012             desc => "Page number of the current output page",
1013             },
1014             "\$FORMAT_TOP_NAME" => {
1015             aliases => { "\$^" => 1 },
1016             desc => "Name of top-of-page format for selected output channel",
1017             },
1018             "\$GID" => {
1019             aliases => { "\$(" => 1, "\$REAL_GROUP_ID" => 1 },
1020             desc => "Real group ID of the current process",
1021             },
1022             "\$INPLACE_EDIT" =>
1023             { aliases => { "\$^I" => 1 }, desc => "In-place editing value" },
1024             "\$INPUT_LINE_NUMBER" => {
1025             aliases => { "\$." => 1, "\$NR" => 1 },
1026             desc => "Line number of last input line",
1027             },
1028             "\$INPUT_RECORD_SEPARATOR" => {
1029             aliases => { "\$/" => 1, "\$RS" => 1 },
1030             desc => "Input record separator (end-of-line marker on inputs)",
1031             },
1032             "\$LAST_PAREN_MATCH" => {
1033             aliases => { "\$+" => 1 },
1034             desc => "Final capture group of most recent regex match",
1035             },
1036             "\$LAST_REGEXP_CODE_RESULT" => {
1037             aliases => { "\$^R" => 1 },
1038             desc => "Result of last successful code block (within regex)",
1039             },
1040             "\$LAST_SUBMATCH_RESULT" => {
1041             aliases => { "\$^N" => 1 },
1042             desc => "Most recent capture group (within regex)",
1043             },
1044             "\$LIST_SEPARATOR" => {
1045             aliases => { "\$\"" => 1 },
1046             desc => "List separator for array interpolation",
1047             },
1048             "\$MATCH" =>
1049             { aliases => { "\$&" => 1 }, desc => "Most recent regex match string" },
1050             "\$NR" => {
1051             aliases => { "\$." => 1, "\$INPUT_LINE_NUMBER" => 1 },
1052             desc => "Line number of last input line",
1053             },
1054             "\$OFMT" => {
1055             aliases => { "\$#" => 1 },
1056             desc => "Output number format [deprecated: use printf() instead]",
1057             },
1058             "\$OFS" => {
1059             aliases => { "\$," => 1, "\$OUTPUT_FIELD_SEPARATOR" => 1 },
1060             desc => "Output field separator for print() and say()",
1061             },
1062             "\$ORS" => {
1063             aliases => { "\$\\" => 1, "\$OUTPUT_RECORD_SEPARATOR" => 1 },
1064             desc => "Output record separator (appended to every print())",
1065             },
1066             "\$OS_ERROR" => {
1067             aliases => { "\$!" => 1, "\$ERRNO" => 1 },
1068             desc => "Status from most recent system call (including I/O)",
1069             },
1070             "\$OSNAME" =>
1071             { aliases => { "\$^O" => 1 }, desc => "Operating system name" },
1072             "\$OUTPUT_AUTOFLUSH" => {
1073             aliases => { "\$|" => 1 },
1074             desc => "Autoflush status of selected output filehandle",
1075             },
1076             "\$OUTPUT_FIELD_SEPARATOR" => {
1077             aliases => { "\$," => 1, "\$OFS" => 1 },
1078             desc => "Output field separator for print() and say()",
1079             },
1080             "\$OUTPUT_RECORD_SEPARATOR" => {
1081             aliases => { "\$\\" => 1, "\$ORS" => 1 },
1082             desc => "Output record separator (appended to every print())",
1083             },
1084             "\$PERL_VERSION" =>
1085             { aliases => { "\$^V" => 1 }, desc => "Perl interpreter version" },
1086             "\$PERLDB" =>
1087             { aliases => { "\$^P" => 1 }, desc => "Internal debugging flags" },
1088             "\$PID" => {
1089             aliases => { "\$\$" => 1, "\$PROCESS_ID" => 1 },
1090             desc => "Process ID",
1091             },
1092             "\$POSTMATCH" => {
1093             aliases => { "\$'" => 1 },
1094             desc => "String following most recent regex match",
1095             },
1096             "\$PREMATCH" => {
1097             aliases => { "\$`" => 1 },
1098             desc => "String preceding most recent regex match",
1099             },
1100             "\$PROCESS_ID" =>
1101             { aliases => { "\$\$" => 1, "\$PID" => 1 }, desc => "Process ID" },
1102             "\$PROGRAM_NAME" => { aliases => { "\$0" => 1 }, desc => "Program name" },
1103             "\$REAL_GROUP_ID" => {
1104             aliases => { "\$(" => 1, "\$GID" => 1 },
1105             desc => "Real group ID of the current process",
1106             },
1107             "\$REAL_USER_ID" => {
1108             aliases => { "\$<" => 1, "\$UID" => 1 },
1109             desc => "Real uid of the current process",
1110             },
1111             "\$RS" => {
1112             aliases => { "\$/" => 1, "\$INPUT_RECORD_SEPARATOR" => 1 },
1113             desc => "Input record separator (end-of-line marker on inputs)",
1114             },
1115             "\$SUBSCRIPT_SEPARATOR" => {
1116             aliases => { "\$;" => 1, "\$SUBSEP" => 1 },
1117             desc => "Hash subscript separator for key concatenation",
1118             },
1119             "\$SUBSEP" => {
1120             aliases => { "\$;" => 1, "\$SUBSCRIPT_SEPARATOR" => 1 },
1121             desc => "Hash subscript separator for key concatenation",
1122             },
1123             "\$SYSTEM_FD_MAX" => {
1124             aliases => { "\$^F" => 1 },
1125             desc => "Maximum system file descriptor",
1126             },
1127             "\$UID" => {
1128             aliases => { "\$<" => 1, "\$REAL_USER_ID" => 1 },
1129             desc => "Real uid of the current process",
1130             },
1131             "\$WARNING" =>
1132             { aliases => { "\$^W" => 1 }, desc => "Global warning flags" },
1133             "\${^CHILD_ERROR_NATIVE}" => {
1134             aliases => {},
1135             desc => "Native status from most recent system-level call",
1136             },
1137             "\${^ENCODING}" => {
1138             aliases => {},
1139             desc => "Encode object for source conversion to Unicode",
1140             },
1141             "\${^GLOBAL_PHASE}" =>
1142             { aliases => {}, desc => "Current interpreter phase" },
1143             "\${^MATCH}" =>
1144             { aliases => {}, desc => "Most recent regex match string (under /p)" },
1145             "\${^OPEN}" => { aliases => {}, desc => "PerlIO I/O layers" },
1146             "\${^POSTMATCH}" => {
1147             aliases => {},
1148             desc => "String following most recent regex match (under /p)",
1149             },
1150             "\${^PREMATCH}" => {
1151             aliases => {},
1152             desc => "String preceding most recent regex match (under /p)",
1153             },
1154             "\${^RE_DEBUG_FLAGS}" =>
1155             { aliases => {}, desc => "Regex debugging flags" },
1156             "\${^RE_TRIE_MAXBUF}" =>
1157             { aliases => {}, desc => "Cache limit on regex optimizations" },
1158             "\${^TAINT}" => { aliases => {}, desc => "Taint mode" },
1159             "\${^UNICODE}" => { aliases => {}, desc => "Unicode settings" },
1160             "\${^UTF8CACHE}" =>
1161             { aliases => {}, desc => "Internal UTF-8 offset caching controls" },
1162             "\${^UTF8LOCALE}" => { aliases => {}, desc => "UTF-8 locale" },
1163             "\${^WARNING_BITS}" => { aliases => {}, desc => "Lexical warning flags" },
1164             "\${^WIN32_SLOPPY_STAT}" =>
1165             { aliases => {}, desc => "Use non-opening stat() under Windows" },
1166             "\$|" => {
1167             aliases => { "\$OUTPUT_AUTOFLUSH" => 1 },
1168             desc => "Autoflush status of selected output filehandle",
1169             },
1170             "\$~" => {
1171             aliases => { "\$FORMAT_NAME" => 1 },
1172             desc => "Name of format for selected output channel",
1173             },
1174             "%!" => {
1175             aliases => { "%ERRNO" => 1, "%OS_ERROR" => 1 },
1176             desc => "Status of all possible errors from most recent system call",
1177             },
1178             "%+" => {
1179             aliases => {},
1180             desc => "Named captures of most recent regex match (as strings)",
1181             },
1182             "%-" => {
1183             aliases => { "%LAST_MATCH_START" => 1 },
1184             desc =>
1185             "Named captures of most recent regex match (as arrays of strings)",
1186             },
1187             "%^H" => { aliases => {}, desc => "Lexical hints hash" },
1188             "%ENV" => { aliases => {}, desc => "The current shell environment" },
1189             "%ERRNO" => {
1190             aliases => { "%!" => 1, "%OS_ERROR" => 1 },
1191             desc => "Status of all possible errors from most recent system call",
1192             },
1193             "%INC" => { aliases => {}, desc => "Filepaths of loaded modules" },
1194             "%LAST_MATCH_START" => {
1195             aliases => { "%-" => 1 },
1196             desc =>
1197             "Named captures of most recent regex match (as arrays of strings)",
1198             },
1199             "%OS_ERROR" => {
1200             aliases => { "%!" => 1, "%ERRNO" => 1 },
1201             desc => "Status of all possible errors from most recent system call",
1202             },
1203             "%SIG" => { aliases => {}, desc => "Signal handlers" },
1204             "\@+" => {
1205             aliases => { "\@LAST_PAREN_MATCH" => 1 },
1206             desc =>
1207             "Offsets of ends of capture groups of most recent regex match",
1208             },
1209             "\@-" => {
1210             aliases => { "\@LAST_MATCH_START" => 1 },
1211             desc =>
1212             "Offsets of starts of capture groups of most recent regex match",
1213             },
1214             "\@_" => { aliases => { "\@ARG" => 1 }, desc => "Subroutine arguments" },
1215             "\@ARG" => { aliases => { "\@_" => 1 }, desc => "Subroutine arguments" },
1216             "\@ARGV" => { aliases => {}, desc => "Command line arguments" },
1217             "\@F" => {
1218             aliases => {},
1219             desc => "Fields of the current input line (under autosplit mode)",
1220             },
1221             "\@INC" => { aliases => {}, desc => "Search path for loading modules" },
1222             "\@LAST_MATCH_START" => {
1223             aliases => { "\@-" => 1 },
1224             desc =>
1225             "Offsets of starts of capture groups of most recent regex match",
1226             },
1227             "\@LAST_PAREN_MATCH" => {
1228             aliases => { "\@+" => 1 },
1229             desc =>
1230             "Offsets of ends of capture groups of most recent regex match",
1231             },
1232             );
1233              
1234              
1235             # Build pattern to detect "unhelpful" variable and subroutine names
1236              
1237             my @CACOGRAMS = qw<
1238             in(put)
1239             out(put)
1240             get
1241             put
1242             (re)set
1243             clear
1244             update
1245              
1246             array
1247             data
1248             dict(ionary)
1249             dictionaries
1250             elem(ent)
1251             hash
1252             heap
1253             idx
1254             indices
1255             key[]
1256             list
1257             node
1258             num(ber)
1259             obj(ect)
1260             queue
1261             rec(ord)
1262             scalar
1263             set
1264             stack
1265             str(ing)
1266             tree
1267             val(ue)[]
1268             opt(ion)
1269             arg(ument)
1270             range
1271             var(iable)
1272              
1273             desc(riptor)
1274             alt(ernate)
1275             item
1276             prev(ious)
1277             next
1278             last
1279             other
1280             res(ult)
1281             target
1282             name
1283             count
1284             size
1285             optional
1286              
1287             foo
1288 500     500   726 bar
1289             baz
1290 500         722 >;
1291 500         873  
1292 500         892 sub _inflect {
1293             my ($word) = @_;
1294 500         1207  
1295 500   100     1508 my $singular = $word =~ s{ \[ .* \]}{}rxms;
  500         2084  
1296             my $sing = $singular =~ s{ \( .* \) }{}grxms;
1297             $singular =~ s/[()]//g;
1298 500         2752  
1299             my $plur = ($word =~ s{ \( .* \) | \[ .* \]}{}grxms) .'s';
1300             my $plural = $word =~ s{ \[ (.*?) \] | \Z }{ $1 // 's'}erxms
1301             =~ s{ [()] }{}grxms;
1302              
1303             return $plural, $plur, $singular, $sing;
1304             }
1305              
1306             my $CACOGRAMS_PAT
1307             = '\b(?!_\z)(?:'.join('|', reverse(sort(uniq(map { _inflect($_) } @CACOGRAMS, '_')))).')+\b';
1308              
1309              
1310             # Build tools to detect parograms (similar, but not identical variable and sub names)...
1311              
1312             my $VOWEL = '[aeiou]';
1313             my @DOUBLE_CONSONANT
1314             = map {("$_$_(?=$VOWEL)" => { "$_$_" => "$_$_?", $_ => "$_$_?" },
1315             "(?<=$VOWEL)$_(?=$VOWEL)" => { "$_$_" => "$_$_?", $_ => "$_$_?" },
1316             )}
1317             qw< b c d f g h j k l m n p q r s t v w x y z >;
1318              
1319             my %VARIANT_SPELLING = (
1320             'ou?r' => { or => 'ou?r', our => 'ou?r', },
1321             'en[cs](?=e)' => { enc => 'en[cs]', ens => 'en[cs]', },
1322             '\B(?:er|re)' => { er => '(?:er|re)', re => '(?:er|re)', },
1323             '(?:x|ct)ion' => { xion => '(?:x|ct)ion', ction => '(?:x|ct)ion', },
1324             'ae' => { ae => 'a?e', },
1325             'oe' => { oe => 'o?e', },
1326             'i[sz](?=e)' => { is => 'i[sz]', iz => 'i[sz]', },
1327             'y[sz](?=e)' => { ys => 'y[sz]', yz => 'y[sz]', },
1328             'og(?:ue)?' => { og => 'og(?:ue)?', ogue => 'og(?:ue)?', },
1329             'e?abl' => { eabl => 'e?abl', abl => 'e?abl', },
1330             @DOUBLE_CONSONANT,
1331             );
1332             my %VARIANT_PAT = map { %{$_}; } values %VARIANT_SPELLING;
1333             my $VARIANT_SPELLING = join('|', reverse sort keys %VARIANT_SPELLING);
1334              
1335             my @CONFLATION_GROUPS = ('aeiou', 'bdfhklt', 'cmnrsvwxz', 'gjpqy');
1336             my %CONFLATION_CHARS;
1337             for my $group (@CONFLATION_GROUPS) {
1338             for my $letter (split('', $group)) {
1339 843     843   1391 $CONFLATION_CHARS{$letter} = "[$group]" =~ s/$letter//gr;
1340             }
1341             }
1342 843         1779  
  3658         4429  
1343 3658 100 66     6527 sub _parograms_of {
  3658         14706  
  10637         26782  
1344             my ($word) = @_;
1345              
1346             my $typos = join '|',
1347 843   33     9859 map { our $pos = $_;
  605         5980  
1348             $word =~ s{(??{pos==$pos?'':'(?!)'}) .}{$CONFLATION_CHARS{$&} // $&}eixmsr;
1349 843 100       2939 }
1350             0..length($word)-1;
1351              
1352             my $spelling = $word =~ s{$VARIANT_SPELLING}{$VARIANT_PAT{lc $&}//$&}egixmsr;
1353              
1354             return $spelling ne $word ? "(?i:$spelling|$typos)" : "(?i:$typos)";
1355 14480     14480   18128 }
1356 14480         18810  
1357 14480         15980  
1358 14480         36374 # Determine if two variables overlap in scope...
1359             sub _share_scope {
1360             my ($var1, $var2) = @_;
1361             my $from_delta = $var1->{start_of_scope} - $var2->{start_of_scope};
1362             my $to_delta = $var1->{end_of_scope} - $var2->{end_of_scope};
1363 57     57 0 192 return $from_delta * $to_delta <= 0;
1364             }
1365              
1366 10     10   104 # Locate all mentions of all variable in the specified code...
  10         26  
  10         18172  
1367 57         395 sub classify_all_vars_in {
1368             my ($source) = @_;
1369              
1370             # A stack to track the scope of each variable
1371 57         200 no warnings 'once';
1372 57         180 local @Code::ART::varscope = { ids => {}, decls => [] };
1373 57         144  
1374             # Hashes to track their variable descriptions and uses
1375             # (Variables are identified by the offset of their declaration from the start of the source)...
1376 57         368558 local %Code::ART::varinfo = ();
1377             local %Code::ART::varuse = ();
1378             local $Code::ART::use_version = 0;
1379              
1380             # Detect and record all instances of variable within the source code...
1381             my $matched = $source =~ m{
1382             \A
1383             (?&_push_scope)
1384             (?&PerlDocument)
1385             (?&_pop_scope)
1386             \Z
1387              
1388 5         253 (?(DEFINE)
1389             (?
1390             (?>
1391             use (?>(?&PerlOWS))
1392             (? \d++ (?: \. \d++)?+ | v\d++ (?: \. \d++)*+ )
1393             (?{ $Code::ART::use_version = version->parse("$+{version}") })
1394             |
1395             (?&PerlStdUseStatement)
1396             )
1397             )
1398              
1399             (?
1400             (?>
1401             (?&_push_scope)
1402             (?&PerlStdBlock)
1403             (?&_pop_scope)
1404             |
1405             (?&_revert_scope_on_failure)
1406             )
1407             )
1408              
1409             (?
1410             (?>
1411             (?&_push_scope)
1412             (?&PerlStdAnonymousHash)
1413             (?&_pop_scope)
1414             |
1415             (?&_revert_scope_on_failure)
1416             )
1417             )
1418              
1419             (?
1420             (?>
1421             (?&PerlStdStatement)
1422             (?&_install_pending_decls)
1423             |
1424             (?&_clear_pending_declaration)
1425             )
1426             )
1427              
1428             (?
1429             (?&_push_scope)
1430             (?>
1431             # Conditionals can have var declarations in their conditions...
1432             (?> if | unless ) \b (?>(?&PerlOWS))
1433             (?>(?&PerlParenthesesList)) (?>(?&PerlOWS))
1434             (?= [^\n]*
1435             (?
1436             (?
1437             \h* \# \h*
1438             (? [^\n]* )
1439             |
1440             (?)
1441             )
1442             (?&_install_pending_decls)
1443             (?>(?&PerlBlock))
1444             (?&_pop_scope)
1445              
1446             (?:
1447             (?>(?&PerlOWS))
1448             (?>(?&PerlPodSequence))
1449             elsif \b (?>(?&PerlOWS))
1450             (?>
1451             (?&_push_scope)
1452             (?>(?&PerlParenthesesList)) (?>(?&PerlOWS))
1453             (?= [^\n]*
1454             (?
1455             (?
1456             \h* \# \h*
1457             (? [^\n]* )
1458             |
1459             (?)
1460             )
1461             (?&_install_pending_decls)
1462             (?&PerlBlock)
1463             (?&_pop_scope)
1464             |
1465             (?&_revert_scope_on_failure)
1466             )
1467             )*+
1468              
1469             (?:
1470             (?>(?&PerlOWS))
1471             (?>(?&PerlPodSequence))
1472             else \b (?>(?&PerlOWS))
1473             (?&PerlBlock)
1474             )?+
1475             |
1476             # Have to handle loops specially (may have var declarations)...
1477             (?>
1478             (? for(?:each)?+ \b )
1479             (?>(?&PerlOWS))
1480             (?>
1481             (?&_allow_decls)
1482             (?> # Explicitly aliased iterator variable...
1483             (?>
1484             \\ (?>(?&PerlOWS))
1485             (? (?> my | our | state ) )
1486             |
1487             (? (?> my | our | state ) )
1488             (?>(?&PerlOWS)) \\
1489             )
1490             (?>(?&PerlOWS))
1491             (?
1492             (?> (?&PerlVariableScalar)
1493             | (?&PerlVariableArray)
1494             | (?&PerlVariableHash)
1495             )
1496             )
1497             |
1498             # Implicitly aliased iterator variable...
1499             (?> (? my | our | state ) (?>(?&PerlOWS)) )?+
1500             (? (?&PerlVariableScalar) )
1501             )?+
1502             (?= [^\n]*
1503             (?
1504             (?
1505             \h* \# \h*
1506             (? [^\n]* )
1507             |
1508             (?)
1509             )
1510             (?&_record_and_disallow_decls)
1511              
1512             (?>(?&PerlOWS))
1513             (?: (?> (?&PerlParenthesesList) | (?&PerlQuotelikeQW) ) )
1514             )
1515             |
1516             (?> while | until) \b (?>(?&PerlOWS))
1517             (?&_allow_decls)
1518             (?&PerlParenthesesList)
1519             (?= [^\n]*
1520             (?
1521             (?
1522             \h* \# \h*
1523             (? [^\n]* )
1524             |
1525             (?)
1526             )
1527             (?&_record_and_disallow_decls)
1528             )
1529              
1530             (?>(?&PerlOWS))
1531             (?&_install_pending_decls)
1532             (?>(?&PerlBlock))
1533              
1534             (?:
1535             (?>(?&PerlOWS)) continue
1536             (?>(?&PerlOWS)) (?&PerlBlock)
1537             )?+
1538             (?&_pop_scope)
1539             |
1540             (?> given | when ) \b (?>(?&PerlOWS))
1541             (?>(?&PerlParenthesesList)) (?>(?&PerlOWS))
1542             (?&_install_pending_decls)
1543             (?&PerlBlock)
1544             (?&_pop_scope)
1545             |
1546             (?&PerlStdControlBlock)
1547             (?&_pop_scope)
1548             |
1549             (?&_revert_scope_on_failure)
1550             )
1551             )
1552              
1553             (?
1554             (?&_push_scope)
1555             (?>
1556             (?> (?> my | state | our ) \b (?>(?&PerlOWS)) )?+
1557             (? sub \b ) (?>(?&PerlOWS))
1558             (?>(?&PerlOldQualifiedIdentifier)) (?&PerlOWS)
1559             |
1560             AUTOLOAD (?&PerlOWS)
1561             |
1562             DESTROY (?&PerlOWS)
1563             )
1564              
1565             (?&_allow_decls)
1566             (?>
1567             # Perl pre 5.028
1568             (?:
1569             (?>
1570             (?&PerlParenthesesList) # Parameter list
1571             |
1572             \( [^)]*+ \) # Prototype (
1573             )
1574             (?&PerlOWS)
1575             )?+
1576             (?: (?>(?&PerlAttributes)) (?&PerlOWS) )?+
1577             (?&_record_and_disallow_decls)
1578             |
1579             # Perl post 5.028
1580             (?: (?>(?&PerlAttributes)) (?&PerlOWS) )?+
1581             (?: (?>(?&PerlParenthesesList)) (?&PerlOWS) )?+ # Parameter list
1582             (?&_record_and_disallow_decls)
1583             )?+
1584             (?&_install_pending_decls)
1585             (?> ; | (?&PerlBlock))
1586             (?&_pop_scope)
1587             |
1588             (?&_revert_scope_on_failure)
1589             )
1590              
1591             (?
1592             (?&_push_scope)
1593             (? sub \b )
1594             (?>(?&PerlOWS))
1595              
1596             (?&_allow_decls)
1597             (?:
1598             # Perl pre 5.028
1599             (?:
1600             (?>
1601             (?&PerlParenthesesList) # Parameter list
1602             |
1603             \( [^)]*+ \) # Prototype (
1604             )
1605             (?&PerlOWS)
1606             )?+
1607             (?: (?>(?&PerlAttributes)) (?&PerlOWS) )?+
1608             (?= [^\n]*
1609             (?
1610             (?
1611             \h* \# \h*
1612             (? [^\n]* )
1613             |
1614             (?)
1615             )
1616             (?&_record_and_disallow_decls)
1617             |
1618             # Perl post 5.028
1619             (?: (?>(?&PerlAttributes)) (?&PerlOWS) )?+
1620             (?: (?&PerlParenthesesList) (?&PerlOWS) )?+ # Parameter list
1621             (?= [^\n]*
1622             (?
1623             (?
1624             \h* \# \h*
1625             (? [^\n]* )
1626             |
1627             (?)
1628             )
1629             (?&_record_and_disallow_decls)
1630             )?+
1631             (?&_install_pending_decls)
1632             (?&PerlBlock)
1633             (?&_pop_scope)
1634             |
1635             (?&_revert_scope_on_failure)
1636             )
1637              
1638             (?
1639             (?> (? my | state | our ) ) \b (?>(?&PerlOWS))
1640             (?: (?&PerlQualifiedIdentifier) (?&PerlOWS) )?+
1641             (?&_allow_decls)
1642             (?:
1643             (?&PerlLvalue)
1644             (?= [^\n]*
1645             (?
1646             (?
1647             \h* \# \h*
1648             (? [^\n]* )
1649             |
1650             (?)
1651             )
1652             (?&_record_and_disallow_decls)
1653             |
1654             (?&_record_and_disallow_decls)
1655             (?!)
1656             )
1657             (?>(?&PerlOWS))
1658             (?&PerlAttributes)?+
1659             )
1660              
1661             (?
1662             (?>
1663             \\?+
1664             (?:
1665             (? (?> \$\#? | [@%] ) (?>(?&PerlOWS)) (?&PerlIdentifier) )
1666             (?&_save_var_after_ows)
1667             )
1668             |
1669             \(
1670             (?>(?&PerlOWS))
1671             (?> \\?+
1672             (? (?> \$\#? | [@%] ) (?>(?&PerlOWS)) (?&PerlIdentifier) )
1673             (?&_save_var_after_ows)
1674             |
1675             undef
1676             )
1677             (?>(?&PerlOWS))
1678             (?:
1679             (?>(?&PerlComma))
1680             (?>(?&PerlOWS))
1681             (?> \\?+
1682             (? (?> \$\#? | [@%] ) (?>(?&PerlOWS)) (?&PerlIdentifier) )
1683             (?&_save_var_after_ows)
1684             |
1685             undef
1686             )
1687             (?>(?&PerlOWS))
1688             )*+
1689             (?: (?>(?&PerlComma)) (?&PerlOWS) )?+
1690             \)
1691             )
1692             )
1693              
1694             (?
1695             (?> (? my | state | our ) ) \b (?>(?&PerlOWS))
1696             (?: (?&PerlQualifiedIdentifier) (?&PerlOWS) )?+
1697             (?&_allow_decls)
1698             (?:
1699             (?&PerlLvalue)
1700             (?= [^\n]*
1701             (?
1702             (?
1703             \h* \# \h*
1704             (? [^\n]* )
1705             |
1706             (?)
1707             )
1708             (?&_record_and_disallow_decls)
1709             |
1710             (?&_disallow_decls)
1711             (?!)
1712             )
1713             (?>(?&PerlOWS))
1714             (?&PerlAttributes)?+
1715             |
1716             (?&PerlStdTerm)
1717             )
1718              
1719             (? (? (?&PerlStdVariableScalar) ) (?&_save_var_after_ows) )
1720             (? (? (?&PerlStdVariableArray) ) (?&_save_var_after_ows) )
1721             (? (? (?&PerlStdVariableHash) ) (?&_save_var_after_ows) )
1722              
1723             (?
1724             (? (?&PerlStdVariableScalarNoSpace) ) (?&_save_var_no_ows)
1725             )
1726             (?
1727             (? (?&PerlStdVariableArrayNoSpace) ) (?&_save_var_no_ows)
1728             )
1729              
1730             (?
1731             " [^"\$\@\\]*+
1732             (?: (?> \\. | (?&PerlScalarAccessNoSpace) | (?&PerlArrayAccessNoSpace) )
1733             [^"\$\@\\]*+
1734             )*+
1735             "
1736             |
1737 2514         7401 (?&PerlStdString)
  25140         33151  
  2514         17121  
  2514         49808  
1738             )
1739              
1740             # Test and record instances of any variable encountered...
1741             (?<_save_var_after_ows>
1742             (?{ my $var = (grep {defined} @{$-{var}})[-1]; [$var, pos() - length($var) ] })
1743 126         287 (?= (?>(?&PerlOWS)) (?> (? \[ ) | (? \{ ) | ) )
  1260         1667  
  126         839  
  126         1105  
1744             (?&_save_var)
1745             )
1746              
1747             (?<_save_var_no_ows>
1748             (?{ my $var = (grep {defined} @{$-{var}})[-1]; [$var, pos() - length($var) ] })
1749             (?= (? \[ ) | (? \{ ) | )
1750 2640         8260 (?&_save_var)
  2640         4801  
1751 2640 100       4934 )
1752 2554         2987  
1753 2554 50       5247 (?<_save_var>
1754 0         0 (?{
1755 0         0 my ($var, $varid) = @{$^R};
1756             if (length($var) > 2) {
1757             while (1) {
1758             last if substr($var,1,1) ne '$';
1759             substr($var, 0, 1, q{});
1760 2640 100       4808 $varid++;
1761 949         988 }
  949         64988  
1762             }
1763              
1764             # Update the scope's information if this variable is being declared...
1765             if ($Code::ART::varscope[-1]{allow_decls}) {
1766             push @{$Code::ART::varscope[-1]{decls}},
1767 1691         1860 { id => $varid, decl_name => $var, raw_name => substr($var,1) };
1768 1691         2699 }
1769 1691 100 66     5479  
1770             # Otherwise record its usage in the appropriate slot (if any)...
1771             else {
1772 1691         4197 my $varlen = length($var);
1773 1691 50       3468 my $sigil = substr($var, 0, 1, q{});
1774             my $twigil = $varlen > 1 && substr($var, 0, 1) eq '#'
1775 1691 100 100     11194 ? substr($var, 0, 1, q{})
    100          
1776             : q{};
1777             (my $cleanvar = $var) =~ s/[^\w:'^]+//g;
1778             $var = $cleanvar if length($cleanvar) > 0;
1779 1691   66     337753 $var = ( $+{array} || $twigil ? '@'
1780             : $+{hash} ? '%'
1781             : $sigil) . $var;
1782             $Code::ART::varuse
1783             {$Code::ART::varscope[-1]{ids}{$var} // $var}
1784             {$varid} = $varlen;
1785             }
1786             })
1787 10491         207086 )
  10491         152916  
1788              
1789             # Set up a new nested scope replicating the surrounding scope...
1790             (?<_push_scope>
1791             (?{ push @Code::ART::varscope, {
1792             ids => {%{$Code::ART::varscope[-1]{ids}}},
1793             decls => [],
1794             };
1795             })
1796 758         6223 )
1797 758         1017  
1798 758         808 # Tear down a nested scope...
  758         1711  
1799             (?<_pop_scope>
1800 3157         29847 (?{
1801             $Code::ART::oldscope = pop @Code::ART::varscope;
1802             $Code::ART::end_of_scope = pos();
1803             for my $id (values %{$Code::ART::oldscope->{ids}}) {
1804             $Code::ART::varinfo{$id}{end_of_scope}
1805             = $Code::ART::end_of_scope;
1806             }
1807 9733         510260 })
1808             )
1809              
1810             # Clean up a scope that's closing, but also propagate failure...
1811             (?<_revert_scope_on_failure>
1812             (?{ pop @Code::ART::varscope; })
1813 742         29105 (?!)
1814             )
1815              
1816             # Allow/disallow variables to be recorded as declarations...
1817 0         0 (?<_allow_decls>
1818             (?{ $Code::ART::varscope[-1]{allow_decls} = 1; })
1819             )
1820              
1821             (?<_disallow_decls>
1822             (?{ $Code::ART::varscope[-1]{allow_decls} = 0; })
1823 742         1174 )
  742         1529  
1824 991   33     2115  
1825 991         4556 # Disallow declarations but remember the ones that were already found...
1826 7928         14036 (?<_record_and_disallow_decls>
  991         4850  
1827             (?{
1828             for my $decl (@{$Code::ART::varscope[-1]{decls}}) {
1829             my $decl_name = $decl->{decl_name} // $+{var};
1830             @{$decl}{'declarator', 'sigil', 'desc', 'decl_name', 'raw_name', 'aliases'}
1831 991   50     1145 = ( (grep {defined} @{$-{declarator}})[-1] // q{},
      100        
1832             substr($_, $decl->{id}, 1),
1833             $+{desc} // q{},
1834 742         74371 $decl_name,
1835             $decl->{raw_name},
1836             []
1837             );
1838             }
1839             $Code::ART::varscope[-1]{allow_decls} = 0;
1840             })
1841             )
1842 1533         43165  
  1533         3439  
1843 739         1807 # Make new variable declarations effective in the current scope...
1844 739         3244 (?<_install_pending_decls>
1845             (?: (?&PerlOWS) \{ )?+
1846 739         1037 (?{
  739         1135  
1847             for my $decl (@{$Code::ART::varscope[-1]{decls}}) {
1848 739   33     1856 $Code::ART::varscope[-1]{ids}{$decl->{decl_name}} = $decl->{id};
1849 739         1333 @{$Code::ART::varinfo{$decl->{id}}}
1850 739         1624 {'declarator', 'sigil', 'desc', 'decl_name', 'raw_name'}
1851             = @{$decl}{'declarator', 'sigil', 'desc', 'decl_name', 'raw_name'};
1852 1533         37734 $Code::ART::varinfo{$decl->{id}}->{sigil}
1853             //= substr($_, $decl->{id},1);
1854             $Code::ART::varinfo{$decl->{id}}->{start_of_scope} = pos();
1855             $Code::ART::varuse{$decl->{id}} = {};
1856             }
1857             $Code::ART::varscope[-1]{decls} = [];
1858             })
1859             (?!) # Backtrack to unwind matching the trailing block delimiter
1860             |
1861 493         27071 (?=) # Then match anyway, but at the original position
1862             )
1863              
1864             # Reset pending variable declarations in current scope...
1865             (?<_clear_pending_declaration>
1866             (?{ $Code::ART::varscope[-1]{decls} = []; })
1867             )
1868             )
1869 57 100       2549  
1870             $PPR::X::GRAMMAR
1871             }xmso;
1872              
1873 56         148 # Return a failure report if unable to process source code...
1874 56         370 return { failed => 'invalid source code', context => $PPR::X::ERROR }
1875 843 100       1790 if !$matched;
1876              
1877             # Install usages and declaration locations...
1878             my $undecl_id = -1;
1879             for my $id (keys %Code::ART::varuse) {
1880             if ($id !~ /^\d+$/) {
1881             $Code::ART::varinfo{$undecl_id--}
1882             = { decl_name => $id,
1883 104   50     1036 sigil => substr($id,0,1),
1884             raw_name => substr($id,1),
1885             declarator => "",
1886             desc => "",
1887             declared_at => -1,
1888             used_at => $Code::ART::varuse{$id} // [],
1889 739         1684 start_of_scope => -1,
1890 739   50     1474 end_of_scope => length($source),
1891             };
1892 739   50     1717 }
      33        
1893             else {
1894             $Code::ART::varinfo{$id}{declared_at} = $id;
1895             $Code::ART::varinfo{$id}{used_at} = $Code::ART::varuse{$id} // [];
1896             $Code::ART::varinfo{$id}{start_of_scope} //= -1,
1897 56         141 $Code::ART::varinfo{$id}{end_of_scope} //= length($source);
1898 56         250 }
1899 843         1382 }
1900 843         1109  
1901             # Install standard descriptions and apply analyses...
1902             my %var_at;
1903 843         1021 for my $varid (keys %Code::ART::varinfo) {
  843         2491  
1904 1486         2673 my $var = $Code::ART::varinfo{$varid};
1905 11612         22058 my $var_name = $var->{raw_name};
1906              
1907             # Invert usages...
1908             for my $startpos (keys %{$var->{used_at}}) {
1909             for my $offset (0 .. $var->{used_at}{$startpos}) {
1910 843         1363 $var_at{ $startpos + $offset } = $varid;
1911 843 100       2169 }
1912 43         98 }
  43         110  
  43         107  
1913 43         91  
1914             # Check whether variable is a built-in...
1915             $var->{is_builtin} = 0;
1916             if (my $std_desc = $STD_VAR_DESC{$var->{decl_name}}) {
1917 843 100       8001 @{$var}{'desc', 'aliases'} = @{$std_desc}{'desc', 'aliases'};
1918             $var->{is_builtin} = 1;
1919             }
1920 843         1638  
1921 843         1637 # Check whether its name is unhelpful...
1922 843         1304 $var->{is_cacogram} = $var_name =~ /\A$CACOGRAMS_PAT\Z/ ? 1 : 0;
1923 843         2222  
1924 15323 100 100     31415 # Check for homograms and parograms...
1925             my $parograms_pat = _parograms_of($var_name);
1926 9020         13632 $var->{homograms} = {};
1927 9020 100       14819 $var->{parograms} = {};
1928             for my $other_var (values %Code::ART::varinfo) {
1929 9020 100       110464 next if $var == $other_var || !_share_scope($var, $other_var);
1930              
1931 3366   100     8646 my $other_name = $other_var->{raw_name};
1932             my ($gram_type, $matcher) = $other_name eq $var_name ? ('homograms', $var_name)
1933 3366         7958 : ('parograms', $parograms_pat);
1934             if ($other_name =~ /\A$matcher\z/) {
1935 3366         7700 $var->{$gram_type}{$other_name}
1936             //= { from=>$var->{declared_at}, to=>$var->{end_of_scope} };
1937             $var->{$gram_type}{$other_name}{from}
1938             = min $var->{$gram_type}{$other_name}{from}, $other_var->{declared_at};
1939             $var->{$gram_type}{$other_name}{to}
1940             = max $var->{$gram_type}{$other_name}{to}, $other_var->{end_of_scope};
1941 843   50     3489 }
1942             }
1943              
1944             # Measure its scope...
1945             $var->{scope_scale}
1946 56         744 = ($var->{end_of_scope} - ($var->{declared_at} // 0)) / length($source);
1947             }
1948              
1949             # Return all the information acquired...
1950             return {
1951             vars => \%Code::ART::varinfo,
1952             var_at => \%var_at,
1953             use_version => $Code::ART::use_version,
1954             }
1955             }
1956              
1957              
1958             1; # Magic true value required at end of module
1959             __END__