File Coverage

lib/Perl500503Syntax/OrDie.pm
Criterion Covered Total %
statement 454 490 92.6
branch 181 214 84.5
condition 82 111 73.8
subroutine 24 25 96.0
pod 2 2 100.0
total 743 842 88.2


line stmt bran cond sub pod time code
1             package Perl500503Syntax::OrDie;
2              
3             ######################################################################
4             #
5             # Perl500503Syntax::OrDie - Validate Perl 5.005_03 source compatibility
6             #
7             # https://metacpan.org/dist/Perl500503Syntax-OrDie
8             #
9             # Copyright (c) 2026 INABA Hitoshi
10             #
11             ######################################################################
12              
13 27     20   1409662 use 5.00503;
  27         88  
14 27     20   347 use strict;
  27         44  
  27         1812  
15 27 100 33 20   582 BEGIN { if ($] < 5.006 && !defined(&warnings::import)) {
16 7         245 $INC{'warnings.pm'} = 'stub'; eval 'package warnings; sub import {}' } }
  7         13  
17 27     20   358 use warnings; local $^W = 1;
  27         38  
  27         1521  
18              
19 27     20   134 use vars qw($VERSION @BLACKLIST @REGEX_BLACKLIST @RAW_BLACKLIST $_OPEN_GUARDED $_MKDIR_GUARDED);
  27         938  
  27         143668  
20              
21             $VERSION = '0.03';
22              
23             # ======================================================================
24             # BLACKLIST
25             # Each entry: [ qr/pattern/, 'human-readable description' ]
26             # Matched against source after masking comments and string literals.
27             # ======================================================================
28             @BLACKLIST = (
29              
30             # ------------------------------------------------------------------
31             # Perl 5.6 features
32             # ------------------------------------------------------------------
33              
34             # our VARIABLE
35             [ qr/\bour\s*[\$\@\%\*]/,
36             "'our' declaration (introduced in Perl 5.6; use 'use vars' instead)" ],
37              
38             # 3-argument open() is detected in a dedicated paren-aware stage
39             # (see "Stage 1c" in _check_source); a flat comma-counting regex
40             # cannot tell a top-level argument comma from one nested inside an
41             # expression such as open(FH, '>' . File::Spec->catfile($d, $f)).
42              
43             # use utf8
44             [ qr/\buse\s+utf8\b/,
45             "'use utf8' (introduced in Perl 5.6)" ],
46              
47             # use VERSION >= 5.6 (numeric forms: 5.6, 5.06, 5.006, 5.010, 5.100 ...)
48             # Pattern: 5. followed by (0*[6-9]) OR (0[1-9]\d+) OR ([1-9]\d+)
49             # This catches 5.6, 5.06, 5.006, 5.010, 5.012, 5.100 etc.
50             # but not 5.5, 5.005, 5.004
51             [ qr/\buse\s+5\s*\.\s*(?:0*[6-9]|0[1-9]\d+|[1-9]\d+)\b/,
52             "use VERSION >= 5.6 (target is Perl 5.005_03)" ],
53              
54             # use v5.6, use v5.10, ...
55             [ qr/\buse\s+v5\s*\.\s*[6-9]/,
56             "use vVERSION >= v5.6 (target is Perl 5.005_03)" ],
57             [ qr/\buse\s+v5\s*\.1/,
58             "use vVERSION >= v5.10 (target is Perl 5.005_03)" ],
59              
60             # \x{HHHH} Unicode escape
61             [ qr/\\x\{[0-9A-Fa-f]+\}/,
62             "\\x{} Unicode escape (introduced in Perl 5.6)" ],
63              
64             # \N{name} named character
65             [ qr/\\N\{[^}]+\}/,
66             "\\N{} named character escape (introduced in Perl 5.6)" ],
67              
68             # @+ / @- match-position arrays: $+[N] $-[N] @+ @-
69             [ qr/(?:\$[+\-]\[|\@[+\-](?!\w))/,
70             'match-position arrays @+/@- or $+[N]/$-[N] (introduced in Perl 5.6)' ],
71              
72             # CHECK { } / INIT { } named phase blocks
73             [ qr/\b(?:CHECK|INIT)\s*\{/,
74             "CHECK/INIT phase blocks (introduced in Perl 5.6)" ],
75              
76             # v-strings: v1.2.3 or v5.6.0
77             [ qr/(?
78             "v-string notation (introduced in Perl 5.6; use a plain number instead)" ],
79              
80             # $^V (version object; use $] instead)
81             [ qr/\$\^V\b/,
82             '$^V version object (introduced in Perl 5.6; use $] instead)' ],
83              
84             # :lvalue subroutine attribute
85             [ qr/\bsub\s+\w[\w:]*\s*(?:\([^)]*\)\s*)?:[^{]*\blvalue\b/,
86             "':lvalue' subroutine attribute (introduced in Perl 5.6)" ],
87              
88             # typeglob component slice *FH{IO} *foo{SCALAR} *foo{CODE} etc.
89             # Perl 5.005_03 does not support typeglob-element access via *name{SLOT}.
90             # Guard: require leading sigil or word boundary to avoid false positives.
91             [ qr/\*\w[\w:]*\s*\{\s*(?:IO|SCALAR|ARRAY|HASH|CODE|GLOB|FORMAT|NAME|PACKAGE)\s*\}/,
92             "typeglob component access *name{SLOT} (introduced in Perl 5.6)" ],
93              
94             # ------------------------------------------------------------------
95             # Perl 5.8 features
96             # ------------------------------------------------------------------
97              
98             # use encoding
99             [ qr/\buse\s+encoding\b/,
100             "'use encoding' (introduced in Perl 5.8)" ],
101              
102             # use constant with a hashref (hash of constants) -- Perl 5.8
103             # Single-value form use constant NAME => VALUE is 5.004+, always OK.
104             # Hash-ref form use constant { A => 1, B => 2 } was added in Perl 5.8.
105             [ qr/\buse\s+constant\s*\{/,
106             "'use constant { HASH }' multi-constant form (introduced in Perl 5.8; use separate 'use constant' statements)" ],
107              
108             # ------------------------------------------------------------------
109             # Perl 5.10 features
110             # ------------------------------------------------------------------
111              
112             # defined-or-assignment operator
113             [ do { my $p = '/' . '/='; qr/$p/ },
114             "defined-or assignment '" . '/' . "/=' (introduced in Perl 5.10)" ],
115              
116             # say HANDLE or say LIST
117             # Exclude ->say(...) method calls and say => hash-key usage
118             [ do { my $kw = 'sa' . 'y'; qr/(?)\b$kw\b(?!\s*=>)/ },
119             "'say' (introduced in Perl 5.10)" ],
120              
121             # sta-te $var
122             [ do { my $kw = 'sta' . 'te'; qr/\b$kw\s+[\$\@\%]/ },
123             "'sta" . "te' variable (introduced in Perl 5.10)" ],
124              
125             # given(...)
126             [ qr/\bgiven\s*\(/,
127             "'given' (introduced in Perl 5.10)" ],
128              
129             # when(...)
130             [ qr/\bwhen\s*\(/,
131             "'when' (introduced in Perl 5.10)" ],
132              
133             # smart-match ~~
134             [ qr/~~/,
135             "smart-match operator '~~' (introduced in Perl 5.10)" ],
136              
137             # use feature
138             # The empty-import form use feature () imports nothing and, when paired
139             # with a BEGIN { $INC{'feature.pm'} = '' if $] < 5.010 } stub, loads
140             # nothing on Perl 5.005_03. It is a no-op on every Perl version and is the
141             # standard cross-version guard idiom (parallel to the tolerated
142             # use warnings stub), so it is NOT a violation. A non-empty import list
143             # such as use feature 'say' or use feature qw(...) still is.
144             [ qr/\buse\s+feature\b(?!\s*\(\s*\))/,
145             "'use feature' (introduced in Perl 5.10)" ],
146              
147             # defined-or operator (two slashes, not part of s///, m//, =~, !~)
148             # and not the defined-or-assign variant
149             [ qr/(?
150             "defined-or operator (introduced in Perl 5.10)" ],
151              
152             # UNITCHECK phase block
153             [ qr/\bUNITCHECK\s*\{/,
154             "UNITCHECK phase block (introduced in Perl 5.10)" ],
155              
156             # ${^MATCH} ${^PREMATCH} ${^POSTMATCH} (used with /p flag -- 5.10)
157             [ qr/\$\{\^(?:MATCH|PREMATCH|POSTMATCH)\}/,
158             "\${^MATCH}/\${^PREMATCH}/\${^POSTMATCH} (introduced in Perl 5.10; require /p flag)" ],
159              
160             # ------------------------------------------------------------------
161             # Perl 5.12 features
162             # ------------------------------------------------------------------
163              
164             # package NAME VERSION
165             [ qr/\bpackage\s+\w[\w:]*\s+v?\d/,
166             "'package NAME VERSION' (introduced in Perl 5.12)" ],
167              
168             # yada-yada operator ... (not to be confused with .. range)
169             [ do { my $p = '\\.' x 3; qr/(?
170             "yada-yada operator '...' (introduced in Perl 5.12)" ],
171              
172             # ------------------------------------------------------------------
173             # Perl 5.14 features
174             # ------------------------------------------------------------------
175              
176             # /r (non-destructive) flag on s/// or tr///
177             # _mask_source emits __SR__ or __TRR__ marker when r flag is present.
178             [ qr/__SR__|__TRR__/,
179             "s///r or tr///r non-destructive flag (introduced in Perl 5.14)" ],
180              
181             # ------------------------------------------------------------------
182             # Perl 5.16 features
183             # ------------------------------------------------------------------
184              
185             # __SUB__ token (reference to the current subroutine)
186             [ qr/__SUB__/,
187             "__SUB__ (introduced in Perl 5.16; use explicit sub name or \$_[0] recursion)" ],
188              
189             # ------------------------------------------------------------------
190             # Perl 5.18 features
191             # ------------------------------------------------------------------
192              
193             # my sub NAME / state sub NAME -- lexical subroutines (Perl 5.18)
194             [ do { my $kw = 'su' . 'b'; qr/\b(?:my|state)\s+$kw\s+\w/ },
195             "'my sub'/'state sub' lexical subroutine (introduced in Perl 5.18)" ],
196              
197             # ------------------------------------------------------------------
198             # Perl 5.20 features
199             # ------------------------------------------------------------------
200              
201             # subroutine signatures: sub foo ($x, $y) { (Perl 5.20)
202             [ qr/\bsub\s+\w+\s*\([^\)]*(?
203             "subroutine signature (introduced in Perl 5.20)" ],
204              
205             # postfix dereference $ref->@* $ref->%* $ref->&*
206             [ qr/->\s*[\@\%\&\$]\s*\*/,
207             "postfix dereference (introduced in Perl 5.20)" ],
208              
209             # %hash{LIST} and %array[LIST] key/value (index/value) slices (Perl 5.20)
210             # Exclude plain %hash alone (no subscript) and %hash = (...) assignment.
211             # Require preceding context that cannot be an assignment target start.
212             [ qr/(?:[\(=,;!&|?:]|\breturn\b)\s*\%\w[\w:]*\s*[\{\[]/,
213             "key/value hash/array slice %hash{} or %array[] (introduced in Perl 5.20)" ],
214              
215             # ------------------------------------------------------------------
216             # Perl 5.22 features
217             # ------------------------------------------------------------------
218              
219             # &. |. ^. ~. string bitwise operators (use feature 'bitwise')
220             [ qr/(?:[&|^]\.=?|~\.)/,
221             "string bitwise operator '&.' '|.' '^.' '~.' (introduced in Perl 5.22)" ],
222              
223             # \$scalar in foreach -- reference aliasing (use feature 'refaliasing')
224             [ qr/\bforeach\s+\\(?:my\s+)?[\$\@\%\*]/,
225             "reference aliasing in foreach (introduced in Perl 5.22; use index-based loop instead)" ],
226              
227             # <<>> double-diamond operator
228             [ qr/<
229             "<<>> double-diamond operator (introduced in Perl 5.22)" ],
230              
231             # /n flag (non-capturing groups make all captures non-capturing)
232             [ qr{(?:=~|!~|(?
233             "/n non-capturing regex flag (introduced in Perl 5.22)" ],
234              
235             # ------------------------------------------------------------------
236             # Perl 5.26 features
237             # ------------------------------------------------------------------
238              
239             # <<~ indented heredoc
240             [ qr/<<~/,
241             "<<~ indented heredoc (introduced in Perl 5.26)" ],
242              
243             # ------------------------------------------------------------------
244             # Perl 5.32 features
245             # ------------------------------------------------------------------
246              
247             # isa infix operator: $obj isa ClassName
248             # Exclude ->isa() method calls (> before) and UNIVERSAL::isa() (:: before)
249             # Exclude isa( call form (followed by open paren = sub call, not infix)
250             [ qr/(?:])(?])\bisa\b(?!::)(?!\s*\()/,
251             "'isa' infix operator (introduced in Perl 5.32; use UNIVERSAL::isa() instead)" ],
252              
253             # ------------------------------------------------------------------
254             # Perl 5.34+ features
255             # ------------------------------------------------------------------
256              
257             # try { } catch ($e) { }
258             [ qr/\btry\s*\{/,
259             "'try' block (introduced in Perl 5.34)" ],
260              
261             # ------------------------------------------------------------------
262             # Perl 5.36 features
263             # ------------------------------------------------------------------
264              
265             # use builtin
266             [ qr/\buse\s+builtin\b/,
267             "'use builtin' (introduced in Perl 5.36)" ],
268              
269             # for my ($a, $b) (@list) -- iterable-value variables in for loop
270             [ qr/\bfor\s+my\s*\(/,
271             "'for my (\$a,\$b)' paired iteration (introduced in Perl 5.36)" ],
272              
273             # ------------------------------------------------------------------
274             # Perl 5.38+ features
275             # ------------------------------------------------------------------
276              
277             # class Foo { }
278             [ qr/\bclass\s+\w/,
279             "'class' keyword (introduced in Perl 5.38)" ],
280              
281             # ------------------------------------------------------------------
282             # Perl 5.40 features
283             # ------------------------------------------------------------------
284              
285             # ^^ high-precedence logical XOR operator ($x ^^ $y)
286             [ qr/\^\^[^>]/,
287             "'^^ / ^^=' high-precedence logical XOR (introduced in Perl 5.40)" ],
288              
289             # __CLASS__ keyword
290             [ qr/__CLASS__/,
291             "'__CLASS__' keyword (introduced in Perl 5.40; use __PACKAGE__ or \$class instead)" ],
292              
293             # ------------------------------------------------------------------
294             # Perl 5.42 features
295             # ------------------------------------------------------------------
296              
297             # any { } LIST and all { } LIST -- experimental keyword operators
298             # Exclude files that have "use List::Util" with any/all in the import list,
299             # since those are sub calls, not keywords. Detection deferred to
300             # _check_source() which performs the import pre-scan.
301             # (Pattern is applied conditionally; see _check_source().)
302              
303             # my method NAME -- lexical method declaration inside class block
304             [ do { my $kw = 'meth' . 'od'; qr/\bmy\s+$kw\b/ },
305             "'my method' lexical method declaration (introduced in Perl 5.42)" ],
306              
307             # ->& invocation of lexical method (Perl 5.42)
308             [ qr/->&\s*\w/,
309             "'->&name' lexical method call operator (introduced in Perl 5.42)" ],
310              
311             );
312              
313             # ======================================================================
314             # BLACKLIST_CONDITIONAL
315             # Entries that are only applied when certain file-level conditions hold.
316             # Each entry: [ $condition_key, qr/pattern/, 'description' ]
317             # $condition_key is checked against %cond hash in _check_source().
318             # ======================================================================
319             # (Currently used for any/all keyword detection.)
320              
321             # ======================================================================
322             # REGEX_BLACKLIST
323             # Patterns checked against the CONTENT of regex literals only.
324             # Each entry: [ qr/pattern/, 'human-readable description' ]
325             # ======================================================================
326             @REGEX_BLACKLIST = (
327              
328             # \x{HHHH} Unicode escape in regex -- Perl 5.6
329             [ qr/\\x\{[0-9A-Fa-f]+\}/,
330             "\\x{} Unicode escape (introduced in Perl 5.6)" ],
331              
332             # \N{name} named character in regex -- Perl 5.6
333             [ qr/\\N\{[^}]+\}/,
334             "\\N{} named character escape (introduced in Perl 5.6)" ],
335              
336             # \p{} \P{} Unicode property escapes -- Perl 5.6
337             [ qr/\\[pP]\{/,
338             "\\p{}\\P{} Unicode property in regex (introduced in Perl 5.6)" ],
339              
340             # \K (keep) -- Perl 5.10
341             [ qr/\\K/,
342             "\\K (keep) in regex (introduced in Perl 5.10)" ],
343              
344             # Named capture (?...) or \k backreference -- Perl 5.10
345             [ qr/(?:\(\?<[A-Za-z_]|\\k<)/,
346             "named capture (?...) or \\k (introduced in Perl 5.10)" ],
347              
348             # Branch reset group (?|...) -- Perl 5.10
349             [ qr/\(\?[|]/,
350             "branch reset (?|...) in regex (introduced in Perl 5.10)" ],
351              
352             # Backtrack control verbs (*FAIL) (*ACCEPT) (*PRUNE) (*SKIP) etc -- Perl 5.10
353             [ qr/\(\*[A-Z]/,
354             "backtrack control verb (*VERB) in regex (introduced in Perl 5.10)" ],
355              
356             # \h \H \v \V \R -- horizontal/vertical whitespace -- Perl 5.10
357             [ qr/\\[hHvVR](?!\w)/,
358             "\\h/\\H/\\v/\\V/\\R regex escape (introduced in Perl 5.10)" ],
359              
360             # Variable-length lookbehind -- experimental Perl 5.30, stable Perl 5.38
361             [ qr/\(\?<[=!][^)]*(?:\{|\*|\+)[^)]*\)/,
362             "variable-length lookbehind in regex (experimental from Perl 5.30, stable in Perl 5.38; use fixed-length)" ],
363              
364             # Possessive quantifiers in regex: a++ a*+ a?+ a{n,m}+ -- Perl 5.10
365             [ qr/(?:[A-Za-z0-9_)\].])(?:[+*?]|\{\d+(?:,\d*)?\})\+/,
366             "possessive quantifier (++/*+/?+/{n,m}+) in regex (introduced in Perl 5.10)" ],
367              
368             # Recursive patterns (?PARNO) (?&name) (?R) -- Perl 5.10
369             [ qr/\(\?(?:[0-9]+|[+-][0-9]+|R|&\w+)\)/,
370             "recursive pattern (?PARNO) / (?&name) / (?R) (introduced in Perl 5.10)" ],
371              
372             # \g{N} relative or absolute backreference -- Perl 5.10
373             [ qr/\\g\{/,
374             "\\g{N} relative/absolute backreference (introduced in Perl 5.10)" ],
375              
376             );
377              
378             # ======================================================================
379             # RAW_BLACKLIST
380             # Abolished: string and regex contents must be freely writable in
381             # Perl 5.005_03 code. Checking runtime string values (PerlIO layer
382             # names, sprintf format flags, etc.) is not the role of a static
383             # syntax checker.
384             #
385             # Former entries and why they were removed:
386             # PerlIO layer (:utf8 etc) in open()/binmode()
387             # -> open() with a PerlIO layer requires 3-argument form, already
388             # caught by the 3-argument open() entry in @BLACKLIST.
389             # binmode() layer string is a runtime value, not syntax.
390             # sprintf/printf "%v" format flag
391             # -> The format string is a runtime value, not a syntax construct.
392             # A string literal containing "%v" is valid Perl 5.005_03 syntax.
393             # ======================================================================
394             @RAW_BLACKLIST = ();
395              
396             # ======================================================================
397             # import() -- called when the caller writes: use Perl500503Syntax::OrDie;
398             # ======================================================================
399             sub import {
400 0     0   0 my $class = shift;
401              
402 0         0 my ($pkg, $file, $line) = caller(0);
403              
404 0 0 0     0 if (defined $file && $file ne '' && $file ne '-e' && $file ne '-') {
      0        
      0        
405 0 0       0 if (-f $file) {
406 0         0 my @violations = _check_file($file);
407 0 0       0 if (@violations) {
408 0         0 die join('', @violations);
409             }
410             }
411             }
412              
413 0         0 _install_runtime_guards();
414 0         0 return;
415             }
416              
417             # ======================================================================
418             # _check_file($path)
419             # Returns a list of violation strings (empty list = no violations).
420             # ======================================================================
421             sub _check_file {
422 10     10   28 my ($file) = @_;
423              
424 10         27 local *_ORDIE_FH;
425 10         916 eval 'CORE::' . 'open(_ORDIE_FH, $file) or die $!';
426 10 50       58 if ($@) {
427 0         0 warn "Perl500503Syntax::OrDie: cannot open '$file': $!\n";
428 0         0 return ();
429             }
430 10         14 my $source = do { local $/; <_ORDIE_FH> };
  10         40  
  10         440  
431 10         107 close _ORDIE_FH;
432              
433 10         37 return _check_source($source, $file);
434             }
435              
436             # ======================================================================
437             # _check_source($source, $filename)
438             # Returns a list of violation strings (empty list = no violations).
439             # ======================================================================
440             sub _check_source {
441 394     394   8157 my ($source, $file) = @_;
442 394 50 33     1350 $file = '(unknown)' unless defined $file && $file ne '';
443              
444 394         523 my @violations;
445              
446 394         662 my ($masked, $regex_bodies_ref) = _mask_source($source);
447 394         18868 my @lines = split(/\n/, $masked, -1);
448 394         17012 my @rawlines = split(/\n/, $source, -1);
449              
450             # ------------------------------------------------------------------
451             # Pre-scan: detect use List::Util qw(... any ... all ...)
452             # If found, the any/all BLOCK keyword check is suppressed because
453             # those names are imported subs, not the new keyword form.
454             # ------------------------------------------------------------------
455 394         517 my $listutil_any_imported = 0;
456 394         518 my $listutil_all_imported = 0;
457             {
458 394         423 my $cm = _mask_comments($source);
  394         654  
459 394 100       3060 if ($cm =~ /\buse\s+List::Util\b([^;]+);/s) {
460 5         24 my $args = $1;
461 5 100       14 $listutil_any_imported = 1 if $args =~ /\bany\b/;
462 5 100       20 $listutil_all_imported = 1 if $args =~ /\ball\b/;
463             }
464             }
465              
466             # Stage 1: BLACKLIST -- scan masked source (comments + strings masked)
467 394         716 for my $entry (@BLACKLIST) {
468 18518         19533 my ($pattern, $desc) = @{$entry};
  18518         25279  
469 18518         19890 my $lineno = 0;
470 18518         20670 for my $mline (@lines) {
471 3413751         2428596 $lineno++;
472 3413751 100       6079938 if ($mline =~ $pattern) {
473 111         379 push @violations,
474             "Perl500503Syntax::OrDie: VIOLATION at $file line $lineno:\n"
475             . " $desc\n";
476             }
477             }
478             }
479              
480             # Stage 1b: any/all BLOCK keyword -- conditional on import pre-scan.
481             # The List::Util keyword form is an expression any { ... } LIST .
482             # A subroutine *definition* sub any { ... } (or a method call such
483             # as $obj->all { ... } ) must NOT be flagged. We therefore inspect
484             # the text immediately preceding each any{ / all{ match and skip it
485             # when it is preceded by sub or by an arrow operator.
486             {
487 394 100       715 my $check_any = $listutil_any_imported ? 0 : 1;
488 394 100       563 my $check_all = $listutil_all_imported ? 0 : 1;
489 394         537 my $desc_any = "'any BLOCK LIST' keyword operator (introduced in Perl 5.42; use List::Util instead)";
490 394         474 my $desc_all = "'all BLOCK LIST' keyword operator (introduced in Perl 5.42; use List::Util instead)";
491 394         432 my $lineno = 0;
492 394         497 for my $mline (@lines) {
493 72633         51041 $lineno++;
494 72633         84189 while ($mline =~ /\b(any|all)(\s*)\{/g) {
495 13         39 my $word = $1;
496 13         50 my $matchlen = length($1) + length($2) + 1;
497 13         25 my $start = pos($mline) - $matchlen;
498 13         27 my $pre = substr($mline, 0, $start);
499 13 100       52 next if $pre =~ /\bsub\s+$/; # sub any { ... } definition
500 9 100       26 next if $pre =~ /->\s*$/; # $obj->any { ... } method
501 8 100 100     38 if ($word eq 'any' && $check_any) {
    100 100        
502 4         22 push @violations,
503             "Perl500503Syntax::OrDie: VIOLATION at $file line $lineno:\n"
504             . " $desc_any\n";
505             }
506             elsif ($word eq 'all' && $check_all) {
507 2         9 push @violations,
508             "Perl500503Syntax::OrDie: VIOLATION at $file line $lineno:\n"
509             . " $desc_all\n";
510             }
511             }
512             }
513             }
514              
515             # Stage 1c: 3-argument open() -- paren-aware top-level comma count.
516             # open(FH, MODE, EXPR) has two commas at the top level of the call;
517             # open(FH, EXPR) has one. Commas nested inside parentheses -- e.g. a
518             # function call used as the second argument, as in
519             # open(FH, '>' . File::Spec->catfile($dir, $file))
520             # -- sit at depth >= 2 and are not counted, so the 2-argument form is
521             # no longer mistaken for the 3-argument form.
522             {
523 394         474 my $kw = 'op' . 'en';
  394         490  
524 394         2408 my $open_re = qr/\b$kw\s*\(/;
525 394         556 my $lineno = 0;
526 394         551 for my $mline (@lines) {
527 72633         51445 $lineno++;
528 72633         114885 while ($mline =~ /$open_re/g) {
529 129         175 my $i = pos($mline); # index just after the '('
530 129         158 my $len = length($mline);
531 129         123 my $depth = 1;
532 129         124 my $commas = 0;
533 129   100     335 while ($i < $len && $depth > 0) {
534 2840         2543 my $c = substr($mline, $i, 1);
535 2840 100 100     5050 if ($c eq '(') {
    100          
    100          
536 3         6 $depth++;
537             }
538             elsif ($c eq ')') {
539 132         99 $depth--;
540             }
541             elsif ($c eq ',' && $depth == 1) {
542 133         815 $commas++;
543             }
544 2840         4692 $i++;
545             }
546 129 100       425 if ($commas >= 2) {
547 6         45 push @violations,
548             "Perl500503Syntax::OrDie: VIOLATION at $file line $lineno:\n"
549             . " 3-argument $kw() (introduced in Perl 5.6; use 2-argument form)\n";
550             }
551             }
552             }
553             }
554              
555             # Possessive quantifiers in code: a++ a*+ a?+ a{n,m}+
556             {
557 394         443 my $lineno2 = 0;
  394         492  
  394         452  
558 394         497 for my $mline (@lines) {
559 72633         49966 $lineno2++;
560 72633         83165 while ($mline =~ /(?<=[a-zA-Z0-9_.)\]])((?:[+*?]|\{\d+(?:,\d*)?\})\+)/g) {
561 393         463 my $quant = $1;
562             # In masked code a "++" is always the postfix-increment
563             # operator (e.g. $pkg::var++, $a[0]++); a genuine possessive
564             # quantifier can only occur inside a regex, which is masked,
565             # and is caught by REGEX_BLACKLIST instead.
566 393 50       705 next if $quant eq '++';
567 0         0 my $qpos = pos($mline) - length($quant) - 1;
568 0         0 my $before = substr($mline, 0, $qpos + 1);
569 0 0       0 next if $before =~ /\$\w+$/;
570 0 0       0 next if $before =~ /\@\w+$/;
571 0         0 push @violations,
572             "Perl500503Syntax::OrDie: VIOLATION at $file line $lineno2:\n"
573             . " possessive quantifier (++/*+/?+/{n,m}+) (introduced in Perl 5.10)\n";
574             }
575             }
576             }
577              
578             # Stage 2: RAW_BLACKLIST -- comment-stripped raw source
579 394         762 my $comment_masked = _mask_comments($source);
580 394         16593 my @cmlines = split(/\n/, $comment_masked, -1);
581 394         750 for my $entry (@RAW_BLACKLIST) {
582 0         0 my ($pattern, $desc) = @{$entry};
  0         0  
583 0         0 my $lineno = 0;
584 0         0 for my $cmline (@cmlines) {
585 0         0 $lineno++;
586 0 0       0 if ($cmline =~ $pattern) {
587 0         0 push @violations,
588             "Perl500503Syntax::OrDie: VIOLATION at $file line $lineno:\n"
589             . " $desc\n";
590             }
591             }
592             }
593              
594             # Stage 3: REGEX_BLACKLIST -- content of regex literals only
595             # Each element of @$regex_bodies_ref is [$body_text, $line_number].
596             # Escaped backslashes are first neutralised so that, for example, the
597             # two-character literal sequence "\\h" (an escaped backslash followed
598             # by the letter h, valid in every Perl) is not mistaken for the \h
599             # horizontal-whitespace escape introduced in Perl 5.10. Backslash
600             # pairs are consumed left to right, leaving a genuine escape introducer
601             # intact (so "\\\h" still exposes a real \h).
602 394         496 my @scan_bodies;
603 394         454 for my $rbody (@{$regex_bodies_ref}) {
  394         637  
604 2274         1646 my ($body_text, $body_line) = @{$rbody};
  2274         3063  
605 2274         2471 (my $scan = $body_text) =~ s/\\\\/\0\0/g;
606 2274         5580 push @scan_bodies, [$scan, $body_line];
607             }
608 394         590 for my $entry (@REGEX_BLACKLIST) {
609 4728         4782 my ($pattern, $desc) = @{$entry};
  4728         6279  
610 4728         6112 for my $rbody (@scan_bodies) {
611 27288         19580 my ($body_text, $body_line) = @{$rbody};
  27288         24538  
612 27288 100       49389 if ($body_text =~ $pattern) {
613 39         119 push @violations,
614             "Perl500503Syntax::OrDie: VIOLATION at $file line $body_line:\n"
615             . " $desc\n";
616             }
617             }
618             }
619              
620 394         17952 return @violations;
621             }
622              
623             # ======================================================================
624             # _mask_comments($source)
625             # ======================================================================
626             sub _mask_comments {
627 788     788   1151 my ($src) = @_;
628 788         1028 my $out = '';
629 788         939 my $pos = 0;
630 788         899 my $len = length($src);
631 788         1269 while ($pos < $len) {
632 6827312         5322363 my $ch = substr($src, $pos, 1);
633 6827312 100       6185912 if ($ch eq '#') {
634 9152         8688 my $end = index($src, "\n", $pos);
635 9152 50       9123 $end = $len if $end == -1;
636 9152         9488 $out .= '#' . ('X' x ($end - $pos - 1));
637 9152         6754 $pos = $end;
638 9152         10005 next;
639             }
640 6818160 100       6068541 if ($ch eq '"') {
641 3998         4549 my ($rep, $len2) = _mask_dquote($src, $pos);
642 3998         6663 $out .= substr($src, $pos, $len2);
643 3998         3103 $pos += $len2;
644 3998         4694 next;
645             }
646 6814162 100       6092886 if ($ch eq "'") {
647 9280         9205 my ($rep, $len2) = _mask_squote($src, $pos);
648 9280         11119 $out .= substr($src, $pos, $len2);
649 9280         6846 $pos += $len2;
650 9280         10310 next;
651             }
652 6804882         4709391 $out .= $ch;
653 6804882         6281828 $pos++;
654             }
655 788         11766 return $out;
656             }
657              
658             # ======================================================================
659             # _mask_source($source)
660             #
661             # Returns ($masked_source, \@regex_bodies).
662             # Each element of @regex_bodies is [$body_text, $start_line_number].
663             # ======================================================================
664             sub _mask_source {
665 463     463   16159 my ($src) = @_;
666 463         601 my $out = '';
667 463         600 my $pos = 0;
668 463         564 my $len = length($src);
669 463         503 my @regex_bodies;
670 463         516 my $in_pod = 0;
671 463         551 my @pending_heredocs;
672              
673 463         808 while ($pos < $len) {
674 998634         897624 my $ch = substr($src, $pos, 1);
675              
676             # -- flush pending heredoc bodies at the end of their line ------
677             # Heredoc bodies follow, in order, the line on which their <<
678             # operators appear. When that line's newline is reached, consume
679             # every queued body so that two or more heredocs sharing one line
680             # (e.g. $_ = <<'A'; $x eq <<'B';) are all masked correctly.
681 998634 100 100     1186464 if ($ch eq "\n" && @pending_heredocs) {
682 43         79 $out .= "\n";
683 43         51 $pos++;
684 43         123 while (@pending_heredocs) {
685 48         101 my $sentinel = shift @pending_heredocs;
686 48         2976 my $remain = substr($src, $pos);
687 48         70 my $bodylen;
688 48 50       979 if ($remain =~ /^\Q$sentinel\E[\t ]*\r?\n/m) {
    0          
689 48         165 $bodylen = length($`) + length($&);
690             }
691             elsif ($remain =~ /^\Q$sentinel\E[\t ]*\r?\z/m) {
692 0         0 $bodylen = length($`) + length($&);
693             }
694             else {
695 0         0 $bodylen = length($remain);
696             }
697 48         175 my $raw = substr($src, $pos, $bodylen);
698 48         11038 (my $masked_body = $raw) =~ s/[^\n]/X/g;
699 48         96 $out .= $masked_body;
700 48         114 $pos += $bodylen;
701             }
702 43         81 next;
703             }
704              
705             # -- __END__ / __DATA__ : mask everything after this line ------
706 998591 100 100     1039714 if ($ch eq '_' && ($pos == 0 || substr($src, $pos - 1, 1) eq "\n")) {
      100        
707 34 100 100     157 if (substr($src, $pos, 7) eq '__END__' || substr($src, $pos, 8) eq '__DATA__') {
708 23         378 my $rest = substr($src, $pos);
709 23         650756 $rest =~ s/[^\n]/X/g;
710 23         2722 $out .= $rest;
711 23         60 last;
712             }
713             }
714              
715             # -- POD block -------------------------------------------------
716 998568 100 100     1571652 if (!$in_pod && $ch eq '=' && ($pos == 0 || substr($src, $pos - 1, 1) eq "\n")) {
      66        
      100        
717 9         18 my $nxt = substr($src, $pos, 12);
718 9 100       60 if ($nxt =~ /^=(head|over|item|back|pod|begin|end|for|encoding)\b/) {
719 5         9 $in_pod = 1;
720             }
721             }
722 998568 100       997949 if ($in_pod) {
723 313443 100 66     299205 if ($ch eq '=' && ($pos == 0 || substr($src, $pos - 1, 1) eq "\n")) {
      100        
724 1079 100       1335 if (substr($src, $pos, 4) eq '=cut') {
725 5         29 my $end2 = index($src, "\n", $pos);
726 5 50       65 $end2 = $len if $end2 == -1;
727 5         38 $out .= 'X' x ($end2 - $pos);
728 5         7 $pos = $end2;
729 5         8 $in_pod = 0;
730 5         9 next;
731             }
732             }
733 313438 100       278615 $out .= ($ch eq "\n") ? "\n" : 'X';
734 313438         224802 $pos++;
735 313438         326978 next;
736             }
737              
738             # -- single-line comment # ... --------------------------------
739 685125 100       708378 if ($ch eq '#') {
740 6236         8283 my $end = index($src, "\n", $pos);
741 6236 100       7366 $end = $len if $end == -1;
742 6236         12600 $out .= '#' . ('X' x ($end - $pos - 1));
743 6236         5363 $pos = $end;
744 6236         8566 next;
745             }
746              
747             # -- double-quoted string "..." --------------------------------
748 678889 100       730257 if ($ch eq '"') {
749 2104         3925 my ($rep, $len2) = _mask_dquote($src, $pos);
750 2104         3421 $out .= $rep;
751 2104         2062 $pos += $len2;
752 2104         2827 next;
753             }
754              
755             # -- single-quoted string '...' --------------------------------
756 676785 100       679410 if ($ch eq "'") {
757             # Old-style package separator: &jcode'tr is &jcode::tr .
758             # When a sigil-led identifier ($pkg'var, &pkg'sub, *pkg'glob,
759             # ...) abuts the apostrophe and a word character follows, the
760             # apostrophe separates package components and is NOT a string
761             # delimiter. Emit it verbatim so the following name is not read
762             # as the start of a new string.
763 4996 50       17448 my $nextc = ($pos + 1 < $len) ? substr($src, $pos + 1, 1) : '';
764 4996 100 66     767162 if ($out =~ /[\$\@\%\&\*]\w*\z/ && $nextc =~ /[A-Za-z_]/) {
765             # Emit the apostrophe together with the whole following
766             # identifier, so that a quote-like name such as the tr in
767             # &jcode'tr is not subsequently read as the tr/// operator
768             # (the operator detector keys off the preceding character).
769 5         10 my $rest = substr($src, $pos);
770 5         35 $rest =~ /\A('(\w+))/;
771 5         12 $out .= $1;
772 5         8 $pos += length($1);
773 5         7 next;
774             }
775 4991         11371 my ($rep, $len2) = _mask_squote($src, $pos);
776 4991         6891 $out .= $rep;
777 4991         4863 $pos += $len2;
778 4991         7060 next;
779             }
780              
781             # -- backtick string `...` -------------------------------------
782 671789 100       680606 if ($ch eq '`') {
783 9         47 my ($rep, $len2) = _mask_delimited($src, $pos, '`', '`');
784 9         17 $out .= $rep;
785 9         14 $pos += $len2;
786 9         17 next;
787             }
788              
789             # -- q{} qq{} qw{} qx{} (but NOT qr) ---------------------------
790 671780 100 66     764634 if ($ch eq 'q' && $pos + 1 < $len) {
791 2577         3133 my $nxt = substr($src, $pos + 1, 1);
792 2577 100 66     7927 if ($nxt =~ /^[qwx]$/ && $pos + 2 < $len) {
    100          
793 441         643 my $d = substr($src, $pos + 2, 1);
794 441 100       1261 if ($d =~ /\S/) {
795 394         933 my $cl = _matching_delim($d);
796 394         942 my ($rep, $len2) = _mask_delimited($src, $pos + 2, $d, $cl);
797 394         1125 $out .= substr($src, $pos, 2) . $rep;
798 394         420 $pos += 2 + $len2;
799 394         663 next;
800             }
801             }
802             elsif ($nxt =~ /^[\{\(\[\/\|!<]$/) {
803 42         118 my $cl = _matching_delim($nxt);
804 42         136 my ($rep, $len2) = _mask_delimited($src, $pos + 1, $nxt, $cl);
805 42         78 $out .= 'q' . $rep;
806 42         51 $pos += 1 + $len2;
807 42         70 next;
808             }
809             }
810              
811             # -- heredoc start <
812             # Only the operator token (<< plus the sentinel) is masked here;
813             # the body is consumed when the current line's newline is reached
814             # (see the flush handler at the top of the loop). This lets two or
815             # more heredocs that share a single line all be masked in order.
816 671344 100 66     755027 if ($ch eq '<' && $pos + 1 < $len && substr($src, $pos + 1, 1) eq '<') {
      100        
817 50         1473 my $rest = substr($src, $pos);
818 50 100       355 if ($rest =~ /\A(<<\s*([\"']?)(\w+)\2)/) {
819 48         163 my $token = $1;
820 48         145 my $sentinel = $3;
821 48         129 push @pending_heredocs, $sentinel;
822 48         273 $out .= 'X' x length($token);
823 48         60 $pos += length($token);
824 48         96 next;
825             }
826             }
827              
828             # -- Regex/subst/transliteration operators ----------------------
829             # A quote-like operator (q/qq/qw/qx/qr/m/s/tr/y) is not recognised
830             # when the immediately preceding character is a typeglob sigil '*',
831             # a subroutine sigil '&', or a package-separator colon ':'; there
832             # the following word is a symbol name, not an operator -- e.g. the
833             # typeglob *s in local(*s, $n) = @_; or the function name tr in a
834             # package-qualified call such as mb::tr($_, 'A', '1').
835 671296 100       1933307 if ($out !~ /[\w\$\@\%*&:]\z/) {
836 376412         6547719 my $rest = substr($src, $pos);
837 376412 100       613235 if ($rest =~ /\A(qr|m(?!y(?:\b|\s*=>))|s(?!ub(?:\b|\s*\{))|tr|y)\s*([^\w\s#])/s) {
838 1093         2549 my $op = $1;
839 1093         2237 my $delim = $2;
840 1093         2920 my $close = _matching_delim($delim);
841 1093         1952 my $ws = '';
842 1093 50       61410 if (substr($rest, length($op)) =~ /\A(\s*)([^\w\s#])/s) {
843 1093         1671 $ws = $1;
844 1093         1336 $delim = $2;
845 1093         1534 $close = _matching_delim($delim);
846             }
847 1093         1742 my $op_len = length($op) + length($ws) + 1;
848              
849             # Compute line number of the start of this regex
850 1093         2508 my $cur_line = 1 + _count_newlines($src, 0, $pos);
851              
852 1093         2180 $pos += $op_len;
853              
854 1093         3842 my ($body1, $blen1) = _mask_delimited_raw($src, $pos, $delim, $close);
855 1093         1843 $pos += $blen1;
856 1093         2446 my $pat1 = substr($body1, 0, length($body1) - 1);
857 1093 100 66     7331 push @regex_bodies, [$pat1, $cur_line]
858             unless $op eq 'tr' || $op eq 'y';
859              
860 1093         1594 my $repl_len = 0;
861 1093 100 100     3436 if ($op eq 's' || $op eq 'tr' || $op eq 'y') {
      66        
862 786         2287 my $delim2 = $delim;
863 786         1093 my $close2 = $close;
864 786 100       1579 if ($delim ne $close) {
865 36         1449 my $gap = substr($src, $pos);
866 36 50       243 if ($gap =~ /\A(\s*)([^\w\s])/s) {
867 36         112 $pos += length($1) + 1;
868 36         55 $repl_len += length($1) + 1;
869 36         69 $delim2 = $2;
870 36         93 $close2 = _matching_delim($delim2);
871             }
872             }
873 786         1488 my ($body2, $blen2) = _mask_delimited_raw($src, $pos, $delim2, $close2);
874 786         1075 $pos += $blen2;
875 786         3176 $repl_len += $blen2;
876             }
877              
878 1093         1579 my $flags = '';
879 1093 50       111072 if (substr($src, $pos) =~ /\A([msixpgeodualncrs]*)/s) {
880 1093         3984 $flags = $1;
881 1093         1499 $pos += length($flags);
882             }
883              
884 1093         1580 my $marker = '';
885 1093 100       3053 if ($flags =~ /r/) {
886 4 100 33     13 if ($op eq 's') { $marker = '__SR__'; }
  3 50       35  
887 1         2 elsif ($op eq 'tr' || $op eq 'y') { $marker = '__TRR__'; }
888             }
889              
890 1093         2375 my $raw_body = substr($src,
891             $pos - length($flags) - $repl_len - $blen1,
892             $blen1 + $repl_len);
893 1093         18587 (my $mbody = $ws . $raw_body) =~ s/[^\n]/X/g;
894 1093         2699 $out .= $op . $mbody . $flags . $marker;
895 1093         3886 next;
896             }
897             }
898              
899             # -- bare /regex/ -- only in regex context ----------------------
900             # The keyword alternation also lists the regex-first-argument list
901             # operators split/grep/map: a '/' immediately following one of these
902             # barewords can only begin a pattern (e.g. split //, $str), never a
903             # division, so it is treated as a regex. This keeps the empty
904             # pattern // in split //, ... from being misread as the Perl 5.10
905             # defined-or operator.
906 670203 100 100     12794644 if ($ch eq '/' &&
907             $out =~ /(?:=~|!~|[=(,\{\[!&|;]|\b(?:if|while|unless|until|not|and|or|return|split|grep|map))\s*\z/s)
908             {
909 1522         6761 my $cur_line = 1 + _count_newlines($src, 0, $pos);
910 1522         2929 $pos++;
911 1522         6021 my ($body, $blen) = _mask_delimited_raw($src, $pos, '/', '/');
912 1522         2315 $pos += $blen;
913 1522         6212 my $pat = substr($body, 0, length($body) - 1);
914 1522         7131 push @regex_bodies, [$pat, $cur_line];
915 1522         2183 my $flags = '';
916 1522 50       93934 if (substr($src, $pos) =~ /\A([msixpgeodualn]*)/s) {
917 1522         6277 $flags = $1;
918 1522         2354 $pos += length($flags);
919             }
920 1522         22313 (my $mtext = '/' . $body) =~ s/[^\n]/X/g;
921 1522         50126 $out .= $mtext . $flags;
922 1522         5633 next;
923             }
924              
925 668681         2014511 $out .= $ch;
926 668681         749185 $pos++;
927             }
928              
929 463         1980 return ($out, \@regex_bodies);
930             }
931              
932             # ======================================================================
933             # _count_newlines($str, $from, $to)
934             # Count newline characters in $str between positions $from and $to-1.
935             # Used to compute line numbers from byte offsets.
936             # ======================================================================
937             sub _count_newlines {
938 2615     2615   5972 my ($str, $from, $to) = @_;
939 2615         2826 my $count = 0;
940 2615         2843 my $p = $from;
941 2615         4644 while ($p < $to) {
942 134043015 100       129248151 $count++ if substr($str, $p, 1) eq "\n";
943 134043015         121593590 $p++;
944             }
945 2615         17062 return $count;
946             }
947              
948             # _mask_delimited_raw($src, $start, $open, $close)
949             sub _mask_delimited_raw {
950 3401     3401   11293 my ($src, $start, $open, $close) = @_;
951 3401         3901 my $pos = $start;
952 3401         4844 my $slen = length($src);
953 3401         4125 my $depth = 1;
954 3401         5502 my $paired = ($open ne $close);
955 3401         5422 my $out = '';
956              
957 3401         6273 while ($pos < $slen) {
958 65934         53499 my $c = substr($src, $pos, 1);
959 65934         50520 $out .= $c;
960 65934         45303 $pos++;
961 65934 100 66     80943 if ($c eq '\\' && $pos < $slen) {
962 7578         7805 $out .= substr($src, $pos, 1);
963 7578         5841 $pos++;
964 7578         10171 next;
965             }
966 58356 100       52963 if ($paired) {
967 7805 100       7387 $depth++ if $c eq $open;
968 7805 100       7240 $depth-- if $c eq $close;
969 7805 100       9394 last if $depth == 0;
970             }
971             else {
972 50551 100       64743 last if $c eq $close;
973             }
974             }
975 3401         14435 return ($out, $pos - $start);
976             }
977              
978             # ----------------------------------------------------------------------
979             sub _mask_dquote {
980 6102     6102   7480 my ($src, $start) = @_;
981 6102         5482 my $pos = $start + 1;
982 6102         5727 my $len = length($src);
983 6102         5491 my $out = '"';
984 6102         7630 while ($pos < $len) {
985 742520         579044 my $ch = substr($src, $pos, 1);
986 742520 100       845721 if ($ch eq '\\') {
    100          
    100          
987 26884 50       27000 my $nx = ($pos + 1 < $len) ? substr($src, $pos + 1, 1) : '';
988 26884 50       24841 $out .= ($nx eq "\n") ? "\\\n" : 'XX';
989 26884         26855 $pos += 2;
990             }
991             elsif ($ch eq '"') {
992 6092         5185 $out .= '"'; $pos++; last;
  6092         5195  
  6092         6111  
993             }
994             elsif ($ch eq "\n") {
995 12181         8767 $out .= "\n"; $pos++;
  12181         12021  
996             }
997             else {
998 697363         487186 $out .= 'X'; $pos++;
  697363         658505  
999             }
1000             }
1001 6102         15333 return ($out, $pos - $start);
1002             }
1003              
1004             sub _mask_squote {
1005 14271     14271   16875 my ($src, $start) = @_;
1006 14271         12897 my $pos = $start + 1;
1007 14271         11977 my $len = length($src);
1008 14271         12446 my $out = "'";
1009 14271         16833 while ($pos < $len) {
1010 1154563         901416 my $ch = substr($src, $pos, 1);
1011 1154563 100       1343438 if ($ch eq '\\') {
    100          
    100          
1012 6482 50       7059 my $nx = ($pos + 1 < $len) ? substr($src, $pos + 1, 1) : '';
1013 6482 100 100     11235 $out .= ($nx eq "'" || $nx eq '\\') ? 'XX' : ('\\' . $nx);
1014 6482         7851 $pos += 2;
1015             }
1016             elsif ($ch eq "'") {
1017 14255         11556 $out .= "'"; $pos++; last;
  14255         11658  
  14255         13193  
1018             }
1019             elsif ($ch eq "\n") {
1020 26441         18572 $out .= "\n"; $pos++;
  26441         25055  
1021             }
1022             else {
1023 1107385         780114 $out .= 'X'; $pos++;
  1107385         1047195  
1024             }
1025             }
1026 14271         26455 return ($out, $pos - $start);
1027             }
1028              
1029             sub _mask_delimited {
1030 445     445   784 my ($src, $start, $open, $close) = @_;
1031 445         516 my $pos = $start + 1;
1032 445         564 my $len = length($src);
1033 445         393 my $depth = 1;
1034 445         599 my $out = $open;
1035 445   100     1247 while ($pos < $len && $depth > 0) {
1036 20956         17265 my $ch = substr($src, $pos, 1);
1037 20956 100 100     41161 if ($ch eq '\\') {
    100          
    100          
    100          
1038 827         649 $out .= 'XX'; $pos += 2;
  827         1318  
1039             }
1040             elsif ($open ne $close && $ch eq $open) {
1041 48         55 $depth++; $out .= $open; $pos++;
  48         44  
  48         80  
1042             }
1043             elsif ($ch eq $close) {
1044 493         462 $depth--; $out .= $close; $pos++;
  493         433  
  493         970  
1045             }
1046             elsif ($ch eq "\n") {
1047 290         248 $out .= "\n"; $pos++;
  290         473  
1048             }
1049             else {
1050 19298         14447 $out .= 'X'; $pos++;
  19298         30834  
1051             }
1052             }
1053 445         1295 return ($out, $pos - $start);
1054             }
1055              
1056             sub _matching_delim {
1057 2658     2658   4075 my ($o) = @_;
1058 2658 100       8767 return '{' eq $o ? '}' : '(' eq $o ? ')' :
    100          
    100          
    100          
1059             '[' eq $o ? ']' : '<' eq $o ? '>' : $o;
1060             }
1061              
1062             # ======================================================================
1063             # _install_runtime_guards()
1064             # ======================================================================
1065             sub _install_runtime_guards {
1066 9 100   9   138 unless ($_OPEN_GUARDED) {
1067 7         29 $_OPEN_GUARDED = 1;
1068 27     20   336 no strict 'refs';
  27         164  
  27         1556  
1069 7         13 *{'CORE::GLOBAL::open'} = \&_guarded_open;
  7         75  
1070             }
1071 9 100       33 unless ($_MKDIR_GUARDED) {
1072 7         13 $_MKDIR_GUARDED = 1;
1073 27     20   170 no strict 'refs';
  20         26  
  20         3296  
1074 7         11 *{'CORE::GLOBAL::mkdir'} = \&_guarded_mkdir;
  7         32  
1075             }
1076 9         5680 return;
1077             }
1078              
1079             sub _guarded_open {
1080 19 100   19   658960 if (@_ >= 3) {
1081 13         82 my ($p, $f, $l) = caller(0);
1082 13         162 die "Perl500503Syntax::OrDie: RUNTIME VIOLATION at $f line $l:\n"
1083             . " 3-argument open() is not supported in Perl 5.005_03\n";
1084             }
1085 19 100 66     31 if (@_ >= 2 && ref $_[1]) {
1086 2         10 my ($p, $f, $l) = caller(0);
1087 2         14 die "Perl500503Syntax::OrDie: RUNTIME VIOLATION at $f line $l:\n"
1088             . " open() with a reference as mode is not supported"
1089             . " in Perl 5.005_03\n";
1090             }
1091 20     20   100 no strict 'refs';
  20         38  
  20         21815  
1092 19 50       8 if (@_ == 1) {
1093 0         0 my $ofn = 'CORE::' . 'open';
1094 0         0 return &{$ofn}($_[0]);
  0         0  
1095             }
1096 19         7250 return CORE::open($_[0], $_[1]);
1097             }
1098              
1099             sub _guarded_mkdir {
1100 6 100   6   312 if (@_ < 2) {
1101 3         10 my ($p, $f, $l) = caller(0);
1102 3         14 die "Perl500503Syntax::OrDie: RUNTIME VIOLATION at $f line $l:\n"
1103             . " mkdir() requires an explicit mode argument in Perl 5.005_03\n";
1104             }
1105 6         354 return CORE::mkdir($_[0], $_[1]);
1106             }
1107              
1108             # ======================================================================
1109             # Public API
1110             # ======================================================================
1111             sub check_file {
1112 3     3 1 837 my $first = shift;
1113 3 100 66     15 my $path = ($first eq 'Perl500503Syntax::OrDie' || ref $first) ? shift : $first;
1114 3         9 my @v = _check_file($path);
1115 3 100       7 if (@v) {
1116 1         7 die join('', @v);
1117             }
1118 2         6 return;
1119             }
1120              
1121             sub check_source {
1122 37     37 1 288968 my $first = shift;
1123 37         86 my ($src, $label);
1124 37 100 66     265 if ($first eq 'Perl500503Syntax::OrDie' || ref $first) {
1125 2         3 ($src, $label) = @_;
1126             }
1127             else {
1128 35         82 ($src, $label) = ($first, shift);
1129             }
1130 37         110 my @violations = _check_source($src, $label);
1131 37         192 return @violations;
1132             }
1133              
1134             # ======================================================================
1135             # Command-line interface
1136             # ======================================================================
1137             sub _run_as_command {
1138 7 50 66 7   60 if (!@ARGV || ($ARGV[0] eq '--help') || ($ARGV[0] eq '-h')) {
      66        
1139 1         4 print "Usage: perl Perl500503Syntax/OrDie.pm [ ...]\n";
1140 1         1 print " perl Perl500503Syntax/OrDie.pm -\n";
1141 1         4 print "\n";
1142 1         2 print " Check each for constructs not available in Perl 5.005_03.\n";
1143 1         1 print " Use '-' to read from standard input.\n";
1144 1         1 print " Violations are reported with file name and line number.\n";
1145 1         2 print "\n";
1146 1         1 print "Example:\n";
1147 1         1 print " perl lib/Perl500503Syntax/OrDie.pm myscript.pl\n";
1148 1         0 exit 0;
1149             }
1150              
1151 6         8 my $ok = 0;
1152 6         17 my $fail = 0;
1153 6         32 my $sep = '-' x 60;
1154 6         17 for my $path (@ARGV) {
1155 7         36 print "$sep\n";
1156 7         12 my @violations;
1157 7 50       202 if ($path eq '-') {
    50          
1158 0         0 print "Checking: (standard input)\n";
1159 0         0 my $source = do { local $/; };
  0         0  
  0         0  
1160 0         0 @violations = _check_source($source, '(stdin)');
1161             }
1162             elsif (!-f $path) {
1163 0         0 print "ERROR: file not found: $path\n";
1164 0         0 $fail++;
1165 0         0 next;
1166             }
1167             else {
1168 7         34 print "Checking: $path\n";
1169 7         31 @violations = _check_file($path);
1170             }
1171 7 100       26 if (@violations) {
1172 2         8 print join('', @violations);
1173 2         5 $fail++;
1174             }
1175             else {
1176 5         90 print " -> No violations found.\n";
1177 5         56 $ok++;
1178             }
1179             }
1180 6         16 print "$sep\n";
1181 6         11 my $total = $ok + $fail;
1182 6         18 print "Results: $ok/$total passed";
1183 6 100       152 if ($fail) {
1184 2         3 print ", $fail failed";
1185             }
1186 6         11 print "\n";
1187 6 100         exit($fail ? 1 : 0);
1188             }
1189              
1190              
1191             _run_as_command() if $0 eq __FILE__;
1192              
1193             1;
1194              
1195             __END__