File Coverage

blib/lib/Perl/Critic/Policy/Plicease/ProhibitUnicodeDigitInRegexp.pm
Criterion Covered Total %
statement 37 38 97.3
branch 8 8 100.0
condition 3 3 100.0
subroutine 12 13 92.3
pod 4 5 80.0
total 64 67 95.5


line stmt bran cond sub pod time code
1             package Perl::Critic::Policy::Plicease::ProhibitUnicodeDigitInRegexp;
2              
3 5     5   3466 use strict;
  5         9  
  5         176  
4 5     5   21 use warnings;
  5         9  
  5         258  
5 5     5   71 use 5.010001;
  5         14  
6 5     5   28 use Perl::Critic::Utils qw( $SEVERITY_LOW );
  5         22  
  5         566  
7 5     5   2159 use PPIx::Regexp;
  5         998818  
  5         218  
8 5     5   41 use base qw( Perl::Critic::Policy );
  5         25  
  5         749  
9              
10             # ABSTRACT: Prohibit non-ASCII \d in regular expressions
11             our $VERSION = '0.10'; # VERSION
12              
13              
14 5     5   33 use constant DESC => 'Using non-ASCII \d';
  5         9  
  5         449  
15 5         1202 use constant EXPL => 'The character class \d (also the POSIX character class [:digit:]) matches non-ASCII unicode digits. ' .
16 5     5   29 'Use [0-9] or the /a modifier (Perl 5.14+) instead.';
  5         47  
17              
18 15     15 0 2108173 sub supported_parameters { () }
19 14     14 1 180 sub default_severity { $SEVERITY_LOW }
20 0     0 1 0 sub default_themes { () }
21 15     15 1 87864 sub applies_to { return ('PPI::Token::Regexp::Match',
22             'PPI::Token::Regexp::Substitute',
23             'PPI::Token::QuoteLike::Regexp') }
24              
25             sub violates
26             {
27 18     18 1 1441 my($self, $elem) = @_;
28              
29 18         103 my %mods = $elem->get_modifiers();
30              
31             # if the whole expression uses /a then we are in the clear.
32 18 100       316 return if $mods{'a'};
33              
34             # if the user has explicitly specified the /u modifier then
35             # we should assume that they want unicode digits. Done.
36 17 100       42 return if $mods{'u'};
37              
38 16         49 my $re = PPIx::Regexp->new($elem->content);
39 16         41438 my $ccs = $re->find('PPIx::Regexp::Token::CharClass');
40 16 100       2673 return unless $ccs;
41 15         33 foreach my $cc (@$ccs)
42             {
43 15 100 100     65 next if ($cc->content ne '\\d' && $cc->content ne '[:digit:]');
44 14         168 return $self->violation( DESC, EXPL, $elem );
45             }
46              
47 1         13 return;
48             }
49              
50             1;
51              
52             __END__
53              
54             =pod
55              
56             =encoding UTF-8
57              
58             =head1 NAME
59              
60             Perl::Critic::Policy::Plicease::ProhibitUnicodeDigitInRegexp - Prohibit non-ASCII \d in regular expressions
61              
62             =head1 VERSION
63              
64             version 0.10
65              
66             =head1 SYNOPSIS
67              
68             perlcriticrc:
69              
70             [Plicease::ProhibitUnicodeDigitInRegexp]
71              
72             code:
73              
74             /\d/; # not ok
75             /[0-9]/; # ok
76              
77             =head1 DESCRIPTION
78              
79             The character class C<\d> (also the POSIX character class C<[:digit:]>) in a regular expression matches all unicode digit character, which
80             might not be what you expect if you are testing if a string can be used as a number in Perl.
81             Instead use either C<[0-9]>, or if you are on Perl 5.14 or better you can use the C</a>
82             modifier. This policy allows C<\d> in expressions with an explicit C</u> modifier (normally
83             on by default), as it indicates that the code is expecting Unicode semantics, including Unicode
84             digits.
85              
86             /\d/; # not ok
87             /[[:digit:]]/; # not ok
88             /\d/a; # ok
89             /\d/u; # ok
90             /[[:digit:]]/a; # ok
91             /[[:digit:]]/u; # ok
92             /[0-9]/; # ok
93              
94             =head1 AFFILIATION
95              
96             None.
97              
98             =head1 CONFIGURATION
99              
100             This policy is not configurable except for the standard options.
101              
102             =head1 CAVEATS
103              
104             This is not a general policy, and should not be applied toward all applications without
105             some thought. This is frequently true for L<Perl::Critic> policies, but especially so
106             for this policy.
107              
108             In the general the ability to match against unicode digits is a useful ability, and doesn't
109             constitute bad code. On the other hand, some applications don't ever need to match non-ASCII
110             digit characters, and incorrectly rely on C<\d> to validate as a number as Perl understands
111             it (and Perl understands non-ASCII digits as zero regardless of what they mean in their
112             respective languages).
113              
114             This policy doesn't take into account using the L<re> pragma.
115              
116             use re '/a';
117            
118             /\d/; # (still) not ok
119              
120             =head1 AUTHOR
121              
122             Author: Graham Ollis E<lt>plicease@cpan.orgE<gt>
123              
124             Contributors:
125              
126             Ville Skyttä (SCOP)
127              
128             Yoshikazu Sawa (yoshikazusawa)
129              
130             Christian Walde (wchristian, MITHALDU)
131              
132             =head1 COPYRIGHT AND LICENSE
133              
134             This software is copyright (c) 2019-2024 by Graham Ollis.
135              
136             This is free software; you can redistribute it and/or modify it under
137             the same terms as the Perl 5 programming language system itself.
138              
139             =cut