File Coverage

blib/lib/PPIx/Regexp/Constant.pm
Criterion Covered Total %
statement 142 145 97.9
branch 6 10 60.0
condition 3 6 50.0
subroutine 51 54 94.4
pod n/a
total 202 215 93.9


line stmt bran cond sub pod time code
1             package PPIx::Regexp::Constant;
2              
3             # Yes, I know this is horrible style. But I need a separate package for
4             # overloading to work, and I do *NOT* want stray files lying around if I
5             # end up ditching this mess.
6             package PPIx::Regexp::Constant::Inf; ## no critic (ProhibitMultiplePackages)
7              
8 10     10   10153 use 5.006;
  10         29  
9              
10 10     10   37 use strict;
  10         12  
  10         186  
11 10     10   39 use warnings;
  10         20  
  10         329  
12              
13 10     10   35 use Carp;
  10         18  
  10         665  
14 10     10   47 use Scalar::Util qw{ refaddr };
  10         27  
  10         1385  
15              
16             our $VERSION = '0.091';
17              
18             use overload
19             # Arithmetic
20 10         111 '+' => \&__preserve,
21             '-' => \&__subtract,
22             '*' => \&__preserve,
23             neg => \&__err_neg_inf,
24             # Comparison
25             '<=>' => \&__space_ship,
26             cmp => \&__cmp,
27             # Conversion
28             '""' => \&__stringify,
29             '0+' => \&__stringify, # For looks_like_number
30             bool => \&__bool,
31             # Catch all
32             nomethod => \&__bug_unimplemented,
33 10     10   3315 ;
  10         8619  
34              
35             {
36             my $inf;
37              
38             # Scheduled block because __pos_inf() can be called at compile time.
39             BEGIN {
40 10     10   4393 $inf = bless( \( my $x = 1 ), __PACKAGE__ );
41             }
42              
43 1     1   77098 sub __pos_inf { return $inf }
44              
45 5   66 5   39 sub __is_inf { return ref $_[0] && refaddr( $_[0] ) == refaddr( $inf ) }
46             }
47              
48 1     1   179916 sub __bool { return 1 }
49              
50             sub __bug_unimplemented {
51 0     0   0 confess "Bug - Operation '$_[3]' is unimplemented";
52             }
53              
54             sub __cmp {
55 1     1   943 my ( $x, $y, $swap ) = @_;
56 1 50       6 $swap
57             and return "$y" cmp "$x";
58 1         5 return "$x" cmp "$y";
59             }
60              
61             sub __err_nan {
62 0     0   0 croak 'NaN not supported';
63             }
64              
65             sub __err_neg_inf {
66 0     0   0 croak 'Negative infinity not supported';
67             }
68              
69             # Any operation that does not change the value
70 1     1   14 sub __preserve { return $_[0] };
71              
72             sub __space_ship {
73 4     4   2823 my ( undef, $y, $swap ) = @_; # We don't need our invocant
74             # Infinity is equal to itself
75 4 100       12 __is_inf( $y )
76             and return 0;
77             # Any number is less than infinity
78 1 50       6 $swap
79             and return -1;
80             # Infinity is greater than any number
81 1         4 return 1;
82             }
83              
84             sub __stringify {
85 1     1   7 return 'Inf';
86             }
87              
88             sub __subtract {
89 1     1   3 my ( $x, $y, $swap ) = @_;
90 1 50       6 __is_inf( $y )
91             and __err_nan(); # Which croaks
92 1 50       2 $swap
93             and __err_neg_inf(); # Which croaks
94 1         1094 return $x;
95             }
96              
97             1;
98              
99             package PPIx::Regexp::Constant; ## no critic (ProhibitMultiplePackages)
100              
101 10     10   63 use strict;
  10         14  
  10         191  
102 10     10   36 use warnings;
  10         10  
  10         408  
103              
104 10     10   34 use base qw{ Exporter };
  10         11  
  10         2343  
105              
106             # CAVEAT: do not include any other PPIx-Regexp modules in this one, or
107             # you will end up with a circular dependency.
108              
109             our $VERSION = '0.085_04';
110              
111             our @EXPORT_OK = qw{
112             ARRAY_REF
113             CODE_REF
114             COOKIE_CLASS
115             COOKIE_LOOKAROUND_ASSERTION
116             COOKIE_QUANT
117             COOKIE_QUOTE
118             COOKIE_REGEX_SET
119             FALSE
120             HASH_REF
121             INFINITY
122             LITERAL_LEFT_CURLY_ALLOWED
123             LITERAL_LEFT_CURLY_REMOVED_PHASE_1
124             LITERAL_LEFT_CURLY_REMOVED_PHASE_2
125             LITERAL_LEFT_CURLY_REMOVED_PHASE_3
126             LOCATION_LINE
127             LOCATION_CHARACTER
128             LOCATION_COLUMN
129             LOCATION_LOGICAL_LINE
130             LOCATION_LOGICAL_FILE
131             MINIMUM_PERL
132             MODIFIER_GROUP_MATCH_SEMANTICS
133             MSG_LOOK_BEHIND_TOO_LONG
134             MSG_PROHIBITED_BY_STRICT
135             NODE_UNKNOWN
136             RE_CAPTURE_NAME
137             REGEXP_REF
138             SCALAR_REF
139             STRUCTURE_UNKNOWN
140             SUFFICIENT_UTF8_SUPPORT_FOR_WEIRD_DELIMITERS
141             TOKEN_LITERAL
142             TOKEN_UNKNOWN
143             TRUE
144             VARIABLE_LENGTH_LOOK_BEHIND_INTRODUCED
145             @CARP_NOT
146             };
147              
148             our @CARP_NOT = qw{
149             PPIx::Regexp
150             PPIx::Regexp::Constant
151             PPIx::Regexp::Dumper
152             PPIx::Regexp::Element
153             PPIx::Regexp::Lexer
154             PPIx::Regexp::Node
155             PPIx::Regexp::Node::Range
156             PPIx::Regexp::Node::Unknown
157             PPIx::Regexp::Structure
158             PPIx::Regexp::Structure::Assertion
159             PPIx::Regexp::Structure::Atomic_Script_Run
160             PPIx::Regexp::Structure::BranchReset
161             PPIx::Regexp::Structure::Capture
162             PPIx::Regexp::Structure::CharClass
163             PPIx::Regexp::Structure::Code
164             PPIx::Regexp::Structure::Main
165             PPIx::Regexp::Structure::Modifier
166             PPIx::Regexp::Structure::NamedCapture
167             PPIx::Regexp::Structure::Quantifier
168             PPIx::Regexp::Structure::RegexSet
169             PPIx::Regexp::Structure::Regexp
170             PPIx::Regexp::Structure::Replacement
171             PPIx::Regexp::Structure::Script_Run
172             PPIx::Regexp::Structure::Subexpression
173             PPIx::Regexp::Structure::Switch
174             PPIx::Regexp::Structure::Unknown
175             PPIx::Regexp::Support
176             PPIx::Regexp::Token
177             PPIx::Regexp::Token::Assertion
178             PPIx::Regexp::Token::Backreference
179             PPIx::Regexp::Token::Backtrack
180             PPIx::Regexp::Token::CharClass
181             PPIx::Regexp::Token::CharClass::POSIX
182             PPIx::Regexp::Token::CharClass::POSIX::Unknown
183             PPIx::Regexp::Token::CharClass::Simple
184             PPIx::Regexp::Token::Code
185             PPIx::Regexp::Token::Comment
186             PPIx::Regexp::Token::Condition
187             PPIx::Regexp::Token::Control
188             PPIx::Regexp::Token::Delimiter
189             PPIx::Regexp::Token::Greediness
190             PPIx::Regexp::Token::GroupType
191             PPIx::Regexp::Token::GroupType::Assertion
192             PPIx::Regexp::Token::GroupType::Atomic_Script_Run
193             PPIx::Regexp::Token::GroupType::BranchReset
194             PPIx::Regexp::Token::GroupType::Code
195             PPIx::Regexp::Token::GroupType::Modifier
196             PPIx::Regexp::Token::GroupType::NamedCapture
197             PPIx::Regexp::Token::GroupType::Script_Run
198             PPIx::Regexp::Token::GroupType::Subexpression
199             PPIx::Regexp::Token::GroupType::Switch
200             PPIx::Regexp::Token::Interpolation
201             PPIx::Regexp::Token::Literal
202             PPIx::Regexp::Token::Modifier
203             PPIx::Regexp::Token::NoOp
204             PPIx::Regexp::Token::Operator
205             PPIx::Regexp::Token::Quantifier
206             PPIx::Regexp::Token::Recursion
207             PPIx::Regexp::Token::Reference
208             PPIx::Regexp::Token::Structure
209             PPIx::Regexp::Token::Unknown
210             PPIx::Regexp::Token::Unmatched
211             PPIx::Regexp::Token::Whitespace
212             PPIx::Regexp::Tokenizer
213             PPIx::Regexp::Util
214             };
215              
216 10     10   116 use constant COOKIE_CLASS => ']';
  10         20  
  10         834  
217 10     10   53 use constant COOKIE_QUANT => '}';
  10         13  
  10         427  
218 10     10   34 use constant COOKIE_QUOTE => '\\E';
  10         16  
  10         424  
219 10     10   37 use constant COOKIE_REGEX_SET => '])';
  10         41  
  10         357  
220 10     10   39 use constant COOKIE_LOOKAROUND_ASSERTION => 'lookaround';
  10         27  
  10         408  
221              
222 10     10   36 use constant FALSE => 0;
  10         15  
  10         1752  
223 10     10   36 use constant TRUE => 1;
  10         14  
  10         793  
224              
225             # This hack is because it appears that Strawberry Perl evaluates 0 +
226             # 'inf' as zero under Perl 5.12.3 and below. But the real problem is
227             # from a pure portability standpoint I can not count on IEEE 754 being
228             # in use. So if 0 + 'Inf' (documented in perldata) is zero or an error I
229             # fall back to an object that mimics its behavior to the extent I think
230             # I need.
231             # NOTE that the only way I have to test the ::Inf object is to comment
232             # out the eval{} code. This is ugly, but I can not think of anything
233             # better.
234             BEGIN {
235 10     10   42 local $@ = undef;
236 10         29 require constant;
237             constant->import( INFINITY =>
238 10   33     14 eval { 0 + 'Inf' } ||
239             PPIx::Regexp::Constant::Inf->__pos_inf() );
240             }
241              
242 10     10   36 use constant ARRAY_REF => ref [];
  10         12  
  10         481  
243 10     10   50 use constant CODE_REF => ref sub {};
  10         22  
  10         481  
244 10     10   39 use constant HASH_REF => ref {};
  10         13  
  10         449  
245 10     10   35 use constant REGEXP_REF => ref qr{};
  10         11  
  10         429  
246 10     10   34 use constant SCALAR_REF => ref \0;
  10         16  
  10         357  
247              
248             # In the cases where an unescaped literal left curly 'could not' be a
249             # quantifier, they are allowed. At least, that was the original idea.
250             # But read on.
251 10     10   42 use constant LITERAL_LEFT_CURLY_ALLOWED => undef;
  10         18  
  10         344  
252              
253             # 'Most' unescaped literal left curlys were removed in 5.26.
254 10     10   36 use constant LITERAL_LEFT_CURLY_REMOVED_PHASE_1 => '5.025001';
  10         11  
  10         389  
255              
256             # Unescaped literal left curlys after literals and certain other
257             # elements are scheduled to be removed in 5.30.
258 10     10   34 use constant LITERAL_LEFT_CURLY_REMOVED_PHASE_2 => undef; # x{ 5.30
  10         13  
  10         326  
259              
260             # In 5.27.8 it was decided that unescaped literal left curlys after an
261             # open paren will be removed in 5.32. This does not include the case
262             # where the entire regex is delimited by parens -- they are still legal
263             # there.
264 10     10   46 use constant LITERAL_LEFT_CURLY_REMOVED_PHASE_3 => undef; # ({ 5.32
  10         11  
  10         294  
265              
266             # Location constants. Must align with PPI
267 10     10   45 use constant LOCATION_LINE => 0;
  10         17  
  10         362  
268 10     10   30 use constant LOCATION_CHARACTER => 1;
  10         11  
  10         307  
269 10     10   48 use constant LOCATION_COLUMN => 2;
  10         28  
  10         252  
270 10     10   34 use constant LOCATION_LOGICAL_LINE => 3;
  10         16  
  10         273  
271 10     10   28 use constant LOCATION_LOGICAL_FILE => 4;
  10         12  
  10         271  
272              
273 10     10   40 use constant MINIMUM_PERL => '5.000';
  10         36  
  10         357  
274              
275 10     10   35 use constant MODIFIER_GROUP_MATCH_SEMANTICS => 'match_semantics';
  10         19  
  10         390  
276              
277 10         355 use constant MSG_LOOK_BEHIND_TOO_LONG =>
278 10     10   33 'Lookbehind longer than 255 not implemented';
  10         24  
279 10         329 use constant MSG_PROHIBITED_BY_STRICT =>
280 10     10   34 q<prohibited by "use re 'strict'">;
  10         11  
281              
282 10     10   37 use constant NODE_UNKNOWN => 'PPIx::Regexp::Node::Unknown';
  10         16  
  10         467  
283              
284             # The perlre for Perl 5.010 says:
285             #
286             # Currently NAME is restricted to simple identifiers only. In
287             # other words, it must match "/^[_A-Za-z][_A-Za-z0-9]*\z/" or
288             # its Unicode extension (see utf8), though it isn't extended by
289             # the locale (see perllocale).
290              
291 10     10   44 use constant RE_CAPTURE_NAME => ' [_[:alpha:]] \w* ';
  10         11  
  10         398  
292              
293 10     10   42 use constant STRUCTURE_UNKNOWN => 'PPIx::Regexp::Structure::Unknown';
  10         48  
  10         372  
294              
295 10     10   57 use constant SUFFICIENT_UTF8_SUPPORT_FOR_WEIRD_DELIMITERS => $] ge '5.008003';
  10         34  
  10         380  
296              
297 10     10   32 use constant TOKEN_LITERAL => 'PPIx::Regexp::Token::Literal';
  10         31  
  10         295  
298 10     10   30 use constant TOKEN_UNKNOWN => 'PPIx::Regexp::Token::Unknown';
  10         36  
  10         344  
299              
300 10     10   64 use constant VARIABLE_LENGTH_LOOK_BEHIND_INTRODUCED => '5.029009';
  10         13  
  10         557  
301              
302             1;
303              
304             __END__
305              
306             =head1 NAME
307              
308             PPIx::Regexp::Constant - Constants for the PPIx::Regexp system
309              
310             =head1 SYNOPSIS
311              
312             use PPIx::Regexp::Constant qw{ TOKEN_UNKNOWN }
313             print "An unknown token's class is TOKEN_UNKNOWN\n";
314              
315             =head1 INHERITANCE
316              
317             C<PPIx::Regexp::Constant> is an L<Exporter|Exporter>.
318              
319             C<PPIx::Regexp::Constant> has no descendants.
320              
321             =head1 DETAILS
322              
323             This module defines manifest constants for use by the various
324             C<PPIx::Regexp> modules. These constants are to be considered B<private>
325             to the C<PPIx::Regexp> system, and the author reserves the right to
326             change them without notice.
327              
328             This module exports the following manifest constants:
329              
330             =head2 @CARP_NOT
331              
332             This global variable contains the names of all modules in the package.
333              
334             =head2 ARRAY_REF
335              
336             This is the result of C<ref []>.
337              
338             =head2 CODE_REF
339              
340             This is the result of C<ref sub {}>.
341              
342             =head2 COOKIE_CLASS
343              
344             The name of the cookie used to control the construction of character
345             classes.
346              
347             This cookie is set in
348             L<PPIx::Regexp::Token::Structure|PPIx::Regexp::Token::Structure> when
349             the left square bracket is encountered, and cleared in the same module
350             when a right square bracket is encountered.
351              
352             =head2 COOKIE_LOOKAROUND_ASSERTION
353              
354             The name of the cookie used to control the parsing of zero-width
355             assertions.
356              
357             This cookie is set in
358             L<PPIx::Regexp::Token::GroupType::Assertion|PPIx::Regexp::Token::GroupType::Assertion>,
359             and it persists until the end of the assertion.
360              
361             =head2 COOKIE_QUANT
362              
363             The name of the cookie used to control the construction of curly
364             bracketed quantifiers.
365              
366             This cookie is set in
367             L<PPIx::Regexp::Token::Structure|PPIx::Regexp::Token::Structure> when a
368             left curly bracket is encountered. It requests itself to be cleared on
369             encountering anything other than a literal comma, a literal digit, or an
370             interpolation, or if more than one comma is encountered. If it survives
371             until L<PPIx::Regexp::Token::Structure|PPIx::Regexp::Token::Structure>
372             processes the right curly bracket, it is cleared there.
373              
374             =head2 COOKIE_QUOTE
375              
376             The name of the cookie used to control the parsing of C<\Q ... \E>
377             quoted literals.
378              
379             This cookie is set in
380             L<PPIx::Regexp::Token::Control|PPIx::Regexp::Token::Control> when a
381             C<\Q> is encountered, and it persists until the next C<\E>.
382              
383             =head2 COOKIE_REGEX_SET
384              
385             The name of the cookie used to control regular expression sets.
386              
387             =head2 FALSE
388              
389             A false value. The author makes no commitment what the exact value is,
390             only that Boolean operations will see it as false.
391              
392             =head2 HASH_REF
393              
394             This is the result of C<ref {}>.
395              
396             =head2 INFINITY
397              
398             This is the IEEE 754 value of C<Inf> if that can be generated, or an
399             opaque overloaded object if not. Because the object does not (and I
400             think can not) implement the complete behavior of IEEE 754 C<Inf>, this
401             manifest constant should only be used for stringification and numeric
402             comparison. Be aware that the numification of the object has to be the
403             same as its stringification to keep C<looks_like_number()> happy.
404             B<Caveat coder>.
405              
406             =head2 LITERAL_LEFT_CURLY_ALLOWED
407              
408             The Perl version at which allowed unescaped literal left curly brackets
409             were removed. This may make more sense if I mention that its value is
410             C<undef>.
411              
412             =head2 LITERAL_LEFT_CURLY_REMOVED_PHASE_1
413              
414             The Perl version at which the first phase of unescaped literal left
415             curly bracket removal took place. The value of this constant is
416             C<'5.025001'>.
417              
418             =head2 LITERAL_LEFT_CURLY_REMOVED_PHASE_2
419              
420             The Perl version at which the second phase of unescaped literal left
421             curly bracket removal took place. The value of this constant is
422             C<undef>, but it will be assigned a value when the timing of the second
423             phase is known.
424              
425             =head2 LITERAL_LEFT_CURLY_REMOVED_PHASE_3
426              
427             The Perl version at which the third phase of unescaped literal left
428             curly bracket removal took place. This is the removal of curly brackets
429             after a left parenthesis. The value of this constant is C<undef>, but it
430             will be assigned a value when the timing of the second phase is known.
431              
432             =head2 MINIMUM_PERL
433              
434             The minimum version of Perl understood by this parser, as a float. It is
435             currently set to 5.000, since that is the minimum version of Perl
436             accessible to the author.
437              
438             =head2 MODIFIER_GROUP_MATCH_SEMANTICS
439              
440             The name of the
441             L<PPIx::Regexp::Token::Modifier|PPIx::Regexp::Token::Modifier> group
442             used to control match semantics.
443              
444             =head2 MSG_LOOK_BEHIND_TOO_LONG
445              
446             An appropriate error message for an unknown entity created from a
447             quantifier which would make a look-behind assertion too long. This is
448             cribbed verbatim from the Perl error message.
449              
450             =head2 MSG_PROHIBITED_BY_STRICT
451              
452             An appropriate error message for an unknown entity created because
453             C<'strict'> was in effect. This is rank ad-hocery, and more than usually
454             subject to being changed, without any notice whatsoever. Caveat user.
455              
456             =head2 NODE_UNKNOWN
457              
458             The name of the class that represents an unknown node. That is,
459             L<PPIx::Regexp::Node::Unknown|PPIx::Regexp::Node::Unknown>.
460              
461             =head2 RE_CAPTURE_NAME
462              
463             A string representation of a regular expression that matches the name of
464             a named capture buffer.
465              
466             =head2 REGEXP_REF
467              
468             This is the result of C<ref qr{}>.
469              
470             =head2 SCALAR_REF
471              
472             This is the result of C<ref \0>.
473              
474             =head2 STRUCTURE_UNKNOWN
475              
476             The name of the class that represents an unknown structure. That is,
477             L<PPIx::Regexp::Structure::Unknown|PPIx::Regexp::Structure::Unknown>.
478              
479             =head2 SUFFICIENT_UTF8_SUPPORT_FOR_WEIRD_DELIMITERS
480              
481             A Boolean which is true if the running version of Perl has UTF-8 support
482             sufficient for our purposes.
483              
484             Currently that means C<5.8.3> or greater, with the specific requirements
485             being C<use open qw{ :std :encoding(utf-8) }>, C</\p{Mark}/>, and the
486             ability to parse things like C<qr \N{U+FFFF}foo\N{U+FFFF}>.
487              
488             =head2 TOKEN_LITERAL
489              
490             The name of the class that represents a literal token. That is,
491             L<PPIx::Regexp::Token::Literal|PPIx::Regexp::Token::Literal>.
492              
493             =head2 TOKEN_UNKNOWN
494              
495             The name of the class that represents the unknown token. That is,
496             L<PPIx::Regexp::Token::Unknown|PPIx::Regexp::Token::Unknown>.
497              
498             =head2 TRUE
499              
500             A true value. The author makes no commitment what the exact value is,
501             only that Boolean operations will see it as true.
502              
503             =head2 VARIABLE_LENGTH_LOOK_BEHIND_INTRODUCED
504              
505             The version in which variable-length look-behinds were introduced.
506             Currently this is C<'5.029009'>, and implies the limited lookbehind
507             introduced at or about that version.
508              
509             =head1 SUPPORT
510              
511             Support is by the author. Please file bug reports at
512             L<https://rt.cpan.org/Public/Dist/Display.html?Name=PPIx-Regexp>,
513             L<https://github.com/trwyant/perl-PPIx-Regexp/issues>, or in
514             electronic mail to the author.
515              
516             =head1 AUTHOR
517              
518             Thomas R. Wyant, III F<wyant at cpan dot org>
519              
520             =head1 COPYRIGHT AND LICENSE
521              
522             Copyright (C) 2009-2023, 2025 by Thomas R. Wyant, III
523              
524             This program is free software; you can redistribute it and/or modify it
525             under the same terms as Perl 5.10.0. For more details, see the full text
526             of the licenses in the directory LICENSES.
527              
528             This program is distributed in the hope that it will be useful, but
529             without any warranty; without even the implied warranty of
530             merchantability or fitness for a particular purpose.
531              
532             =cut
533              
534             # ex: set textwidth=72 :