File Coverage

blib/lib/PPIx/Regexp/Structure.pm
Criterion Covered Total %
statement 128 151 84.7
branch 45 70 64.2
condition 23 33 69.7
subroutine 17 18 94.4
pod 8 8 100.0
total 221 280 78.9


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PPIx::Regexp::Structure - Represent a structure.
4              
5             =head1 SYNOPSIS
6              
7             use PPIx::Regexp::Dumper;
8             PPIx::Regexp::Dumper->new( 'qr{(foo)}' )->print();
9              
10             =head1 INHERITANCE
11              
12             C<PPIx::Regexp::Structure> is a
13             L<PPIx::Regexp::Node|PPIx::Regexp::Node>.
14              
15             C<PPIx::Regexp::Structure> is the parent of
16             L<PPIx::Regexp::Structure::Assertion|PPIx::Regexp::Structure::Assertion>,
17             L<PPIx::Regexp::Structure::BranchReset|PPIx::Regexp::Structure::BranchReset>,
18             L<PPIx::Regexp::Structure::Capture|PPIx::Regexp::Structure::Capture>,
19             L<PPIx::Regexp::Structure::CharClass|PPIx::Regexp::Structure::CharClass>,
20             L<PPIx::Regexp::Structure::Code|PPIx::Regexp::Structure::Code>,
21             L<PPIx::Regexp::Structure::Main|PPIx::Regexp::Structure::Main>,
22             L<PPIx::Regexp::Structure::Modifier|PPIx::Regexp::Structure::Modifier>,
23             L<PPIx::Regexp::Structure::Quantifier|PPIx::Regexp::Structure::Quantifier>,
24             L<PPIx::Regexp::Structure::Script_Run|PPIx::Regexp::Structure::Script_Run>,
25             L<PPIx::Regexp::Structure::Subexpression|PPIx::Regexp::Structure::Subexpression>,
26             L<PPIx::Regexp::Structure::Switch|PPIx::Regexp::Structure::Switch> and
27             L<PPIx::Regexp::Structure::Unknown|PPIx::Regexp::Structure::Unknown>.
28              
29             =head1 DESCRIPTION
30              
31             This class represents a bracketed construction of some sort. The
32             brackets are considered part of the structure, but not inside it. So the
33             C<elements()> method returns the brackets if they are defined, but the
34             C<children()> method does not.
35              
36             =head1 METHODS
37              
38             This class provides the following public methods. Methods not documented
39             here are private, and unsupported in the sense that the author reserves
40             the right to change or remove them without notice.
41              
42             =cut
43              
44             package PPIx::Regexp::Structure;
45              
46 9     9   64 use strict;
  9         30  
  9         255  
47 9     9   28 use warnings;
  9         15  
  9         323  
48              
49 9     9   31 use base qw{ PPIx::Regexp::Node };
  9         10  
  9         692  
50              
51 9     9   83 use Carp qw{ confess };
  9         114  
  9         487  
52 9         800 use PPIx::Regexp::Constant qw{
53             ARRAY_REF
54             HASH_REF
55             STRUCTURE_UNKNOWN
56             @CARP_NOT
57 9     9   41 };
  9         10  
58 9     9   43 use PPIx::Regexp::Util qw{ __instance };
  9         10  
  9         541  
59 9     9   42 use Scalar::Util qw{ refaddr };
  9         13  
  9         447  
60              
61             our $VERSION = '0.091';
62              
63 9     9   43 use constant ELEMENT_UNKNOWN => STRUCTURE_UNKNOWN;
  9         11  
  9         12557  
64              
65             sub __new {
66 620     620   1510 my ( $class, @args ) = @_;
67 620         831 my %brkt;
68 620 100       1499 if ( HASH_REF eq ref $args[0] ) {
69 38         53 %brkt = %{ shift @args };
  38         111  
70 38         78 foreach my $key ( qw{ start type finish } ) {
71             ARRAY_REF eq ref $brkt{$key}
72 114 50       328 or $brkt{$key} = [ $brkt{$key} ];
73             }
74             } else {
75 582 100       1862 $brkt{finish} = [ @args ? pop @args : () ];
76 582 100       1553 $brkt{start} = [ @args ? shift @args : () ];
77 582   100     2059 while ( @args && ! $args[0]->significant() ) {
78 29         48 push @{ $brkt{start} }, shift @args;
  29         113  
79             }
80 582         1107 $brkt{type} = [];
81 582 100       1487 if ( __instance( $args[0], 'PPIx::Regexp::Token::GroupType' ) ) {
82 110         178 push @{ $brkt{type} }, shift @args;
  110         251  
83 110   100     412 while ( @args && ! $args[0]->significant() ) {
84 1         2 push @{ $brkt{type} }, shift @args;
  1         3  
85             }
86             }
87             }
88              
89 620         2290 $class->_check_for_interpolated_match( \%brkt, \@args );
90              
91 620 50       2301 my $self = $class->SUPER::__new( @args )
92             or return;
93              
94 620 100       1461 if ( __instance( $brkt{type}[0], 'PPIx::Regexp::Token::GroupType' ) ) {
95 114         669 ( my $reclass = ref $brkt{type}[0] ) =~
96             s/ Token::GroupType /Structure/smx;
97 114 50       1180 $reclass->can( 'start' )
98             or confess "Programming error - $reclass not loaded";
99 114         497 bless $self, $reclass;
100             }
101              
102 620         1320 foreach my $key ( qw{ start type finish } ) {
103 1860         3215 $self->{$key} = [];
104 1860 50       3433 ARRAY_REF eq ref $brkt{$key}
105             or confess "Programming error - '$brkt{$key}' not an ARRAY";
106 1860         1909 foreach my $val ( @{ $brkt{$key} } ) {
  1860         2664  
107 1428 100       2049 defined $val or next;
108 1370 50       1934 __instance( $val, 'PPIx::Regexp::Element' )
109             or confess "Programming error - '$val' not a ",
110             "PPIx::Regexp::Element";
111 1370         1562 push @{ $self->{$key} }, $val;
  1370         2191  
112 1370         2315 $val->_parent( $self );
113             }
114             }
115              
116 620         1154 @{ $self->{finish} }
117 620 100       751 or $self->{error} = 'Missing end delimiter';
118              
119 620         2739 return $self;
120             }
121              
122             =head2 elements
123              
124             This override returns all components of the structure, including those
125             that define it.
126              
127             =cut
128              
129             sub elements {
130 2951     2951 1 3700 my ( $self ) = @_;
131              
132 2951 100       3736 if ( wantarray ) {
    50          
133             return (
134 2950         3609 @{ $self->{start} },
135 2950         3519 @{ $self->{type} },
136 2950         3470 @{ $self->{children} },
137 2950         2962 @{ $self->{finish} },
  2950         6278  
138             );
139             } elsif ( defined wantarray ) {
140 1         2 my $size = scalar @{ $self->{start} };
  1         3  
141 1         1 $size += scalar @{ $self->{type} };
  1         2  
142 1         1 $size += scalar @{ $self->{children} };
  1         2  
143 1         2 $size += scalar @{ $self->{finish} };
  1         1  
144 1         2 return $size;
145             } else {
146 0         0 return;
147             }
148             }
149              
150             {
151             my %explanation = (
152             q<(> => 'Grouping', # )
153             );
154              
155             sub explain {
156 1     1 1 3 my ( $self ) = @_;
157 1 50       4 if ( my $type = $self->type() ) {
158 0         0 return $type->explain();
159             }
160 1 50       3 if ( my $start = $self->start() ) {
161             # The check for a left parenthesis before returning
162             # 'Grouping' is probably superflous, since it appears that
163             # this method is overridden in all other cases where we
164             # might get here (i.e. '[...]', '{...}'). But I'm paranoid.
165 1   33     4 return $explanation{ $start->content() } || $start->explain();
166             }
167 0         0 return $self->__no_explanation();
168             }
169             }
170              
171             =head2 finish
172              
173             my $elem = $struct->finish();
174             my @elem = $struct->finish();
175             my $elem = $struct->finish( 0 );
176              
177             Returns the finishing structure element. This is included in the
178             C<elements> but not in the C<children>.
179              
180             The finishing element is actually an array, though it should never have
181             more than one element. Calling C<finish> in list context gets you all
182             elements of the array. Calling it in scalar context gets you an element
183             of the array, defaulting to element 0 if no argument is passed.
184              
185             =cut
186              
187             sub finish {
188 710     710 1 1325 my ( $self, $inx ) = @_;
189 710 100       1590 wantarray and return @{ $self->{finish} };
  193         795  
190 517 100       2133 return $self->{finish}[ defined $inx ? $inx : 0 ];
191             }
192              
193             sub first_element {
194 3     3 1 7 my ( $self ) = @_;
195              
196 3 50       16 $self->{start}[0] and return $self->{start}[0];
197              
198 0 0       0 $self->{type}[0] and return $self->{type}[0];
199              
200 0 0       0 if ( my $elem = $self->SUPER::first_element() ) {
201 0         0 return $elem;
202             }
203              
204 0 0       0 $self->{finish}[0] and return $self->{finish}[0];
205              
206 0         0 return;
207             }
208              
209             sub last_element {
210 45     45 1 59 my ( $self ) = @_;
211              
212 45 50       153 $self->{finish}[-1] and return $self->{finish}[-1];
213              
214 0 0       0 if ( my $elem = $self->SUPER::last_element() ) {
215 0         0 return $elem;
216             }
217              
218 0 0       0 $self->{type}[-1] and return $self->{type}[-1];
219              
220 0 0       0 $self->{start}[-1] and return $self->{start}[-1];
221              
222 0         0 return;
223             }
224              
225             sub remove_insignificant {
226 0     0 1 0 my ( $self ) = @_;
227             return $self->__new(
228 0         0 map { $_->remove_insignificant() } $self->elements() );
  0         0  
229             }
230              
231             =head2 start
232              
233             my $elem = $struct->start();
234             my @elem = $struct->start();
235             my $elem = $struct->start( 0 );
236              
237             Returns the starting structure element. This is included in the
238             C<elements> but not in the C<children>.
239              
240             The starting element is actually an array. The first element (element 0)
241             is the actual starting delimiter. Subsequent elements, if any, are
242             insignificant elements (comments or white space) absorbed into the start
243             element for ease of parsing subsequent elements.
244              
245             Calling C<start> in list context gets you all elements of the array.
246             Calling it in scalar context gets you an element of the array,
247             defaulting to element 0 if no argument is passed.
248              
249             =cut
250              
251             sub start {
252 739     739 1 1527 my ( $self, $inx ) = @_;
253 739 100       1598 wantarray and return @{ $self->{start} };
  222         849  
254 517 100       2185 return $self->{start}[ defined $inx ? $inx : 0 ];
255             }
256              
257             =head2 type
258              
259             my $elem = $struct->type();
260             my @elem = $struct->type();
261             my $elem = $struct->type( 0 );
262              
263             Returns the group type if any. This will be the leading
264             L<PPIx::Regexp::Token::GroupType|PPIx::Regexp::Token::GroupType>
265             token if any. This is included in C<elements> but not in C<children>.
266              
267             The type is actually an array. The first element (element 0) is the
268             actual type determiner. Subsequent elements, if any, are insignificant
269             elements (comments or white space) absorbed into the type element for
270             consistency with the way the start element is handled.
271              
272             Calling C<type> in list context gets you all elements of the array.
273             Calling it in scalar context gets you an element of the array,
274             defaulting to element 0 if no argument is passed.
275              
276             =cut
277              
278             sub type {
279 389     389 1 907 my ( $self, $inx ) = @_;
280 389 100       1005 wantarray and return @{ $self->{type} };
  196         677  
281 193 100       982 return $self->{type}[ defined $inx ? $inx : 0 ];
282             }
283              
284             # Check for things like (?$foo:...) or (?$foo)
285             sub _check_for_interpolated_match {
286 620     620   1130 my ( undef, $brkt, $args ) = @_; # Invocant unused
287              
288             # Everything we are interested in begins with a literal '?' followed
289             # by an interpolation.
290 620 100 100     1211 __instance( $args->[0], 'PPIx::Regexp::Token::Unknown' )
      66        
291             and $args->[0]->content() eq '?'
292             and __instance( $args->[1], 'PPIx::Regexp::Token::Interpolation' )
293             or return;
294              
295 4         8 my $hiwater = 2; # Record how far we got into the arguments for
296             # subsequent use detecting things like
297             # (?$foo).
298              
299             # If we have a literal ':' as the third argument:
300             # GroupType::Modifier, rebless the ':' so we know not to match
301             # against it, and splice all three tokens into the type.
302 4 100 100     9 if ( __instance( $args->[2], 'PPIx::Regexp::Token::Literal' )
303             && $args->[2]->content() eq ':' ) {
304              
305             # Rebless the '?' as a GroupType::Modifier.
306 1         11 PPIx::Regexp::Token::GroupType::Modifier->__PPIX_ELEM__rebless(
307             $args->[0] );
308              
309             # Rebless the ':' as a GroupType, just so it does not look like
310             # something to match against.
311 1         5 PPIx::Regexp::Token::GroupType->__PPIX_ELEM__rebless(
312             $args->[2] );
313              
314             # Shove our three significant tokens into the type.
315 1         1 push @{ $brkt->{type} }, splice @{ $args }, 0, 3;
  1         2  
  1         3  
316              
317             # Stuff all the immediately-following insignificant tokens into
318             # the type as well.
319 1   33     1 while ( @{ $args } && ! $args->[0]->significant() ) {
  1         20  
320 0         0 push @{ $brkt->{type} }, shift @{ $args };
  0         0  
  0         0  
321             }
322              
323             # Return to the caller, since we have done all the damage we
324             # can.
325 1         2 return;
326             }
327              
328             # If we have a literal '-' as the third argument, we might have
329             # something like (?$on-$off:$foo).
330 3 50 66     9 if ( __instance( $args->[2], 'PPIx::Regexp::Token::Literal' )
      66        
331             && $args->[2]->content() eq '-'
332             && __instance( $args->[3], 'PPIx::Regexp::Token::Interpolation' )
333             ) {
334 2         4 $hiwater = 4;
335              
336 2 100 66     5 if ( __instance( $args->[4], 'PPIx::Regexp::Token::Literal' )
337             && $args->[4]->content() eq ':' ) {
338              
339             # Rebless the '?' as a GroupType::Modifier.
340 1         7 PPIx::Regexp::Token::GroupType::Modifier->__PPIX_ELEM__rebless(
341             $args->[0] );
342              
343             # Rebless the '-' and ':' as GroupType, just so they do not
344             # look like something to match against.
345 1         3 PPIx::Regexp::Token::GroupType->__PPIX_ELEM__rebless(
346             $args->[2] );
347 1         2 PPIx::Regexp::Token::GroupType->__PPIX_ELEM__rebless(
348             $args->[4] );
349              
350             # Shove our five significant tokens into the type.
351 1         2 push @{ $brkt->{type} }, splice @{ $args }, 0, 5;
  1         2  
  1         3  
352              
353             # Stuff all the immediately-following insignificant tokens
354             # into the type as well.
355 1   33     2 while ( @{ $args } && ! $args->[0]->significant() ) {
  1         4  
356 0         0 push @{ $brkt->{type} }, shift @{ $args };
  0         0  
  0         0  
357             }
358              
359             # Return to the caller, since we have done all the damage we
360             # can.
361 1         1 return;
362             }
363             }
364              
365             # If the group contains _any_ significant tokens at this point, we
366             # do _not_ have something like (?$foo).
367 2         8 foreach my $inx ( $hiwater .. $#$args ) {
368 0 0       0 $args->[$inx]->significant() and return;
369             }
370              
371             # Rebless the '?' as a GroupType::Modifier.
372             PPIx::Regexp::Token::GroupType::Modifier->__PPIX_ELEM__rebless(
373 2         32 $args->[0] );
374              
375             # Shove all the contents of $args into type, using splice to leave
376             # @{ $args } empty after we do this.
377 2         4 push @{ $brkt->{type} }, splice @{ $args };
  2         4  
  2         5  
378              
379             # We have done all the damage we can.
380 2         4 return;
381             }
382              
383             1;
384              
385             __END__
386              
387             =head1 SUPPORT
388              
389             Support is by the author. Please file bug reports at
390             L<https://rt.cpan.org/Public/Dist/Display.html?Name=PPIx-Regexp>,
391             L<https://github.com/trwyant/perl-PPIx-Regexp/issues>, or in
392             electronic mail to the author.
393              
394             =head1 AUTHOR
395              
396             Thomas R. Wyant, III F<wyant at cpan dot org>
397              
398             =head1 COPYRIGHT AND LICENSE
399              
400             Copyright (C) 2009-2023, 2025 by Thomas R. Wyant, III
401              
402             This program is free software; you can redistribute it and/or modify it
403             under the same terms as Perl 5.10.0. For more details, see the full text
404             of the licenses in the directory LICENSES.
405              
406             This program is distributed in the hope that it will be useful, but
407             without any warranty; without even the implied warranty of
408             merchantability or fitness for a particular purpose.
409              
410             =cut
411              
412             # ex: set textwidth=72 :