File Coverage

blib/lib/PPIx/Regexp/Node.pm

Criterion	Covered	Total	%
statement	214	234	91.4
branch	87	124	70.1
condition	3	6	50.0
subroutine	43	45	95.5
pod	22	22	100.0
total	369	431	85.6

line	stmt	bran	cond	sub	pod	time	code
1							=head1 NAME
2
3							PPIx::Regexp::Node - Represent a container
4
5							=head1 SYNOPSIS
6
7							use PPIx::Regexp::Dumper;
8							PPIx::Regexp::Dumper->new( 'qr{(foo)}' )->print();
9
10							=head1 INHERITANCE
11
12							C is a
13							L.
14
15							C is the parent of L,
16							L and
17							L.
18
19							=head1 DESCRIPTION
20
21							This class represents a structural element that contains other classes.
22							It is an abstract class, not instantiated by the lexer.
23
24							=head1 METHODS
25
26							This class provides the following public methods. Methods not documented
27							here are private, and unsupported in the sense that the author reserves
28							the right to change or remove them without notice.
29
30							=cut
31
32							package PPIx::Regexp::Node;
33
34	9			9		55	use strict;
	9					13
	9					255
35	9			9		33	use warnings;
	9					14
	9					333
36
37	9			9		34	use base qw{ PPIx::Regexp::Element };
	9					11
	9					4454
38
39	9			9		52	use Carp;
	9					12
	9					462
40	9			9		35	use List::Util qw{ max min };
	9					10
	9					395
41	9					742	use PPIx::Regexp::Constant qw{
42							CODE_REF
43							FALSE
44							INFINITY
45							MINIMUM_PERL
46							NODE_UNKNOWN
47							TRUE
48							@CARP_NOT
49	9			9		42	};
	9					11
50	9			9		57	use PPIx::Regexp::Util qw{ __instance __merge_perl_requirements width };
	9					10
	9					287
51	9			9		27	use Scalar::Util qw{ refaddr };
	9					10
	9					365
52
53							our $VERSION = '0.092';
54
55	9			9		30	use constant ELEMENT_UNKNOWN => NODE_UNKNOWN;
	9					22
	9					3086
56
57							sub __new {
58	976			976		1977	my ( $class, @children ) = @_;
59	976					1332	foreach my $elem ( @children ) {
60	2588	50				3470	__instance( $elem, 'PPIx::Regexp::Element' ) or return;
61							}
62	976					2215	my $self = {
63							children => \@children,
64							};
65	976		33			2523	bless $self, ref $class \|\| $class;
66	976					1235	foreach my $elem ( @children ) {
67	2588					4631	$elem->_parent( $self );
68							}
69	976					2196	return $self;
70							}
71
72							=head2 child
73
74							my $kid = $node->child( 0 );
75
76							This method returns the child at the given index. The indices start from
77							zero, and negative indices are from the end of the list, so that
78							C<< $node->child( -1 ) >> returns the last child of the node.
79
80							=cut
81
82							sub child {
83	6159			6159	1	8927	my ( $self, $inx ) = @_;
84	6159	50				10048	defined $inx or $inx = 0;
85	6159					20289	return $self->{children}[$inx];
86							}
87
88							=head2 children
89
90							This method returns the children of the Node. If called in scalar
91							context it returns the number of children.
92
93							=cut
94
95							sub children {
96	2485			2485	1	3310	my ( $self ) = @_;
97	2485					2539	return @{ $self->{children} };
	2485					5114
98							}
99
100							=head2 contains
101
102							print $node->contains( $elem ) ? "yes\n" : "no\n";
103
104							This method returns true if the given element is contained in the node,
105							or false otherwise.
106
107							=cut
108
109							sub contains {
110	1			1	1	2	my ( $self, $elem ) = @_;
111	1	50				3	__instance( $elem, 'PPIx::Regexp::Element' ) or return;
112
113	1					2	my $addr = refaddr( $self );
114
115	1					3	while ( $elem = $elem->parent() ) {
116	3	100				11	$addr == refaddr( $elem ) and return 1;
117							}
118
119	0					0	return;
120							}
121
122							sub content {
123	516			516	1	2849	my ( $self ) = @_;
124	516					1322	return join( '', map{ $_->content() } $self->elements() );
	2138					3928
125							}
126
127							=head2 elements
128
129							This method returns the elements in the Node. For a
130							C proper, it is the same as C.
131
132							=cut
133
134							{
135	9			9		71	no warnings qw{ once };
	9					19
	9					10478
136							*elements = \&children; # sub slements
137							}
138
139							=head2 find
140
141							my $rslt = $node->find( 'PPIx::Regexp::Token::Literal' );
142							my $rslt = $node->find( 'Token::Literal' );
143							my $rslt = $node->find( sub {
144							return $_[1]->isa( 'PPIx::Regexp::Token::Literal' )
145							&& $_[1]->ordinal < ord(' ');
146							} );
147
148							This method finds things.
149
150							If given a string as argument, it is assumed to be a class name
151							(possibly without the leading 'PPIx::Regexp::'), and all elements of the
152							given class are found.
153
154							If given a code reference, that code reference is called once for each
155							element, and passed C<$self> and the element. The code should return
156							true to accept the element, false to reject it, and ( for subclasses of
157							C) C to prevent recursion into the node. If
158							the code throws an exception, you get nothing back from this method.
159
160							Either way, the return is a reference to the list of things found, a
161							false (but defined) value if nothing was found, or C if an error
162							occurred.
163
164							=cut
165
166							sub _find_routine {
167	1347			1347		1636	my ( $want ) = @_;
168	1347	100				2386	CODE_REF eq ref $want
169							and return $want;
170	691	100				988	ref $want and return;
171	690	100				2008	$want =~ m/ \A PPIx::Regexp:: /smx
172							or $want = 'PPIx::Regexp::' . $want;
173							return sub {
174	6198	100		6198		7409	return __instance( $_[1], $want ) ? 1 : 0;
175	690					2574	};
176							}
177
178							sub find {
179	1340			1340	1	1765	my ( $self, $want ) = @_;
180
181	1340	100				1862	$want = _find_routine( $want ) or return;
182
183	1339					1593	my @found;
184
185							# We use a recursion to find what we want. PPI::Node uses an
186							# iteration.
187	1339					2051	foreach my $elem ( $self->elements() ) {
188	6197	100				5852	my $rslt = eval { $want->( $self, $elem ) }
	6197					6487
189							and push @found, $elem;
190	6197	50				7746	$@ and return;
191
192	6197	100				7382	__instance( $elem, 'PPIx::Regexp::Node' ) or next;
193	651	50				1025	defined $rslt or next;
194							$rslt = $elem->find( $want )
195	651	100				1135	and push @found, @{ $rslt };
	52					95
196							}
197
198	1339	100				5421	return @found ? \@found : 0;
199
200							}
201
202							=head2 find_parents
203
204							my $rslt = $node->find_parents( sub {
205							return $_[1]->isa( 'PPIx::Regexp::Token::Operator' )
206							&& $_[1]->content() eq '\|';
207							} );
208
209							This convenience method takes the same arguments as C, but instead
210							of the found objects themselves returns their parents. No parent will
211							appear more than once in the output.
212
213							This method returns a reference to the array of parents if any were
214							found. If no parents were found the return is false but defined. If an
215							error occurred the return is C.
216
217							=cut
218
219							sub find_parents {
220	1			1	1	3	my ( $self, $want ) = @_;
221
222	1					1	my $found;
223	1	50				3	$found = $self->find( $want ) or return $found;
224
225	1					2	my %parents;
226							my @rslt;
227	1					2	foreach my $elem ( @{ $found } ) {
	1					1
228	2	50				6	my $dad = $elem->parent() or next;
229	2	100				7	$parents{ refaddr( $dad ) }++
230							or push @rslt, $dad;
231							}
232
233	1					4	return \@rslt;
234							}
235
236							=head2 find_first
237
238							This method has the same arguments as L, but returns either a
239							reference to the first element found, a false (but defined) value if no
240							elements were found, or C if an error occurred.
241
242							=cut
243
244							sub find_first {
245	7			7	1	9	my ( $self, $want ) = @_;
246
247	7	50				11	$want = _find_routine( $want ) or return;
248
249							# We use a recursion to find what we want. PPI::Node uses an
250							# iteration.
251	7					20	foreach my $elem ( $self->elements() ) {
252	16	100				21	my $rslt = eval { $want->( $self, $elem ) }
	16					19
253							and return $elem;
254	13	50				21	$@ and return;
255
256	13	100				16	__instance( $elem, 'PPIx::Regexp::Node' ) or next;
257	4	50				7	defined $rslt or next;
258
259	4	50				13	defined( $rslt = $elem->find_first( $want ) )
260							or return;
261	4	50				22	$rslt and return $rslt;
262							}
263
264	0					0	return 0;
265
266							}
267
268							=head2 first_element
269
270							This method returns the first element in the node.
271
272							=cut
273
274							sub first_element {
275	3			3	1	6	my ( $self ) = @_;
276	3					13	return $self->{children}[0];
277							}
278
279							=head2 first_token
280
281							This method returns the first token in the node. If there is none, it
282							returns nothing.
283
284							=cut
285
286							sub first_token {
287	4			4	1	9	my ( $self ) = @_;
288	4	50				15	my $elem = $self->first_element()
289							or return;
290	4					6	my $token;
291	4					26	while ( ! ( $token = $elem->first_token() ) ) {
292	0	0				0	$elem = $elem->next_element()
293							or return;
294							}
295	4					15	return $token;
296							}
297
298							=head2 last_element
299
300							This method returns the last element in the node.
301
302							=cut
303
304							sub last_element {
305	2			2	1	4	my ( $self ) = @_;
306	2					8	return $self->{children}[-1];
307							}
308
309							=head2 last_token
310
311							This method returns the last token in the node. If there is none, it
312							returns nothing.
313
314							=cut
315
316							sub last_token {
317	45			45	1	55	my ( $self ) = @_;
318	45	50				113	my $elem = $self->last_element()
319							or return;
320	45					50	my $token;
321	45					102	while ( ! ( $token = $elem->last_token() ) ) {
322	0	0				0	$elem = $elem->previous_element()
323							or return;
324							}
325	45					117	return $token;
326							}
327
328							sub location {
329	1			1	1	2	my ( $self ) = @_;
330	1	50				6	my $token = $self->first_token()
331							or return undef; ## no critic (ProhibitExplicitReturnUndef)
332	1					3	return $token->location();
333							}
334
335							=head2 is_matcher
336
337							This method returns a true value if any of the node's children does.
338							Otherwise it returns C if any of the node's children does.
339							Otherwise it returns a false (but defined) value.
340
341							=cut
342
343							sub is_matcher {
344	5			5	1	7	my ( $self ) = @_;
345	5					6	my $rslt = 0;
346	5					14	foreach my $kid ( @{ $self->{children} } ) {
	5					8
347	5	50				19	my $kid_rslt = $kid->is_matcher()
348							and return 1;
349	0	0				0	defined $kid_rslt
350							or $rslt = $kid_rslt;
351							}
352	0					0	return $rslt;
353							}
354
355							=head2 perl_version_introduced
356
357							This method returns the maximum value of C
358							returned by any of its elements. In other words, it returns the minimum
359							version of Perl under which this node is valid. If there are no
360							elements, 5.000 is returned, since that is the minimum value of Perl
361							supported by this package.
362
363							=cut
364
365							sub perl_version_introduced {
366	176			176	1	270	my ( $self ) = @_;
367	1038					1667	return max( grep { defined $_ } MINIMUM_PERL,
368							$self->{perl_version_introduced},
369	176					555	map { $_->perl_version_introduced() } $self->elements() );
	686					1760
370							}
371
372							=head2 perl_version_removed
373
374							This method returns the minimum defined value of C
375							returned by any of the node's elements. In other words, it returns the
376							lowest version of Perl in which this node is C valid. If there are
377							no elements, or if no element has a defined C,
378							C is returned.
379
380							=cut
381
382							sub perl_version_removed {
383	182			182	1	247	my ( $self ) = @_;
384	182					200	my $max;
385	182					462	foreach my $elem ( $self->elements() ) {
386	697	100				1685	if ( defined ( my $ver = $elem->perl_version_removed() ) ) {
387	15	50				21	if ( defined $max ) {
388	0	0				0	$ver < $max and $max = $ver;
389							} else {
390	15					18	$max = $ver;
391							}
392							}
393							}
394	182					336	return $max;
395							}
396
397							sub remove_insignificant {
398	0			0	1	0	my ( $self ) = @_;
399	0					0	return $self->__new( map { $_->remove_insignificant() }
	0					0
400							$self->children() );
401							}
402
403							=head2 schild
404
405							This method returns the significant child at the given index; that is,
406							C<< $node->schild(0) >> returns the first significant child,
407							C<< $node->schild(1) >> returns the second significant child, and so on.
408							Negative indices count from the end.
409
410							=cut
411
412							sub schild {
413	22			22	1	39	my ( $self, $inx ) = @_;
414	22	50				53	defined $inx or $inx = 0;
415
416	22					35	my $kids = $self->{children};
417
418	22	100				45	if ( $inx >= 0 ) {
419
420	20					51	my $loc = 0;
421
422	20					54	while ( exists $kids->[$loc] ) {
423	22	100				60	$kids->[$loc]->significant() or next;
424	21	100				49	--$inx >= 0 and next;
425	20					144	return $kids->[$loc];
426							} continue {
427	2					4	$loc++;
428							}
429
430							} else {
431
432	2					5	my $loc = -1;
433
434	2					9	while ( exists $kids->[$loc] ) {
435	5	100				34	$kids->[$loc]->significant() or next;
436	3	100				7	$inx++ < -1 and next;
437	2					17	return $kids->[$loc];
438							} continue {
439	3					6	--$loc;
440							}
441
442							}
443
444	0					0	return;
445							}
446
447							=head2 schildren
448
449							This method returns the significant children of the Node. If called in
450							scalar context it returns the number of significant children.
451
452							=cut
453
454							sub schildren {
455	4			4	1	9	my ( $self ) = @_;
456	4	50				10	if ( wantarray ) {
		0
457	4					5	return ( grep { $_->significant() } @{ $self->{children} } );
	13					26
	4					9
458							} elsif ( defined wantarray ) {
459	0					0	my $kids = 0;
460	0					0	foreach ( @{ $self->{children} } ) {
	0					0
461	0	0				0	$_->significant() and $kids++;
462							}
463	0					0	return $kids;
464							} else {
465	0					0	return;
466							}
467							}
468
469							sub scontent {
470	9			9	1	10	my ( $self ) = @_;
471							# As of the invention of this method all nodes are significant, so
472							# the following statement is pure paranoia on my part. -- TRW
473	9	50				22	$self->significant()
474							or return;
475							# This needs to be elements(), not children() or schildren() -- or
476							# selements() if that is ever invented. Not children() or
477							# schildren() because those ignore the delimiters. Not selements()
478							# (if that ever comes to pass) because scontent() has to make the
479							# significance check, so selements() would be wasted effort.
480	9					24	return join( '', map{ $_->scontent() } $self->elements() );
	75					136
481							}
482
483							sub tokens {
484	35			35	1	54	my ( $self ) = @_;
485	35					64	return ( map { $_->tokens() } $self->elements() );
	144					290
486							}
487
488							sub unescaped_content {
489	0			0	1	0	my ( $self ) = @_;
490	0					0	return join '', map { $_->unescaped_content() } $self->elements();
	0					0
491							}
492
493	9			9		83	use constant ALTERNATION => q<\|>;
	9					14
	9					6750
494
495							{
496							my $obj;
497							sub _alternation_object {
498	390	100		390		640	unless ( $obj ) {
499
500							=begin comment
501
502							# This is a pain because PPIx::Regexp::Token requires a
503							# tokenizer object.
504							require PPIx::Regexp::Tokenizer;
505							require PPIx::Regexp::Token::Operator;
506							$obj = PPIx::Regexp::Token::Operator->__new(
507							ALTERNATION,
508							tokenizer => PPIx::Regexp::Tokenizer->new( ALTERNATION ),
509							);
510
511							=end comment
512
513							=cut
514
515							# DANGER WILL ROBINSON!
516							# This is a horrible encapsulation violation, which I get
517							# away with because I am using the object as a sentinel.
518
519	2					7	$obj = bless {
520							content => ALTERNATION,
521							}, 'PPIx::Regexp::Token::Operator';
522							}
523	390					596	return $obj;
524							}
525							}
526
527							sub raw_width {
528	396			396	1	509	my ( $self ) = @_;
529	396					765	return ( $self->__raw_width() )[ 0, 1 ];
530							}
531
532							# PRIVATE TO THIS PACKAGE.
533							# This is the machinery for raw_width(), but because the datum is needed
534							# internally it also returns the number of alternatives found.
535							sub __raw_width {
536	390			390		501	my ( $self ) = @_;
537	390					553	my ( $node_min, $node_max ) = ( INFINITY, 0 );
538	390					480	my ( $raw_min, $raw_max ) = ( 0, 0 );
539	390					518	my $alternatives = 0;
540	390					582	foreach my $elem ( $self->elements(), _alternation_object() ) {
541	1926	100	66			5793	if ( $elem->isa( 'PPIx::Regexp::Token::Operator' ) &&
542							$elem->content() eq ALTERNATION
543							) {
544	428					428	$alternatives++;
545	428	100				889	defined $node_min
		100
546							and $node_min = defined $raw_min ?
547							min( $node_min, $raw_min ) :
548							undef;
549	428					403	$raw_min = 0;
550	428	100				716	defined $node_max
		100
551							and $node_max = defined $raw_max ?
552							max( $node_max, $raw_max ) :
553							undef;
554	428					698	$raw_max = 0;
555							} else {
556	1498					2601	my ( $e_min, $e_max ) = $elem->width();
557	1498	100				2484	defined $raw_min
		100
558							and $raw_min = defined $e_min ? $raw_min + $e_min : undef;
559	1498	100				2501	defined $raw_max
		100
560							and $raw_max = defined $e_max ? $raw_max + $e_max : undef;
561							}
562							}
563	390					931	return ( $node_min, $node_max, $alternatives );
564							}
565
566							# Help for nav();
567							sub __nav {
568	23			23		29	my ( $self, $child ) = @_;
569	23	50				40	refaddr( $child->parent() ) == refaddr( $self )
570							or return;
571	23	50				44	my ( $method, $inx ) = $child->__my_nav()
572							or return;
573
574	23					54	return ( $method => [ $inx ] );
575							}
576
577							sub __error {
578	3			3		9	my ( $self, $msg, %arg ) = @_;
579	3	50				9	defined $msg
580							or $msg = 'Was class ' . ref $self;
581	3					43	$self->ELEMENT_UNKNOWN()->__PPIX_ELEM__rebless( $self, error => $msg );
582	3					7	foreach my $key ( keys %arg ) {
583	2					5	$self->{$key} = $arg{$key};
584							}
585	3					6	return 1;
586							}
587
588							sub __perl_requirements {
589	23			23		31	my ( $self ) = @_;
590	23	100				42	unless ( $self->{perl_requirements} ) {
591	9					26	my @req = $self->__perl_requirements_setup();
592	9					15	foreach my $kid ( $self->children() ) {
593	19					69	push @req, $kid->__perl_requirements();
594							}
595	9					22	$self->{perl_requirements} = [ __merge_perl_requirements( @req ) ];
596							}
597	23					21	return @{ $self->{perl_requirements} };
	23					49
598							}
599
600							sub _token_order {
601	5			5		9	my ( $self ) = @_;
602	5					5	my $order = 0;
603	5					9	delete $self->{_token_order};
604	5					13	foreach my $elem ( $self->tokens() ) {
605	58					89	$self->{_token_order}{ refaddr $elem } = $order++;
606							}
607	5					10	return;
608							}
609
610							# Order two elements according to the position of their last tokens. The
611							# elements must both be descendants of the invocant or an exception is
612							# thrown. The return is equivalent to the space ship operator (<=>).
613							#
614							# For the moment at least this is private to the PPIx-Regexp package.
615							# It is needed by the width() functionality to (try to) determine which
616							# capture group a back reference refers to.
617							sub __token_post_order {
618	42			42		59	my ( $self, $left, $right ) = @_;
619							$self->{_token_order}
620	42	100				83	or $self->_token_order();
621	42					45	my @order;
622	42					55	foreach ( $left, $right ) {
623	84	50				112	ref $_
624							or confess 'Bug - Operand must be a PPIx::Regexp::Element';
625	84	50				143	defined( my $inx = $self->{_token_order}{ refaddr( $_->last_token() ) } )
626							or confess 'Bug - Operand not descendant of invocant';
627	84					109	push @order, $inx;
628							}
629	42					106	return $order[0] <=> $order[1];
630							}
631
632							# Called by the lexer once it has done its worst to all the tokens.
633							# Called as a method with the lexer as argument. The return is the
634							# number of parse failures discovered when finalizing.
635							sub __PPIX_LEXER__finalize {
636	265			265		423	my ( $self, $lexer ) = @_;
637	265					323	my $rslt = 0;
638	265					571	foreach my $elem ( $self->elements() ) {
639	1213					2078	$rslt += $elem->__PPIX_LEXER__finalize( $lexer );
640							}
641	265					449	return $rslt;
642							}
643
644							# Called by the lexer to record the capture number.
645							sub __PPIX_LEXER__record_capture_number {
646	520			520		796	my ( $self, $number ) = @_;
647	520					984	foreach my $kid ( $self->children() ) {
648	1288					2859	$number = $kid->__PPIX_LEXER__record_capture_number( $number );
649							}
650	520					1236	return $number;
651							}
652
653							1;
654
655							__END__