File Coverage

blib/lib/PPI/Lexer.pm
Criterion Covered Total %
statement 441 462 95.4
branch 260 296 87.8
condition 157 206 76.2
subroutine 30 30 100.0
pod 5 6 83.3
total 893 1000 89.3


line stmt bran cond sub pod time code
1             package PPI::Lexer;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Lexer - The PPI Lexer
8              
9             =head1 SYNOPSIS
10              
11             use PPI;
12            
13             # Create a new Lexer
14             my $Lexer = PPI::Lexer->new;
15            
16             # Build a PPI::Document object from a Token stream
17             my $Tokenizer = PPI::Tokenizer->load('My/Module.pm');
18             my $Document = $Lexer->lex_tokenizer($Tokenizer);
19            
20             # Build a PPI::Document object for some raw source
21             my $source = "print 'Hello World!'; kill(Humans->all);";
22             $Document = $Lexer->lex_source($source);
23            
24             # Build a PPI::Document object for a particular file name
25             $Document = $Lexer->lex_file('My/Module.pm');
26              
27             =head1 DESCRIPTION
28              
29             The is the L Lexer. In the larger scheme of things, its job is to take
30             token streams, in a variety of forms, and "lex" them into nested structures.
31              
32             Pretty much everything in this module happens behind the scenes at this
33             point. In fact, at the moment you don't really need to instantiate the lexer
34             at all, the three main methods will auto-instantiate themselves a
35             C object as needed.
36              
37             All methods do a one-shot "lex this and give me a L object".
38              
39             In fact, if you are reading this, what you B want to do is to
40             just "load a document", in which case you can do this in a much more
41             direct and concise manner with one of the following.
42              
43             use PPI;
44            
45             $Document = PPI::Document->load( $filename );
46             $Document = PPI::Document->new( $string );
47              
48             See L for more details.
49              
50             For more unusual tasks, by all means forge onwards.
51              
52             =head1 METHODS
53              
54             =cut
55              
56 68     68   1595 use strict;
  68         1350  
  68         2552  
57 68     68   756 use Scalar::Util ();
  68         325  
  68         1373  
58 68     68   231 use Params::Util qw{_STRING _INSTANCE};
  68         90  
  68         3131  
59 68     68   305 use PPI ();
  68         98  
  68         836  
60 68     68   199 use PPI::Exception ();
  68         110  
  68         1036  
61 68     68   246 use PPI::Singletons '%_PARENT';
  68         98  
  68         312114  
62              
63             our $VERSION = '1.291';
64              
65             our $errstr = "";
66              
67             # Keyword -> Structure class maps
68             my %ROUND = (
69             # Conditions
70             'if' => 'PPI::Structure::Condition',
71             'elsif' => 'PPI::Structure::Condition',
72             'unless' => 'PPI::Structure::Condition',
73             'while' => 'PPI::Structure::Condition',
74             'until' => 'PPI::Structure::Condition',
75              
76             # For(each)
77             'for' => 'PPI::Structure::For',
78             'foreach' => 'PPI::Structure::For',
79             );
80              
81             # Opening brace to refining method
82             my %RESOLVE = (
83             '(' => '_round',
84             '[' => '_square',
85             '{' => '_curly',
86             );
87              
88             # Allows for experimental overriding of the tokenizer
89             our $X_TOKENIZER = "PPI::Tokenizer";
90 16847     16847 0 64381 sub X_TOKENIZER { $X_TOKENIZER }
91              
92              
93              
94              
95              
96             #####################################################################
97             # Constructor
98              
99             =pod
100              
101             =head2 new
102              
103             The C constructor creates a new C object. The object itself
104             is merely used to hold various buffers and state data during the lexing
105             process, and holds no significant data between -Elex_xxxxx calls.
106              
107             Returns a new C object
108              
109             =cut
110              
111             sub new {
112 16847     16847 1 41999 my $class = shift->_clear;
113 16847         95555 bless {
114             Tokenizer => undef, # Where we store the tokenizer for a run
115             buffer => [], # The input token buffer
116             delayed => [], # The "delayed insignificant tokens" buffer
117             features_stack => [], # Stack of features in scope
118             }, $class;
119             }
120              
121              
122              
123              
124              
125             #####################################################################
126             # Main Lexing Methods
127              
128             =pod
129              
130             =head2 lex_file $filename
131              
132             The C method takes a filename as argument. It then loads the file,
133             creates a L for the content and lexes the token stream
134             produced by the tokenizer. Basically, a sort of all-in-one method for
135             getting a L object from a file name.
136              
137             Additional arguments are passed to the tokenizer as a hash.
138              
139             Returns a L object, or C on error.
140              
141             =cut
142              
143 521     521 1 2070 sub lex_file { shift->_lex_input(@_) }
144              
145             =pod
146              
147             =head2 lex_source $string
148              
149             The C method takes a normal scalar string as argument. It
150             creates a L object for the string, and then lexes the
151             resulting token stream.
152              
153             Additional arguments are passed to the tokenizer as a hash.
154              
155             Returns a L object, or C on error.
156              
157             =cut
158              
159 16326     16326 1 952092 sub lex_source { shift->_lex_input( \shift, @_ ) }
160              
161             sub _lex_input {
162 16847     16847   32758 my ( $self, $input, %args ) = @_;
163 16847 100       43774 $self = ref $self ? $self : $self->new;
164              
165             # Create the Tokenizer
166 16847         26511 my $Tokenizer = eval { X_TOKENIZER->new($input) };
  16847         33759  
167             return #
168 16847 50       133624 $@
    50          
    100          
169             ? $self->_error( _INSTANCE( $@, 'PPI::Exception' ) ? $@->message : $@ )
170             : !_INSTANCE( $Tokenizer, 'PPI::Tokenizer' )
171             ? $self->_error($Tokenizer)
172             : $self->lex_tokenizer( $Tokenizer, %args );
173             }
174              
175             =pod
176              
177             =head2 lex_tokenizer $Tokenizer
178              
179             The C takes as argument a L object. It
180             lexes the token stream from the tokenizer into a L object.
181              
182             Additional arguments are set on the L produced.
183              
184             Returns a L object, or C on error.
185              
186             =cut
187              
188             sub lex_tokenizer {
189 16844 50   16844 1 32497 my $self = ref $_[0] ? shift : shift->new;
190 16844         40489 my $Tokenizer = _INSTANCE(shift, 'PPI::Tokenizer');
191 16844 50       35276 return $self->_error(
192             "Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer"
193             ) unless $Tokenizer;
194 16844         27511 my %args = @_;
195              
196             # Create the empty document
197 16844         43993 my $Document = PPI::Document->new;
198 16844         57291 ref($Document)->_setattr( $Document, %args );
199 16844         43599 $Tokenizer->_document($Document);
200 16844 100       34735 if (my $feat = $Document->feature_mods) {
201 18         33 push @{$self->{features_stack}}, $feat;
  18         59  
202 18         68 $Tokenizer->_features($feat);
203             }
204              
205             # Lex the token stream into the document
206 16844         25838 $self->{Tokenizer} = $Tokenizer;
207 16844 100       20528 if ( !eval { $self->_lex_document($Document); 1 } ) {
  16844         34902  
  16843         28956  
208             # If an error occurs DESTROY the partially built document.
209 1         3 $Tokenizer->_document(undef);
210 1         3 undef $Document;
211 1 50       5 if ( _INSTANCE($@, 'PPI::Exception') ) {
212 1         3 return $self->_error( $@->message );
213             } else {
214 0         0 return $self->_error( $errstr );
215             }
216             }
217              
218 16843         149664 return $Document;
219             }
220              
221              
222              
223              
224              
225             #####################################################################
226             # Lex Methods - Document Object
227              
228             sub _lex_document {
229 16844     16844   25982 my ($self, $Document) = @_;
230             # my $self = shift;
231             # my $Document = _INSTANCE(shift, 'PPI::Document') or return undef;
232              
233             # Start the processing loop
234 16844         19554 my $Token;
235 16844         39854 while ( ref($Token = $self->_get_token) ) {
236             # Add insignificant tokens directly beneath us
237 54110 100       109079 unless ( $Token->significant ) {
238 21244         39786 $self->_add_element( $Document, $Token );
239 21244         30312 next;
240             }
241              
242 32866 100       61468 if ( $Token->content eq ';' ) {
243             # It's a semi-colon on its own.
244             # We call this a null statement.
245 470         1571 $self->_add_element(
246             $Document,
247             PPI::Statement::Null->new($Token),
248             );
249 470         915 next;
250             }
251              
252             # Handle anything other than a structural element
253 32396 100       59382 unless ( ref $Token eq 'PPI::Token::Structure' ) {
254             # Determine the class for the Statement, and create it
255 29270         65167 my $Statement = $self->_statement($Document, $Token)->new($Token);
256              
257             # Move the lexing down into the statement
258 29270         69133 $self->_add_delayed( $Document );
259 29270         59566 $self->_add_element( $Document, $Statement );
260 29270         63616 $self->_lex_statement( $Statement );
261              
262 29270         59018 next;
263             }
264              
265             # Is this the opening of a structure?
266 3126 100       5343 if ( $Token->__LEXER__opens ) {
267             # This should actually have a Statement instead
268 977         2327 $self->_rollback( $Token );
269 977         3216 my $Statement = PPI::Statement->new;
270 977         2187 $self->_add_element( $Document, $Statement );
271 977         2218 $self->_lex_statement( $Statement );
272 977         1663 next;
273             }
274              
275             # Is this the close of a structure.
276 2149 50       3221 if ( $Token->__LEXER__closes ) {
277             # Because we are at the top of the tree, this is an error.
278             # This means either a mis-parsing, or a mistake in the code.
279             # To handle this, we create a "Naked Close" statement
280 2149         5846 $self->_add_element( $Document,
281             PPI::Statement::UnmatchedBrace->new($Token)
282             );
283 2149         3720 next;
284             }
285              
286             # Shouldn't be able to get here
287 0         0 PPI::Exception->throw('Lexer reached an illegal state');
288             }
289              
290             # Did we leave the main loop because of a Tokenizer error?
291 16843 50       26908 unless ( defined $Token ) {
292 0 0       0 my $errstr = $self->{Tokenizer} ? $self->{Tokenizer}->errstr : '';
293 0   0     0 $errstr ||= 'Unknown Tokenizer Error';
294 0         0 PPI::Exception->throw($errstr);
295             }
296              
297             # No error, it's just the end of file.
298             # Add any insignificant trailing tokens.
299 16843         31400 $self->_add_delayed( $Document );
300              
301             # If the Tokenizer has any v6 blocks to attach, do so now.
302             # Checking once at the end is faster than adding a special
303             # case check for every statement parsed.
304 16843         24730 my $perl6 = $self->{Tokenizer}->{'perl6'};
305 16843 100       27545 if ( @$perl6 ) {
306 2         9 my $includes = $Document->find( 'PPI::Statement::Include::Perl6' );
307 2         5 foreach my $include ( @$includes ) {
308 2 50       4 unless ( @$perl6 ) {
309 0         0 PPI::Exception->throw('Failed to find a perl6 section');
310             }
311 2         6 $include->{perl6} = shift @$perl6;
312             }
313             }
314              
315 16843         23180 return 1;
316             }
317              
318              
319              
320              
321              
322             #####################################################################
323             # Lex Methods - Statement Object
324              
325             # Keyword -> Statement Subclass
326             my %STATEMENT_CLASSES = (
327             # Things that affect the timing of execution
328             'BEGIN' => 'PPI::Statement::Scheduled',
329             'CHECK' => 'PPI::Statement::Scheduled',
330             'UNITCHECK' => 'PPI::Statement::Scheduled',
331             'INIT' => 'PPI::Statement::Scheduled',
332             'END' => 'PPI::Statement::Scheduled',
333              
334             # Special subroutines for which 'sub' is optional
335             'AUTOLOAD' => 'PPI::Statement::Sub',
336             'DESTROY' => 'PPI::Statement::Sub',
337              
338             # Loading and context statement
339             'package' => 'PPI::Statement::Package',
340             # 'use' => 'PPI::Statement::Include',
341             'no' => 'PPI::Statement::Include',
342             'require' => 'PPI::Statement::Include',
343              
344             # Various declarations
345             'my' => 'PPI::Statement::Variable',
346             'local' => 'PPI::Statement::Variable',
347             'our' => 'PPI::Statement::Variable',
348             'state' => 'PPI::Statement::Variable',
349             # Statements starting with 'sub' could be any one of...
350             # 'sub' => 'PPI::Statement::Sub',
351             # 'sub' => 'PPI::Statement::Scheduled',
352             # 'sub' => 'PPI::Statement',
353              
354             # Compound statement
355             'if' => 'PPI::Statement::Compound',
356             'unless' => 'PPI::Statement::Compound',
357             'for' => 'PPI::Statement::Compound',
358             'foreach' => 'PPI::Statement::Compound',
359             'while' => 'PPI::Statement::Compound',
360             'until' => 'PPI::Statement::Compound',
361              
362             # Switch statement
363             'given' => 'PPI::Statement::Given',
364             'when' => 'PPI::Statement::When',
365             'default' => 'PPI::Statement::When',
366              
367             # Various ways of breaking out of scope
368             'redo' => 'PPI::Statement::Break',
369             'next' => 'PPI::Statement::Break',
370             'last' => 'PPI::Statement::Break',
371             'return' => 'PPI::Statement::Break',
372             'goto' => 'PPI::Statement::Break',
373              
374             # Special sections of the file
375             '__DATA__' => 'PPI::Statement::Data',
376             '__END__' => 'PPI::Statement::End',
377             );
378              
379             sub _statement {
380 66768     66768   96538 my ($self, $Parent, $Token) = @_;
381             # my $self = shift;
382             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
383             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
384              
385             # Check for things like ( parent => ... )
386 66768 100 100     343892 if (
387             $Parent->isa('PPI::Structure::List')
388             or
389             $Parent->isa('PPI::Structure::Constructor')
390             ) {
391 11846 100       32298 if ( $Token->isa('PPI::Token::Word') ) {
392             # Is the next significant token a =>
393             # Read ahead to the next significant token
394 2018         2253 my $Next;
395 2018         3196 while ( $Next = $self->_get_token ) {
396 2854 100       5787 unless ( $Next->significant ) {
397 883         1096 push @{$self->{delayed}}, $Next;
  883         1290  
398             # $self->_delay_element( $Next );
399 883         1375 next;
400             }
401              
402             # Got the next token
403 1971 100 100     6690 if (
404             $Next->isa('PPI::Token::Operator')
405             and
406             $Next->content eq '=>'
407             ) {
408             # Is an ordinary expression
409 947         1830 $self->_rollback( $Next );
410 947         3235 return 'PPI::Statement::Expression';
411             } else {
412 1024         1288 last;
413             }
414             }
415              
416             # Rollback and continue
417 1071         2080 $self->_rollback( $Next );
418             }
419             }
420              
421 65821         75395 my $is_lexsub = 0;
422              
423             # Is it a token in our known classes list
424 65821         107246 my $content = $Token->content;
425             my $class =
426             ( $content eq 'try' and ( $self->{features_stack}[-1] || {} )->{try} )
427             ? 'PPI::Statement::Compound'
428 65821 100 100     166994 : $STATEMENT_CLASSES{$content};
429              
430 65821 100       101375 if ( $class ) {
431             # Is the next significant token a =>
432             # Read ahead to the next significant token
433 9918         10914 my $Next;
434 9918         16728 while ( $Next = $self->_get_token ) {
435 19467 100       37022 if ( !$Next->significant ) {
436 9596         10237 push @{$self->{delayed}}, $Next;
  9596         15755  
437 9596         15557 next;
438             }
439              
440             # Scheduled block must be followed by left curly or
441             # semicolon. Otherwise we have something else (e.g.
442             # open( CHECK, ... );
443 9871 100 66     19904 if (
      100        
444             'PPI::Statement::Scheduled' eq $class
445             and not ( $Next->isa( 'PPI::Token::Structure' )
446             and $Next->content =~ m/\A[{;]\z/ ) # }
447             ) {
448 1         1 $class = undef;
449 1         1 last;
450             }
451              
452             # Lexical subroutine
453 9870 100 100     44575 if (
      66        
454             $content =~ /^(?:my|our|state)\z/
455             and $Next->isa( 'PPI::Token::Word' ) and $Next->content eq 'sub'
456             ) {
457             # This should be PPI::Statement::Sub rather than PPI::Statement::Variable
458 7         10 $class = undef;
459 7         8 $is_lexsub = 1;
460 7         7 last;
461             }
462              
463             last if
464 9863 100 100     37510 !$Next->isa( 'PPI::Token::Operator' ) or $Next->content ne '=>';
465              
466             # Got the next token
467             # Is an ordinary expression
468 21         59 $self->_rollback( $Next );
469 21         89 return 'PPI::Statement';
470             }
471              
472             # Rollback and continue
473 9897         19493 $self->_rollback( $Next );
474             }
475              
476             # Handle potential barewords for subscripts
477 65800 100       171840 if ( $Parent->isa('PPI::Structure::Subscript') ) {
478             # Fast obvious case, just an expression
479 7579 100 100     15027 unless ( $class and $class->isa('PPI::Statement::Expression') ) {
480 7456         20163 return 'PPI::Statement::Expression';
481             }
482              
483             # This is something like "my" or "our" etc... more subtle.
484             # Check if the next token is a closing curly brace.
485             # This means we are something like $h{my}
486 123         141 my $Next;
487 123         198 while ( $Next = $self->_get_token ) {
488 119 50       259 unless ( $Next->significant ) {
489 0         0 push @{$self->{delayed}}, $Next;
  0         0  
490             # $self->_delay_element( $Next );
491 0         0 next;
492             }
493              
494             # Found the next significant token.
495             # Is it a closing curly brace?
496 119 50       190 if ( $Next->content eq '}' ) {
497 119         195 $self->_rollback( $Next );
498 119         387 return 'PPI::Statement::Expression';
499             } else {
500 0         0 $self->_rollback( $Next );
501 0         0 return $class;
502             }
503             }
504              
505             # End of file... this means it is something like $h{our
506             # which is probably going to be $h{our} ... I think
507 4         10 $self->_rollback( $Next );
508 4         12 return 'PPI::Statement::Expression';
509             }
510              
511             # If it's a token in our list, use that class
512 58221 100       123890 return $class if $class;
513              
514             # Handle the more in-depth sub detection
515 48485 100 100     133361 if ( $is_lexsub || $content eq 'sub' ) {
516             # Read ahead to the next significant token
517 3416         3838 my $Next;
518 3416         6104 while ( $Next = $self->_get_token ) {
519 6769 100       13714 unless ( $Next->significant ) {
520 3377         3959 push @{$self->{delayed}}, $Next;
  3377         6313  
521             # $self->_delay_element( $Next );
522 3377         5612 next;
523             }
524              
525             # Got the next significant token
526 3392         6260 my $sclass = $STATEMENT_CLASSES{$Next->content};
527 3392 100 100     7844 if ( $sclass and $sclass eq 'PPI::Statement::Scheduled' ) {
528 28         74 $self->_rollback( $Next );
529 28         143 return 'PPI::Statement::Scheduled';
530             }
531 3364 100       8945 if ( $Next->isa('PPI::Token::Word') ) {
532 3255         7471 $self->_rollback( $Next );
533 3255         14720 return 'PPI::Statement::Sub';
534             }
535              
536             ### Comment out these two, as they would return PPI::Statement anyway
537             # if ( $content eq '{' ) {
538             # Anonymous sub at start of statement
539             # return 'PPI::Statement';
540             # }
541             #
542             # if ( $Next->isa('PPI::Token::Prototype') ) {
543             # Anonymous sub at start of statement
544             # return 'PPI::Statement';
545             # }
546              
547             # PPI::Statement is the safest fall-through
548 109         267 $self->_rollback( $Next );
549 109         457 return 'PPI::Statement';
550             }
551              
552             # End of file... PPI::Statement::Sub is the most likely
553 24         58 $self->_rollback( $Next );
554 24         124 return 'PPI::Statement::Sub';
555             }
556              
557 45069 100       67203 if ( $content eq 'use' ) {
558             # Add a special case for "use v6" lines.
559 2293         2398 my $Next;
560 2293         4539 while ( $Next = $self->_get_token ) {
561 4581 100       8844 unless ( $Next->significant ) {
562 2290         2680 push @{$self->{delayed}}, $Next;
  2290         4323  
563             # $self->_delay_element( $Next );
564 2290         4027 next;
565             }
566              
567             # Found the next significant token.
568 2291 100 66     10814 if (
    100          
569             $Next->isa('PPI::Token::Operator')
570             and
571             $Next->content eq '=>'
572             ) {
573             # Is an ordinary expression
574 1         4 $self->_rollback( $Next );
575 1         5 return 'PPI::Statement';
576             # Is it a v6 use?
577             } elsif ( $Next->content eq 'v6' ) {
578 2         8 $self->_rollback( $Next );
579 2         25 return 'PPI::Statement::Include::Perl6';
580             } else {
581 2288         6099 $self->_rollback( $Next );
582 2288         10915 return 'PPI::Statement::Include';
583             }
584             }
585              
586             # End of file... this means it is an incomplete use
587             # line, just treat it as a normal include.
588 2         6 $self->_rollback( $Next );
589 2         16 return 'PPI::Statement::Include';
590             }
591              
592             # If our parent is a Condition, we are an Expression
593 42776 100       96425 if ( $Parent->isa('PPI::Structure::Condition') ) {
594 1244         4050 return 'PPI::Statement::Expression';
595             }
596              
597             # If our parent is a List, we are also an expression
598 41532 100       84683 if ( $Parent->isa('PPI::Structure::List') ) {
599 8933         27919 return 'PPI::Statement::Expression';
600             }
601              
602             # Switch statements use expressions, as well.
603 32599 100 100     145351 if (
604             $Parent->isa('PPI::Structure::Given')
605             or
606             $Parent->isa('PPI::Structure::When')
607             ) {
608 6         44 return 'PPI::Statement::Expression';
609             }
610              
611 32593 100       166579 if ( _INSTANCE($Token, 'PPI::Token::Label') ) {
612 392         1864 return 'PPI::Statement::Compound';
613             }
614              
615             # Beyond that, I have no idea for the moment.
616             # Just keep adding more conditions above this.
617 32201         121659 return 'PPI::Statement';
618             }
619              
620             sub _update_features {
621 68218     68218   88155 my ( $self, $statement ) = @_;
622              
623 68218 100       121401 return if ref $statement ne 'PPI::Statement::Include';
624             return unless #
625 3326 100       11848 my $new_features = $statement->feature_mods;
626 18         33 push @{ $self->{features_stack} }, {}
627 19 100       31 if not @{ $self->{features_stack} };
  19         52  
628 19         33 my $current_features = $self->{features_stack}[-1];
629             $self->{Tokenizer}->_features #
630             ( $self->{features_stack}[-1] =
631 19         30 { %{$current_features}, %{$new_features} } );
  19         38  
  19         83  
632             }
633              
634             sub _lex_statement {
635 68226     68226   85757 my ($self, $Statement) = @_;
636             # my $self = shift;
637             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
638              
639             # Handle some special statements
640 68226 100       195998 if ( $Statement->isa('PPI::Statement::End') ) {
641 8         23 return $self->_lex_end( $Statement );
642             }
643              
644             # Begin processing tokens
645 68218         73074 my $Token;
646 68218         99348 while ( ref( $Token = $self->_get_token ) ) {
647             # Delay whitespace and comment tokens
648 448403 100       755039 unless ( $Token->significant ) {
649 147601         141676 push @{$self->{delayed}}, $Token;
  147601         192110  
650             # $self->_delay_element( $Token );
651 147601         195527 next;
652             }
653              
654             # Structual closes, and __DATA__ and __END__ tags implicitly
655             # end every type of statement
656 300802 100 66     446549 if (
657             $Token->__LEXER__closes
658             or
659             $Token->isa('PPI::Token::Separator')
660             ) {
661             # Rollback and end the statement
662 25355         54035 $self->_update_features( $Statement );
663 25355         43686 return $self->_rollback( $Token );
664             }
665              
666             # Normal statements never implicitly end
667 275447 100       479941 unless ( $Statement->__LEXER__normal ) {
668             # Have we hit an implicit end to the statement
669 25228 100       47886 unless ( $self->_continues( $Statement, $Token ) ) {
670             # Rollback and finish the statement
671 4549         11277 $self->_update_features( $Statement );
672 4549         8712 return $self->_rollback( $Token );
673             }
674             }
675              
676             # Any normal character just gets added
677 270898 100       521777 unless ( $Token->isa('PPI::Token::Structure') ) {
678 213865         347853 $self->_add_element( $Statement, $Token );
679 213865         297006 next;
680             }
681              
682             # Handle normal statement terminators
683 57033 100       90024 if ( $Token->content eq ';' ) {
684 26836         50055 $self->_add_element( $Statement, $Token );
685 26836         55354 $self->_update_features( $Statement );
686 26836         36661 return 1;
687             }
688              
689             # Which leaves us with a new structure
690              
691             # Determine the class for the structure and create it
692 30197         49861 my $method = $RESOLVE{$Token->content};
693 30197         78031 my $Structure = $self->$method($Statement)->new($Token);
694              
695             # Move the lexing down into the Structure
696 30197         69239 $self->_add_delayed( $Statement );
697 30197         60999 $self->_add_element( $Statement, $Structure );
698 30197         56474 $self->_lex_structure( $Structure );
699             }
700              
701             # Was it an error in the tokenizer?
702 11478 50       17298 unless ( defined $Token ) {
703 0         0 PPI::Exception->throw;
704             }
705              
706             # No, it's just the end of the file...
707             # Roll back any insignificant tokens, they'll get added at the Document level
708 11478         21470 $self->_update_features( $Statement );
709 11478         16640 $self->_rollback;
710             }
711              
712             sub _lex_end {
713 8     8   16 my ($self, $Statement) = @_;
714             # my $self = shift;
715             # my $Statement = _INSTANCE(shift, 'PPI::Statement::End') or die "Bad param 1";
716              
717             # End of the file, EVERYTHING is ours
718 8         11 my $Token;
719 8         15 while ( $Token = $self->_get_token ) {
720             # Inlined $Statement->__add_element($Token);
721             Scalar::Util::weaken(
722 15         35 $_PARENT{Scalar::Util::refaddr $Token} = $Statement
723             );
724 15         14 push @{$Statement->{children}}, $Token;
  15         26  
725             }
726              
727             # Was it an error in the tokenizer?
728 8 50       37 unless ( defined $Token ) {
729 0         0 PPI::Exception->throw;
730             }
731              
732             # No, it's just the end of the file...
733             # Roll back any insignificant tokens, they get added at the Document level
734 8         18 $self->_rollback;
735             }
736              
737             # For many statements, it can be difficult to determine the end-point.
738             # This method takes a statement and the next significant token, and attempts
739             # to determine if the there is a statement boundary between the two, or if
740             # the statement can continue with the token.
741             sub _continues {
742 25228     25228   33812 my ($self, $Statement, $Token) = @_;
743             # my $self = shift;
744             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
745             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
746              
747             # Handle the simple block case
748             # { print 1; }
749 25228 100 100     53753 if (
750             $Statement->schildren == 1
751             and
752             $Statement->schild(0)->isa('PPI::Structure::Block')
753             ) {
754 50         144 return '';
755             }
756              
757             # Alrighty then, there are six implied-end statement types:
758             # ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, ::When,
759             # and ::Package statements.
760 25178 50       49193 return 1
761             if ref $Statement !~ /\b(?:Scheduled|Sub|Compound|Given|When|Package)$/;
762              
763             # Of these six, ::Scheduled, ::Sub, ::Given, and ::When follow the same
764             # simple rule and can be handled first. The block form of ::Package
765             # follows the rule, too. (The non-block form of ::Package
766             # requires a statement terminator, and thus doesn't need to have
767             # an implied end detected.)
768 25178         45904 my @part = $Statement->schildren;
769 25178         30081 my $LastChild = $part[-1];
770             # If the last significant element of the statement is a block,
771             # then an implied-end statement is done, no questions asked.
772 25178 100       111575 return !$LastChild->isa('PPI::Structure::Block')
773             if !$Statement->isa('PPI::Statement::Compound');
774              
775             # Now we get to compound statements, which kind of suck (to lex).
776             # However, of them all, the 'if' type, which includes unless, are
777             # relatively easy to handle compared to the others.
778 5574         14451 my $type = $Statement->type;
779 5574 100       9303 if ( $type eq 'if' ) {
780             # This should be one of the following
781             # if (EXPR) BLOCK
782             # if (EXPR) BLOCK else BLOCK
783             # if (EXPR) BLOCK elsif (EXPR) BLOCK ... else BLOCK
784              
785             # We only implicitly end on a block
786 3444 100       8431 unless ( $LastChild->isa('PPI::Structure::Block') ) {
787             # if (EXPR) ...
788             # if (EXPR) BLOCK else ...
789             # if (EXPR) BLOCK elsif (EXPR) BLOCK ...
790 2380         5492 return 1;
791             }
792              
793             # If the token before the block is an 'else',
794             # it's over, no matter what.
795 1064         2239 my $NextLast = $Statement->schild(-2);
796 1064 50 66     7192 if (
      66        
      66        
797             $NextLast
798             and
799             $NextLast->isa('PPI::Token')
800             and
801             $NextLast->isa('PPI::Token::Word')
802             and
803             $NextLast->content eq 'else'
804             ) {
805 73         263 return '';
806             }
807              
808             # Otherwise, we continue for 'elsif' or 'else' only.
809 991 100 100     3546 if (
      100        
810             $Token->isa('PPI::Token::Word')
811             and (
812             $Token->content eq 'else'
813             or
814             $Token->content eq 'elsif'
815             )
816             ) {
817 303         1022 return 1;
818             }
819              
820 688         1942 return '';
821             }
822              
823 2130 100       3667 if ( $type eq 'label' ) {
824             # We only have the label so far, could be any of
825             # LABEL while (EXPR) BLOCK
826             # LABEL while (EXPR) BLOCK continue BLOCK
827             # LABEL for (EXPR; EXPR; EXPR) BLOCK
828             # LABEL foreach VAR (LIST) BLOCK
829             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
830             # LABEL BLOCK continue BLOCK
831              
832             # Handle cases with a word after the label
833 370 100 100     1639 if (
834             $Token->isa('PPI::Token::Word')
835             and
836             $Token->content =~ /^(?:while|until|for|foreach)$/
837             ) {
838 38         90 return 1;
839             }
840              
841             # Handle labelled blocks
842 332 100 100     1210 if ( $Token->isa('PPI::Token::Structure') && $Token->content eq '{' ) {
843 243         639 return 1;
844             }
845              
846 89         226 return '';
847             }
848              
849             # Handle the common "after round braces" case
850 1760 100 100     6550 if ( $LastChild->isa('PPI::Structure') and $LastChild->braces eq '()' ) {
851             # LABEL while (EXPR) ...
852             # LABEL while (EXPR) ...
853             # LABEL for (EXPR; EXPR; EXPR) ...
854             # LABEL for VAR (LIST) ...
855             # LABEL foreach VAR (LIST) ...
856             # Only a block will do
857 384   33     1457 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
858             }
859              
860 1376 100       2402 if ( $type eq 'for' ) {
861             # LABEL for (EXPR; EXPR; EXPR) BLOCK
862 143 100 66     507 if (
    50          
    0          
863             $LastChild->isa('PPI::Token::Word')
864             and
865             $LastChild->content =~ /^for(?:each)?\z/
866             ) {
867             # LABEL for ...
868 130 100 66     840 if (
      100        
869             (
870             $Token->isa('PPI::Token::Structure')
871             and
872             $Token->content eq '('
873             )
874             or
875             $Token->isa('PPI::Token::QuoteLike::Words')
876             ) {
877 21         63 return 1;
878             }
879              
880 109 50       341 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
881             # LABEL for VAR QW{} ...
882             # LABEL foreach VAR QW{} ...
883             # Only a block will do
884 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
885             }
886              
887             # In this case, we can also behave like a foreach
888 109         143 $type = 'foreach';
889              
890             } elsif ( $LastChild->isa('PPI::Structure::Block') ) {
891             # LABEL for (EXPR; EXPR; EXPR) BLOCK
892             # That's it, nothing can continue
893 13         51 return '';
894              
895             } elsif ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
896             # LABEL for VAR QW{} ...
897             # LABEL foreach VAR QW{} ...
898             # Only a block will do
899 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
900             }
901             }
902              
903             # Handle the common continue case
904 1342 100 100     4708 if ( $LastChild->isa('PPI::Token::Word') and $LastChild->content eq 'continue' ) {
905             # LABEL while (EXPR) BLOCK continue ...
906             # LABEL foreach VAR (LIST) BLOCK continue ...
907             # LABEL BLOCK continue ...
908             # Only a block will do
909 6   33     31 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
910             }
911              
912 1336 50 66     2480 if ( $type eq 'try' and $LastChild->presumed_features->{try} ) {
913 6 100       23 return 1 if not $LastChild->isa('PPI::Structure::Block');
914              
915 2         5 my $NextLast = $Statement->schild(-2);
916 2 50 33     29 return ''
      33        
      33        
917             if $NextLast
918             and $NextLast->isa('PPI::Token')
919             and $NextLast->isa('PPI::Token::Word')
920             and $NextLast->content eq 'catch';
921              
922 2 50 33     7 return 1 #
923             if $Token->isa('PPI::Token::Word') and $Token->content eq 'catch';
924              
925 0         0 return '';
926             }
927              
928             # Handle the common continuable block case
929 1330 100       3299 if ( $LastChild->isa('PPI::Structure::Block') ) {
930             # LABEL while (EXPR) BLOCK
931             # LABEL while (EXPR) BLOCK ...
932             # LABEL for (EXPR; EXPR; EXPR) BLOCK
933             # LABEL foreach VAR (LIST) BLOCK
934             # LABEL foreach VAR (LIST) BLOCK ...
935             # LABEL BLOCK ...
936             # Is this the block for a continue?
937 465 100 66     3079 if ( _INSTANCE($part[-2], 'PPI::Token::Word') and $part[-2]->content eq 'continue' ) {
938             # LABEL while (EXPR) BLOCK continue BLOCK
939             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
940             # LABEL BLOCK continue BLOCK
941             # That's it, nothing can continue this
942 6         17 return '';
943             }
944              
945             # Only a continue will do
946 459   100     2252 return $Token->isa('PPI::Token::Word') && $Token->content eq 'continue';
947             }
948              
949 865 50       1368 if ( $type eq 'block' ) {
950             # LABEL BLOCK continue BLOCK
951             # Every possible case is covered in the common cases above
952             }
953              
954 865 100       1337 if ( $type eq 'while' ) {
955             # LABEL while (EXPR) BLOCK
956             # LABEL while (EXPR) BLOCK continue BLOCK
957             # LABEL until (EXPR) BLOCK
958             # LABEL until (EXPR) BLOCK continue BLOCK
959             # The only case not covered is the while ...
960 157 50 66     710 if (
      66        
961             $LastChild->isa('PPI::Token::Word')
962             and (
963             $LastChild->content eq 'while'
964             or
965             $LastChild->content eq 'until'
966             )
967             ) {
968             # LABEL while ...
969             # LABEL until ...
970             # Only a condition structure will do
971 157   33     545 return $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
972             }
973             }
974              
975 708 50       1170 if ( $type eq 'foreach' ) {
976             # LABEL foreach VAR (LIST) BLOCK
977             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
978             # The only two cases that have not been covered already are
979             # 'foreach ...' and 'foreach VAR ...'
980              
981 708 100       1650 if ( $LastChild->isa('PPI::Token::Symbol') ) {
982             # LABEL foreach my $scalar ...
983             # Open round brace, or a quotewords
984 210 100 66     784 return 1 if $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
985 16 50       53 return 1 if $Token->isa('PPI::Token::QuoteLike::Words');
986 0         0 return '';
987             }
988              
989 498 100 100     739 if ( $LastChild->content eq 'foreach' or $LastChild->content eq 'for' ) {
990             # There are three possibilities here
991 281 100 100     1062 if (
    100 100        
    100 66        
    100          
992             $Token->isa('PPI::Token::Word')
993             and (
994             ($STATEMENT_CLASSES{ $Token->content } || '')
995             eq
996             'PPI::Statement::Variable'
997             )
998             ) {
999             # VAR == 'my ...'
1000 196         563 return 1;
1001             } elsif ( $Token->content =~ /^\$/ ) {
1002             # VAR == '$scalar'
1003 34         98 return 1;
1004             } elsif ( $Token->isa('PPI::Token::Structure') and $Token->content eq '(' ) {
1005 42         176 return 1;
1006             } elsif ( $Token->isa('PPI::Token::QuoteLike::Words') ) {
1007 6         18 return 1;
1008             } else {
1009 3         10 return '';
1010             }
1011             }
1012              
1013 217 100 100     461 if (
1014             ($STATEMENT_CLASSES{ $LastChild->content } || '')
1015             eq
1016             'PPI::Statement::Variable'
1017             ) {
1018             # LABEL foreach my ...
1019             # Only a scalar will do
1020 192         335 return $Token->content =~ /^\$/;
1021             }
1022              
1023             # Handle the rare for my $foo qw{bar} ... case
1024 25 50       55 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
1025             # LABEL for VAR QW ...
1026             # LABEL foreach VAR QW ...
1027             # Only a block will do
1028 25   33     88 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
1029             }
1030             }
1031              
1032             # Something we don't know about... what could it be
1033 0         0 PPI::Exception->throw("Illegal state in '$type' compound statement");
1034             }
1035              
1036              
1037              
1038              
1039              
1040             #####################################################################
1041             # Lex Methods - Structure Object
1042              
1043             # Given a parent element, and a ( token to open a structure, determine
1044             # the class that the structure should be.
1045             sub _round {
1046 11886     11886   16638 my ($self, $Parent) = @_;
1047             # my $self = shift;
1048             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1049              
1050             # Get the last significant element in the parent
1051 11886         28040 my $Element = $Parent->schild(-1);
1052 11886 100       50407 if ( _INSTANCE($Element, 'PPI::Token::Word') ) {
1053             # Can it be determined because it is a keyword?
1054 10247         17401 my $rclass = $ROUND{$Element->content};
1055 10247 100       21797 return $rclass if $rclass;
1056             }
1057              
1058             # If we are part of a for or foreach statement, we are a ForLoop
1059 10543 100       79068 if ( $Parent->isa('PPI::Statement::Compound') ) {
    100          
    100          
    100          
1060 195 100       673 if ( $Parent->type =~ /^for(?:each)?$/ ) {
1061 194         807 return 'PPI::Structure::For';
1062             }
1063             } elsif ( $Parent->isa('PPI::Statement::Given') ) {
1064 3         53 return 'PPI::Structure::Given';
1065             } elsif ( $Parent->isa('PPI::Statement::When') ) {
1066 3         39 return 'PPI::Structure::When';
1067             } elsif ( $Parent->isa('PPI::Statement::Sub') ) {
1068 156         636 return 'PPI::Structure::Signature';
1069             }
1070              
1071             # Otherwise, it must be a list
1072              
1073             # If the previous element is -> then we mark it as a dereference
1074 10187 100 100     38261 if ( _INSTANCE($Element, 'PPI::Token::Operator') and $Element->content eq '->' ) {
1075 10         24 $Element->{_dereference} = 1;
1076             }
1077              
1078             'PPI::Structure::List'
1079 10187         31390 }
1080              
1081             # Given a parent element, and a [ token to open a structure, determine
1082             # the class that the structure should be.
1083             sub _square {
1084 6795     6795   10833 my ($self, $Parent) = @_;
1085             # my $self = shift;
1086             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1087              
1088             # Get the last significant element in the parent
1089 6795         15795 my $Element = $Parent->schild(-1);
1090              
1091             # Is this a subscript, like $foo[1] or $foo{expr}
1092            
1093 6795 100       14168 if ( $Element ) {
1094 6536 100 100     20415 if ( $Element->isa('PPI::Token::Operator') and $Element->content eq '->' ) {
1095             # $foo->[]
1096 4060         7437 $Element->{_dereference} = 1;
1097 4060         11609 return 'PPI::Structure::Subscript';
1098             }
1099 2476 100       6686 if ( $Element->isa('PPI::Structure::Subscript') ) {
1100             # $foo{}[]
1101 15         40 return 'PPI::Structure::Subscript';
1102             }
1103 2461 100 100     8044 if ( $Element->isa('PPI::Token::Symbol') and $Element->content =~ /^(?:\$|\@)/ ) {
1104             # $foo[], @foo[]
1105 737         2398 return 'PPI::Structure::Subscript';
1106             }
1107 1724 100 100     5747 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%)/ ) {
1108 49         88 my $prior = $Parent->schild(-2);
1109 49 100 100     238 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1110             # Postfix dereference: ->@[...] ->%[...]
1111 2         8 return 'PPI::Structure::Subscript';
1112             }
1113             }
1114             # FIXME - More cases to catch
1115             }
1116              
1117             # Otherwise, we assume that it's an anonymous arrayref constructor
1118 1981         6230 'PPI::Structure::Constructor';
1119             }
1120              
1121             # Keyword -> Structure class maps
1122             my %CURLY_CLASSES = (
1123             # Blocks
1124             'sub' => 'PPI::Structure::Block',
1125             'grep' => 'PPI::Structure::Block',
1126             'map' => 'PPI::Structure::Block',
1127             'sort' => 'PPI::Structure::Block',
1128             'do' => 'PPI::Structure::Block',
1129             # rely on 'continue' + block being handled elsewhere
1130             # rely on 'eval' + block being handled elsewhere
1131              
1132             # Hash constructors
1133             'scalar' => 'PPI::Structure::Constructor',
1134             '=' => 'PPI::Structure::Constructor',
1135             '||=' => 'PPI::Structure::Constructor',
1136             '&&=' => 'PPI::Structure::Constructor',
1137             '//=' => 'PPI::Structure::Constructor',
1138             '||' => 'PPI::Structure::Constructor',
1139             '&&' => 'PPI::Structure::Constructor',
1140             '//' => 'PPI::Structure::Constructor',
1141             '?' => 'PPI::Structure::Constructor',
1142             ':' => 'PPI::Structure::Constructor',
1143             ',' => 'PPI::Structure::Constructor',
1144             '=>' => 'PPI::Structure::Constructor',
1145             '+' => 'PPI::Structure::Constructor', # per perlref
1146             'return' => 'PPI::Structure::Constructor', # per perlref
1147             'bless' => 'PPI::Structure::Constructor', # pragmatic --
1148             # perlfunc says first arg is a reference, and
1149             # bless {; ... } fails to compile.
1150             );
1151              
1152             my @CURLY_LOOKAHEAD_CLASSES = (
1153             {}, # not used
1154             {
1155             ';' => 'PPI::Structure::Block', # per perlref
1156             '}' => 'PPI::Structure::Constructor',
1157             },
1158             {
1159             '=>' => 'PPI::Structure::Constructor',
1160             },
1161             );
1162              
1163              
1164             # Given a parent element, and a { token to open a structure, determine
1165             # the class that the structure should be.
1166             sub _curly {
1167 11516     11516   17358 my ($self, $Parent) = @_;
1168             # my $self = shift;
1169             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1170              
1171             # Get the last significant element in the parent
1172 11516         25360 my $Element = $Parent->schild(-1);
1173 11516 100       32103 my $content = $Element ? $Element->content : '';
1174              
1175             # Is this a subscript, like $foo[1] or $foo{expr}
1176 11516 100       24235 if ( $Element ) {
1177 10857 100 66     25985 if ( $content eq '->' and $Element->isa('PPI::Token::Operator') ) {
1178             # $foo->{}
1179 2116         3837 $Element->{_dereference} = 1;
1180 2116         6459 return 'PPI::Structure::Subscript';
1181             }
1182 8741 100       26034 if ( $Element->isa('PPI::Structure::Subscript') ) {
1183             # $foo[]{}
1184 73         176 return 'PPI::Structure::Subscript';
1185             }
1186 8668 100 100     28433 if ( $content =~ /^(?:\$|\@)/ and $Element->isa('PPI::Token::Symbol') ) {
1187             # $foo{}, @foo{}
1188 584         2029 return 'PPI::Structure::Subscript';
1189             }
1190 8084 100 100     27797 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%|\*)/ ) {
1191 337         729 my $prior = $Parent->schild(-2);
1192 337 100 100     1861 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1193             # Postfix dereference: ->@{...} ->%{...} ->*{...}
1194 3         12 return 'PPI::Structure::Subscript';
1195             }
1196             }
1197 8081 100       20206 if ( $Element->isa('PPI::Structure::Block') ) {
1198             # deference - ${$hash_ref}{foo}
1199             # or even ${burfle}{foo}
1200             # hash slice - @{$hash_ref}{'foo', 'bar'}
1201 2 50       4 if ( my $prior = $Parent->schild(-2) ) {
1202 2         4 my $prior_content = $prior->content();
1203 2 50 66     24 $prior->isa( 'PPI::Token::Cast' )
      33        
1204             and ( $prior_content eq '@' ||
1205             $prior_content eq '$' )
1206             and return 'PPI::Structure::Subscript';
1207             }
1208             }
1209              
1210             # Are we the last argument of sub?
1211             # E.g.: 'sub foo {}', 'sub foo ($) {}'
1212 8079 100       28922 return 'PPI::Structure::Block' if $Parent->isa('PPI::Statement::Sub');
1213              
1214             # Are we the second or third argument of package?
1215             # E.g.: 'package Foo {}' or 'package Foo v1.2.3 {}'
1216 5617 100       20158 return 'PPI::Structure::Block'
1217             if $Parent->isa('PPI::Statement::Package');
1218              
1219 4324 100       10438 if ( $CURLY_CLASSES{$content} ) {
1220             # Known type
1221 893         3429 return $CURLY_CLASSES{$content};
1222             }
1223             }
1224              
1225             # Are we in a compound statement
1226 4090 100       10760 if ( $Parent->isa('PPI::Statement::Compound') ) {
1227             # We will only encounter blocks in compound statements
1228 1924         5472 return 'PPI::Structure::Block';
1229             }
1230              
1231             # Are we the second or third argument of use
1232 2166 100       6065 if ( $Parent->isa('PPI::Statement::Include') ) {
1233 53 50 33     130 if ( $Parent->schildren == 2 ||
      66        
1234             $Parent->schildren == 3 &&
1235             $Parent->schild(2)->isa('PPI::Token::Number')
1236             ) {
1237             # This is something like use constant { ... };
1238 53         213 return 'PPI::Structure::Constructor';
1239             }
1240             }
1241              
1242             # Unless we are at the start of the statement, everything else should be a block
1243             ### FIXME This is possibly a bad choice, but will have to do for now.
1244 2113 100       7358 return 'PPI::Structure::Block' if $Element;
1245              
1246 659 100 66     2782 if (
1247             $Parent->isa('PPI::Statement')
1248             and
1249             _INSTANCE($Parent->parent, 'PPI::Structure::List')
1250             ) {
1251 179         380 my $function = $Parent->parent->parent->schild(-2);
1252              
1253             # Special case: Are we the param of a core function
1254             # i.e. map({ $_ => 1 } @foo)
1255 179 100 100     682 return 'PPI::Structure::Block'
1256             if $function and $function->content =~ /^(?:map|grep|sort|eval|do)$/;
1257              
1258             # If not part of a block print, list-embedded curlies are most likely constructors
1259 85 100 100     363 return 'PPI::Structure::Constructor'
1260             if not $function or $function->content !~ /^(?:print|say)$/;
1261             }
1262              
1263             # We need to scan ahead.
1264 486         681 my $Next;
1265 486         575 my $position = 0;
1266 486         608 my @delayed;
1267 486         788 while ( $Next = $self->_get_token ) {
1268 1209 100       2478 unless ( $Next->significant ) {
1269 212         310 push @delayed, $Next;
1270 212         348 next;
1271             }
1272              
1273             # If we are off the end of the lookahead array,
1274 997 100       2244 if ( ++$position >= @CURLY_LOOKAHEAD_CLASSES ) {
    100          
1275             # default to block.
1276 130         485 $self->_buffer( splice(@delayed), $Next );
1277 130         187 last;
1278             # If the content at this position is known
1279             } elsif ( my $class = $CURLY_LOOKAHEAD_CLASSES[$position]
1280             {$Next->content} ) {
1281             # return the associated class.
1282 266         628 $self->_buffer( splice(@delayed), $Next );
1283 266         1134 return $class;
1284             }
1285              
1286             # Delay and continue
1287 601         1218 push @delayed, $Next;
1288             }
1289              
1290             # Hit the end of the document, or bailed out, go with block
1291 220         521 $self->_buffer( splice(@delayed) );
1292 220 50       449 if ( ref $Parent eq 'PPI::Statement' ) {
1293 220         384 bless $Parent, 'PPI::Statement::Compound';
1294             }
1295 220         844 return 'PPI::Structure::Block';
1296             }
1297              
1298              
1299             sub _lex_structure {
1300 30197     30197   39418 my ($self, $Structure) = @_;
1301             # my $self = shift;
1302             # my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1";
1303              
1304 30197   100     30245 push @{$self->{features_stack}}, $self->{features_stack}[-1] || {};
  30197         74504  
1305              
1306             # Start the processing loop
1307 30197         33778 my $Token;
1308 30197         42950 while ( ref($Token = $self->_get_token) ) {
1309             # Is this a direct type token
1310 116597 100       218135 unless ( $Token->significant ) {
1311 50286         51839 push @{$self->{delayed}}, $Token;
  50286         70407  
1312             # $self->_delay_element( $Token );
1313 50286         80167 next;
1314             }
1315              
1316             # Anything other than a Structure starts a Statement
1317 66311 100       177836 unless ( $Token->isa('PPI::Token::Structure') ) {
1318             # Because _statement may well delay and rollback itself,
1319             # we need to add the delayed tokens early
1320 37498         68949 $self->_add_delayed( $Structure );
1321              
1322             # Determine the class for the Statement and create it
1323 37498         68872 my $Statement = $self->_statement($Structure, $Token)->new($Token);
1324              
1325             # Move the lexing down into the Statement
1326 37498         79757 $self->_add_element( $Structure, $Statement );
1327 37498         78875 $self->_lex_statement( $Statement );
1328              
1329 37498         65825 next;
1330             }
1331              
1332             # Is this the opening of another structure directly inside us?
1333 28813 100       49669 if ( $Token->__LEXER__opens ) {
1334             # Rollback the Token, and recurse into the statement
1335 481         1222 $self->_rollback( $Token );
1336 481         1491 my $Statement = PPI::Statement->new;
1337 481         1208 $self->_add_element( $Structure, $Statement );
1338 481         1308 $self->_lex_statement( $Statement );
1339 481         1051 next;
1340             }
1341              
1342             # Is this the close of a structure ( which would be an error )
1343 28332 100       47313 if ( $Token->__LEXER__closes ) {
1344 28279         29921 pop @{$self->{features_stack}};
  28279         50066  
1345              
1346             # Is this OUR closing structure
1347 28279 100       47978 if ( $Token->content eq $Structure->start->__LEXER__opposite ) {
1348             # Add any delayed tokens, and the finishing token (the ugly way)
1349 27537         64189 $self->_add_delayed( $Structure );
1350 27537         46337 $Structure->{finish} = $Token;
1351             Scalar::Util::weaken(
1352 27537         64710 $_PARENT{Scalar::Util::refaddr $Token} = $Structure
1353             );
1354              
1355             # Confirm that ForLoop structures are actually so, and
1356             # aren't really a list.
1357 27537 100       76594 if ( $Structure->isa('PPI::Structure::For') ) {
1358 232 100       701 if ( 2 > scalar grep {
1359 594         1699 $_->isa('PPI::Statement')
1360             } $Structure->children ) {
1361 211         383 bless($Structure, 'PPI::Structure::List');
1362             }
1363             }
1364 27537         65922 return 1;
1365             }
1366              
1367             # Unmatched closing brace.
1368             # Either they typed the wrong thing, or haven't put
1369             # one at all. Either way it's an error we need to
1370             # somehow handle gracefully. For now, we'll treat it
1371             # as implicitly ending the structure. This causes the
1372             # least damage across the various reasons why this
1373             # might have happened.
1374 742         1282 return $self->_rollback( $Token );
1375             }
1376              
1377             # It's a semi-colon on its own, just inside the block.
1378             # This is a null statement.
1379             $self->_add_element(
1380 53         217 $Structure,
1381             PPI::Statement::Null->new($Token),
1382             );
1383             }
1384              
1385             # Is this an error
1386 1918 50       2944 unless ( defined $Token ) {
1387 0         0 PPI::Exception->throw;
1388             }
1389              
1390 1918         1857 pop @{$self->{features_stack}};
  1918         2761  
1391              
1392             # No, it's just the end of file.
1393             # Add any insignificant trailing tokens.
1394 1918         3646 $self->_add_delayed( $Structure );
1395             }
1396              
1397              
1398              
1399              
1400              
1401             #####################################################################
1402             # Support Methods
1403              
1404             # Get the next token for processing, handling buffering
1405             sub _get_token {
1406 684586 100   684586   608248 shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token;
  684586         1612681  
1407             }
1408              
1409             # Old long version of the above
1410             # my $self = shift;
1411             # # First from the buffer
1412             # if ( @{$self->{buffer}} ) {
1413             # return shift @{$self->{buffer}};
1414             # }
1415             #
1416             # # Then from the Tokenizer
1417             # $self->{Tokenizer}->get_token;
1418             # }
1419              
1420             # Delay the addition of insignificant elements.
1421             # This ended up being inlined.
1422             # sub _delay_element {
1423             # my $self = shift;
1424             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 1";
1425             # push @{ $_[0]->{delayed} }, $_[1];
1426             # }
1427              
1428             # Add an Element to a Node, including any delayed Elements
1429             sub _add_element {
1430 363040     363040   440492 my ($self, $Parent, $Element) = @_;
1431             # my $self = shift;
1432             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1433             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 2";
1434              
1435             # Handle a special case, where a statement is not fully resolved
1436 363040 100 100     638569 if ( ref $Parent eq 'PPI::Statement'
1437             and my $first = $Parent->schild(0) ) {
1438 77527 50 33     196457 if ( $first->isa('PPI::Token::Label')
1439             and !(my $second = $Parent->schild(1)) ) {
1440 0         0 my $new_class = $STATEMENT_CLASSES{$second->content};
1441             # It's a labelled statement
1442 0 0       0 bless $Parent, $new_class if $new_class;
1443             }
1444             }
1445              
1446             # Add first the delayed, from the front, then the passed element
1447 363040         345423 foreach my $el ( @{$self->{delayed}} ) {
  363040         475319  
1448             Scalar::Util::weaken(
1449 115955         325537 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1450             );
1451             # Inlined $Parent->__add_element($el);
1452             }
1453             Scalar::Util::weaken(
1454 363040         875193 $_PARENT{Scalar::Util::refaddr $Element} = $Parent
1455             );
1456 363040         332134 push @{$Parent->{children}}, @{$self->{delayed}}, $Element;
  363040         399476  
  363040         511683  
1457              
1458             # Clear the delayed elements
1459 363040         519523 $self->{delayed} = [];
1460             }
1461              
1462             # Specifically just add any delayed tokens, if any.
1463             sub _add_delayed {
1464 143263     143263   184820 my ($self, $Parent) = @_;
1465             # my $self = shift;
1466             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1467              
1468             # Add any delayed
1469 143263         139837 foreach my $el ( @{$self->{delayed}} ) {
  143263         217835  
1470             Scalar::Util::weaken(
1471 61604         154977 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1472             );
1473             # Inlined $Parent->__add_element($el);
1474             }
1475 143263         146004 push @{$Parent->{children}}, @{$self->{delayed}};
  143263         179756  
  143263         195126  
1476              
1477             # Clear the delayed elements
1478 143263         219237 $self->{delayed} = [];
1479             }
1480              
1481             # Rollback the delayed tokens, plus any passed. Once all the tokens
1482             # have been moved back on to the buffer, the order should be.
1483             # <--- @{$self->{delayed}}, @_, @{$self->{buffer}} <----
1484             sub _rollback {
1485 61358     61358   68857 my $self = shift;
1486              
1487             # First, put any passed objects back
1488 61358 100       95223 if ( @_ ) {
1489 49872         49405 unshift @{$self->{buffer}}, splice @_;
  49872         101597  
1490             }
1491              
1492             # Then, put back anything delayed
1493 61358 100       66464 if ( @{$self->{delayed}} ) {
  61358         98091  
1494 29529         29604 unshift @{$self->{buffer}}, splice @{$self->{delayed}};
  29529         35309  
  29529         42546  
1495             }
1496              
1497 61358         86174 1;
1498             }
1499              
1500             # Partial rollback, just return a single list to the buffer
1501             sub _buffer {
1502 616     616   659 my $self = shift;
1503              
1504             # Put any passed objects back
1505 616 100       989 if ( @_ ) {
1506 474         587 unshift @{$self->{buffer}}, splice @_;
  474         914  
1507             }
1508              
1509 616         720 1;
1510             }
1511              
1512              
1513              
1514              
1515              
1516             #####################################################################
1517             # Error Handling
1518              
1519             # Set the error message
1520             sub _error {
1521 4     4   9 $errstr = "Lexer failed: $_[1]";
1522 4         17 undef;
1523             }
1524              
1525             # Clear the error message.
1526             # Returns the object as a convenience.
1527             sub _clear {
1528 16850     16850   27366 $errstr = '';
1529 16850         28090 $_[0];
1530             }
1531              
1532             =pod
1533              
1534             =head2 errstr
1535              
1536             For any error that occurs, you can use the C, as either
1537             a static or object method, to access the error message.
1538              
1539             If no error occurs for any particular action, C will return false.
1540              
1541             =cut
1542              
1543             sub errstr {
1544 8     8 1 23 $errstr;
1545             }
1546              
1547              
1548              
1549              
1550              
1551             #####################################################################
1552             # PDOM Extensions
1553             #
1554             # This is something of a future expansion... ignore it for now :)
1555             #
1556             # use PPI::Statement::Sub ();
1557             #
1558             # sub PPI::Statement::Sub::__LEXER__normal { '' }
1559              
1560             1;
1561              
1562             =pod
1563              
1564             =head1 TO DO
1565              
1566             - Add optional support for some of the more common source filters
1567              
1568             - Some additional checks for blessing things into various Statement
1569             and Structure subclasses.
1570              
1571             =head1 SUPPORT
1572              
1573             See the L in the main module.
1574              
1575             =head1 AUTHOR
1576              
1577             Adam Kennedy Eadamk@cpan.orgE
1578              
1579             =head1 COPYRIGHT
1580              
1581             Copyright 2001 - 2011 Adam Kennedy.
1582              
1583             This program is free software; you can redistribute
1584             it and/or modify it under the same terms as Perl itself.
1585              
1586             The full text of the license can be found in the
1587             LICENSE file included with this module.
1588              
1589             =cut