File Coverage

blib/lib/PPI/Lexer.pm
Criterion Covered Total %
statement 426 452 94.2
branch 254 294 86.3
condition 156 207 75.3
subroutine 28 28 100.0
pod 5 6 83.3
total 869 987 88.0


line stmt bran cond sub pod time code
1             package PPI::Lexer;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Lexer - The PPI Lexer
8              
9             =head1 SYNOPSIS
10              
11             use PPI;
12            
13             # Create a new Lexer
14             my $Lexer = PPI::Lexer->new;
15            
16             # Build a PPI::Document object from a Token stream
17             my $Tokenizer = PPI::Tokenizer->load('My/Module.pm');
18             my $Document = $Lexer->lex_tokenizer($Tokenizer);
19            
20             # Build a PPI::Document object for some raw source
21             my $source = "print 'Hello World!'; kill(Humans->all);";
22             $Document = $Lexer->lex_source($source);
23            
24             # Build a PPI::Document object for a particular file name
25             $Document = $Lexer->lex_file('My/Module.pm');
26              
27             =head1 DESCRIPTION
28              
29             The is the L<PPI> Lexer. In the larger scheme of things, its job is to take
30             token streams, in a variety of forms, and "lex" them into nested structures.
31              
32             Pretty much everything in this module happens behind the scenes at this
33             point. In fact, at the moment you don't really need to instantiate the lexer
34             at all, the three main methods will auto-instantiate themselves a
35             C<PPI::Lexer> object as needed.
36              
37             All methods do a one-shot "lex this and give me a L<PPI::Document> object".
38              
39             In fact, if you are reading this, what you B<probably> want to do is to
40             just "load a document", in which case you can do this in a much more
41             direct and concise manner with one of the following.
42              
43             use PPI;
44            
45             $Document = PPI::Document->load( $filename );
46             $Document = PPI::Document->new( $string );
47              
48             See L<PPI::Document> for more details.
49              
50             For more unusual tasks, by all means forge onwards.
51              
52             =head1 METHODS
53              
54             =cut
55              
56 66     66   2726 use strict;
  66         1438  
  66         2488  
57 66     66   744 use Scalar::Util ();
  66         535  
  66         1532  
58 66     66   209 use Params::Util qw{_STRING _INSTANCE};
  66         91  
  66         2956  
59 66     66   264 use PPI ();
  66         79  
  66         687  
60 66     66   194 use PPI::Exception ();
  66         81  
  66         1099  
61 66     66   205 use PPI::Singletons '%_PARENT';
  66         99  
  66         290121  
62              
63             our $VERSION = '1.284';
64              
65             our $errstr = "";
66              
67             # Keyword -> Structure class maps
68             my %ROUND = (
69             # Conditions
70             'if' => 'PPI::Structure::Condition',
71             'elsif' => 'PPI::Structure::Condition',
72             'unless' => 'PPI::Structure::Condition',
73             'while' => 'PPI::Structure::Condition',
74             'until' => 'PPI::Structure::Condition',
75              
76             # For(each)
77             'for' => 'PPI::Structure::For',
78             'foreach' => 'PPI::Structure::For',
79             );
80              
81             # Opening brace to refining method
82             my %RESOLVE = (
83             '(' => '_round',
84             '[' => '_square',
85             '{' => '_curly',
86             );
87              
88             # Allows for experimental overriding of the tokenizer
89             our $X_TOKENIZER = "PPI::Tokenizer";
90 16799     16799 0 69063 sub X_TOKENIZER { $X_TOKENIZER }
91              
92              
93              
94              
95              
96             #####################################################################
97             # Constructor
98              
99             =pod
100              
101             =head2 new
102              
103             The C<new> constructor creates a new C<PPI::Lexer> object. The object itself
104             is merely used to hold various buffers and state data during the lexing
105             process, and holds no significant data between -E<gt>lex_xxxxx calls.
106              
107             Returns a new C<PPI::Lexer> object
108              
109             =cut
110              
111             sub new {
112 16800     16800 1 40652 my $class = shift->_clear;
113 16800         77569 bless {
114             Tokenizer => undef, # Where we store the tokenizer for a run
115             buffer => [], # The input token buffer
116             delayed => [], # The "delayed insignificant tokens" buffer
117             }, $class;
118             }
119              
120              
121              
122              
123              
124             #####################################################################
125             # Main Lexing Methods
126              
127             =pod
128              
129             =head2 lex_file $filename
130              
131             The C<lex_file> method takes a filename as argument. It then loads the file,
132             creates a L<PPI::Tokenizer> for the content and lexes the token stream
133             produced by the tokenizer. Basically, a sort of all-in-one method for
134             getting a L<PPI::Document> object from a file name.
135              
136             Additional arguments are passed to the tokenizer as a hash.
137              
138             Returns a L<PPI::Document> object, or C<undef> on error.
139              
140             =cut
141              
142             sub lex_file {
143 507 100   507 1 2394 my $self = ref $_[0] ? shift : shift->new;
144 507         2379 my $file = _STRING(shift);
145 507 100       1693 unless ( defined $file ) {
146 1         3 return $self->_error("Did not pass a filename to PPI::Lexer::lex_file");
147             }
148 506         1617 my %args = @_;
149              
150             # Create the Tokenizer
151 506         936 my $Tokenizer = eval {
152 506         1604 X_TOKENIZER->new($file);
153             };
154 506 50       3029 if ( _INSTANCE($@, 'PPI::Exception') ) {
    50          
155 0         0 return $self->_error( $@->message );
156             } elsif ( $@ ) {
157 0         0 return $self->_error( $errstr );
158             }
159              
160 506         8471 $self->lex_tokenizer( $Tokenizer, %args );
161             }
162              
163             =pod
164              
165             =head2 lex_source $string
166              
167             The C<lex_source> method takes a normal scalar string as argument. It
168             creates a L<PPI::Tokenizer> object for the string, and then lexes the
169             resulting token stream.
170              
171             Additional arguments are passed to the tokenizer as a hash.
172              
173             Returns a L<PPI::Document> object, or C<undef> on error.
174              
175             =cut
176              
177             sub lex_source {
178 16293 50   16293 1 954632 my $self = ref $_[0] ? shift : shift->new;
179 16293         24095 my $source = shift;
180 16293 50 33     66097 unless ( defined $source and not ref $source ) {
181 0         0 return $self->_error("Did not pass a string to PPI::Lexer::lex_source");
182             }
183 16293         25675 my %args = @_;
184              
185             # Create the Tokenizer and hand off to the next method
186 16293         22140 my $Tokenizer = eval {
187 16293         32702 X_TOKENIZER->new(\$source);
188             };
189 16293 50       57431 if ( _INSTANCE($@, 'PPI::Exception') ) {
    50          
190 0         0 return $self->_error( $@->message );
191             } elsif ( $@ ) {
192 0         0 return $self->_error( $errstr );
193             }
194              
195 16293         39588 $self->lex_tokenizer( $Tokenizer, %args );
196             }
197              
198             =pod
199              
200             =head2 lex_tokenizer $Tokenizer
201              
202             The C<lex_tokenizer> takes as argument a L<PPI::Tokenizer> object. It
203             lexes the token stream from the tokenizer into a L<PPI::Document> object.
204              
205             Additional arguments are set on the L<PPI::Document> produced.
206              
207             Returns a L<PPI::Document> object, or C<undef> on error.
208              
209             =cut
210              
211             sub lex_tokenizer {
212 16799 50   16799 1 32452 my $self = ref $_[0] ? shift : shift->new;
213 16799         83221 my $Tokenizer = _INSTANCE(shift, 'PPI::Tokenizer');
214 16799 50       33016 return $self->_error(
215             "Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer"
216             ) unless $Tokenizer;
217 16799         24644 my %args = @_;
218              
219             # Create the empty document
220 16799         43523 my $Document = PPI::Document->new;
221 16799         56959 ref($Document)->_setattr( $Document, %args );
222 16799         43784 $Tokenizer->_document($Document);
223              
224             # Lex the token stream into the document
225 16799         26432 $self->{Tokenizer} = $Tokenizer;
226 16799 100       20300 if ( !eval { $self->_lex_document($Document); 1 } ) {
  16799         38114  
  16798         30598  
227             # If an error occurs DESTROY the partially built document.
228 1         4 $Tokenizer->_document(undef);
229 1         3 undef $Document;
230 1 50       5 if ( _INSTANCE($@, 'PPI::Exception') ) {
231 1         3 return $self->_error( $@->message );
232             } else {
233 0         0 return $self->_error( $errstr );
234             }
235             }
236              
237 16798         130570 return $Document;
238             }
239              
240              
241              
242              
243              
244             #####################################################################
245             # Lex Methods - Document Object
246              
247             sub _lex_document {
248 16799     16799   27212 my ($self, $Document) = @_;
249             # my $self = shift;
250             # my $Document = _INSTANCE(shift, 'PPI::Document') or return undef;
251              
252             # Start the processing loop
253 16799         19338 my $Token;
254 16799         36481 while ( ref($Token = $self->_get_token) ) {
255             # Add insignificant tokens directly beneath us
256 53370 100       111672 unless ( $Token->significant ) {
257 20902         41586 $self->_add_element( $Document, $Token );
258 20902         32247 next;
259             }
260              
261 32468 100       61178 if ( $Token->content eq ';' ) {
262             # It's a semi-colon on its own.
263             # We call this a null statement.
264 465         1771 $self->_add_element(
265             $Document,
266             PPI::Statement::Null->new($Token),
267             );
268 465         1123 next;
269             }
270              
271             # Handle anything other than a structural element
272 32003 100       64305 unless ( ref $Token eq 'PPI::Token::Structure' ) {
273             # Determine the class for the Statement, and create it
274 28990         67605 my $Statement = $self->_statement($Document, $Token)->new($Token);
275              
276             # Move the lexing down into the statement
277 28990         73269 $self->_add_delayed( $Document );
278 28990         59596 $self->_add_element( $Document, $Statement );
279 28990         61054 $self->_lex_statement( $Statement );
280              
281 28990         57888 next;
282             }
283              
284             # Is this the opening of a structure?
285 3013 100       5593 if ( $Token->__LEXER__opens ) {
286             # This should actually have a Statement instead
287 946         2749 $self->_rollback( $Token );
288 946         3226 my $Statement = PPI::Statement->new;
289 946         2427 $self->_add_element( $Document, $Statement );
290 946         2266 $self->_lex_statement( $Statement );
291 946         1879 next;
292             }
293              
294             # Is this the close of a structure.
295 2067 50       4496 if ( $Token->__LEXER__closes ) {
296             # Because we are at the top of the tree, this is an error.
297             # This means either a mis-parsing, or a mistake in the code.
298             # To handle this, we create a "Naked Close" statement
299 2067         6485 $self->_add_element( $Document,
300             PPI::Statement::UnmatchedBrace->new($Token)
301             );
302 2067         3562 next;
303             }
304              
305             # Shouldn't be able to get here
306 0         0 PPI::Exception->throw('Lexer reached an illegal state');
307             }
308              
309             # Did we leave the main loop because of a Tokenizer error?
310 16798 50       29082 unless ( defined $Token ) {
311 0 0       0 my $errstr = $self->{Tokenizer} ? $self->{Tokenizer}->errstr : '';
312 0   0     0 $errstr ||= 'Unknown Tokenizer Error';
313 0         0 PPI::Exception->throw($errstr);
314             }
315              
316             # No error, it's just the end of file.
317             # Add any insignificant trailing tokens.
318 16798         36496 $self->_add_delayed( $Document );
319              
320             # If the Tokenizer has any v6 blocks to attach, do so now.
321             # Checking once at the end is faster than adding a special
322             # case check for every statement parsed.
323 16798         28996 my $perl6 = $self->{Tokenizer}->{'perl6'};
324 16798 100       30143 if ( @$perl6 ) {
325 2         9 my $includes = $Document->find( 'PPI::Statement::Include::Perl6' );
326 2         5 foreach my $include ( @$includes ) {
327 2 50       4 unless ( @$perl6 ) {
328 0         0 PPI::Exception->throw('Failed to find a perl6 section');
329             }
330 2         7 $include->{perl6} = shift @$perl6;
331             }
332             }
333              
334 16798         22502 return 1;
335             }
336              
337              
338              
339              
340              
341             #####################################################################
342             # Lex Methods - Statement Object
343              
344             # Keyword -> Statement Subclass
345             my %STATEMENT_CLASSES = (
346             # Things that affect the timing of execution
347             'BEGIN' => 'PPI::Statement::Scheduled',
348             'CHECK' => 'PPI::Statement::Scheduled',
349             'UNITCHECK' => 'PPI::Statement::Scheduled',
350             'INIT' => 'PPI::Statement::Scheduled',
351             'END' => 'PPI::Statement::Scheduled',
352              
353             # Special subroutines for which 'sub' is optional
354             'AUTOLOAD' => 'PPI::Statement::Sub',
355             'DESTROY' => 'PPI::Statement::Sub',
356              
357             # Loading and context statement
358             'package' => 'PPI::Statement::Package',
359             # 'use' => 'PPI::Statement::Include',
360             'no' => 'PPI::Statement::Include',
361             'require' => 'PPI::Statement::Include',
362              
363             # Various declarations
364             'my' => 'PPI::Statement::Variable',
365             'local' => 'PPI::Statement::Variable',
366             'our' => 'PPI::Statement::Variable',
367             'state' => 'PPI::Statement::Variable',
368             # Statements starting with 'sub' could be any one of...
369             # 'sub' => 'PPI::Statement::Sub',
370             # 'sub' => 'PPI::Statement::Scheduled',
371             # 'sub' => 'PPI::Statement',
372              
373             # Compound statement
374             'if' => 'PPI::Statement::Compound',
375             'unless' => 'PPI::Statement::Compound',
376             'for' => 'PPI::Statement::Compound',
377             'foreach' => 'PPI::Statement::Compound',
378             'while' => 'PPI::Statement::Compound',
379             'until' => 'PPI::Statement::Compound',
380              
381             # Switch statement
382             'given' => 'PPI::Statement::Given',
383             'when' => 'PPI::Statement::When',
384             'default' => 'PPI::Statement::When',
385              
386             # Various ways of breaking out of scope
387             'redo' => 'PPI::Statement::Break',
388             'next' => 'PPI::Statement::Break',
389             'last' => 'PPI::Statement::Break',
390             'return' => 'PPI::Statement::Break',
391             'goto' => 'PPI::Statement::Break',
392              
393             # Special sections of the file
394             '__DATA__' => 'PPI::Statement::Data',
395             '__END__' => 'PPI::Statement::End',
396             );
397              
398             sub _statement {
399 55199     55199   90180 my ($self, $Parent, $Token) = @_;
400             # my $self = shift;
401             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
402             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
403              
404             # Check for things like ( parent => ... )
405 55199 100 100     335790 if (
406             $Parent->isa('PPI::Structure::List')
407             or
408             $Parent->isa('PPI::Structure::Constructor')
409             ) {
410 7999 100       26233 if ( $Token->isa('PPI::Token::Word') ) {
411             # Is the next significant token a =>
412             # Read ahead to the next significant token
413 2034         2976 my $Next;
414 2034         4267 while ( $Next = $self->_get_token ) {
415 2880 100       7385 unless ( $Next->significant ) {
416 897         1288 push @{$self->{delayed}}, $Next;
  897         3050  
417             # $self->_delay_element( $Next );
418 897         1646 next;
419             }
420              
421             # Got the next token
422 1983 100 100     9452 if (
423             $Next->isa('PPI::Token::Operator')
424             and
425             $Next->content eq '=>'
426             ) {
427             # Is an ordinary expression
428 949         2424 $self->_rollback( $Next );
429 949         4314 return 'PPI::Statement::Expression';
430             } else {
431 1034         1882 last;
432             }
433             }
434              
435             # Rollback and continue
436 1085         2248 $self->_rollback( $Next );
437             }
438             }
439              
440 54250         72209 my $is_lexsub = 0;
441              
442             # Is it a token in our known classes list
443             my $class = {
444             %STATEMENT_CLASSES,
445             ( try => 'PPI::Statement::Compound' ) x
446             !!( $Parent->schild(-1) || $Parent )->presumed_features->{try},
447 54250   66     448582 }->{ $Token->content };
448              
449 54250 100       311139 if ( $class ) {
450             # Is the next significant token a =>
451             # Read ahead to the next significant token
452 9909         14047 my $Next;
453 9909         25171 while ( $Next = $self->_get_token ) {
454 19442 100       49178 if ( !$Next->significant ) {
455 9580         13398 push @{$self->{delayed}}, $Next;
  9580         20437  
456 9580         18724 next;
457             }
458              
459             # Scheduled block must be followed by left curly or
460             # semicolon. Otherwise we have something else (e.g.
461             # open( CHECK, ... );
462 9862 100 66     26091 if (
      100        
463             'PPI::Statement::Scheduled' eq $class
464             and not ( $Next->isa( 'PPI::Token::Structure' )
465             and $Next->content =~ m/\A[{;]\z/ ) # }
466             ) {
467 1         4 $class = undef;
468 1         4 last;
469             }
470              
471             # Lexical subroutine
472 9861 100 100     20565 if (
      66        
473             $Token->content =~ /^(?:my|our|state)$/
474             and $Next->isa( 'PPI::Token::Word' ) and $Next->content eq 'sub'
475             ) {
476             # This should be PPI::Statement::Sub rather than PPI::Statement::Variable
477 7         11 $class = undef;
478 7         7 $is_lexsub = 1;
479 7         8 last;
480             }
481              
482             last if
483 9854 100 100     51902 !$Next->isa( 'PPI::Token::Operator' ) or $Next->content ne '=>';
484              
485             # Got the next token
486             # Is an ordinary expression
487 21         60 $self->_rollback( $Next );
488 21         143 return 'PPI::Statement';
489             }
490              
491             # Rollback and continue
492 9888         25550 $self->_rollback( $Next );
493             }
494              
495             # Handle potential barewords for subscripts
496 54229 100       203752 if ( $Parent->isa('PPI::Structure::Subscript') ) {
497             # Fast obvious case, just an expression
498 3883 100 100     11922 unless ( $class and $class->isa('PPI::Statement::Expression') ) {
499 3760         18495 return 'PPI::Statement::Expression';
500             }
501              
502             # This is something like "my" or "our" etc... more subtle.
503             # Check if the next token is a closing curly brace.
504             # This means we are something like $h{my}
505 123         165 my $Next;
506 123         194 while ( $Next = $self->_get_token ) {
507 119 50       245 unless ( $Next->significant ) {
508 0         0 push @{$self->{delayed}}, $Next;
  0         0  
509             # $self->_delay_element( $Next );
510 0         0 next;
511             }
512              
513             # Found the next significant token.
514             # Is it a closing curly brace?
515 119 50       203 if ( $Next->content eq '}' ) {
516 119         198 $self->_rollback( $Next );
517 119         415 return 'PPI::Statement::Expression';
518             } else {
519 0         0 $self->_rollback( $Next );
520 0         0 return $class;
521             }
522             }
523              
524             # End of file... this means it is something like $h{our
525             # which is probably going to be $h{our} ... I think
526 4         11 $self->_rollback( $Next );
527 4         15 return 'PPI::Statement::Expression';
528             }
529              
530             # If it's a token in our list, use that class
531 50346 100       129568 return $class if $class;
532              
533             # Handle the more in-depth sub detection
534 40619 100 100     96442 if ( $is_lexsub || $Token->content eq 'sub' ) {
535             # Read ahead to the next significant token
536 3287         4444 my $Next;
537 3287         7095 while ( $Next = $self->_get_token ) {
538 6511 100       13888 unless ( $Next->significant ) {
539 3248         4325 push @{$self->{delayed}}, $Next;
  3248         6403  
540             # $self->_delay_element( $Next );
541 3248         6202 next;
542             }
543              
544             # Got the next significant token
545 3263         5881 my $sclass = $STATEMENT_CLASSES{$Next->content};
546 3263 100 100     8585 if ( $sclass and $sclass eq 'PPI::Statement::Scheduled' ) {
547 28         67 $self->_rollback( $Next );
548 28         123 return 'PPI::Statement::Scheduled';
549             }
550 3235 100       9077 if ( $Next->isa('PPI::Token::Word') ) {
551 3126         7671 $self->_rollback( $Next );
552 3126         15194 return 'PPI::Statement::Sub';
553             }
554              
555             ### Comment out these two, as they would return PPI::Statement anyway
556             # if ( $content eq '{' ) {
557             # Anonymous sub at start of statement
558             # return 'PPI::Statement';
559             # }
560             #
561             # if ( $Next->isa('PPI::Token::Prototype') ) {
562             # Anonymous sub at start of statement
563             # return 'PPI::Statement';
564             # }
565              
566             # PPI::Statement is the safest fall-through
567 109         360 $self->_rollback( $Next );
568 109         556 return 'PPI::Statement';
569             }
570              
571             # End of file... PPI::Statement::Sub is the most likely
572 24         71 $self->_rollback( $Next );
573 24         142 return 'PPI::Statement::Sub';
574             }
575              
576 37332 100       61313 if ( $Token->content eq 'use' ) {
577             # Add a special case for "use v6" lines.
578 2268         2885 my $Next;
579 2268         5680 while ( $Next = $self->_get_token ) {
580 4531 100       9932 unless ( $Next->significant ) {
581 2265         3007 push @{$self->{delayed}}, $Next;
  2265         4665  
582             # $self->_delay_element( $Next );
583 2265         4414 next;
584             }
585              
586             # Found the next significant token.
587 2266 100 66     11029 if (
    100          
588             $Next->isa('PPI::Token::Operator')
589             and
590             $Next->content eq '=>'
591             ) {
592             # Is an ordinary expression
593 1         3 $self->_rollback( $Next );
594 1         5 return 'PPI::Statement';
595             # Is it a v6 use?
596             } elsif ( $Next->content eq 'v6' ) {
597 2         8 $self->_rollback( $Next );
598 2         26 return 'PPI::Statement::Include::Perl6';
599             } else {
600 2263         6990 $self->_rollback( $Next );
601 2263         12736 return 'PPI::Statement::Include';
602             }
603             }
604              
605             # End of file... this means it is an incomplete use
606             # line, just treat it as a normal include.
607 2         11 $self->_rollback( $Next );
608 2         24 return 'PPI::Statement::Include';
609             }
610              
611             # If our parent is a Condition, we are an Expression
612 35064 100       96190 if ( $Parent->isa('PPI::Structure::Condition') ) {
613 1263         7466 return 'PPI::Statement::Expression';
614             }
615              
616             # If our parent is a List, we are also an expression
617 33801 100       83079 if ( $Parent->isa('PPI::Structure::List') ) {
618 5157         26993 return 'PPI::Statement::Expression';
619             }
620              
621             # Switch statements use expressions, as well.
622 28644 100 100     147424 if (
623             $Parent->isa('PPI::Structure::Given')
624             or
625             $Parent->isa('PPI::Structure::When')
626             ) {
627 6         33 return 'PPI::Statement::Expression';
628             }
629              
630 28638 100       148493 if ( _INSTANCE($Token, 'PPI::Token::Label') ) {
631 395         2191 return 'PPI::Statement::Compound';
632             }
633              
634             # Beyond that, I have no idea for the moment.
635             # Just keep adding more conditions above this.
636 28243         106556 return 'PPI::Statement';
637             }
638              
639             sub _lex_statement {
640 56589     56589   77700 my ($self, $Statement) = @_;
641             # my $self = shift;
642             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
643              
644             # Handle some special statements
645 56589 100       193897 if ( $Statement->isa('PPI::Statement::End') ) {
646 8         46 return $self->_lex_end( $Statement );
647             }
648              
649             # Begin processing tokens
650 56581         65444 my $Token;
651 56581         90463 while ( ref( $Token = $self->_get_token ) ) {
652             # Delay whitespace and comment tokens
653 263555 100       489459 unless ( $Token->significant ) {
654 93232         96222 push @{$self->{delayed}}, $Token;
  93232         132406  
655             # $self->_delay_element( $Token );
656 93232         134411 next;
657             }
658              
659             # Structual closes, and __DATA__ and __END__ tags implicitly
660             # end every type of statement
661 170323 100 66     309150 if (
662             $Token->__LEXER__closes
663             or
664             $Token->isa('PPI::Token::Separator')
665             ) {
666             # Rollback and end the statement
667 17730         44261 return $self->_rollback( $Token );
668             }
669              
670             # Normal statements never implicitly end
671 152593 100       308310 unless ( $Statement->__LEXER__normal ) {
672             # Have we hit an implicit end to the statement
673 24743 100       47007 unless ( $self->_continues( $Statement, $Token ) ) {
674             # Rollback and finish the statement
675 4415         11262 return $self->_rollback( $Token );
676             }
677             }
678              
679             # Any normal character just gets added
680 148178 100       317021 unless ( $Token->isa('PPI::Token::Structure') ) {
681 102718         176341 $self->_add_element( $Statement, $Token );
682 102718         152682 next;
683             }
684              
685             # Handle normal statement terminators
686 45460 100       72225 if ( $Token->content eq ';' ) {
687 23035         46523 $self->_add_element( $Statement, $Token );
688 23035         35099 return 1;
689             }
690              
691             # Which leaves us with a new structure
692              
693             # Determine the class for the structure and create it
694 22425         39658 my $method = $RESOLVE{$Token->content};
695 22425         74227 my $Structure = $self->$method($Statement)->new($Token);
696              
697             # Move the lexing down into the Structure
698 22425         51965 $self->_add_delayed( $Statement );
699 22425         48382 $self->_add_element( $Statement, $Structure );
700 22425         56150 $self->_lex_structure( $Structure );
701             }
702              
703             # Was it an error in the tokenizer?
704 11401 50       17848 unless ( defined $Token ) {
705 0         0 PPI::Exception->throw;
706             }
707              
708             # No, it's just the end of the file...
709             # Roll back any insignificant tokens, they'll get added at the Document level
710 11401         19215 $self->_rollback;
711             }
712              
713             sub _lex_end {
714 8     8   16 my ($self, $Statement) = @_;
715             # my $self = shift;
716             # my $Statement = _INSTANCE(shift, 'PPI::Statement::End') or die "Bad param 1";
717              
718             # End of the file, EVERYTHING is ours
719 8         12 my $Token;
720 8         26 while ( $Token = $self->_get_token ) {
721             # Inlined $Statement->__add_element($Token);
722             Scalar::Util::weaken(
723 15         47 $_PARENT{Scalar::Util::refaddr $Token} = $Statement
724             );
725 15         17 push @{$Statement->{children}}, $Token;
  15         88  
726             }
727              
728             # Was it an error in the tokenizer?
729 8 50       28 unless ( defined $Token ) {
730 0         0 PPI::Exception->throw;
731             }
732              
733             # No, it's just the end of the file...
734             # Roll back any insignificant tokens, they get added at the Document level
735 8         20 $self->_rollback;
736             }
737              
738             # For many statements, it can be difficult to determine the end-point.
739             # This method takes a statement and the next significant token, and attempts
740             # to determine if the there is a statement boundary between the two, or if
741             # the statement can continue with the token.
742             sub _continues {
743 24743     24743   34771 my ($self, $Statement, $Token) = @_;
744             # my $self = shift;
745             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
746             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
747              
748             # Handle the simple block case
749             # { print 1; }
750 24743 100 100     52886 if (
751             $Statement->schildren == 1
752             and
753             $Statement->schild(0)->isa('PPI::Structure::Block')
754             ) {
755 46         178 return '';
756             }
757              
758             # Alrighty then, there are six implied-end statement types:
759             # ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, ::When,
760             # and ::Package statements.
761 24697 50       54120 return 1
762             if ref $Statement !~ /\b(?:Scheduled|Sub|Compound|Given|When|Package)$/;
763              
764             # Of these six, ::Scheduled, ::Sub, ::Given, and ::When follow the same
765             # simple rule and can be handled first. The block form of ::Package
766             # follows the rule, too. (The non-block form of ::Package
767             # requires a statement terminator, and thus doesn't need to have
768             # an implied end detected.)
769 24697         45626 my @part = $Statement->schildren;
770 24697         30748 my $LastChild = $part[-1];
771             # If the last significant element of the statement is a block,
772             # then an implied-end statement is done, no questions asked.
773 24697 100       113788 return !$LastChild->isa('PPI::Structure::Block')
774             if !$Statement->isa('PPI::Statement::Compound');
775              
776             # Now we get to compound statements, which kind of suck (to lex).
777             # However, of them all, the 'if' type, which includes unless, are
778             # relatively easy to handle compared to the others.
779 5600         20748 my $type = $Statement->type;
780 5600 100       14321 if ( $type eq 'if' ) {
781             # This should be one of the following
782             # if (EXPR) BLOCK
783             # if (EXPR) BLOCK else BLOCK
784             # if (EXPR) BLOCK elsif (EXPR) BLOCK ... else BLOCK
785              
786             # We only implicitly end on a block
787 3483 100       15536 unless ( $LastChild->isa('PPI::Structure::Block') ) {
788             # if (EXPR) ...
789             # if (EXPR) BLOCK else ...
790             # if (EXPR) BLOCK elsif (EXPR) BLOCK ...
791 2411         9098 return 1;
792             }
793              
794             # If the token before the block is an 'else',
795             # it's over, no matter what.
796 1072         2974 my $NextLast = $Statement->schild(-2);
797 1072 50 66     10154 if (
      66        
      66        
798             $NextLast
799             and
800             $NextLast->isa('PPI::Token')
801             and
802             $NextLast->isa('PPI::Token::Word')
803             and
804             $NextLast->content eq 'else'
805             ) {
806 72         319 return '';
807             }
808              
809             # Otherwise, we continue for 'elsif' or 'else' only.
810 1000 100 100     4537 if (
      100        
811             $Token->isa('PPI::Token::Word')
812             and (
813             $Token->content eq 'else'
814             or
815             $Token->content eq 'elsif'
816             )
817             ) {
818 310         1543 return 1;
819             }
820              
821 690         2904 return '';
822             }
823              
824 2117 100       5742 if ( $type eq 'label' ) {
825             # We only have the label so far, could be any of
826             # LABEL while (EXPR) BLOCK
827             # LABEL while (EXPR) BLOCK continue BLOCK
828             # LABEL for (EXPR; EXPR; EXPR) BLOCK
829             # LABEL foreach VAR (LIST) BLOCK
830             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
831             # LABEL BLOCK continue BLOCK
832              
833             # Handle cases with a word after the label
834 368 100 100     2337 if (
835             $Token->isa('PPI::Token::Word')
836             and
837             $Token->content =~ /^(?:while|until|for|foreach)$/
838             ) {
839 38         114 return 1;
840             }
841              
842             # Handle labelled blocks
843 330 100 100     1608 if ( $Token->isa('PPI::Token::Structure') && $Token->content eq '{' ) {
844 242         1115 return 1;
845             }
846              
847 88         240 return '';
848             }
849              
850             # Handle the common "after round braces" case
851 1749 100 100     8679 if ( $LastChild->isa('PPI::Structure') and $LastChild->braces eq '()' ) {
852             # LABEL while (EXPR) ...
853             # LABEL while (EXPR) ...
854             # LABEL for (EXPR; EXPR; EXPR) ...
855             # LABEL for VAR (LIST) ...
856             # LABEL foreach VAR (LIST) ...
857             # Only a block will do
858 383   33     1841 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
859             }
860              
861 1366 100       3117 if ( $type eq 'for' ) {
862             # LABEL for (EXPR; EXPR; EXPR) BLOCK
863 140 100 66     689 if (
    50          
    0          
864             $LastChild->isa('PPI::Token::Word')
865             and
866             $LastChild->content =~ /^for(?:each)?\z/
867             ) {
868             # LABEL for ...
869 127 100 66     1029 if (
      100        
870             (
871             $Token->isa('PPI::Token::Structure')
872             and
873             $Token->content eq '('
874             )
875             or
876             $Token->isa('PPI::Token::QuoteLike::Words')
877             ) {
878 21         75 return 1;
879             }
880              
881 106 50       350 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
882             # LABEL for VAR QW{} ...
883             # LABEL foreach VAR QW{} ...
884             # Only a block will do
885 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
886             }
887              
888             # In this case, we can also behave like a foreach
889 106         212 $type = 'foreach';
890              
891             } elsif ( $LastChild->isa('PPI::Structure::Block') ) {
892             # LABEL for (EXPR; EXPR; EXPR) BLOCK
893             # That's it, nothing can continue
894 13         51 return '';
895              
896             } elsif ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
897             # LABEL for VAR QW{} ...
898             # LABEL foreach VAR QW{} ...
899             # Only a block will do
900 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
901             }
902             }
903              
904             # Handle the common continue case
905 1332 100 100     5004 if ( $LastChild->isa('PPI::Token::Word') and $LastChild->content eq 'continue' ) {
906             # LABEL while (EXPR) BLOCK continue ...
907             # LABEL foreach VAR (LIST) BLOCK continue ...
908             # LABEL BLOCK continue ...
909             # Only a block will do
910 6   33     33 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
911             }
912              
913 1326 50 66     4285 if ( $type eq 'try' and $LastChild->presumed_features->{try} ) {
914 6 100       20 return 1 if not $LastChild->isa('PPI::Structure::Block');
915              
916 2         4 my $NextLast = $Statement->schild(-2);
917 2 50 33     27 return ''
      33        
      33        
918             if $NextLast
919             and $NextLast->isa('PPI::Token')
920             and $NextLast->isa('PPI::Token::Word')
921             and $NextLast->content eq 'catch';
922              
923 2 50 33     8 return 1 #
924             if $Token->isa('PPI::Token::Word') and $Token->content eq 'catch';
925              
926 0         0 return '';
927             }
928              
929             # Handle the common continuable block case
930 1320 100       3532 if ( $LastChild->isa('PPI::Structure::Block') ) {
931             # LABEL while (EXPR) BLOCK
932             # LABEL while (EXPR) BLOCK ...
933             # LABEL for (EXPR; EXPR; EXPR) BLOCK
934             # LABEL foreach VAR (LIST) BLOCK
935             # LABEL foreach VAR (LIST) BLOCK ...
936             # LABEL BLOCK ...
937             # Is this the block for a continue?
938 462 100 66     3429 if ( _INSTANCE($part[-2], 'PPI::Token::Word') and $part[-2]->content eq 'continue' ) {
939             # LABEL while (EXPR) BLOCK continue BLOCK
940             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
941             # LABEL BLOCK continue BLOCK
942             # That's it, nothing can continue this
943 6         18 return '';
944             }
945              
946             # Only a continue will do
947 456   100     2634 return $Token->isa('PPI::Token::Word') && $Token->content eq 'continue';
948             }
949              
950 858 50       1521 if ( $type eq 'block' ) {
951             # LABEL BLOCK continue BLOCK
952             # Every possible case is covered in the common cases above
953             }
954              
955 858 100       1420 if ( $type eq 'while' ) {
956             # LABEL while (EXPR) BLOCK
957             # LABEL while (EXPR) BLOCK continue BLOCK
958             # LABEL until (EXPR) BLOCK
959             # LABEL until (EXPR) BLOCK continue BLOCK
960             # The only case not covered is the while ...
961 159 50 66     866 if (
      66        
962             $LastChild->isa('PPI::Token::Word')
963             and (
964             $LastChild->content eq 'while'
965             or
966             $LastChild->content eq 'until'
967             )
968             ) {
969             # LABEL while ...
970             # LABEL until ...
971             # Only a condition structure will do
972 159   33     705 return $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
973             }
974             }
975              
976 699 50       1127 if ( $type eq 'foreach' ) {
977             # LABEL foreach VAR (LIST) BLOCK
978             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
979             # The only two cases that have not been covered already are
980             # 'foreach ...' and 'foreach VAR ...'
981              
982 699 100       1671 if ( $LastChild->isa('PPI::Token::Symbol') ) {
983             # LABEL foreach my $scalar ...
984             # Open round brace, or a quotewords
985 207 100 66     829 return 1 if $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
986 16 50       71 return 1 if $Token->isa('PPI::Token::QuoteLike::Words');
987 0         0 return '';
988             }
989              
990 492 100 100     942 if ( $LastChild->content eq 'foreach' or $LastChild->content eq 'for' ) {
991             # There are three possibilities here
992 278 100 100     1143 if (
    100 100        
    100 66        
    100          
993             $Token->isa('PPI::Token::Word')
994             and (
995             ($STATEMENT_CLASSES{ $Token->content } || '')
996             eq
997             'PPI::Statement::Variable'
998             )
999             ) {
1000             # VAR == 'my ...'
1001 193         695 return 1;
1002             } elsif ( $Token->content =~ /^\$/ ) {
1003             # VAR == '$scalar'
1004 34         106 return 1;
1005             } elsif ( $Token->isa('PPI::Token::Structure') and $Token->content eq '(' ) {
1006 42         158 return 1;
1007             } elsif ( $Token->isa('PPI::Token::QuoteLike::Words') ) {
1008 6         34 return 1;
1009             } else {
1010 3         17 return '';
1011             }
1012             }
1013              
1014 214 100 100     531 if (
1015             ($STATEMENT_CLASSES{ $LastChild->content } || '')
1016             eq
1017             'PPI::Statement::Variable'
1018             ) {
1019             # LABEL foreach my ...
1020             # Only a scalar will do
1021 189         406 return $Token->content =~ /^\$/;
1022             }
1023              
1024             # Handle the rare for my $foo qw{bar} ... case
1025 25 50       89 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
1026             # LABEL for VAR QW ...
1027             # LABEL foreach VAR QW ...
1028             # Only a block will do
1029 25   33     110 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
1030             }
1031             }
1032              
1033             # Something we don't know about... what could it be
1034 0         0 PPI::Exception->throw("Illegal state in '$type' compound statement");
1035             }
1036              
1037              
1038              
1039              
1040              
1041             #####################################################################
1042             # Lex Methods - Structure Object
1043              
1044             # Given a parent element, and a ( token to open a structure, determine
1045             # the class that the structure should be.
1046             sub _round {
1047 8090     8090   14948 my ($self, $Parent) = @_;
1048             # my $self = shift;
1049             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1050              
1051             # Get the last significant element in the parent
1052 8090         21420 my $Element = $Parent->schild(-1);
1053 8090 100       41265 if ( _INSTANCE($Element, 'PPI::Token::Word') ) {
1054             # Can it be determined because it is a keyword?
1055 6454         16075 my $rclass = $ROUND{$Element->content};
1056 6454 100       21836 return $rclass if $rclass;
1057             }
1058              
1059             # If we are part of a for or foreach statement, we are a ForLoop
1060 6730 100       70445 if ( $Parent->isa('PPI::Statement::Compound') ) {
    100          
    100          
    100          
1061 192 100       451 if ( $Parent->type =~ /^for(?:each)?$/ ) {
1062 191         1107 return 'PPI::Structure::For';
1063             }
1064             } elsif ( $Parent->isa('PPI::Statement::Given') ) {
1065 3         34 return 'PPI::Structure::Given';
1066             } elsif ( $Parent->isa('PPI::Statement::When') ) {
1067 3         25 return 'PPI::Structure::When';
1068             } elsif ( $Parent->isa('PPI::Statement::Sub') ) {
1069 34         147 return 'PPI::Structure::Signature';
1070             }
1071              
1072             # Otherwise, it must be a list
1073              
1074             # If the previous element is -> then we mark it as a dereference
1075 6499 100 100     32376 if ( _INSTANCE($Element, 'PPI::Token::Operator') and $Element->content eq '->' ) {
1076 10         28 $Element->{_dereference} = 1;
1077             }
1078              
1079             'PPI::Structure::List'
1080 6499         26468 }
1081              
1082             # Given a parent element, and a [ token to open a structure, determine
1083             # the class that the structure should be.
1084             sub _square {
1085 3030     3030   6430 my ($self, $Parent) = @_;
1086             # my $self = shift;
1087             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1088              
1089             # Get the last significant element in the parent
1090 3030         8292 my $Element = $Parent->schild(-1);
1091              
1092             # Is this a subscript, like $foo[1] or $foo{expr}
1093            
1094 3030 100       8642 if ( $Element ) {
1095 2781 100 100     12760 if ( $Element->isa('PPI::Token::Operator') and $Element->content eq '->' ) {
1096             # $foo->[]
1097 400         913 $Element->{_dereference} = 1;
1098 400         1559 return 'PPI::Structure::Subscript';
1099             }
1100 2381 100       7943 if ( $Element->isa('PPI::Structure::Subscript') ) {
1101             # $foo{}[]
1102 5         17 return 'PPI::Structure::Subscript';
1103             }
1104 2376 100 100     9767 if ( $Element->isa('PPI::Token::Symbol') and $Element->content =~ /^(?:\$|\@)/ ) {
1105             # $foo[], @foo[]
1106 735         3190 return 'PPI::Structure::Subscript';
1107             }
1108 1641 100 100     7036 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%)/ ) {
1109 42         81 my $prior = $Parent->schild(-2);
1110 42 100 100     157 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1111             # Postfix dereference: ->@[...] ->%[...]
1112 2         10 return 'PPI::Structure::Subscript';
1113             }
1114             }
1115             # FIXME - More cases to catch
1116             }
1117              
1118             # Otherwise, we assume that it's an anonymous arrayref constructor
1119 1888         7296 'PPI::Structure::Constructor';
1120             }
1121              
1122             # Keyword -> Structure class maps
1123             my %CURLY_CLASSES = (
1124             # Blocks
1125             'sub' => 'PPI::Structure::Block',
1126             'grep' => 'PPI::Structure::Block',
1127             'map' => 'PPI::Structure::Block',
1128             'sort' => 'PPI::Structure::Block',
1129             'do' => 'PPI::Structure::Block',
1130             # rely on 'continue' + block being handled elsewhere
1131             # rely on 'eval' + block being handled elsewhere
1132              
1133             # Hash constructors
1134             'scalar' => 'PPI::Structure::Constructor',
1135             '=' => 'PPI::Structure::Constructor',
1136             '||=' => 'PPI::Structure::Constructor',
1137             '&&=' => 'PPI::Structure::Constructor',
1138             '//=' => 'PPI::Structure::Constructor',
1139             '||' => 'PPI::Structure::Constructor',
1140             '&&' => 'PPI::Structure::Constructor',
1141             '//' => 'PPI::Structure::Constructor',
1142             '?' => 'PPI::Structure::Constructor',
1143             ':' => 'PPI::Structure::Constructor',
1144             ',' => 'PPI::Structure::Constructor',
1145             '=>' => 'PPI::Structure::Constructor',
1146             '+' => 'PPI::Structure::Constructor', # per perlref
1147             'return' => 'PPI::Structure::Constructor', # per perlref
1148             'bless' => 'PPI::Structure::Constructor', # pragmatic --
1149             # perlfunc says first arg is a reference, and
1150             # bless {; ... } fails to compile.
1151             );
1152              
1153             my @CURLY_LOOKAHEAD_CLASSES = (
1154             {}, # not used
1155             {
1156             ';' => 'PPI::Structure::Block', # per perlref
1157             '}' => 'PPI::Structure::Constructor',
1158             },
1159             {
1160             '=>' => 'PPI::Structure::Constructor',
1161             },
1162             );
1163              
1164              
1165             # Given a parent element, and a { token to open a structure, determine
1166             # the class that the structure should be.
1167             sub _curly {
1168 11305     11305   20556 my ($self, $Parent) = @_;
1169             # my $self = shift;
1170             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1171              
1172             # Get the last significant element in the parent
1173 11305         26007 my $Element = $Parent->schild(-1);
1174 11305 100       36661 my $content = $Element ? $Element->content : '';
1175              
1176             # Is this a subscript, like $foo[1] or $foo{expr}
1177 11305 100       27993 if ( $Element ) {
1178 10678 100 66     32099 if ( $content eq '->' and $Element->isa('PPI::Token::Operator') ) {
1179             # $foo->{}
1180 2089         5491 $Element->{_dereference} = 1;
1181 2089         8405 return 'PPI::Structure::Subscript';
1182             }
1183 8589 100       32129 if ( $Element->isa('PPI::Structure::Subscript') ) {
1184             # $foo[]{}
1185 71         212 return 'PPI::Structure::Subscript';
1186             }
1187 8518 100 100     43012 if ( $content =~ /^(?:\$|\@)/ and $Element->isa('PPI::Token::Symbol') ) {
1188             # $foo{}, @foo{}
1189 579         2229 return 'PPI::Structure::Subscript';
1190             }
1191 7939 100 100     34418 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%|\*)/ ) {
1192 303         702 my $prior = $Parent->schild(-2);
1193 303 100 100     2107 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1194             # Postfix dereference: ->@{...} ->%{...} ->*{...}
1195 3         11 return 'PPI::Structure::Subscript';
1196             }
1197             }
1198 7936 100       25013 if ( $Element->isa('PPI::Structure::Block') ) {
1199             # deference - ${$hash_ref}{foo}
1200             # or even ${burfle}{foo}
1201             # hash slice - @{$hash_ref}{'foo', 'bar'}
1202 4 50       11 if ( my $prior = $Parent->schild(-2) ) {
1203 4         10 my $prior_content = $prior->content();
1204 4 50 66     24 $prior->isa( 'PPI::Token::Cast' )
      66        
1205             and ( $prior_content eq '@' ||
1206             $prior_content eq '$' )
1207             and return 'PPI::Structure::Subscript';
1208             }
1209             }
1210              
1211             # Are we the last argument of sub?
1212             # E.g.: 'sub foo {}', 'sub foo ($) {}'
1213 7934 100       34320 return 'PPI::Structure::Block' if $Parent->isa('PPI::Statement::Sub');
1214              
1215             # Are we the second or third argument of package?
1216             # E.g.: 'package Foo {}' or 'package Foo v1.2.3 {}'
1217 5601 100       24252 return 'PPI::Structure::Block'
1218             if $Parent->isa('PPI::Statement::Package');
1219              
1220 4308 100       12746 if ( $CURLY_CLASSES{$content} ) {
1221             # Known type
1222 898         4149 return $CURLY_CLASSES{$content};
1223             }
1224             }
1225              
1226             # Are we in a compound statement
1227 4037 100       14353 if ( $Parent->isa('PPI::Statement::Compound') ) {
1228             # We will only encounter blocks in compound statements
1229 1938         7697 return 'PPI::Structure::Block';
1230             }
1231              
1232             # Are we the second or third argument of use
1233 2099 100       7221 if ( $Parent->isa('PPI::Statement::Include') ) {
1234 53 50 33     174 if ( $Parent->schildren == 2 ||
      66        
1235             $Parent->schildren == 3 &&
1236             $Parent->schild(2)->isa('PPI::Token::Number')
1237             ) {
1238             # This is something like use constant { ... };
1239 53         287 return 'PPI::Structure::Constructor';
1240             }
1241             }
1242              
1243             # Unless we are at the start of the statement, everything else should be a block
1244             ### FIXME This is possibly a bad choice, but will have to do for now.
1245 2046 100       7966 return 'PPI::Structure::Block' if $Element;
1246              
1247 627 100 66     2896 if (
1248             $Parent->isa('PPI::Statement')
1249             and
1250             _INSTANCE($Parent->parent, 'PPI::Structure::List')
1251             ) {
1252 165         336 my $function = $Parent->parent->parent->schild(-2);
1253              
1254             # Special case: Are we the param of a core function
1255             # i.e. map({ $_ => 1 } @foo)
1256 165 100 100     658 return 'PPI::Structure::Block'
1257             if $function and $function->content =~ /^(?:map|grep|sort|eval|do)$/;
1258              
1259             # If not part of a block print, list-embedded curlies are most likely constructors
1260 71 100 100     352 return 'PPI::Structure::Constructor'
1261             if not $function or $function->content !~ /^(?:print|say)$/;
1262             }
1263              
1264             # We need to scan ahead.
1265 468         725 my $Next;
1266 468         595 my $position = 0;
1267 468         681 my @delayed;
1268 468         895 while ( $Next = $self->_get_token ) {
1269 1129 100       2571 unless ( $Next->significant ) {
1270 188         319 push @delayed, $Next;
1271 188         347 next;
1272             }
1273              
1274             # If we are off the end of the lookahead array,
1275 941 100       4070 if ( ++$position >= @CURLY_LOOKAHEAD_CLASSES ) {
    100          
1276             # default to block.
1277 111         407 $self->_buffer( splice(@delayed), $Next );
1278 111         187 last;
1279             # If the content at this position is known
1280             } elsif ( my $class = $CURLY_LOOKAHEAD_CLASSES[$position]
1281             {$Next->content} ) {
1282             # return the associated class.
1283 271         935 $self->_buffer( splice(@delayed), $Next );
1284 271         1479 return $class;
1285             }
1286              
1287             # Delay and continue
1288 559         1203 push @delayed, $Next;
1289             }
1290              
1291             # Hit the end of the document, or bailed out, go with block
1292 197         525 $self->_buffer( splice(@delayed) );
1293 197 50       521 if ( ref $Parent eq 'PPI::Statement' ) {
1294 197         316 bless $Parent, 'PPI::Statement::Compound';
1295             }
1296 197         810 return 'PPI::Structure::Block';
1297             }
1298              
1299              
1300             sub _lex_structure {
1301 22425     22425   34226 my ($self, $Structure) = @_;
1302             # my $self = shift;
1303             # my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1";
1304              
1305             # Start the processing loop
1306 22425         24091 my $Token;
1307 22425         37479 while ( ref($Token = $self->_get_token) ) {
1308             # Is this a direct type token
1309 89938 100       183310 unless ( $Token->significant ) {
1310 42668         46260 push @{$self->{delayed}}, $Token;
  42668         70007  
1311             # $self->_delay_element( $Token );
1312 42668         81514 next;
1313             }
1314              
1315             # Anything other than a Structure starts a Statement
1316 47270 100       141446 unless ( $Token->isa('PPI::Token::Structure') ) {
1317             # Because _statement may well delay and rollback itself,
1318             # we need to add the delayed tokens early
1319 26209         52479 $self->_add_delayed( $Structure );
1320              
1321             # Determine the class for the Statement and create it
1322 26209         57286 my $Statement = $self->_statement($Structure, $Token)->new($Token);
1323              
1324             # Move the lexing down into the Statement
1325 26209         83708 $self->_add_element( $Structure, $Statement );
1326 26209         79270 $self->_lex_statement( $Statement );
1327              
1328 26209         54344 next;
1329             }
1330              
1331             # Is this the opening of another structure directly inside us?
1332 21061 100       43954 if ( $Token->__LEXER__opens ) {
1333             # Rollback the Token, and recurse into the statement
1334 444         1340 $self->_rollback( $Token );
1335 444         1442 my $Statement = PPI::Statement->new;
1336 444         1105 $self->_add_element( $Structure, $Statement );
1337 444         1256 $self->_lex_statement( $Statement );
1338 444         1061 next;
1339             }
1340              
1341             # Is this the close of a structure ( which would be an error )
1342 20617 100       39865 if ( $Token->__LEXER__closes ) {
1343             # Is this OUR closing structure
1344 20562 100       41534 if ( $Token->content eq $Structure->start->__LEXER__opposite ) {
1345             # Add any delayed tokens, and the finishing token (the ugly way)
1346 19851         42528 $self->_add_delayed( $Structure );
1347 19851         42851 $Structure->{finish} = $Token;
1348             Scalar::Util::weaken(
1349 19851         59752 $_PARENT{Scalar::Util::refaddr $Token} = $Structure
1350             );
1351              
1352             # Confirm that ForLoop structures are actually so, and
1353             # aren't really a list.
1354 19851 100       63133 if ( $Structure->isa('PPI::Structure::For') ) {
1355 229 100       917 if ( 2 > scalar grep {
1356 585         2086 $_->isa('PPI::Statement')
1357             } $Structure->children ) {
1358 208         411 bless($Structure, 'PPI::Structure::List');
1359             }
1360             }
1361 19851         54724 return 1;
1362             }
1363              
1364             # Unmatched closing brace.
1365             # Either they typed the wrong thing, or haven't put
1366             # one at all. Either way it's an error we need to
1367             # somehow handle gracefully. For now, we'll treat it
1368             # as implicitly ending the structure. This causes the
1369             # least damage across the various reasons why this
1370             # might have happened.
1371 711         1456 return $self->_rollback( $Token );
1372             }
1373              
1374             # It's a semi-colon on its own, just inside the block.
1375             # This is a null statement.
1376             $self->_add_element(
1377 55         218 $Structure,
1378             PPI::Statement::Null->new($Token),
1379             );
1380             }
1381              
1382             # Is this an error
1383 1863 50       3206 unless ( defined $Token ) {
1384 0         0 PPI::Exception->throw;
1385             }
1386              
1387             # No, it's just the end of file.
1388             # Add any insignificant trailing tokens.
1389 1863         2986 $self->_add_delayed( $Structure );
1390             }
1391              
1392              
1393              
1394              
1395              
1396             #####################################################################
1397             # Support Methods
1398              
1399             # Get the next token for processing, handling buffering
1400             sub _get_token {
1401 471775 100   471775   443825 shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token;
  471775         1304878  
1402             }
1403              
1404             # Old long version of the above
1405             # my $self = shift;
1406             # # First from the buffer
1407             # if ( @{$self->{buffer}} ) {
1408             # return shift @{$self->{buffer}};
1409             # }
1410             #
1411             # # Then from the Tokenizer
1412             # $self->{Tokenizer}->get_token;
1413             # }
1414              
1415             # Delay the addition of insignificant elements.
1416             # This ended up being inlined.
1417             # sub _delay_element {
1418             # my $self = shift;
1419             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 1";
1420             # push @{ $_[0]->{delayed} }, $_[1];
1421             # }
1422              
1423             # Add an Element to a Node, including any delayed Elements
1424             sub _add_element {
1425 228256     228256   289671 my ($self, $Parent, $Element) = @_;
1426             # my $self = shift;
1427             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1428             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 2";
1429              
1430             # Handle a special case, where a statement is not fully resolved
1431 228256 100 100     468938 if ( ref $Parent eq 'PPI::Statement'
1432             and my $first = $Parent->schild(0) ) {
1433 69101 50 33     186978 if ( $first->isa('PPI::Token::Label')
1434             and !(my $second = $Parent->schild(1)) ) {
1435 0         0 my $new_class = $STATEMENT_CLASSES{$second->content};
1436             # It's a labelled statement
1437 0 0       0 bless $Parent, $new_class if $new_class;
1438             }
1439             }
1440              
1441             # Add first the delayed, from the front, then the passed element
1442 228256         229585 foreach my $el ( @{$self->{delayed}} ) {
  228256         324639  
1443             Scalar::Util::weaken(
1444 62074         155784 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1445             );
1446             # Inlined $Parent->__add_element($el);
1447             }
1448             Scalar::Util::weaken(
1449 228256         503164 $_PARENT{Scalar::Util::refaddr $Element} = $Parent
1450             );
1451 228256         211958 push @{$Parent->{children}}, @{$self->{delayed}}, $Element;
  228256         261158  
  228256         326538  
1452              
1453             # Clear the delayed elements
1454 228256         340054 $self->{delayed} = [];
1455             }
1456              
1457             # Specifically just add any delayed tokens, if any.
1458             sub _add_delayed {
1459 116136     116136   153608 my ($self, $Parent) = @_;
1460             # my $self = shift;
1461             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1462              
1463             # Add any delayed
1464 116136         119466 foreach my $el ( @{$self->{delayed}} ) {
  116136         196061  
1465             Scalar::Util::weaken(
1466 53875         158379 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1467             );
1468             # Inlined $Parent->__add_element($el);
1469             }
1470 116136         120032 push @{$Parent->{children}}, @{$self->{delayed}};
  116136         157862  
  116136         157863  
1471              
1472             # Clear the delayed elements
1473 116136         203714 $self->{delayed} = [];
1474             }
1475              
1476             # Rollback the delayed tokens, plus any passed. Once all the tokens
1477             # have been moved back on to the buffer, the order should be.
1478             # <--- @{$self->{delayed}}, @_, @{$self->{buffer}} <----
1479             sub _rollback {
1480 53276     53276   62234 my $self = shift;
1481              
1482             # First, put any passed objects back
1483 53276 100       91955 if ( @_ ) {
1484 41867         43782 unshift @{$self->{buffer}}, splice @_;
  41867         95801  
1485             }
1486              
1487             # Then, put back anything delayed
1488 53276 100       56642 if ( @{$self->{delayed}} ) {
  53276         100536  
1489 29144         34347 unshift @{$self->{buffer}}, splice @{$self->{delayed}};
  29144         37720  
  29144         46638  
1490             }
1491              
1492 53276         85388 1;
1493             }
1494              
1495             # Partial rollback, just return a single list to the buffer
1496             sub _buffer {
1497 579     579   773 my $self = shift;
1498              
1499             # Put any passed objects back
1500 579 100       1046 if ( @_ ) {
1501 455         606 unshift @{$self->{buffer}}, splice @_;
  455         1049  
1502             }
1503              
1504 579         818 1;
1505             }
1506              
1507              
1508              
1509              
1510              
1511             #####################################################################
1512             # Error Handling
1513              
1514             # Set the error message
1515             sub _error {
1516 2     2   3 $errstr = $_[1];
1517 2         10 undef;
1518             }
1519              
1520             # Clear the error message.
1521             # Returns the object as a convenience.
1522             sub _clear {
1523 16801     16801   30263 $errstr = '';
1524 16801         28324 $_[0];
1525             }
1526              
1527             =pod
1528              
1529             =head2 errstr
1530              
1531             For any error that occurs, you can use the C<errstr>, as either
1532             a static or object method, to access the error message.
1533              
1534             If no error occurs for any particular action, C<errstr> will return false.
1535              
1536             =cut
1537              
1538             sub errstr {
1539 2     2 1 12 $errstr;
1540             }
1541              
1542              
1543              
1544              
1545              
1546             #####################################################################
1547             # PDOM Extensions
1548             #
1549             # This is something of a future expansion... ignore it for now :)
1550             #
1551             # use PPI::Statement::Sub ();
1552             #
1553             # sub PPI::Statement::Sub::__LEXER__normal { '' }
1554              
1555             1;
1556              
1557             =pod
1558              
1559             =head1 TO DO
1560              
1561             - Add optional support for some of the more common source filters
1562              
1563             - Some additional checks for blessing things into various Statement
1564             and Structure subclasses.
1565              
1566             =head1 SUPPORT
1567              
1568             See the L<support section|PPI/SUPPORT> in the main module.
1569              
1570             =head1 AUTHOR
1571              
1572             Adam Kennedy E<lt>adamk@cpan.orgE<gt>
1573              
1574             =head1 COPYRIGHT
1575              
1576             Copyright 2001 - 2011 Adam Kennedy.
1577              
1578             This program is free software; you can redistribute
1579             it and/or modify it under the same terms as Perl itself.
1580              
1581             The full text of the license can be found in the
1582             LICENSE file included with this module.
1583              
1584             =cut