File Coverage

blib/lib/PPI/Lexer.pm
Criterion Covered Total %
statement 441 462 95.4
branch 260 296 87.8
condition 158 206 76.7
subroutine 30 30 100.0
pod 5 6 83.3
total 894 1000 89.4


line stmt bran cond sub pod time code
1             package PPI::Lexer;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Lexer - The PPI Lexer
8              
9             =head1 SYNOPSIS
10              
11             use PPI;
12            
13             # Create a new Lexer
14             my $Lexer = PPI::Lexer->new;
15            
16             # Build a PPI::Document object from a Token stream
17             my $Tokenizer = PPI::Tokenizer->load('My/Module.pm');
18             my $Document = $Lexer->lex_tokenizer($Tokenizer);
19            
20             # Build a PPI::Document object for some raw source
21             my $source = "print 'Hello World!'; kill(Humans->all);";
22             $Document = $Lexer->lex_source($source);
23            
24             # Build a PPI::Document object for a particular file name
25             $Document = $Lexer->lex_file('My/Module.pm');
26              
27             =head1 DESCRIPTION
28              
29             The is the L Lexer. In the larger scheme of things, its job is to take
30             token streams, in a variety of forms, and "lex" them into nested structures.
31              
32             Pretty much everything in this module happens behind the scenes at this
33             point. In fact, at the moment you don't really need to instantiate the lexer
34             at all, the three main methods will auto-instantiate themselves a
35             C object as needed.
36              
37             All methods do a one-shot "lex this and give me a L object".
38              
39             In fact, if you are reading this, what you B want to do is to
40             just "load a document", in which case you can do this in a much more
41             direct and concise manner with one of the following.
42              
43             use PPI;
44            
45             $Document = PPI::Document->load( $filename );
46             $Document = PPI::Document->new( $string );
47              
48             See L for more details.
49              
50             For more unusual tasks, by all means forge onwards.
51              
52             =head1 METHODS
53              
54             =cut
55              
56 67     67   2474 use strict;
  67         933  
  67         2700  
57 67     67   255 use Scalar::Util ();
  67         83  
  67         1165  
58 67     67   198 use Params::Util qw{_STRING _INSTANCE};
  67         81  
  67         2855  
59 67     67   278 use PPI ();
  67         86  
  67         780  
60 67     67   213 use PPI::Exception ();
  67         92  
  67         991  
61 67     67   189 use PPI::Singletons '%_PARENT';
  67         91  
  67         298146  
62              
63             our $VERSION = '1.287';
64              
65             our $errstr = "";
66              
67             # Keyword -> Structure class maps
68             my %ROUND = (
69             # Conditions
70             'if' => 'PPI::Structure::Condition',
71             'elsif' => 'PPI::Structure::Condition',
72             'unless' => 'PPI::Structure::Condition',
73             'while' => 'PPI::Structure::Condition',
74             'until' => 'PPI::Structure::Condition',
75              
76             # For(each)
77             'for' => 'PPI::Structure::For',
78             'foreach' => 'PPI::Structure::For',
79             );
80              
81             # Opening brace to refining method
82             my %RESOLVE = (
83             '(' => '_round',
84             '[' => '_square',
85             '{' => '_curly',
86             );
87              
88             # Allows for experimental overriding of the tokenizer
89             our $X_TOKENIZER = "PPI::Tokenizer";
90 16841     16841 0 64873 sub X_TOKENIZER { $X_TOKENIZER }
91              
92              
93              
94              
95              
96             #####################################################################
97             # Constructor
98              
99             =pod
100              
101             =head2 new
102              
103             The C constructor creates a new C object. The object itself
104             is merely used to hold various buffers and state data during the lexing
105             process, and holds no significant data between -Elex_xxxxx calls.
106              
107             Returns a new C object
108              
109             =cut
110              
111             sub new {
112 16841     16841 1 38617 my $class = shift->_clear;
113 16841         89680 bless {
114             Tokenizer => undef, # Where we store the tokenizer for a run
115             buffer => [], # The input token buffer
116             delayed => [], # The "delayed insignificant tokens" buffer
117             features_stack => [], # Stack of features in scope
118             }, $class;
119             }
120              
121              
122              
123              
124              
125             #####################################################################
126             # Main Lexing Methods
127              
128             =pod
129              
130             =head2 lex_file $filename
131              
132             The C method takes a filename as argument. It then loads the file,
133             creates a L for the content and lexes the token stream
134             produced by the tokenizer. Basically, a sort of all-in-one method for
135             getting a L object from a file name.
136              
137             Additional arguments are passed to the tokenizer as a hash.
138              
139             Returns a L object, or C on error.
140              
141             =cut
142              
143 518     518 1 2163 sub lex_file { shift->_lex_input(@_) }
144              
145             =pod
146              
147             =head2 lex_source $string
148              
149             The C method takes a normal scalar string as argument. It
150             creates a L object for the string, and then lexes the
151             resulting token stream.
152              
153             Additional arguments are passed to the tokenizer as a hash.
154              
155             Returns a L object, or C on error.
156              
157             =cut
158              
159 16323     16323 1 968202 sub lex_source { shift->_lex_input( \shift, @_ ) }
160              
161             sub _lex_input {
162 16841     16841   32624 my ( $self, $input, %args ) = @_;
163 16841 100       41117 $self = ref $self ? $self : $self->new;
164              
165             # Create the Tokenizer
166 16841         24693 my $Tokenizer = eval { X_TOKENIZER->new($input) };
  16841         33239  
167             return #
168 16841 50       135681 $@
    50          
    100          
169             ? $self->_error( _INSTANCE( $@, 'PPI::Exception' ) ? $@->message : $@ )
170             : !_INSTANCE( $Tokenizer, 'PPI::Tokenizer' )
171             ? $self->_error($Tokenizer)
172             : $self->lex_tokenizer( $Tokenizer, %args );
173             }
174              
175             =pod
176              
177             =head2 lex_tokenizer $Tokenizer
178              
179             The C takes as argument a L object. It
180             lexes the token stream from the tokenizer into a L object.
181              
182             Additional arguments are set on the L produced.
183              
184             Returns a L object, or C on error.
185              
186             =cut
187              
188             sub lex_tokenizer {
189 16839 50   16839 1 34478 my $self = ref $_[0] ? shift : shift->new;
190 16839         43255 my $Tokenizer = _INSTANCE(shift, 'PPI::Tokenizer');
191 16839 50       33266 return $self->_error(
192             "Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer"
193             ) unless $Tokenizer;
194 16839         27795 my %args = @_;
195              
196             # Create the empty document
197 16839         44111 my $Document = PPI::Document->new;
198 16839         55213 ref($Document)->_setattr( $Document, %args );
199 16839         43349 $Tokenizer->_document($Document);
200 16839 100       34174 if (my $feat = $Document->feature_mods) {
201 18         19 push @{$self->{features_stack}}, $feat;
  18         34  
202 18         33 $Tokenizer->_features($feat);
203             }
204              
205             # Lex the token stream into the document
206 16839         25034 $self->{Tokenizer} = $Tokenizer;
207 16839 100       20375 if ( !eval { $self->_lex_document($Document); 1 } ) {
  16839         37254  
  16838         29765  
208             # If an error occurs DESTROY the partially built document.
209 1         5 $Tokenizer->_document(undef);
210 1         4 undef $Document;
211 1 50       9 if ( _INSTANCE($@, 'PPI::Exception') ) {
212 1         6 return $self->_error( $@->message );
213             } else {
214 0         0 return $self->_error( $errstr );
215             }
216             }
217              
218 16838         154734 return $Document;
219             }
220              
221              
222              
223              
224              
225             #####################################################################
226             # Lex Methods - Document Object
227              
228             sub _lex_document {
229 16839     16839   24862 my ($self, $Document) = @_;
230             # my $self = shift;
231             # my $Document = _INSTANCE(shift, 'PPI::Document') or return undef;
232              
233             # Start the processing loop
234 16839         20084 my $Token;
235 16839         35574 while ( ref($Token = $self->_get_token) ) {
236             # Add insignificant tokens directly beneath us
237 54119 100       110035 unless ( $Token->significant ) {
238 21220         39984 $self->_add_element( $Document, $Token );
239 21220         31951 next;
240             }
241              
242 32899 100       69121 if ( $Token->content eq ';' ) {
243             # It's a semi-colon on its own.
244             # We call this a null statement.
245 475         1869 $self->_add_element(
246             $Document,
247             PPI::Statement::Null->new($Token),
248             );
249 475         989 next;
250             }
251              
252             # Handle anything other than a structural element
253 32424 100       58983 unless ( ref $Token eq 'PPI::Token::Structure' ) {
254             # Determine the class for the Statement, and create it
255 29306         62957 my $Statement = $self->_statement($Document, $Token)->new($Token);
256              
257             # Move the lexing down into the statement
258 29306         68993 $self->_add_delayed( $Document );
259 29306         60512 $self->_add_element( $Document, $Statement );
260 29306         62126 $self->_lex_statement( $Statement );
261              
262 29306         57046 next;
263             }
264              
265             # Is this the opening of a structure?
266 3118 100       5930 if ( $Token->__LEXER__opens ) {
267             # This should actually have a Statement instead
268 956         2730 $self->_rollback( $Token );
269 956         3296 my $Statement = PPI::Statement->new;
270 956         2580 $self->_add_element( $Document, $Statement );
271 956         2557 $self->_lex_statement( $Statement );
272 956         2010 next;
273             }
274              
275             # Is this the close of a structure.
276 2162 50       3659 if ( $Token->__LEXER__closes ) {
277             # Because we are at the top of the tree, this is an error.
278             # This means either a mis-parsing, or a mistake in the code.
279             # To handle this, we create a "Naked Close" statement
280 2162         6243 $self->_add_element( $Document,
281             PPI::Statement::UnmatchedBrace->new($Token)
282             );
283 2162         3586 next;
284             }
285              
286             # Shouldn't be able to get here
287 0         0 PPI::Exception->throw('Lexer reached an illegal state');
288             }
289              
290             # Did we leave the main loop because of a Tokenizer error?
291 16838 50       28583 unless ( defined $Token ) {
292 0 0       0 my $errstr = $self->{Tokenizer} ? $self->{Tokenizer}->errstr : '';
293 0   0     0 $errstr ||= 'Unknown Tokenizer Error';
294 0         0 PPI::Exception->throw($errstr);
295             }
296              
297             # No error, it's just the end of file.
298             # Add any insignificant trailing tokens.
299 16838         33723 $self->_add_delayed( $Document );
300              
301             # If the Tokenizer has any v6 blocks to attach, do so now.
302             # Checking once at the end is faster than adding a special
303             # case check for every statement parsed.
304 16838         26605 my $perl6 = $self->{Tokenizer}->{'perl6'};
305 16838 100       29314 if ( @$perl6 ) {
306 2         9 my $includes = $Document->find( 'PPI::Statement::Include::Perl6' );
307 2         5 foreach my $include ( @$includes ) {
308 2 50       4 unless ( @$perl6 ) {
309 0         0 PPI::Exception->throw('Failed to find a perl6 section');
310             }
311 2         6 $include->{perl6} = shift @$perl6;
312             }
313             }
314              
315 16838         22757 return 1;
316             }
317              
318              
319              
320              
321              
322             #####################################################################
323             # Lex Methods - Statement Object
324              
325             # Keyword -> Statement Subclass
326             my %STATEMENT_CLASSES = (
327             # Things that affect the timing of execution
328             'BEGIN' => 'PPI::Statement::Scheduled',
329             'CHECK' => 'PPI::Statement::Scheduled',
330             'UNITCHECK' => 'PPI::Statement::Scheduled',
331             'INIT' => 'PPI::Statement::Scheduled',
332             'END' => 'PPI::Statement::Scheduled',
333              
334             # Special subroutines for which 'sub' is optional
335             'AUTOLOAD' => 'PPI::Statement::Sub',
336             'DESTROY' => 'PPI::Statement::Sub',
337              
338             # Loading and context statement
339             'package' => 'PPI::Statement::Package',
340             # 'use' => 'PPI::Statement::Include',
341             'no' => 'PPI::Statement::Include',
342             'require' => 'PPI::Statement::Include',
343              
344             # Various declarations
345             'my' => 'PPI::Statement::Variable',
346             'local' => 'PPI::Statement::Variable',
347             'our' => 'PPI::Statement::Variable',
348             'state' => 'PPI::Statement::Variable',
349             # Statements starting with 'sub' could be any one of...
350             # 'sub' => 'PPI::Statement::Sub',
351             # 'sub' => 'PPI::Statement::Scheduled',
352             # 'sub' => 'PPI::Statement',
353              
354             # Compound statement
355             'if' => 'PPI::Statement::Compound',
356             'unless' => 'PPI::Statement::Compound',
357             'for' => 'PPI::Statement::Compound',
358             'foreach' => 'PPI::Statement::Compound',
359             'while' => 'PPI::Statement::Compound',
360             'until' => 'PPI::Statement::Compound',
361              
362             # Switch statement
363             'given' => 'PPI::Statement::Given',
364             'when' => 'PPI::Statement::When',
365             'default' => 'PPI::Statement::When',
366              
367             # Various ways of breaking out of scope
368             'redo' => 'PPI::Statement::Break',
369             'next' => 'PPI::Statement::Break',
370             'last' => 'PPI::Statement::Break',
371             'return' => 'PPI::Statement::Break',
372             'goto' => 'PPI::Statement::Break',
373              
374             # Special sections of the file
375             '__DATA__' => 'PPI::Statement::Data',
376             '__END__' => 'PPI::Statement::End',
377             );
378              
379             sub _statement {
380 66692     66692   96158 my ($self, $Parent, $Token) = @_;
381             # my $self = shift;
382             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
383             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
384              
385             # Check for things like ( parent => ... )
386 66692 100 100     348596 if (
387             $Parent->isa('PPI::Structure::List')
388             or
389             $Parent->isa('PPI::Structure::Constructor')
390             ) {
391 11809 100       30437 if ( $Token->isa('PPI::Token::Word') ) {
392             # Is the next significant token a =>
393             # Read ahead to the next significant token
394 2056         3005 my $Next;
395 2056         3704 while ( $Next = $self->_get_token ) {
396 2894 100       6646 unless ( $Next->significant ) {
397 888         1438 push @{$self->{delayed}}, $Next;
  888         1443  
398             # $self->_delay_element( $Next );
399 888         1507 next;
400             }
401              
402             # Got the next token
403 2006 100 100     7723 if (
404             $Next->isa('PPI::Token::Operator')
405             and
406             $Next->content eq '=>'
407             ) {
408             # Is an ordinary expression
409 945         2421 $self->_rollback( $Next );
410 945         3865 return 'PPI::Statement::Expression';
411             } else {
412 1061         1429 last;
413             }
414             }
415              
416             # Rollback and continue
417 1111         2474 $self->_rollback( $Next );
418             }
419             }
420              
421 65747         76027 my $is_lexsub = 0;
422              
423             # Is it a token in our known classes list
424 65747         110778 my $content = $Token->content;
425             my $class =
426             ( $content eq 'try' and ( $self->{features_stack}[-1] || {} )->{try} )
427             ? 'PPI::Statement::Compound'
428 65747 100 100     170594 : $STATEMENT_CLASSES{$content};
429              
430 65747 100       97490 if ( $class ) {
431             # Is the next significant token a =>
432             # Read ahead to the next significant token
433 9893         10920 my $Next;
434 9893         15970 while ( $Next = $self->_get_token ) {
435 19418 100       40968 if ( !$Next->significant ) {
436 9572         10583 push @{$self->{delayed}}, $Next;
  9572         16552  
437 9572         15898 next;
438             }
439              
440             # Scheduled block must be followed by left curly or
441             # semicolon. Otherwise we have something else (e.g.
442             # open( CHECK, ... );
443 9846 100 66     21521 if (
      100        
444             'PPI::Statement::Scheduled' eq $class
445             and not ( $Next->isa( 'PPI::Token::Structure' )
446             and $Next->content =~ m/\A[{;]\z/ ) # }
447             ) {
448 1         2 $class = undef;
449 1         2 last;
450             }
451              
452             # Lexical subroutine
453 9845 100 100     44542 if (
      66        
454             $content =~ /^(?:my|our|state)\z/
455             and $Next->isa( 'PPI::Token::Word' ) and $Next->content eq 'sub'
456             ) {
457             # This should be PPI::Statement::Sub rather than PPI::Statement::Variable
458 7         12 $class = undef;
459 7         9 $is_lexsub = 1;
460 7         7 last;
461             }
462              
463             last if
464 9838 100 100     41768 !$Next->isa( 'PPI::Token::Operator' ) or $Next->content ne '=>';
465              
466             # Got the next token
467             # Is an ordinary expression
468 21         50 $self->_rollback( $Next );
469 21         78 return 'PPI::Statement';
470             }
471              
472             # Rollback and continue
473 9872         20767 $self->_rollback( $Next );
474             }
475              
476             # Handle potential barewords for subscripts
477 65726 100       151495 if ( $Parent->isa('PPI::Structure::Subscript') ) {
478             # Fast obvious case, just an expression
479 7556 100 100     15074 unless ( $class and $class->isa('PPI::Statement::Expression') ) {
480 7433         20961 return 'PPI::Statement::Expression';
481             }
482              
483             # This is something like "my" or "our" etc... more subtle.
484             # Check if the next token is a closing curly brace.
485             # This means we are something like $h{my}
486 123         130 my $Next;
487 123         179 while ( $Next = $self->_get_token ) {
488 119 50       232 unless ( $Next->significant ) {
489 0         0 push @{$self->{delayed}}, $Next;
  0         0  
490             # $self->_delay_element( $Next );
491 0         0 next;
492             }
493              
494             # Found the next significant token.
495             # Is it a closing curly brace?
496 119 50       203 if ( $Next->content eq '}' ) {
497 119         209 $self->_rollback( $Next );
498 119         400 return 'PPI::Statement::Expression';
499             } else {
500 0         0 $self->_rollback( $Next );
501 0         0 return $class;
502             }
503             }
504              
505             # End of file... this means it is something like $h{our
506             # which is probably going to be $h{our} ... I think
507 4         9 $self->_rollback( $Next );
508 4         12 return 'PPI::Statement::Expression';
509             }
510              
511             # If it's a token in our list, use that class
512 58170 100       118061 return $class if $class;
513              
514             # Handle the more in-depth sub detection
515 48459 100 100     121267 if ( $is_lexsub || $content eq 'sub' ) {
516             # Read ahead to the next significant token
517 3415         4070 my $Next;
518 3415         6217 while ( $Next = $self->_get_token ) {
519 6767 100       13479 unless ( $Next->significant ) {
520 3376         3753 push @{$self->{delayed}}, $Next;
  3376         6400  
521             # $self->_delay_element( $Next );
522 3376         5919 next;
523             }
524              
525             # Got the next significant token
526 3391         6258 my $sclass = $STATEMENT_CLASSES{$Next->content};
527 3391 100 100     7928 if ( $sclass and $sclass eq 'PPI::Statement::Scheduled' ) {
528 28         74 $self->_rollback( $Next );
529 28         123 return 'PPI::Statement::Scheduled';
530             }
531 3363 100       9126 if ( $Next->isa('PPI::Token::Word') ) {
532 3254         7265 $self->_rollback( $Next );
533 3254         14138 return 'PPI::Statement::Sub';
534             }
535              
536             ### Comment out these two, as they would return PPI::Statement anyway
537             # if ( $content eq '{' ) {
538             # Anonymous sub at start of statement
539             # return 'PPI::Statement';
540             # }
541             #
542             # if ( $Next->isa('PPI::Token::Prototype') ) {
543             # Anonymous sub at start of statement
544             # return 'PPI::Statement';
545             # }
546              
547             # PPI::Statement is the safest fall-through
548 109         287 $self->_rollback( $Next );
549 109         460 return 'PPI::Statement';
550             }
551              
552             # End of file... PPI::Statement::Sub is the most likely
553 24         73 $self->_rollback( $Next );
554 24         134 return 'PPI::Statement::Sub';
555             }
556              
557 45044 100       65159 if ( $content eq 'use' ) {
558             # Add a special case for "use v6" lines.
559 2275         2635 my $Next;
560 2275         4054 while ( $Next = $self->_get_token ) {
561 4545 100       8989 unless ( $Next->significant ) {
562 2272         2961 push @{$self->{delayed}}, $Next;
  2272         4311  
563             # $self->_delay_element( $Next );
564 2272         4032 next;
565             }
566              
567             # Found the next significant token.
568 2273 100 66     10458 if (
    100          
569             $Next->isa('PPI::Token::Operator')
570             and
571             $Next->content eq '=>'
572             ) {
573             # Is an ordinary expression
574 1         3 $self->_rollback( $Next );
575 1         4 return 'PPI::Statement';
576             # Is it a v6 use?
577             } elsif ( $Next->content eq 'v6' ) {
578 2         7 $self->_rollback( $Next );
579 2         23 return 'PPI::Statement::Include::Perl6';
580             } else {
581 2270         5765 $self->_rollback( $Next );
582 2270         11341 return 'PPI::Statement::Include';
583             }
584             }
585              
586             # End of file... this means it is an incomplete use
587             # line, just treat it as a normal include.
588 2         5 $self->_rollback( $Next );
589 2         14 return 'PPI::Statement::Include';
590             }
591              
592             # If our parent is a Condition, we are an Expression
593 42769 100       101187 if ( $Parent->isa('PPI::Structure::Condition') ) {
594 1242         4311 return 'PPI::Statement::Expression';
595             }
596              
597             # If our parent is a List, we are also an expression
598 41527 100       82201 if ( $Parent->isa('PPI::Structure::List') ) {
599 8954         28493 return 'PPI::Statement::Expression';
600             }
601              
602             # Switch statements use expressions, as well.
603 32573 100 100     152652 if (
604             $Parent->isa('PPI::Structure::Given')
605             or
606             $Parent->isa('PPI::Structure::When')
607             ) {
608 6         46 return 'PPI::Statement::Expression';
609             }
610              
611 32567 100       168439 if ( _INSTANCE($Token, 'PPI::Token::Label') ) {
612 383         2081 return 'PPI::Statement::Compound';
613             }
614              
615             # Beyond that, I have no idea for the moment.
616             # Just keep adding more conditions above this.
617 32184         114342 return 'PPI::Statement';
618             }
619              
620             sub _update_features {
621 68110     68110   87299 my ( $self, $statement ) = @_;
622              
623 68110 100       127801 return if ref $statement ne 'PPI::Statement::Include';
624             return unless #
625 3308 100       11412 my $new_features = $statement->feature_mods;
626 18         33 push @{ $self->{features_stack} }, {}
627 19 100       35 if not @{ $self->{features_stack} };
  19         65  
628 19         34 my $current_features = $self->{features_stack}[-1];
629             $self->{Tokenizer}->_features #
630             ( $self->{features_stack}[-1] =
631 19         33 { %{$current_features}, %{$new_features} } );
  19         32  
  19         85  
632             }
633              
634             sub _lex_statement {
635 68118     68118   88382 my ($self, $Statement) = @_;
636             # my $self = shift;
637             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
638              
639             # Handle some special statements
640 68118 100       199725 if ( $Statement->isa('PPI::Statement::End') ) {
641 8         25 return $self->_lex_end( $Statement );
642             }
643              
644             # Begin processing tokens
645 68110         73174 my $Token;
646 68110         100181 while ( ref( $Token = $self->_get_token ) ) {
647             # Delay whitespace and comment tokens
648 447436 100       739862 unless ( $Token->significant ) {
649 147142         138115 push @{$self->{delayed}}, $Token;
  147142         183910  
650             # $self->_delay_element( $Token );
651 147142         196040 next;
652             }
653              
654             # Structual closes, and __DATA__ and __END__ tags implicitly
655             # end every type of statement
656 300294 100 66     450170 if (
657             $Token->__LEXER__closes
658             or
659             $Token->isa('PPI::Token::Separator')
660             ) {
661             # Rollback and end the statement
662 25325         54968 $self->_update_features( $Statement );
663 25325         42947 return $self->_rollback( $Token );
664             }
665              
666             # Normal statements never implicitly end
667 274969 100       480222 unless ( $Statement->__LEXER__normal ) {
668             # Have we hit an implicit end to the statement
669 25205 100       44770 unless ( $self->_continues( $Statement, $Token ) ) {
670             # Rollback and finish the statement
671 4532         12001 $self->_update_features( $Statement );
672 4532         9206 return $self->_rollback( $Token );
673             }
674             }
675              
676             # Any normal character just gets added
677 270437 100       516627 unless ( $Token->isa('PPI::Token::Structure') ) {
678 213558         338818 $self->_add_element( $Statement, $Token );
679 213558         289303 next;
680             }
681              
682             # Handle normal statement terminators
683 56879 100       85488 if ( $Token->content eq ';' ) {
684 26789         50006 $self->_add_element( $Statement, $Token );
685 26789         57055 $self->_update_features( $Statement );
686 26789         37543 return 1;
687             }
688              
689             # Which leaves us with a new structure
690              
691             # Determine the class for the structure and create it
692 30090         49003 my $method = $RESOLVE{$Token->content};
693 30090         80053 my $Structure = $self->$method($Statement)->new($Token);
694              
695             # Move the lexing down into the Structure
696 30090         66551 $self->_add_delayed( $Statement );
697 30090         58639 $self->_add_element( $Statement, $Structure );
698 30090         63714 $self->_lex_structure( $Structure );
699             }
700              
701             # Was it an error in the tokenizer?
702 11464 50       16532 unless ( defined $Token ) {
703 0         0 PPI::Exception->throw;
704             }
705              
706             # No, it's just the end of the file...
707             # Roll back any insignificant tokens, they'll get added at the Document level
708 11464         21335 $self->_update_features( $Statement );
709 11464         17694 $self->_rollback;
710             }
711              
712             sub _lex_end {
713 8     8   39 my ($self, $Statement) = @_;
714             # my $self = shift;
715             # my $Statement = _INSTANCE(shift, 'PPI::Statement::End') or die "Bad param 1";
716              
717             # End of the file, EVERYTHING is ours
718 8         11 my $Token;
719 8         26 while ( $Token = $self->_get_token ) {
720             # Inlined $Statement->__add_element($Token);
721             Scalar::Util::weaken(
722 15         68 $_PARENT{Scalar::Util::refaddr $Token} = $Statement
723             );
724 15         15 push @{$Statement->{children}}, $Token;
  15         26  
725             }
726              
727             # Was it an error in the tokenizer?
728 8 50       22 unless ( defined $Token ) {
729 0         0 PPI::Exception->throw;
730             }
731              
732             # No, it's just the end of the file...
733             # Roll back any insignificant tokens, they get added at the Document level
734 8         27 $self->_rollback;
735             }
736              
737             # For many statements, it can be difficult to determine the end-point.
738             # This method takes a statement and the next significant token, and attempts
739             # to determine if the there is a statement boundary between the two, or if
740             # the statement can continue with the token.
741             sub _continues {
742 25205     25205   34973 my ($self, $Statement, $Token) = @_;
743             # my $self = shift;
744             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
745             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
746              
747             # Handle the simple block case
748             # { print 1; }
749 25205 100 100     52047 if (
750             $Statement->schildren == 1
751             and
752             $Statement->schild(0)->isa('PPI::Structure::Block')
753             ) {
754 47         155 return '';
755             }
756              
757             # Alrighty then, there are six implied-end statement types:
758             # ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, ::When,
759             # and ::Package statements.
760 25158 50       49351 return 1
761             if ref $Statement !~ /\b(?:Scheduled|Sub|Compound|Given|When|Package)$/;
762              
763             # Of these six, ::Scheduled, ::Sub, ::Given, and ::When follow the same
764             # simple rule and can be handled first. The block form of ::Package
765             # follows the rule, too. (The non-block form of ::Package
766             # requires a statement terminator, and thus doesn't need to have
767             # an implied end detected.)
768 25158         44859 my @part = $Statement->schildren;
769 25158         29372 my $LastChild = $part[-1];
770             # If the last significant element of the statement is a block,
771             # then an implied-end statement is done, no questions asked.
772 25158 100       108242 return !$LastChild->isa('PPI::Structure::Block')
773             if !$Statement->isa('PPI::Statement::Compound');
774              
775             # Now we get to compound statements, which kind of suck (to lex).
776             # However, of them all, the 'if' type, which includes unless, are
777             # relatively easy to handle compared to the others.
778 5557         16865 my $type = $Statement->type;
779 5557 100       11103 if ( $type eq 'if' ) {
780             # This should be one of the following
781             # if (EXPR) BLOCK
782             # if (EXPR) BLOCK else BLOCK
783             # if (EXPR) BLOCK elsif (EXPR) BLOCK ... else BLOCK
784              
785             # We only implicitly end on a block
786 3438 100       8539 unless ( $LastChild->isa('PPI::Structure::Block') ) {
787             # if (EXPR) ...
788             # if (EXPR) BLOCK else ...
789             # if (EXPR) BLOCK elsif (EXPR) BLOCK ...
790 2376         5731 return 1;
791             }
792              
793             # If the token before the block is an 'else',
794             # it's over, no matter what.
795 1062         2296 my $NextLast = $Statement->schild(-2);
796 1062 50 66     7967 if (
      66        
      66        
797             $NextLast
798             and
799             $NextLast->isa('PPI::Token')
800             and
801             $NextLast->isa('PPI::Token::Word')
802             and
803             $NextLast->content eq 'else'
804             ) {
805 73         262 return '';
806             }
807              
808             # Otherwise, we continue for 'elsif' or 'else' only.
809 989 100 100     4506 if (
      100        
810             $Token->isa('PPI::Token::Word')
811             and (
812             $Token->content eq 'else'
813             or
814             $Token->content eq 'elsif'
815             )
816             ) {
817 303         1111 return 1;
818             }
819              
820 686         2349 return '';
821             }
822              
823 2119 100       4492 if ( $type eq 'label' ) {
824             # We only have the label so far, could be any of
825             # LABEL while (EXPR) BLOCK
826             # LABEL while (EXPR) BLOCK continue BLOCK
827             # LABEL for (EXPR; EXPR; EXPR) BLOCK
828             # LABEL foreach VAR (LIST) BLOCK
829             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
830             # LABEL BLOCK continue BLOCK
831              
832             # Handle cases with a word after the label
833 360 100 100     1849 if (
834             $Token->isa('PPI::Token::Word')
835             and
836             $Token->content =~ /^(?:while|until|for|foreach)$/
837             ) {
838 38         111 return 1;
839             }
840              
841             # Handle labelled blocks
842 322 100 100     1610 if ( $Token->isa('PPI::Token::Structure') && $Token->content eq '{' ) {
843 243         770 return 1;
844             }
845              
846 79         190 return '';
847             }
848              
849             # Handle the common "after round braces" case
850 1759 100 100     7648 if ( $LastChild->isa('PPI::Structure') and $LastChild->braces eq '()' ) {
851             # LABEL while (EXPR) ...
852             # LABEL while (EXPR) ...
853             # LABEL for (EXPR; EXPR; EXPR) ...
854             # LABEL for VAR (LIST) ...
855             # LABEL foreach VAR (LIST) ...
856             # Only a block will do
857 384   33     1743 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
858             }
859              
860 1375 100       2580 if ( $type eq 'for' ) {
861             # LABEL for (EXPR; EXPR; EXPR) BLOCK
862 143 100 66     544 if (
    50          
    0          
863             $LastChild->isa('PPI::Token::Word')
864             and
865             $LastChild->content =~ /^for(?:each)?\z/
866             ) {
867             # LABEL for ...
868 130 100 66     885 if (
      100        
869             (
870             $Token->isa('PPI::Token::Structure')
871             and
872             $Token->content eq '('
873             )
874             or
875             $Token->isa('PPI::Token::QuoteLike::Words')
876             ) {
877 21         68 return 1;
878             }
879              
880 109 50       308 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
881             # LABEL for VAR QW{} ...
882             # LABEL foreach VAR QW{} ...
883             # Only a block will do
884 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
885             }
886              
887             # In this case, we can also behave like a foreach
888 109         184 $type = 'foreach';
889              
890             } elsif ( $LastChild->isa('PPI::Structure::Block') ) {
891             # LABEL for (EXPR; EXPR; EXPR) BLOCK
892             # That's it, nothing can continue
893 13         34 return '';
894              
895             } elsif ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
896             # LABEL for VAR QW{} ...
897             # LABEL foreach VAR QW{} ...
898             # Only a block will do
899 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
900             }
901             }
902              
903             # Handle the common continue case
904 1341 100 100     4580 if ( $LastChild->isa('PPI::Token::Word') and $LastChild->content eq 'continue' ) {
905             # LABEL while (EXPR) BLOCK continue ...
906             # LABEL foreach VAR (LIST) BLOCK continue ...
907             # LABEL BLOCK continue ...
908             # Only a block will do
909 6   33     35 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
910             }
911              
912 1335 50 66     2484 if ( $type eq 'try' and $LastChild->presumed_features->{try} ) {
913 6 100       23 return 1 if not $LastChild->isa('PPI::Structure::Block');
914              
915 2         5 my $NextLast = $Statement->schild(-2);
916 2 50 33     29 return ''
      33        
      33        
917             if $NextLast
918             and $NextLast->isa('PPI::Token')
919             and $NextLast->isa('PPI::Token::Word')
920             and $NextLast->content eq 'catch';
921              
922 2 50 33     16 return 1 #
923             if $Token->isa('PPI::Token::Word') and $Token->content eq 'catch';
924              
925 0         0 return '';
926             }
927              
928             # Handle the common continuable block case
929 1329 100       3730 if ( $LastChild->isa('PPI::Structure::Block') ) {
930             # LABEL while (EXPR) BLOCK
931             # LABEL while (EXPR) BLOCK ...
932             # LABEL for (EXPR; EXPR; EXPR) BLOCK
933             # LABEL foreach VAR (LIST) BLOCK
934             # LABEL foreach VAR (LIST) BLOCK ...
935             # LABEL BLOCK ...
936             # Is this the block for a continue?
937 464 100 66     3323 if ( _INSTANCE($part[-2], 'PPI::Token::Word') and $part[-2]->content eq 'continue' ) {
938             # LABEL while (EXPR) BLOCK continue BLOCK
939             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
940             # LABEL BLOCK continue BLOCK
941             # That's it, nothing can continue this
942 6         26 return '';
943             }
944              
945             # Only a continue will do
946 458   100     2587 return $Token->isa('PPI::Token::Word') && $Token->content eq 'continue';
947             }
948              
949 865 50       1525 if ( $type eq 'block' ) {
950             # LABEL BLOCK continue BLOCK
951             # Every possible case is covered in the common cases above
952             }
953              
954 865 100       1412 if ( $type eq 'while' ) {
955             # LABEL while (EXPR) BLOCK
956             # LABEL while (EXPR) BLOCK continue BLOCK
957             # LABEL until (EXPR) BLOCK
958             # LABEL until (EXPR) BLOCK continue BLOCK
959             # The only case not covered is the while ...
960 157 50 66     720 if (
      66        
961             $LastChild->isa('PPI::Token::Word')
962             and (
963             $LastChild->content eq 'while'
964             or
965             $LastChild->content eq 'until'
966             )
967             ) {
968             # LABEL while ...
969             # LABEL until ...
970             # Only a condition structure will do
971 157   33     699 return $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
972             }
973             }
974              
975 708 50       1073 if ( $type eq 'foreach' ) {
976             # LABEL foreach VAR (LIST) BLOCK
977             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
978             # The only two cases that have not been covered already are
979             # 'foreach ...' and 'foreach VAR ...'
980              
981 708 100       1836 if ( $LastChild->isa('PPI::Token::Symbol') ) {
982             # LABEL foreach my $scalar ...
983             # Open round brace, or a quotewords
984 210 100 66     1155 return 1 if $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
985 16 50       56 return 1 if $Token->isa('PPI::Token::QuoteLike::Words');
986 0         0 return '';
987             }
988              
989 498 100 100     811 if ( $LastChild->content eq 'foreach' or $LastChild->content eq 'for' ) {
990             # There are three possibilities here
991 281 100 100     1268 if (
    100 100        
    100 66        
    100          
992             $Token->isa('PPI::Token::Word')
993             and (
994             ($STATEMENT_CLASSES{ $Token->content } || '')
995             eq
996             'PPI::Statement::Variable'
997             )
998             ) {
999             # VAR == 'my ...'
1000 196         620 return 1;
1001             } elsif ( $Token->content =~ /^\$/ ) {
1002             # VAR == '$scalar'
1003 34         110 return 1;
1004             } elsif ( $Token->isa('PPI::Token::Structure') and $Token->content eq '(' ) {
1005 42         168 return 1;
1006             } elsif ( $Token->isa('PPI::Token::QuoteLike::Words') ) {
1007 6         17 return 1;
1008             } else {
1009 3         11 return '';
1010             }
1011             }
1012              
1013 217 100 100     451 if (
1014             ($STATEMENT_CLASSES{ $LastChild->content } || '')
1015             eq
1016             'PPI::Statement::Variable'
1017             ) {
1018             # LABEL foreach my ...
1019             # Only a scalar will do
1020 192         1789 return $Token->content =~ /^\$/;
1021             }
1022              
1023             # Handle the rare for my $foo qw{bar} ... case
1024 25 50       57 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
1025             # LABEL for VAR QW ...
1026             # LABEL foreach VAR QW ...
1027             # Only a block will do
1028 25   33     100 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
1029             }
1030             }
1031              
1032             # Something we don't know about... what could it be
1033 0         0 PPI::Exception->throw("Illegal state in '$type' compound statement");
1034             }
1035              
1036              
1037              
1038              
1039              
1040             #####################################################################
1041             # Lex Methods - Structure Object
1042              
1043             # Given a parent element, and a ( token to open a structure, determine
1044             # the class that the structure should be.
1045             sub _round {
1046 11878     11878   17656 my ($self, $Parent) = @_;
1047             # my $self = shift;
1048             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1049              
1050             # Get the last significant element in the parent
1051 11878         32438 my $Element = $Parent->schild(-1);
1052 11878 100       49762 if ( _INSTANCE($Element, 'PPI::Token::Word') ) {
1053             # Can it be determined because it is a keyword?
1054 10216         17286 my $rclass = $ROUND{$Element->content};
1055 10216 100       23205 return $rclass if $rclass;
1056             }
1057              
1058             # If we are part of a for or foreach statement, we are a ForLoop
1059 10537 100       83413 if ( $Parent->isa('PPI::Statement::Compound') ) {
    100          
    100          
    100          
1060 195 100       415 if ( $Parent->type =~ /^for(?:each)?$/ ) {
1061 194         1006 return 'PPI::Structure::For';
1062             }
1063             } elsif ( $Parent->isa('PPI::Statement::Given') ) {
1064 3         30 return 'PPI::Structure::Given';
1065             } elsif ( $Parent->isa('PPI::Statement::When') ) {
1066 3         25 return 'PPI::Structure::When';
1067             } elsif ( $Parent->isa('PPI::Statement::Sub') ) {
1068 156         514 return 'PPI::Structure::Signature';
1069             }
1070              
1071             # Otherwise, it must be a list
1072              
1073             # If the previous element is -> then we mark it as a dereference
1074 10181 100 100     41143 if ( _INSTANCE($Element, 'PPI::Token::Operator') and $Element->content eq '->' ) {
1075 11         54 $Element->{_dereference} = 1;
1076             }
1077              
1078             'PPI::Structure::List'
1079 10181         31985 }
1080              
1081             # Given a parent element, and a [ token to open a structure, determine
1082             # the class that the structure should be.
1083             sub _square {
1084 6723     6723   9970 my ($self, $Parent) = @_;
1085             # my $self = shift;
1086             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1087              
1088             # Get the last significant element in the parent
1089 6723         15617 my $Element = $Parent->schild(-1);
1090              
1091             # Is this a subscript, like $foo[1] or $foo{expr}
1092            
1093 6723 100       14172 if ( $Element ) {
1094 6474 100 100     20542 if ( $Element->isa('PPI::Token::Operator') and $Element->content eq '->' ) {
1095             # $foo->[]
1096 4060         6415 $Element->{_dereference} = 1;
1097 4060         11626 return 'PPI::Structure::Subscript';
1098             }
1099 2414 100       7536 if ( $Element->isa('PPI::Structure::Subscript') ) {
1100             # $foo{}[]
1101 13         48 return 'PPI::Structure::Subscript';
1102             }
1103 2401 100 100     8279 if ( $Element->isa('PPI::Token::Symbol') and $Element->content =~ /^(?:\$|\@)/ ) {
1104             # $foo[], @foo[]
1105 732         2607 return 'PPI::Structure::Subscript';
1106             }
1107 1669 100 100     6098 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%)/ ) {
1108 45         85 my $prior = $Parent->schild(-2);
1109 45 100 100     228 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1110             # Postfix dereference: ->@[...] ->%[...]
1111 2         11 return 'PPI::Structure::Subscript';
1112             }
1113             }
1114             # FIXME - More cases to catch
1115             }
1116              
1117             # Otherwise, we assume that it's an anonymous arrayref constructor
1118 1916         6618 'PPI::Structure::Constructor';
1119             }
1120              
1121             # Keyword -> Structure class maps
1122             my %CURLY_CLASSES = (
1123             # Blocks
1124             'sub' => 'PPI::Structure::Block',
1125             'grep' => 'PPI::Structure::Block',
1126             'map' => 'PPI::Structure::Block',
1127             'sort' => 'PPI::Structure::Block',
1128             'do' => 'PPI::Structure::Block',
1129             # rely on 'continue' + block being handled elsewhere
1130             # rely on 'eval' + block being handled elsewhere
1131              
1132             # Hash constructors
1133             'scalar' => 'PPI::Structure::Constructor',
1134             '=' => 'PPI::Structure::Constructor',
1135             '||=' => 'PPI::Structure::Constructor',
1136             '&&=' => 'PPI::Structure::Constructor',
1137             '//=' => 'PPI::Structure::Constructor',
1138             '||' => 'PPI::Structure::Constructor',
1139             '&&' => 'PPI::Structure::Constructor',
1140             '//' => 'PPI::Structure::Constructor',
1141             '?' => 'PPI::Structure::Constructor',
1142             ':' => 'PPI::Structure::Constructor',
1143             ',' => 'PPI::Structure::Constructor',
1144             '=>' => 'PPI::Structure::Constructor',
1145             '+' => 'PPI::Structure::Constructor', # per perlref
1146             'return' => 'PPI::Structure::Constructor', # per perlref
1147             'bless' => 'PPI::Structure::Constructor', # pragmatic --
1148             # perlfunc says first arg is a reference, and
1149             # bless {; ... } fails to compile.
1150             );
1151              
1152             my @CURLY_LOOKAHEAD_CLASSES = (
1153             {}, # not used
1154             {
1155             ';' => 'PPI::Structure::Block', # per perlref
1156             '}' => 'PPI::Structure::Constructor',
1157             },
1158             {
1159             '=>' => 'PPI::Structure::Constructor',
1160             },
1161             );
1162              
1163              
1164             # Given a parent element, and a { token to open a structure, determine
1165             # the class that the structure should be.
1166             sub _curly {
1167 11489     11489   19145 my ($self, $Parent) = @_;
1168             # my $self = shift;
1169             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1170              
1171             # Get the last significant element in the parent
1172 11489         24642 my $Element = $Parent->schild(-1);
1173 11489 100       33500 my $content = $Element ? $Element->content : '';
1174              
1175             # Is this a subscript, like $foo[1] or $foo{expr}
1176 11489 100       24785 if ( $Element ) {
1177 10838 100 66     28103 if ( $content eq '->' and $Element->isa('PPI::Token::Operator') ) {
1178             # $foo->{}
1179 2111         4154 $Element->{_dereference} = 1;
1180 2111         7311 return 'PPI::Structure::Subscript';
1181             }
1182 8727 100       25968 if ( $Element->isa('PPI::Structure::Subscript') ) {
1183             # $foo[]{}
1184 73         212 return 'PPI::Structure::Subscript';
1185             }
1186 8654 100 100     31747 if ( $content =~ /^(?:\$|\@)/ and $Element->isa('PPI::Token::Symbol') ) {
1187             # $foo{}, @foo{}
1188 574         2065 return 'PPI::Structure::Subscript';
1189             }
1190 8080 100 100     28958 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%|\*)/ ) {
1191 321         722 my $prior = $Parent->schild(-2);
1192 321 100 100     1755 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1193             # Postfix dereference: ->@{...} ->%{...} ->*{...}
1194 3         13 return 'PPI::Structure::Subscript';
1195             }
1196             }
1197 8077 100       22776 if ( $Element->isa('PPI::Structure::Block') ) {
1198             # deference - ${$hash_ref}{foo}
1199             # or even ${burfle}{foo}
1200             # hash slice - @{$hash_ref}{'foo', 'bar'}
1201 4 50       11 if ( my $prior = $Parent->schild(-2) ) {
1202 4         16 my $prior_content = $prior->content();
1203 4 50 66     39 $prior->isa( 'PPI::Token::Cast' )
      66        
1204             and ( $prior_content eq '@' ||
1205             $prior_content eq '$' )
1206             and return 'PPI::Structure::Subscript';
1207             }
1208             }
1209              
1210             # Are we the last argument of sub?
1211             # E.g.: 'sub foo {}', 'sub foo ($) {}'
1212 8075 100       29063 return 'PPI::Structure::Block' if $Parent->isa('PPI::Statement::Sub');
1213              
1214             # Are we the second or third argument of package?
1215             # E.g.: 'package Foo {}' or 'package Foo v1.2.3 {}'
1216 5614 100       22303 return 'PPI::Structure::Block'
1217             if $Parent->isa('PPI::Statement::Package');
1218              
1219 4321 100       10705 if ( $CURLY_CLASSES{$content} ) {
1220             # Known type
1221 905         3675 return $CURLY_CLASSES{$content};
1222             }
1223             }
1224              
1225             # Are we in a compound statement
1226 4067 100       11478 if ( $Parent->isa('PPI::Statement::Compound') ) {
1227             # We will only encounter blocks in compound statements
1228 1922         6035 return 'PPI::Structure::Block';
1229             }
1230              
1231             # Are we the second or third argument of use
1232 2145 100       6597 if ( $Parent->isa('PPI::Statement::Include') ) {
1233 53 50 33     142 if ( $Parent->schildren == 2 ||
      66        
1234             $Parent->schildren == 3 &&
1235             $Parent->schild(2)->isa('PPI::Token::Number')
1236             ) {
1237             # This is something like use constant { ... };
1238 53         195 return 'PPI::Structure::Constructor';
1239             }
1240             }
1241              
1242             # Unless we are at the start of the statement, everything else should be a block
1243             ### FIXME This is possibly a bad choice, but will have to do for now.
1244 2092 100       8043 return 'PPI::Structure::Block' if $Element;
1245              
1246 651 100 66     3146 if (
1247             $Parent->isa('PPI::Statement')
1248             and
1249             _INSTANCE($Parent->parent, 'PPI::Structure::List')
1250             ) {
1251 172         371 my $function = $Parent->parent->parent->schild(-2);
1252              
1253             # Special case: Are we the param of a core function
1254             # i.e. map({ $_ => 1 } @foo)
1255 172 100 100     615 return 'PPI::Structure::Block'
1256             if $function and $function->content =~ /^(?:map|grep|sort|eval|do)$/;
1257              
1258             # If not part of a block print, list-embedded curlies are most likely constructors
1259 78 100 100     334 return 'PPI::Structure::Constructor'
1260             if not $function or $function->content !~ /^(?:print|say)$/;
1261             }
1262              
1263             # We need to scan ahead.
1264 485         765 my $Next;
1265 485         678 my $position = 0;
1266 485         737 my @delayed;
1267 485         1090 while ( $Next = $self->_get_token ) {
1268 1190 100       2810 unless ( $Next->significant ) {
1269 190         298 push @delayed, $Next;
1270 190         359 next;
1271             }
1272              
1273             # If we are off the end of the lookahead array,
1274 1000 100       2753 if ( ++$position >= @CURLY_LOOKAHEAD_CLASSES ) {
    100          
1275             # default to block.
1276 130         581 $self->_buffer( splice(@delayed), $Next );
1277 130         227 last;
1278             # If the content at this position is known
1279             } elsif ( my $class = $CURLY_LOOKAHEAD_CLASSES[$position]
1280             {$Next->content} ) {
1281             # return the associated class.
1282 268         780 $self->_buffer( splice(@delayed), $Next );
1283 268         1377 return $class;
1284             }
1285              
1286             # Delay and continue
1287 602         1368 push @delayed, $Next;
1288             }
1289              
1290             # Hit the end of the document, or bailed out, go with block
1291 217         562 $self->_buffer( splice(@delayed) );
1292 217 50       522 if ( ref $Parent eq 'PPI::Statement' ) {
1293 217         375 bless $Parent, 'PPI::Statement::Compound';
1294             }
1295 217         892 return 'PPI::Structure::Block';
1296             }
1297              
1298              
1299             sub _lex_structure {
1300 30090     30090   39559 my ($self, $Structure) = @_;
1301             # my $self = shift;
1302             # my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1";
1303              
1304 30090   100     29423 push @{$self->{features_stack}}, $self->{features_stack}[-1] || {};
  30090         74518  
1305              
1306             # Start the processing loop
1307 30090         35488 my $Token;
1308 30090         43781 while ( ref($Token = $self->_get_token) ) {
1309             # Is this a direct type token
1310 116178 100       229152 unless ( $Token->significant ) {
1311 50070         57661 push @{$self->{delayed}}, $Token;
  50070         75643  
1312             # $self->_delay_element( $Token );
1313 50070         79422 next;
1314             }
1315              
1316             # Anything other than a Structure starts a Statement
1317 66108 100       176312 unless ( $Token->isa('PPI::Token::Structure') ) {
1318             # Because _statement may well delay and rollback itself,
1319             # we need to add the delayed tokens early
1320 37386         69447 $self->_add_delayed( $Structure );
1321              
1322             # Determine the class for the Statement and create it
1323 37386         67406 my $Statement = $self->_statement($Structure, $Token)->new($Token);
1324              
1325             # Move the lexing down into the Statement
1326 37386         82771 $self->_add_element( $Structure, $Statement );
1327 37386         82656 $self->_lex_statement( $Statement );
1328              
1329 37386         67550 next;
1330             }
1331              
1332             # Is this the opening of another structure directly inside us?
1333 28722 100       50510 if ( $Token->__LEXER__opens ) {
1334             # Rollback the Token, and recurse into the statement
1335 470         1457 $self->_rollback( $Token );
1336 470         1397 my $Statement = PPI::Statement->new;
1337 470         1129 $self->_add_element( $Structure, $Statement );
1338 470         1174 $self->_lex_statement( $Statement );
1339 470         951 next;
1340             }
1341              
1342             # Is this the close of a structure ( which would be an error )
1343 28252 100       47884 if ( $Token->__LEXER__closes ) {
1344 28197         30319 pop @{$self->{features_stack}};
  28197         40551  
1345              
1346             # Is this OUR closing structure
1347 28197 100       50052 if ( $Token->content eq $Structure->start->__LEXER__opposite ) {
1348             # Add any delayed tokens, and the finishing token (the ugly way)
1349 27468         53503 $self->_add_delayed( $Structure );
1350 27468         44464 $Structure->{finish} = $Token;
1351             Scalar::Util::weaken(
1352 27468         68417 $_PARENT{Scalar::Util::refaddr $Token} = $Structure
1353             );
1354              
1355             # Confirm that ForLoop structures are actually so, and
1356             # aren't really a list.
1357 27468 100       76517 if ( $Structure->isa('PPI::Structure::For') ) {
1358 232 100       805 if ( 2 > scalar grep {
1359 594         1787 $_->isa('PPI::Statement')
1360             } $Structure->children ) {
1361 211         403 bless($Structure, 'PPI::Structure::List');
1362             }
1363             }
1364 27468         66564 return 1;
1365             }
1366              
1367             # Unmatched closing brace.
1368             # Either they typed the wrong thing, or haven't put
1369             # one at all. Either way it's an error we need to
1370             # somehow handle gracefully. For now, we'll treat it
1371             # as implicitly ending the structure. This causes the
1372             # least damage across the various reasons why this
1373             # might have happened.
1374 729         1371 return $self->_rollback( $Token );
1375             }
1376              
1377             # It's a semi-colon on its own, just inside the block.
1378             # This is a null statement.
1379             $self->_add_element(
1380 55         226 $Structure,
1381             PPI::Statement::Null->new($Token),
1382             );
1383             }
1384              
1385             # Is this an error
1386 1893 50       2786 unless ( defined $Token ) {
1387 0         0 PPI::Exception->throw;
1388             }
1389              
1390 1893         1847 pop @{$self->{features_stack}};
  1893         2624  
1391              
1392             # No, it's just the end of file.
1393             # Add any insignificant trailing tokens.
1394 1893         3796 $self->_add_delayed( $Structure );
1395             }
1396              
1397              
1398              
1399              
1400              
1401             #####################################################################
1402             # Support Methods
1403              
1404             # Get the next token for processing, handling buffering
1405             sub _get_token {
1406 683099 100   683099   615382 shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token;
  683099         1613825  
1407             }
1408              
1409             # Old long version of the above
1410             # my $self = shift;
1411             # # First from the buffer
1412             # if ( @{$self->{buffer}} ) {
1413             # return shift @{$self->{buffer}};
1414             # }
1415             #
1416             # # Then from the Tokenizer
1417             # $self->{Tokenizer}->get_token;
1418             # }
1419              
1420             # Delay the addition of insignificant elements.
1421             # This ended up being inlined.
1422             # sub _delay_element {
1423             # my $self = shift;
1424             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 1";
1425             # push @{ $_[0]->{delayed} }, $_[1];
1426             # }
1427              
1428             # Add an Element to a Node, including any delayed Elements
1429             sub _add_element {
1430 362467     362467   442071 my ($self, $Parent, $Element) = @_;
1431             # my $self = shift;
1432             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1433             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 2";
1434              
1435             # Handle a special case, where a statement is not fully resolved
1436 362467 100 100     629771 if ( ref $Parent eq 'PPI::Statement'
1437             and my $first = $Parent->schild(0) ) {
1438 77136 50 33     198390 if ( $first->isa('PPI::Token::Label')
1439             and !(my $second = $Parent->schild(1)) ) {
1440 0         0 my $new_class = $STATEMENT_CLASSES{$second->content};
1441             # It's a labelled statement
1442 0 0       0 bless $Parent, $new_class if $new_class;
1443             }
1444             }
1445              
1446             # Add first the delayed, from the front, then the passed element
1447 362467         337560 foreach my $el ( @{$self->{delayed}} ) {
  362467         460805  
1448             Scalar::Util::weaken(
1449 115568         295600 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1450             );
1451             # Inlined $Parent->__add_element($el);
1452             }
1453             Scalar::Util::weaken(
1454 362467         858994 $_PARENT{Scalar::Util::refaddr $Element} = $Parent
1455             );
1456 362467         338554 push @{$Parent->{children}}, @{$self->{delayed}}, $Element;
  362467         399014  
  362467         544170  
1457              
1458             # Clear the delayed elements
1459 362467         501011 $self->{delayed} = [];
1460             }
1461              
1462             # Specifically just add any delayed tokens, if any.
1463             sub _add_delayed {
1464 142981     142981   185997 my ($self, $Parent) = @_;
1465             # my $self = shift;
1466             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1467              
1468             # Add any delayed
1469 142981         144174 foreach my $el ( @{$self->{delayed}} ) {
  142981         220719  
1470             Scalar::Util::weaken(
1471 61351         169724 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1472             );
1473             # Inlined $Parent->__add_element($el);
1474             }
1475 142981         147122 push @{$Parent->{children}}, @{$self->{delayed}};
  142981         179325  
  142981         192048  
1476              
1477             # Clear the delayed elements
1478 142981         221530 $self->{delayed} = [];
1479             }
1480              
1481             # Rollback the delayed tokens, plus any passed. Once all the tokens
1482             # have been moved back on to the buffer, the order should be.
1483             # <--- @{$self->{delayed}}, @_, @{$self->{buffer}} <----
1484             sub _rollback {
1485 61246     61246   70341 my $self = shift;
1486              
1487             # First, put any passed objects back
1488 61246 100       90559 if ( @_ ) {
1489 49774         52941 unshift @{$self->{buffer}}, splice @_;
  49774         102192  
1490             }
1491              
1492             # Then, put back anything delayed
1493 61246 100       66460 if ( @{$self->{delayed}} ) {
  61246         99683  
1494 29449         31855 unshift @{$self->{buffer}}, splice @{$self->{delayed}};
  29449         37842  
  29449         43119  
1495             }
1496              
1497 61246         92857 1;
1498             }
1499              
1500             # Partial rollback, just return a single list to the buffer
1501             sub _buffer {
1502 615     615   737 my $self = shift;
1503              
1504             # Put any passed objects back
1505 615 100       1281 if ( @_ ) {
1506 473         741 unshift @{$self->{buffer}}, splice @_;
  473         1136  
1507             }
1508              
1509 615         774 1;
1510             }
1511              
1512              
1513              
1514              
1515              
1516             #####################################################################
1517             # Error Handling
1518              
1519             # Set the error message
1520             sub _error {
1521 3     3   9 $errstr = "Lexer failed: $_[1]";
1522 3         16 undef;
1523             }
1524              
1525             # Clear the error message.
1526             # Returns the object as a convenience.
1527             sub _clear {
1528 16843     16843   26752 $errstr = '';
1529 16843         26922 $_[0];
1530             }
1531              
1532             =pod
1533              
1534             =head2 errstr
1535              
1536             For any error that occurs, you can use the C, as either
1537             a static or object method, to access the error message.
1538              
1539             If no error occurs for any particular action, C will return false.
1540              
1541             =cut
1542              
1543             sub errstr {
1544 6     6 1 22 $errstr;
1545             }
1546              
1547              
1548              
1549              
1550              
1551             #####################################################################
1552             # PDOM Extensions
1553             #
1554             # This is something of a future expansion... ignore it for now :)
1555             #
1556             # use PPI::Statement::Sub ();
1557             #
1558             # sub PPI::Statement::Sub::__LEXER__normal { '' }
1559              
1560             1;
1561              
1562             =pod
1563              
1564             =head1 TO DO
1565              
1566             - Add optional support for some of the more common source filters
1567              
1568             - Some additional checks for blessing things into various Statement
1569             and Structure subclasses.
1570              
1571             =head1 SUPPORT
1572              
1573             See the L in the main module.
1574              
1575             =head1 AUTHOR
1576              
1577             Adam Kennedy Eadamk@cpan.orgE
1578              
1579             =head1 COPYRIGHT
1580              
1581             Copyright 2001 - 2011 Adam Kennedy.
1582              
1583             This program is free software; you can redistribute
1584             it and/or modify it under the same terms as Perl itself.
1585              
1586             The full text of the license can be found in the
1587             LICENSE file included with this module.
1588              
1589             =cut