File Coverage

blib/lib/PPI/Lexer.pm
Criterion Covered Total %
statement 448 474 94.5
branch 262 302 86.7
condition 162 212 76.4
subroutine 29 29 100.0
pod 5 6 83.3
total 906 1023 88.5


line stmt bran cond sub pod time code
1             package PPI::Lexer;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Lexer - The PPI Lexer
8              
9             =head1 SYNOPSIS
10              
11             use PPI;
12            
13             # Create a new Lexer
14             my $Lexer = PPI::Lexer->new;
15            
16             # Build a PPI::Document object from a Token stream
17             my $Tokenizer = PPI::Tokenizer->load('My/Module.pm');
18             my $Document = $Lexer->lex_tokenizer($Tokenizer);
19            
20             # Build a PPI::Document object for some raw source
21             my $source = "print 'Hello World!'; kill(Humans->all);";
22             $Document = $Lexer->lex_source($source);
23            
24             # Build a PPI::Document object for a particular file name
25             $Document = $Lexer->lex_file('My/Module.pm');
26              
27             =head1 DESCRIPTION
28              
29             The is the L Lexer. In the larger scheme of things, its job is to take
30             token streams, in a variety of forms, and "lex" them into nested structures.
31              
32             Pretty much everything in this module happens behind the scenes at this
33             point. In fact, at the moment you don't really need to instantiate the lexer
34             at all, the three main methods will auto-instantiate themselves a
35             C object as needed.
36              
37             All methods do a one-shot "lex this and give me a L object".
38              
39             In fact, if you are reading this, what you B want to do is to
40             just "load a document", in which case you can do this in a much more
41             direct and concise manner with one of the following.
42              
43             use PPI;
44            
45             $Document = PPI::Document->load( $filename );
46             $Document = PPI::Document->new( $string );
47              
48             See L for more details.
49              
50             For more unusual tasks, by all means forge onwards.
51              
52             =head1 METHODS
53              
54             =cut
55              
56 66     66   2788 use strict;
  66         556  
  66         3091  
57 66     66   399 use Scalar::Util ();
  66         132  
  66         1707  
58 66     66   289 use Params::Util qw{_STRING _INSTANCE};
  66         125  
  66         4288  
59 66     66   387 use PPI ();
  66         115  
  66         1108  
60 66     66   393 use PPI::Exception ();
  66         114  
  66         1635  
61 66     66   295 use PPI::Singletons '%_PARENT';
  66         112  
  66         455888  
62              
63             our $VERSION = '1.28401'; # TRIAL
64              
65             our $errstr = "";
66              
67             # Keyword -> Structure class maps
68             my %ROUND = (
69             # Conditions
70             'if' => 'PPI::Structure::Condition',
71             'elsif' => 'PPI::Structure::Condition',
72             'unless' => 'PPI::Structure::Condition',
73             'while' => 'PPI::Structure::Condition',
74             'until' => 'PPI::Structure::Condition',
75              
76             # For(each)
77             'for' => 'PPI::Structure::For',
78             'foreach' => 'PPI::Structure::For',
79             );
80              
81             # Opening brace to refining method
82             my %RESOLVE = (
83             '(' => '_round',
84             '[' => '_square',
85             '{' => '_curly',
86             );
87              
88             # Allows for experimental overriding of the tokenizer
89             our $X_TOKENIZER = "PPI::Tokenizer";
90 16799     16799 0 123624 sub X_TOKENIZER { $X_TOKENIZER }
91              
92              
93              
94              
95              
96             #####################################################################
97             # Constructor
98              
99             =pod
100              
101             =head2 new
102              
103             The C constructor creates a new C object. The object itself
104             is merely used to hold various buffers and state data during the lexing
105             process, and holds no significant data between -Elex_xxxxx calls.
106              
107             Returns a new C object
108              
109             =cut
110              
111             sub new {
112 16800     16800 1 69795 my $class = shift->_clear;
113 16800         146594 bless {
114             Tokenizer => undef, # Where we store the tokenizer for a run
115             buffer => [], # The input token buffer
116             delayed => [], # The "delayed insignificant tokens" buffer
117             features => [], # Stack of features in scope
118             }, $class;
119             }
120              
121              
122              
123              
124              
125             #####################################################################
126             # Main Lexing Methods
127              
128             =pod
129              
130             =head2 lex_file $filename
131              
132             The C method takes a filename as argument. It then loads the file,
133             creates a L for the content and lexes the token stream
134             produced by the tokenizer. Basically, a sort of all-in-one method for
135             getting a L object from a file name.
136              
137             Additional arguments are passed to the tokenizer as a hash.
138              
139             Returns a L object, or C on error.
140              
141             =cut
142              
143             sub lex_file {
144 507 100   507 1 3121 my $self = ref $_[0] ? shift : shift->new;
145 507         2452 my $file = _STRING(shift);
146 507 100       2126 unless ( defined $file ) {
147 1         6 return $self->_error("Did not pass a filename to PPI::Lexer::lex_file");
148             }
149 506         1934 my %args = @_;
150              
151             # Create the Tokenizer
152 506         1227 my $Tokenizer = eval {
153 506         1745 X_TOKENIZER->new($file);
154             };
155 506 50       4551 if ( _INSTANCE($@, 'PPI::Exception') ) {
    50          
156 0         0 return $self->_error( $@->message );
157             } elsif ( $@ ) {
158 0         0 return $self->_error( $errstr );
159             }
160              
161 506         3158 $self->lex_tokenizer( $Tokenizer, %args );
162             }
163              
164             =pod
165              
166             =head2 lex_source $string
167              
168             The C method takes a normal scalar string as argument. It
169             creates a L object for the string, and then lexes the
170             resulting token stream.
171              
172             Additional arguments are passed to the tokenizer as a hash.
173              
174             Returns a L object, or C on error.
175              
176             =cut
177              
178             sub lex_source {
179 16293 50   16293 1 1335676 my $self = ref $_[0] ? shift : shift->new;
180 16293         44888 my $source = shift;
181 16293 50 33     114345 unless ( defined $source and not ref $source ) {
182 0         0 return $self->_error("Did not pass a string to PPI::Lexer::lex_source");
183             }
184 16293         49135 my %args = @_;
185              
186             # Create the Tokenizer and hand off to the next method
187 16293         34732 my $Tokenizer = eval {
188 16293         58022 X_TOKENIZER->new(\$source);
189             };
190 16293 50       99476 if ( _INSTANCE($@, 'PPI::Exception') ) {
    50          
191 0         0 return $self->_error( $@->message );
192             } elsif ( $@ ) {
193 0         0 return $self->_error( $errstr );
194             }
195              
196 16293         82947 $self->lex_tokenizer( $Tokenizer, %args );
197             }
198              
199             =pod
200              
201             =head2 lex_tokenizer $Tokenizer
202              
203             The C takes as argument a L object. It
204             lexes the token stream from the tokenizer into a L object.
205              
206             Additional arguments are set on the L produced.
207              
208             Returns a L object, or C on error.
209              
210             =cut
211              
212             sub lex_tokenizer {
213 16799 50   16799 1 59526 my $self = ref $_[0] ? shift : shift->new;
214 16799         132794 my $Tokenizer = _INSTANCE(shift, 'PPI::Tokenizer');
215 16799 50       58848 return $self->_error(
216             "Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer"
217             ) unless $Tokenizer;
218 16799         44330 my %args = @_;
219              
220             # Create the empty document
221 16799         70513 my $Document = PPI::Document->new;
222 16799         103717 ref($Document)->_setattr( $Document, %args );
223 16799         87262 $Tokenizer->_document($Document);
224 16799 100       70851 if (my $feat = $Document->feature_mods) {
225 18         35 push @{$self->{features}}, $feat;
  18         46  
226 18         61 $Tokenizer->_features($feat);
227             }
228              
229             # Lex the token stream into the document
230 16799         50304 $self->{Tokenizer} = $Tokenizer;
231 16799 100       33886 if ( !eval { $self->_lex_document($Document); 1 } ) {
  16799         62297  
  16798         55903  
232             # If an error occurs DESTROY the partially built document.
233 1         6 $Tokenizer->_document(undef);
234 1         6 undef $Document;
235 1 50       9 if ( _INSTANCE($@, 'PPI::Exception') ) {
236 1         5 return $self->_error( $@->message );
237             } else {
238 0         0 return $self->_error( $errstr );
239             }
240             }
241              
242 16798         213867 return $Document;
243             }
244              
245              
246              
247              
248              
249             #####################################################################
250             # Lex Methods - Document Object
251              
252             sub _lex_document {
253 16799     16799   45114 my ($self, $Document) = @_;
254             # my $self = shift;
255             # my $Document = _INSTANCE(shift, 'PPI::Document') or return undef;
256              
257             # Start the processing loop
258 16799         30386 my $Token;
259 16799         65945 while ( ref($Token = $self->_get_token) ) {
260             # Add insignificant tokens directly beneath us
261 53926 100       179376 unless ( $Token->significant ) {
262 21304         71981 $self->_add_element( $Document, $Token );
263 21304         51274 next;
264             }
265              
266 32622 100       121503 if ( $Token->content eq ';' ) {
267             # It's a semi-colon on its own.
268             # We call this a null statement.
269 463         3307 $self->_add_element(
270             $Document,
271             PPI::Statement::Null->new($Token),
272             );
273 463         1698 next;
274             }
275              
276             # Handle anything other than a structural element
277 32159 100       101269 unless ( ref $Token eq 'PPI::Token::Structure' ) {
278             # Determine the class for the Statement, and create it
279 29079         112958 my $Statement = $self->_statement($Document, $Token)->new($Token);
280              
281             # Move the lexing down into the statement
282 29079         121077 $self->_add_delayed( $Document );
283 29079         102423 $self->_add_element( $Document, $Statement );
284 29079         115578 $self->_lex_statement( $Statement );
285              
286 29079         108679 next;
287             }
288              
289             # Is this the opening of a structure?
290 3080 100       9268 if ( $Token->__LEXER__opens ) {
291             # This should actually have a Statement instead
292 936         4731 $self->_rollback( $Token );
293 936         5557 my $Statement = PPI::Statement->new;
294 936         4067 $self->_add_element( $Document, $Statement );
295 936         3527 $self->_lex_statement( $Statement );
296 936         3247 next;
297             }
298              
299             # Is this the close of a structure.
300 2144 50       6952 if ( $Token->__LEXER__closes ) {
301             # Because we are at the top of the tree, this is an error.
302             # This means either a mis-parsing, or a mistake in the code.
303             # To handle this, we create a "Naked Close" statement
304 2144         10638 $self->_add_element( $Document,
305             PPI::Statement::UnmatchedBrace->new($Token)
306             );
307 2144         5806 next;
308             }
309              
310             # Shouldn't be able to get here
311 0         0 PPI::Exception->throw('Lexer reached an illegal state');
312             }
313              
314             # Did we leave the main loop because of a Tokenizer error?
315 16798 50       53120 unless ( defined $Token ) {
316 0 0       0 my $errstr = $self->{Tokenizer} ? $self->{Tokenizer}->errstr : '';
317 0   0     0 $errstr ||= 'Unknown Tokenizer Error';
318 0         0 PPI::Exception->throw($errstr);
319             }
320              
321             # No error, it's just the end of file.
322             # Add any insignificant trailing tokens.
323 16798         59650 $self->_add_delayed( $Document );
324              
325             # If the Tokenizer has any v6 blocks to attach, do so now.
326             # Checking once at the end is faster than adding a special
327             # case check for every statement parsed.
328 16798         59258 my $perl6 = $self->{Tokenizer}->{'perl6'};
329 16798 100       43408 if ( @$perl6 ) {
330 2         15 my $includes = $Document->find( 'PPI::Statement::Include::Perl6' );
331 2         8 foreach my $include ( @$includes ) {
332 2 50       6 unless ( @$perl6 ) {
333 0         0 PPI::Exception->throw('Failed to find a perl6 section');
334             }
335 2         9 $include->{perl6} = shift @$perl6;
336             }
337             }
338              
339 16798         35851 return 1;
340             }
341              
342              
343              
344              
345              
346             #####################################################################
347             # Lex Methods - Statement Object
348              
349             # Keyword -> Statement Subclass
350             my %STATEMENT_CLASSES = (
351             # Things that affect the timing of execution
352             'BEGIN' => 'PPI::Statement::Scheduled',
353             'CHECK' => 'PPI::Statement::Scheduled',
354             'UNITCHECK' => 'PPI::Statement::Scheduled',
355             'INIT' => 'PPI::Statement::Scheduled',
356             'END' => 'PPI::Statement::Scheduled',
357              
358             # Special subroutines for which 'sub' is optional
359             'AUTOLOAD' => 'PPI::Statement::Sub',
360             'DESTROY' => 'PPI::Statement::Sub',
361              
362             # Loading and context statement
363             'package' => 'PPI::Statement::Package',
364             # 'use' => 'PPI::Statement::Include',
365             'no' => 'PPI::Statement::Include',
366             'require' => 'PPI::Statement::Include',
367              
368             # Various declarations
369             'my' => 'PPI::Statement::Variable',
370             'local' => 'PPI::Statement::Variable',
371             'our' => 'PPI::Statement::Variable',
372             'state' => 'PPI::Statement::Variable',
373             # Statements starting with 'sub' could be any one of...
374             # 'sub' => 'PPI::Statement::Sub',
375             # 'sub' => 'PPI::Statement::Scheduled',
376             # 'sub' => 'PPI::Statement',
377              
378             # Compound statement
379             'if' => 'PPI::Statement::Compound',
380             'unless' => 'PPI::Statement::Compound',
381             'for' => 'PPI::Statement::Compound',
382             'foreach' => 'PPI::Statement::Compound',
383             'while' => 'PPI::Statement::Compound',
384             'until' => 'PPI::Statement::Compound',
385              
386             # Switch statement
387             'given' => 'PPI::Statement::Given',
388             'when' => 'PPI::Statement::When',
389             'default' => 'PPI::Statement::When',
390              
391             # Various ways of breaking out of scope
392             'redo' => 'PPI::Statement::Break',
393             'next' => 'PPI::Statement::Break',
394             'last' => 'PPI::Statement::Break',
395             'return' => 'PPI::Statement::Break',
396             'goto' => 'PPI::Statement::Break',
397              
398             # Special sections of the file
399             '__DATA__' => 'PPI::Statement::Data',
400             '__END__' => 'PPI::Statement::End',
401             );
402              
403             sub _statement {
404 55296     55296   131213 my ($self, $Parent, $Token) = @_;
405             # my $self = shift;
406             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
407             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
408              
409             # Check for things like ( parent => ... )
410 55296 100 100     493579 if (
411             $Parent->isa('PPI::Structure::List')
412             or
413             $Parent->isa('PPI::Structure::Constructor')
414             ) {
415 7926 100       35524 if ( $Token->isa('PPI::Token::Word') ) {
416             # Is the next significant token a =>
417             # Read ahead to the next significant token
418 1995         4045 my $Next;
419 1995         5751 while ( $Next = $self->_get_token ) {
420 2834 100       10132 unless ( $Next->significant ) {
421 887         2422 push @{$self->{delayed}}, $Next;
  887         2571  
422             # $self->_delay_element( $Next );
423 887         2343 next;
424             }
425              
426             # Got the next token
427 1947 100 100     13330 if (
428             $Next->isa('PPI::Token::Operator')
429             and
430             $Next->content eq '=>'
431             ) {
432             # Is an ordinary expression
433 945         3315 $self->_rollback( $Next );
434 945         5925 return 'PPI::Statement::Expression';
435             } else {
436 1002         2212 last;
437             }
438             }
439              
440             # Rollback and continue
441 1050         3438 $self->_rollback( $Next );
442             }
443             }
444              
445 54351         113239 my $is_lexsub = 0;
446              
447             # Is it a token in our known classes list
448 54351         152954 my $content = $Token->content;
449             my $class =
450             $content eq 'try' && ($self->{features}[-1] || {})->{try}
451             ? 'PPI::Statement::Compound'
452 54351 100 100     230387 : $STATEMENT_CLASSES{$content};
453              
454 54351 100       127959 if ( $class ) {
455             # Is the next significant token a =>
456             # Read ahead to the next significant token
457 9930         18430 my $Next;
458 9930         28844 while ( $Next = $self->_get_token ) {
459 19484 100       67729 if ( !$Next->significant ) {
460 9601         19874 push @{$self->{delayed}}, $Next;
  9601         30242  
461 9601         27292 next;
462             }
463              
464             # Scheduled block must be followed by left curly or
465             # semicolon. Otherwise we have something else (e.g.
466             # open( CHECK, ... );
467 9883 100 66     34782 if (
      100        
468             'PPI::Statement::Scheduled' eq $class
469             and not ( $Next->isa( 'PPI::Token::Structure' )
470             and $Next->content =~ m/\A[{;]\z/ ) # }
471             ) {
472 1         3 $class = undef;
473 1         2 last;
474             }
475              
476             # Lexical subroutine
477 9882 100 100     69802 if (
      66        
478             $content =~ /^(?:my|our|state)\z/
479             and $Next->isa( 'PPI::Token::Word' ) and $Next->content eq 'sub'
480             ) {
481             # This should be PPI::Statement::Sub rather than PPI::Statement::Variable
482 7         16 $class = undef;
483 7         11 $is_lexsub = 1;
484 7         14 last;
485             }
486              
487             last if
488 9875 100 100     74838 !$Next->isa( 'PPI::Token::Operator' ) or $Next->content ne '=>';
489              
490             # Got the next token
491             # Is an ordinary expression
492 21         72 $self->_rollback( $Next );
493 21         153 return 'PPI::Statement';
494             }
495              
496             # Rollback and continue
497 9909         37625 $self->_rollback( $Next );
498             }
499              
500             # Handle potential barewords for subscripts
501 54330 100       229150 if ( $Parent->isa('PPI::Structure::Subscript') ) {
502             # Fast obvious case, just an expression
503 3903 100 100     13351 unless ( $class and $class->isa('PPI::Statement::Expression') ) {
504 3780         18908 return 'PPI::Statement::Expression';
505             }
506              
507             # This is something like "my" or "our" etc... more subtle.
508             # Check if the next token is a closing curly brace.
509             # This means we are something like $h{my}
510 123         260 my $Next;
511 123         329 while ( $Next = $self->_get_token ) {
512 119 50       447 unless ( $Next->significant ) {
513 0         0 push @{$self->{delayed}}, $Next;
  0         0  
514             # $self->_delay_element( $Next );
515 0         0 next;
516             }
517              
518             # Found the next significant token.
519             # Is it a closing curly brace?
520 119 50       335 if ( $Next->content eq '}' ) {
521 119         319 $self->_rollback( $Next );
522 119         688 return 'PPI::Statement::Expression';
523             } else {
524 0         0 $self->_rollback( $Next );
525 0         0 return $class;
526             }
527             }
528              
529             # End of file... this means it is something like $h{our
530             # which is probably going to be $h{our} ... I think
531 4         15 $self->_rollback( $Next );
532 4         23 return 'PPI::Statement::Expression';
533             }
534              
535             # If it's a token in our list, use that class
536 50427 100       177873 return $class if $class;
537              
538             # Handle the more in-depth sub detection
539 40679 100 100     185557 if ( $is_lexsub || $content eq 'sub' ) {
540             # Read ahead to the next significant token
541 3291         6825 my $Next;
542 3291         10245 while ( $Next = $self->_get_token ) {
543 6519 100       21087 unless ( $Next->significant ) {
544 3252         7447 push @{$self->{delayed}}, $Next;
  3252         10140  
545             # $self->_delay_element( $Next );
546 3252         9044 next;
547             }
548              
549             # Got the next significant token
550 3267         11835 my $sclass = $STATEMENT_CLASSES{$Next->content};
551 3267 100 100     13447 if ( $sclass and $sclass eq 'PPI::Statement::Scheduled' ) {
552 28         101 $self->_rollback( $Next );
553 28         241 return 'PPI::Statement::Scheduled';
554             }
555 3239 100       14456 if ( $Next->isa('PPI::Token::Word') ) {
556 3130         19498 $self->_rollback( $Next );
557 3130         26749 return 'PPI::Statement::Sub';
558             }
559              
560             ### Comment out these two, as they would return PPI::Statement anyway
561             # if ( $content eq '{' ) {
562             # Anonymous sub at start of statement
563             # return 'PPI::Statement';
564             # }
565             #
566             # if ( $Next->isa('PPI::Token::Prototype') ) {
567             # Anonymous sub at start of statement
568             # return 'PPI::Statement';
569             # }
570              
571             # PPI::Statement is the safest fall-through
572 109         650 $self->_rollback( $Next );
573 109         775 return 'PPI::Statement';
574             }
575              
576             # End of file... PPI::Statement::Sub is the most likely
577 24         89 $self->_rollback( $Next );
578 24         179 return 'PPI::Statement::Sub';
579             }
580              
581 37388 100       102681 if ( $content eq 'use' ) {
582             # Add a special case for "use v6" lines.
583 2268         3784 my $Next;
584 2268         6866 while ( $Next = $self->_get_token ) {
585 4531 100       14098 unless ( $Next->significant ) {
586 2265         3641 push @{$self->{delayed}}, $Next;
  2265         6494  
587             # $self->_delay_element( $Next );
588 2265         5407 next;
589             }
590              
591             # Found the next significant token.
592 2266 100 66     15715 if (
    100          
593             $Next->isa('PPI::Token::Operator')
594             and
595             $Next->content eq '=>'
596             ) {
597             # Is an ordinary expression
598 1         3 $self->_rollback( $Next );
599 1         5 return 'PPI::Statement';
600             # Is it a v6 use?
601             } elsif ( $Next->content eq 'v6' ) {
602 2         10 $self->_rollback( $Next );
603 2         37 return 'PPI::Statement::Include::Perl6';
604             } else {
605 2263         8501 $self->_rollback( $Next );
606 2263         16793 return 'PPI::Statement::Include';
607             }
608             }
609              
610             # End of file... this means it is an incomplete use
611             # line, just treat it as a normal include.
612 2         8 $self->_rollback( $Next );
613 2         24 return 'PPI::Statement::Include';
614             }
615              
616             # If our parent is a Condition, we are an Expression
617 35120 100       138201 if ( $Parent->isa('PPI::Structure::Condition') ) {
618 1265         7330 return 'PPI::Statement::Expression';
619             }
620              
621             # If our parent is a List, we are also an expression
622 33855 100       122853 if ( $Parent->isa('PPI::Structure::List') ) {
623 5123         30431 return 'PPI::Statement::Expression';
624             }
625              
626             # Switch statements use expressions, as well.
627 28732 100 100     225165 if (
628             $Parent->isa('PPI::Structure::Given')
629             or
630             $Parent->isa('PPI::Structure::When')
631             ) {
632 6         79 return 'PPI::Statement::Expression';
633             }
634              
635 28726 100       246093 if ( _INSTANCE($Token, 'PPI::Token::Label') ) {
636 379         2982 return 'PPI::Statement::Compound';
637             }
638              
639             # Beyond that, I have no idea for the moment.
640             # Just keep adding more conditions above this.
641 28347         186733 return 'PPI::Statement';
642             }
643              
644             sub _update_features {
645 56679     56679   117120 my ($self, $statement) = @_;
646              
647 56679 100 100     210439 if (ref $statement eq 'PPI::Statement::Include' && (my $feat = $statement->feature_mods)) {
648 17 100       32 push @{$self->{features}}, {} unless @{$self->{features}};
  16         52  
  17         112  
649 17         41 my $current_features = $self->{features}[-1];
650             $self->{Tokenizer}->_features(
651 17         139 $self->{features}[-1] = { %$current_features, %$feat }
652             );
653             }
654             }
655              
656             sub _lex_statement {
657 56687     56687   127487 my ($self, $Statement) = @_;
658             # my $self = shift;
659             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
660              
661             # Handle some special statements
662 56687 100       301069 if ( $Statement->isa('PPI::Statement::End') ) {
663 8         31 return $self->_lex_end( $Statement );
664             }
665              
666             # Begin processing tokens
667 56679         94946 my $Token;
668 56679         157732 while ( ref( $Token = $self->_get_token ) ) {
669             # Delay whitespace and comment tokens
670 264419 100       761952 unless ( $Token->significant ) {
671 93414         140305 push @{$self->{delayed}}, $Token;
  93414         204741  
672             # $self->_delay_element( $Token );
673 93414         212061 next;
674             }
675              
676             # Structual closes, and __DATA__ and __END__ tags implicitly
677             # end every type of statement
678 171005 100 66     503060 if (
679             $Token->__LEXER__closes
680             or
681             $Token->isa('PPI::Token::Separator')
682             ) {
683             # Rollback and end the statement
684 17833         63643 $self->_update_features( $Statement );
685 17833         51646 return $self->_rollback( $Token );
686             }
687              
688             # Normal statements never implicitly end
689 153172 100       503119 unless ( $Statement->__LEXER__normal ) {
690             # Have we hit an implicit end to the statement
691 24751 100       75038 unless ( $self->_continues( $Statement, $Token ) ) {
692             # Rollback and finish the statement
693 4409         18687 $self->_update_features( $Statement );
694 4409         14337 return $self->_rollback( $Token );
695             }
696             }
697              
698             # Any normal character just gets added
699 148763 100       503911 unless ( $Token->isa('PPI::Token::Structure') ) {
700 103252         300427 $self->_add_element( $Statement, $Token );
701 103252         255173 next;
702             }
703              
704             # Handle normal statement terminators
705 45511 100       115174 if ( $Token->content eq ';' ) {
706 23038         77255 $self->_add_element( $Statement, $Token );
707 23038         80073 $self->_update_features( $Statement );
708 23038         46982 return 1;
709             }
710              
711             # Which leaves us with a new structure
712              
713             # Determine the class for the structure and create it
714 22473         57489 my $method = $RESOLVE{$Token->content};
715 22473         102866 my $Structure = $self->$method($Statement)->new($Token);
716              
717             # Move the lexing down into the Structure
718 22473         92905 $self->_add_delayed( $Statement );
719 22473         66535 $self->_add_element( $Statement, $Structure );
720 22473         77993 $self->_lex_structure( $Structure );
721             }
722              
723             # Was it an error in the tokenizer?
724 11399 50       29665 unless ( defined $Token ) {
725 0         0 PPI::Exception->throw;
726             }
727              
728             # No, it's just the end of the file...
729             # Roll back any insignificant tokens, they'll get added at the Document level
730 11399         40685 $self->_update_features( $Statement );
731 11399         31448 $self->_rollback;
732             }
733              
734             sub _lex_end {
735 8     8   18 my ($self, $Statement) = @_;
736             # my $self = shift;
737             # my $Statement = _INSTANCE(shift, 'PPI::Statement::End') or die "Bad param 1";
738              
739             # End of the file, EVERYTHING is ours
740 8         15 my $Token;
741 8         20 while ( $Token = $self->_get_token ) {
742             # Inlined $Statement->__add_element($Token);
743             Scalar::Util::weaken(
744 15         47 $_PARENT{Scalar::Util::refaddr $Token} = $Statement
745             );
746 15         23 push @{$Statement->{children}}, $Token;
  15         43  
747             }
748              
749             # Was it an error in the tokenizer?
750 8 50       24 unless ( defined $Token ) {
751 0         0 PPI::Exception->throw;
752             }
753              
754             # No, it's just the end of the file...
755             # Roll back any insignificant tokens, they get added at the Document level
756 8         34 $self->_rollback;
757             }
758              
759             # For many statements, it can be difficult to determine the end-point.
760             # This method takes a statement and the next significant token, and attempts
761             # to determine if the there is a statement boundary between the two, or if
762             # the statement can continue with the token.
763             sub _continues {
764 24751     24751   52668 my ($self, $Statement, $Token) = @_;
765             # my $self = shift;
766             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
767             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
768              
769             # Handle the simple block case
770             # { print 1; }
771 24751 100 100     95695 if (
772             $Statement->schildren == 1
773             and
774             $Statement->schild(0)->isa('PPI::Structure::Block')
775             ) {
776 44         233 return '';
777             }
778              
779             # Alrighty then, there are six implied-end statement types:
780             # ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, ::When,
781             # and ::Package statements.
782 24707 50       92894 return 1
783             if ref $Statement !~ /\b(?:Scheduled|Sub|Compound|Given|When|Package)$/;
784              
785             # Of these six, ::Scheduled, ::Sub, ::Given, and ::When follow the same
786             # simple rule and can be handled first. The block form of ::Package
787             # follows the rule, too. (The non-block form of ::Package
788             # requires a statement terminator, and thus doesn't need to have
789             # an implied end detected.)
790 24707         76600 my @part = $Statement->schildren;
791 24707         47505 my $LastChild = $part[-1];
792             # If the last significant element of the statement is a block,
793             # then an implied-end statement is done, no questions asked.
794 24707 100       187135 return !$LastChild->isa('PPI::Structure::Block')
795             if !$Statement->isa('PPI::Statement::Compound');
796              
797             # Now we get to compound statements, which kind of suck (to lex).
798             # However, of them all, the 'if' type, which includes unless, are
799             # relatively easy to handle compared to the others.
800 5598         23300 my $type = $Statement->type;
801 5598 100       13852 if ( $type eq 'if' ) {
802             # This should be one of the following
803             # if (EXPR) BLOCK
804             # if (EXPR) BLOCK else BLOCK
805             # if (EXPR) BLOCK elsif (EXPR) BLOCK ... else BLOCK
806              
807             # We only implicitly end on a block
808 3499 100       13697 unless ( $LastChild->isa('PPI::Structure::Block') ) {
809             # if (EXPR) ...
810             # if (EXPR) BLOCK else ...
811             # if (EXPR) BLOCK elsif (EXPR) BLOCK ...
812 2423         10704 return 1;
813             }
814              
815             # If the token before the block is an 'else',
816             # it's over, no matter what.
817 1076         3376 my $NextLast = $Statement->schild(-2);
818 1076 50 66     11379 if (
      66        
      66        
819             $NextLast
820             and
821             $NextLast->isa('PPI::Token')
822             and
823             $NextLast->isa('PPI::Token::Word')
824             and
825             $NextLast->content eq 'else'
826             ) {
827 72         408 return '';
828             }
829              
830             # Otherwise, we continue for 'elsif' or 'else' only.
831 1004 100 100     5946 if (
      100        
832             $Token->isa('PPI::Token::Word')
833             and (
834             $Token->content eq 'else'
835             or
836             $Token->content eq 'elsif'
837             )
838             ) {
839 310         1700 return 1;
840             }
841              
842 694         3538 return '';
843             }
844              
845 2099 100       5568 if ( $type eq 'label' ) {
846             # We only have the label so far, could be any of
847             # LABEL while (EXPR) BLOCK
848             # LABEL while (EXPR) BLOCK continue BLOCK
849             # LABEL for (EXPR; EXPR; EXPR) BLOCK
850             # LABEL foreach VAR (LIST) BLOCK
851             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
852             # LABEL BLOCK continue BLOCK
853              
854             # Handle cases with a word after the label
855 356 100 100     2596 if (
856             $Token->isa('PPI::Token::Word')
857             and
858             $Token->content =~ /^(?:while|until|for|foreach)$/
859             ) {
860 38         118 return 1;
861             }
862              
863             # Handle labelled blocks
864 318 100 100     2146 if ( $Token->isa('PPI::Token::Structure') && $Token->content eq '{' ) {
865 240         1273 return 1;
866             }
867              
868 78         291 return '';
869             }
870              
871             # Handle the common "after round braces" case
872 1743 100 100     10730 if ( $LastChild->isa('PPI::Structure') and $LastChild->braces eq '()' ) {
873             # LABEL while (EXPR) ...
874             # LABEL while (EXPR) ...
875             # LABEL for (EXPR; EXPR; EXPR) ...
876             # LABEL for VAR (LIST) ...
877             # LABEL foreach VAR (LIST) ...
878             # Only a block will do
879 381   33     2174 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
880             }
881              
882 1362 100       4485 if ( $type eq 'for' ) {
883             # LABEL for (EXPR; EXPR; EXPR) BLOCK
884 140 100 66     881 if (
    50          
    0          
885             $LastChild->isa('PPI::Token::Word')
886             and
887             $LastChild->content =~ /^for(?:each)?\z/
888             ) {
889             # LABEL for ...
890 127 100 66     1274 if (
      100        
891             (
892             $Token->isa('PPI::Token::Structure')
893             and
894             $Token->content eq '('
895             )
896             or
897             $Token->isa('PPI::Token::QuoteLike::Words')
898             ) {
899 21         109 return 1;
900             }
901              
902 106 50       414 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
903             # LABEL for VAR QW{} ...
904             # LABEL foreach VAR QW{} ...
905             # Only a block will do
906 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
907             }
908              
909             # In this case, we can also behave like a foreach
910 106         245 $type = 'foreach';
911              
912             } elsif ( $LastChild->isa('PPI::Structure::Block') ) {
913             # LABEL for (EXPR; EXPR; EXPR) BLOCK
914             # That's it, nothing can continue
915 13         52 return '';
916              
917             } elsif ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
918             # LABEL for VAR QW{} ...
919             # LABEL foreach VAR QW{} ...
920             # Only a block will do
921 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
922             }
923             }
924              
925             # Handle the common continue case
926 1328 100 100     6109 if ( $LastChild->isa('PPI::Token::Word') and $LastChild->content eq 'continue' ) {
927             # LABEL while (EXPR) BLOCK continue ...
928             # LABEL foreach VAR (LIST) BLOCK continue ...
929             # LABEL BLOCK continue ...
930             # Only a block will do
931 6   33     50 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
932             }
933              
934 1322 50 66     3718 if ( $type eq 'try' and $LastChild->presumed_features->{try} ) {
935 6 100       43 return 1 if not $LastChild->isa('PPI::Structure::Block');
936              
937 2         10 my $NextLast = $Statement->schild(-2);
938 2 50 33     26 return ''
      33        
      33        
939             if $NextLast
940             and $NextLast->isa('PPI::Token')
941             and $NextLast->isa('PPI::Token::Word')
942             and $NextLast->content eq 'catch';
943              
944 2 50 33     28 return 1 #
945             if $Token->isa('PPI::Token::Word') and $Token->content eq 'catch';
946              
947 0         0 return '';
948             }
949              
950             # Handle the common continuable block case
951 1316 100       5022 if ( $LastChild->isa('PPI::Structure::Block') ) {
952             # LABEL while (EXPR) BLOCK
953             # LABEL while (EXPR) BLOCK ...
954             # LABEL for (EXPR; EXPR; EXPR) BLOCK
955             # LABEL foreach VAR (LIST) BLOCK
956             # LABEL foreach VAR (LIST) BLOCK ...
957             # LABEL BLOCK ...
958             # Is this the block for a continue?
959 460 100 66     4646 if ( _INSTANCE($part[-2], 'PPI::Token::Word') and $part[-2]->content eq 'continue' ) {
960             # LABEL while (EXPR) BLOCK continue BLOCK
961             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
962             # LABEL BLOCK continue BLOCK
963             # That's it, nothing can continue this
964 6         27 return '';
965             }
966              
967             # Only a continue will do
968 454   100     3296 return $Token->isa('PPI::Token::Word') && $Token->content eq 'continue';
969             }
970              
971 856 50       1946 if ( $type eq 'block' ) {
972             # LABEL BLOCK continue BLOCK
973             # Every possible case is covered in the common cases above
974             }
975              
976 856 100       1950 if ( $type eq 'while' ) {
977             # LABEL while (EXPR) BLOCK
978             # LABEL while (EXPR) BLOCK continue BLOCK
979             # LABEL until (EXPR) BLOCK
980             # LABEL until (EXPR) BLOCK continue BLOCK
981             # The only case not covered is the while ...
982 157 50 66     1011 if (
      66        
983             $LastChild->isa('PPI::Token::Word')
984             and (
985             $LastChild->content eq 'while'
986             or
987             $LastChild->content eq 'until'
988             )
989             ) {
990             # LABEL while ...
991             # LABEL until ...
992             # Only a condition structure will do
993 157   33     895 return $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
994             }
995             }
996              
997 699 50       1671 if ( $type eq 'foreach' ) {
998             # LABEL foreach VAR (LIST) BLOCK
999             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
1000             # The only two cases that have not been covered already are
1001             # 'foreach ...' and 'foreach VAR ...'
1002              
1003 699 100       2415 if ( $LastChild->isa('PPI::Token::Symbol') ) {
1004             # LABEL foreach my $scalar ...
1005             # Open round brace, or a quotewords
1006 207 100 66     1577 return 1 if $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
1007 16 50       108 return 1 if $Token->isa('PPI::Token::QuoteLike::Words');
1008 0         0 return '';
1009             }
1010              
1011 492 100 100     1121 if ( $LastChild->content eq 'foreach' or $LastChild->content eq 'for' ) {
1012             # There are three possibilities here
1013 278 100 100     1499 if (
    100 100        
    100 66        
    100          
1014             $Token->isa('PPI::Token::Word')
1015             and (
1016             ($STATEMENT_CLASSES{ $Token->content } || '')
1017             eq
1018             'PPI::Statement::Variable'
1019             )
1020             ) {
1021             # VAR == 'my ...'
1022 193         921 return 1;
1023             } elsif ( $Token->content =~ /^\$/ ) {
1024             # VAR == '$scalar'
1025 34         138 return 1;
1026             } elsif ( $Token->isa('PPI::Token::Structure') and $Token->content eq '(' ) {
1027 42         201 return 1;
1028             } elsif ( $Token->isa('PPI::Token::QuoteLike::Words') ) {
1029 6         28 return 1;
1030             } else {
1031 3         18 return '';
1032             }
1033             }
1034              
1035 214 100 100     1995 if (
1036             ($STATEMENT_CLASSES{ $LastChild->content } || '')
1037             eq
1038             'PPI::Statement::Variable'
1039             ) {
1040             # LABEL foreach my ...
1041             # Only a scalar will do
1042 189         621 return $Token->content =~ /^\$/;
1043             }
1044              
1045             # Handle the rare for my $foo qw{bar} ... case
1046 25 50       103 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
1047             # LABEL for VAR QW ...
1048             # LABEL foreach VAR QW ...
1049             # Only a block will do
1050 25   33     115 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
1051             }
1052             }
1053              
1054             # Something we don't know about... what could it be
1055 0         0 PPI::Exception->throw("Illegal state in '$type' compound statement");
1056             }
1057              
1058              
1059              
1060              
1061              
1062             #####################################################################
1063             # Lex Methods - Structure Object
1064              
1065             # Given a parent element, and a ( token to open a structure, determine
1066             # the class that the structure should be.
1067             sub _round {
1068 8072     8072   18213 my ($self, $Parent) = @_;
1069             # my $self = shift;
1070             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1071              
1072             # Get the last significant element in the parent
1073 8072         31881 my $Element = $Parent->schild(-1);
1074 8072 100       66866 if ( _INSTANCE($Element, 'PPI::Token::Word') ) {
1075             # Can it be determined because it is a keyword?
1076 6466         18374 my $rclass = $ROUND{$Element->content};
1077 6466 100       23995 return $rclass if $rclass;
1078             }
1079              
1080             # If we are part of a for or foreach statement, we are a ForLoop
1081 6708 100       96883 if ( $Parent->isa('PPI::Statement::Compound') ) {
    100          
    100          
    100          
1082 192 100       587 if ( $Parent->type =~ /^for(?:each)?$/ ) {
1083 191         1391 return 'PPI::Structure::For';
1084             }
1085             } elsif ( $Parent->isa('PPI::Statement::Given') ) {
1086 3         49 return 'PPI::Structure::Given';
1087             } elsif ( $Parent->isa('PPI::Statement::When') ) {
1088 3         42 return 'PPI::Structure::When';
1089             } elsif ( $Parent->isa('PPI::Statement::Sub') ) {
1090 34         200 return 'PPI::Structure::Signature';
1091             }
1092              
1093             # Otherwise, it must be a list
1094              
1095             # If the previous element is -> then we mark it as a dereference
1096 6477 100 100     41718 if ( _INSTANCE($Element, 'PPI::Token::Operator') and $Element->content eq '->' ) {
1097 10         38 $Element->{_dereference} = 1;
1098             }
1099              
1100             'PPI::Structure::List'
1101 6477         36244 }
1102              
1103             # Given a parent element, and a [ token to open a structure, determine
1104             # the class that the structure should be.
1105             sub _square {
1106 3035     3035   7594 my ($self, $Parent) = @_;
1107             # my $self = shift;
1108             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1109              
1110             # Get the last significant element in the parent
1111 3035         11951 my $Element = $Parent->schild(-1);
1112              
1113             # Is this a subscript, like $foo[1] or $foo{expr}
1114            
1115 3035 100       10527 if ( $Element ) {
1116 2811 100 100     16777 if ( $Element->isa('PPI::Token::Operator') and $Element->content eq '->' ) {
1117             # $foo->[]
1118 400         1092 $Element->{_dereference} = 1;
1119 400         2221 return 'PPI::Structure::Subscript';
1120             }
1121 2411 100       11070 if ( $Element->isa('PPI::Structure::Subscript') ) {
1122             # $foo{}[]
1123 13         55 return 'PPI::Structure::Subscript';
1124             }
1125 2398 100 100     12098 if ( $Element->isa('PPI::Token::Symbol') and $Element->content =~ /^(?:\$|\@)/ ) {
1126             # $foo[], @foo[]
1127 733         4409 return 'PPI::Structure::Subscript';
1128             }
1129 1665 100 100     8593 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%)/ ) {
1130 46         138 my $prior = $Parent->schild(-2);
1131 46 100 100     308 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1132             # Postfix dereference: ->@[...] ->%[...]
1133 2         12 return 'PPI::Structure::Subscript';
1134             }
1135             }
1136             # FIXME - More cases to catch
1137             }
1138              
1139             # Otherwise, we assume that it's an anonymous arrayref constructor
1140 1887         10559 'PPI::Structure::Constructor';
1141             }
1142              
1143             # Keyword -> Structure class maps
1144             my %CURLY_CLASSES = (
1145             # Blocks
1146             'sub' => 'PPI::Structure::Block',
1147             'grep' => 'PPI::Structure::Block',
1148             'map' => 'PPI::Structure::Block',
1149             'sort' => 'PPI::Structure::Block',
1150             'do' => 'PPI::Structure::Block',
1151             # rely on 'continue' + block being handled elsewhere
1152             # rely on 'eval' + block being handled elsewhere
1153              
1154             # Hash constructors
1155             'scalar' => 'PPI::Structure::Constructor',
1156             '=' => 'PPI::Structure::Constructor',
1157             '||=' => 'PPI::Structure::Constructor',
1158             '&&=' => 'PPI::Structure::Constructor',
1159             '//=' => 'PPI::Structure::Constructor',
1160             '||' => 'PPI::Structure::Constructor',
1161             '&&' => 'PPI::Structure::Constructor',
1162             '//' => 'PPI::Structure::Constructor',
1163             '?' => 'PPI::Structure::Constructor',
1164             ':' => 'PPI::Structure::Constructor',
1165             ',' => 'PPI::Structure::Constructor',
1166             '=>' => 'PPI::Structure::Constructor',
1167             '+' => 'PPI::Structure::Constructor', # per perlref
1168             'return' => 'PPI::Structure::Constructor', # per perlref
1169             'bless' => 'PPI::Structure::Constructor', # pragmatic --
1170             # perlfunc says first arg is a reference, and
1171             # bless {; ... } fails to compile.
1172             );
1173              
1174             my @CURLY_LOOKAHEAD_CLASSES = (
1175             {}, # not used
1176             {
1177             ';' => 'PPI::Structure::Block', # per perlref
1178             '}' => 'PPI::Structure::Constructor',
1179             },
1180             {
1181             '=>' => 'PPI::Structure::Constructor',
1182             },
1183             );
1184              
1185              
1186             # Given a parent element, and a { token to open a structure, determine
1187             # the class that the structure should be.
1188             sub _curly {
1189 11366     11366   30094 my ($self, $Parent) = @_;
1190             # my $self = shift;
1191             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1192              
1193             # Get the last significant element in the parent
1194 11366         40640 my $Element = $Parent->schild(-1);
1195 11366 100       56136 my $content = $Element ? $Element->content : '';
1196              
1197             # Is this a subscript, like $foo[1] or $foo{expr}
1198 11366 100       37194 if ( $Element ) {
1199 10723 100 66     41260 if ( $content eq '->' and $Element->isa('PPI::Token::Operator') ) {
1200             # $foo->{}
1201 2111         6007 $Element->{_dereference} = 1;
1202 2111         11312 return 'PPI::Structure::Subscript';
1203             }
1204 8612 100       46748 if ( $Element->isa('PPI::Structure::Subscript') ) {
1205             # $foo[]{}
1206 73         307 return 'PPI::Structure::Subscript';
1207             }
1208 8539 100 100     50330 if ( $content =~ /^(?:\$|\@)/ and $Element->isa('PPI::Token::Symbol') ) {
1209             # $foo{}, @foo{}
1210 573         4334 return 'PPI::Structure::Subscript';
1211             }
1212 7966 100 100     47449 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%|\*)/ ) {
1213 326         1170 my $prior = $Parent->schild(-2);
1214 326 100 100     2756 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1215             # Postfix dereference: ->@{...} ->%{...} ->*{...}
1216 3         15 return 'PPI::Structure::Subscript';
1217             }
1218             }
1219 7963 100       34480 if ( $Element->isa('PPI::Structure::Block') ) {
1220             # deference - ${$hash_ref}{foo}
1221             # or even ${burfle}{foo}
1222             # hash slice - @{$hash_ref}{'foo', 'bar'}
1223 4 50       20 if ( my $prior = $Parent->schild(-2) ) {
1224 4         17 my $prior_content = $prior->content();
1225 4 50 66     39 $prior->isa( 'PPI::Token::Cast' )
      66        
1226             and ( $prior_content eq '@' ||
1227             $prior_content eq '$' )
1228             and return 'PPI::Structure::Subscript';
1229             }
1230             }
1231              
1232             # Are we the last argument of sub?
1233             # E.g.: 'sub foo {}', 'sub foo ($) {}'
1234 7961 100       64470 return 'PPI::Structure::Block' if $Parent->isa('PPI::Statement::Sub');
1235              
1236             # Are we the second or third argument of package?
1237             # E.g.: 'package Foo {}' or 'package Foo v1.2.3 {}'
1238 5624 100       37612 return 'PPI::Structure::Block'
1239             if $Parent->isa('PPI::Statement::Package');
1240              
1241 4331 100       19636 if ( $CURLY_CLASSES{$content} ) {
1242             # Known type
1243 893         6195 return $CURLY_CLASSES{$content};
1244             }
1245             }
1246              
1247             # Are we in a compound statement
1248 4081 100       19662 if ( $Parent->isa('PPI::Statement::Compound') ) {
1249             # We will only encounter blocks in compound statements
1250 1940         13056 return 'PPI::Structure::Block';
1251             }
1252              
1253             # Are we the second or third argument of use
1254 2141 100       11479 if ( $Parent->isa('PPI::Statement::Include') ) {
1255 53 50 33     266 if ( $Parent->schildren == 2 ||
      66        
1256             $Parent->schildren == 3 &&
1257             $Parent->schild(2)->isa('PPI::Token::Number')
1258             ) {
1259             # This is something like use constant { ... };
1260 53         416 return 'PPI::Structure::Constructor';
1261             }
1262             }
1263              
1264             # Unless we are at the start of the statement, everything else should be a block
1265             ### FIXME This is possibly a bad choice, but will have to do for now.
1266 2088 100       13734 return 'PPI::Structure::Block' if $Element;
1267              
1268 643 100 66     5411 if (
1269             $Parent->isa('PPI::Statement')
1270             and
1271             _INSTANCE($Parent->parent, 'PPI::Structure::List')
1272             ) {
1273 165         634 my $function = $Parent->parent->parent->schild(-2);
1274              
1275             # Special case: Are we the param of a core function
1276             # i.e. map({ $_ => 1 } @foo)
1277 165 100 100     1204 return 'PPI::Structure::Block'
1278             if $function and $function->content =~ /^(?:map|grep|sort|eval|do)$/;
1279              
1280             # If not part of a block print, list-embedded curlies are most likely constructors
1281 71 100 100     446 return 'PPI::Structure::Constructor'
1282             if not $function or $function->content !~ /^(?:print|say)$/;
1283             }
1284              
1285             # We need to scan ahead.
1286 484         1525 my $Next;
1287 484         873 my $position = 0;
1288 484         1074 my @delayed;
1289 484         1359 while ( $Next = $self->_get_token ) {
1290 1195 100       4016 unless ( $Next->significant ) {
1291 194         427 push @delayed, $Next;
1292 194         501 next;
1293             }
1294              
1295             # If we are off the end of the lookahead array,
1296 1001 100       4547 if ( ++$position >= @CURLY_LOOKAHEAD_CLASSES ) {
    100          
1297             # default to block.
1298 131         705 $self->_buffer( splice(@delayed), $Next );
1299 131         300 last;
1300             # If the content at this position is known
1301             } elsif ( my $class = $CURLY_LOOKAHEAD_CLASSES[$position]
1302             {$Next->content} ) {
1303             # return the associated class.
1304 265         1427 $self->_buffer( splice(@delayed), $Next );
1305 265         2335 return $class;
1306             }
1307              
1308             # Delay and continue
1309 605         1820 push @delayed, $Next;
1310             }
1311              
1312             # Hit the end of the document, or bailed out, go with block
1313 219         903 $self->_buffer( splice(@delayed) );
1314 219 50       831 if ( ref $Parent eq 'PPI::Statement' ) {
1315 219         576 bless $Parent, 'PPI::Statement::Compound';
1316             }
1317 219         1327 return 'PPI::Structure::Block';
1318             }
1319              
1320              
1321             sub _lex_structure {
1322 22473     22473   45264 my ($self, $Structure) = @_;
1323             # my $self = shift;
1324             # my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1";
1325              
1326 22473   100     36450 push @{$self->{features}}, $self->{features}[-1] || {};
  22473         110898  
1327              
1328             # Start the processing loop
1329 22473         40732 my $Token;
1330 22473         53165 while ( ref($Token = $self->_get_token) ) {
1331             # Is this a direct type token
1332 90081 100       286991 unless ( $Token->significant ) {
1333 42751         77746 push @{$self->{delayed}}, $Token;
  42751         97518  
1334             # $self->_delay_element( $Token );
1335 42751         100346 next;
1336             }
1337              
1338             # Anything other than a Structure starts a Statement
1339 47330 100       214679 unless ( $Token->isa('PPI::Token::Structure') ) {
1340             # Because _statement may well delay and rollback itself,
1341             # we need to add the delayed tokens early
1342 26217         80924 $self->_add_delayed( $Structure );
1343              
1344             # Determine the class for the Statement and create it
1345 26217         81239 my $Statement = $self->_statement($Structure, $Token)->new($Token);
1346              
1347             # Move the lexing down into the Statement
1348 26217         99935 $self->_add_element( $Structure, $Statement );
1349 26217         99647 $self->_lex_statement( $Statement );
1350              
1351 26217         83637 next;
1352             }
1353              
1354             # Is this the opening of another structure directly inside us?
1355 21113 100       59527 if ( $Token->__LEXER__opens ) {
1356             # Rollback the Token, and recurse into the statement
1357 455         1842 $self->_rollback( $Token );
1358 455         3197 my $Statement = PPI::Statement->new;
1359 455         1667 $self->_add_element( $Structure, $Statement );
1360 455         1687 $self->_lex_statement( $Statement );
1361 455         1474 next;
1362             }
1363              
1364             # Is this the close of a structure ( which would be an error )
1365 20658 100       59722 if ( $Token->__LEXER__closes ) {
1366 20607         31655 pop @{$self->{features}};
  20607         47175  
1367              
1368             # Is this OUR closing structure
1369 20607 100       66003 if ( $Token->content eq $Structure->start->__LEXER__opposite ) {
1370             # Add any delayed tokens, and the finishing token (the ugly way)
1371 19939         61713 $self->_add_delayed( $Structure );
1372 19939         55422 $Structure->{finish} = $Token;
1373             Scalar::Util::weaken(
1374 19939         76556 $_PARENT{Scalar::Util::refaddr $Token} = $Structure
1375             );
1376              
1377             # Confirm that ForLoop structures are actually so, and
1378             # aren't really a list.
1379 19939 100       100245 if ( $Structure->isa('PPI::Structure::For') ) {
1380 229 100       1104 if ( 2 > scalar grep {
1381 585         2438 $_->isa('PPI::Statement')
1382             } $Structure->children ) {
1383 208         550 bless($Structure, 'PPI::Structure::List');
1384             }
1385             }
1386 19939         88572 return 1;
1387             }
1388              
1389             # Unmatched closing brace.
1390             # Either they typed the wrong thing, or haven't put
1391             # one at all. Either way it's an error we need to
1392             # somehow handle gracefully. For now, we'll treat it
1393             # as implicitly ending the structure. This causes the
1394             # least damage across the various reasons why this
1395             # might have happened.
1396 668         2162 return $self->_rollback( $Token );
1397             }
1398              
1399             # It's a semi-colon on its own, just inside the block.
1400             # This is a null statement.
1401             $self->_add_element(
1402 51         353 $Structure,
1403             PPI::Statement::Null->new($Token),
1404             );
1405             }
1406              
1407             # Is this an error
1408 1866 50       5887 unless ( defined $Token ) {
1409 0         0 PPI::Exception->throw;
1410             }
1411              
1412 1866         2948 pop @{$self->{features}};
  1866         5091  
1413              
1414             # No, it's just the end of file.
1415             # Add any insignificant trailing tokens.
1416 1866         6302 $self->_add_delayed( $Structure );
1417             }
1418              
1419              
1420              
1421              
1422              
1423             #####################################################################
1424             # Support Methods
1425              
1426             # Get the next token for processing, handling buffering
1427             sub _get_token {
1428 473408 100   473408   688477 shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token;
  473408         2136335  
1429             }
1430              
1431             # Old long version of the above
1432             # my $self = shift;
1433             # # First from the buffer
1434             # if ( @{$self->{buffer}} ) {
1435             # return shift @{$self->{buffer}};
1436             # }
1437             #
1438             # # Then from the Tokenizer
1439             # $self->{Tokenizer}->get_token;
1440             # }
1441              
1442             # Delay the addition of insignificant elements.
1443             # This ended up being inlined.
1444             # sub _delay_element {
1445             # my $self = shift;
1446             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 1";
1447             # push @{ $_[0]->{delayed} }, $_[1];
1448             # }
1449              
1450             # Add an Element to a Node, including any delayed Elements
1451             sub _add_element {
1452 229412     229412   454733 my ($self, $Parent, $Element) = @_;
1453             # my $self = shift;
1454             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1455             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 2";
1456              
1457             # Handle a special case, where a statement is not fully resolved
1458 229412 100 100     774154 if ( ref $Parent eq 'PPI::Statement'
1459             and my $first = $Parent->schild(0) ) {
1460 69727 50 33     312037 if ( $first->isa('PPI::Token::Label')
1461             and !(my $second = $Parent->schild(1)) ) {
1462 0         0 my $new_class = $STATEMENT_CLASSES{$second->content};
1463             # It's a labelled statement
1464 0 0       0 bless $Parent, $new_class if $new_class;
1465             }
1466             }
1467              
1468             # Add first the delayed, from the front, then the passed element
1469 229412         351406 foreach my $el ( @{$self->{delayed}} ) {
  229412         511214  
1470             Scalar::Util::weaken(
1471 62209         277380 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1472             );
1473             # Inlined $Parent->__add_element($el);
1474             }
1475             Scalar::Util::weaken(
1476 229412         807561 $_PARENT{Scalar::Util::refaddr $Element} = $Parent
1477             );
1478 229412         323993 push @{$Parent->{children}}, @{$self->{delayed}}, $Element;
  229412         412615  
  229412         541510  
1479              
1480             # Clear the delayed elements
1481 229412         541659 $self->{delayed} = [];
1482             }
1483              
1484             # Specifically just add any delayed tokens, if any.
1485             sub _add_delayed {
1486 116372     116372   237386 my ($self, $Parent) = @_;
1487             # my $self = shift;
1488             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1489              
1490             # Add any delayed
1491 116372         168600 foreach my $el ( @{$self->{delayed}} ) {
  116372         311853  
1492             Scalar::Util::weaken(
1493 53946         234526 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1494             );
1495             # Inlined $Parent->__add_element($el);
1496             }
1497 116372         192684 push @{$Parent->{children}}, @{$self->{delayed}};
  116372         250392  
  116372         254870  
1498              
1499             # Clear the delayed elements
1500 116372         296989 $self->{delayed} = [];
1501             }
1502              
1503             # Rollback the delayed tokens, plus any passed. Once all the tokens
1504             # have been moved back on to the buffer, the order should be.
1505             # <--- @{$self->{delayed}}, @_, @{$self->{buffer}} <----
1506             sub _rollback {
1507 53315     53315   90077 my $self = shift;
1508              
1509             # First, put any passed objects back
1510 53315 100       131686 if ( @_ ) {
1511 41908         66097 unshift @{$self->{buffer}}, splice @_;
  41908         147411  
1512             }
1513              
1514             # Then, put back anything delayed
1515 53315 100       80625 if ( @{$self->{delayed}} ) {
  53315         148070  
1516 29211         48059 unshift @{$self->{buffer}}, splice @{$self->{delayed}};
  29211         58678  
  29211         74302  
1517             }
1518              
1519 53315         122304 1;
1520             }
1521              
1522             # Partial rollback, just return a single list to the buffer
1523             sub _buffer {
1524 615     615   1101 my $self = shift;
1525              
1526             # Put any passed objects back
1527 615 100       2719 if ( @_ ) {
1528 472         818 unshift @{$self->{buffer}}, splice @_;
  472         1690  
1529             }
1530              
1531 615         1055 1;
1532             }
1533              
1534              
1535              
1536              
1537              
1538             #####################################################################
1539             # Error Handling
1540              
1541             # Set the error message
1542             sub _error {
1543 2     2   6 $errstr = $_[1];
1544 2         15 undef;
1545             }
1546              
1547             # Clear the error message.
1548             # Returns the object as a convenience.
1549             sub _clear {
1550 16801     16801   45541 $errstr = '';
1551 16801         39350 $_[0];
1552             }
1553              
1554             =pod
1555              
1556             =head2 errstr
1557              
1558             For any error that occurs, you can use the C, as either
1559             a static or object method, to access the error message.
1560              
1561             If no error occurs for any particular action, C will return false.
1562              
1563             =cut
1564              
1565             sub errstr {
1566 2     2 1 16 $errstr;
1567             }
1568              
1569              
1570              
1571              
1572              
1573             #####################################################################
1574             # PDOM Extensions
1575             #
1576             # This is something of a future expansion... ignore it for now :)
1577             #
1578             # use PPI::Statement::Sub ();
1579             #
1580             # sub PPI::Statement::Sub::__LEXER__normal { '' }
1581              
1582             1;
1583              
1584             =pod
1585              
1586             =head1 TO DO
1587              
1588             - Add optional support for some of the more common source filters
1589              
1590             - Some additional checks for blessing things into various Statement
1591             and Structure subclasses.
1592              
1593             =head1 SUPPORT
1594              
1595             See the L in the main module.
1596              
1597             =head1 AUTHOR
1598              
1599             Adam Kennedy Eadamk@cpan.orgE
1600              
1601             =head1 COPYRIGHT
1602              
1603             Copyright 2001 - 2011 Adam Kennedy.
1604              
1605             This program is free software; you can redistribute
1606             it and/or modify it under the same terms as Perl itself.
1607              
1608             The full text of the license can be found in the
1609             LICENSE file included with this module.
1610              
1611             =cut