File Coverage

blib/lib/PPI/Lexer.pm
Criterion Covered Total %
statement 414 439 94.3
branch 246 284 86.6
condition 148 189 78.3
subroutine 28 28 100.0
pod 5 6 83.3
total 841 946 88.9


line stmt bran cond sub pod time code
1             package PPI::Lexer;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Lexer - The PPI Lexer
8              
9             =head1 SYNOPSIS
10              
11             use PPI;
12            
13             # Create a new Lexer
14             my $Lexer = PPI::Lexer->new;
15            
16             # Build a PPI::Document object from a Token stream
17             my $Tokenizer = PPI::Tokenizer->load('My/Module.pm');
18             my $Document = $Lexer->lex_tokenizer($Tokenizer);
19            
20             # Build a PPI::Document object for some raw source
21             my $source = "print 'Hello World!'; kill(Humans->all);";
22             $Document = $Lexer->lex_source($source);
23            
24             # Build a PPI::Document object for a particular file name
25             $Document = $Lexer->lex_file('My/Module.pm');
26              
27             =head1 DESCRIPTION
28              
29             The is the L Lexer. In the larger scheme of things, its job is to take
30             token streams, in a variety of forms, and "lex" them into nested structures.
31              
32             Pretty much everything in this module happens behind the scenes at this
33             point. In fact, at the moment you don't really need to instantiate the lexer
34             at all, the three main methods will auto-instantiate themselves a
35             C object as needed.
36              
37             All methods do a one-shot "lex this and give me a L object".
38              
39             In fact, if you are reading this, what you B want to do is to
40             just "load a document", in which case you can do this in a much more
41             direct and concise manner with one of the following.
42              
43             use PPI;
44            
45             $Document = PPI::Document->load( $filename );
46             $Document = PPI::Document->new( $string );
47              
48             See L for more details.
49              
50             For more unusual tasks, by all means forge onwards.
51              
52             =head1 METHODS
53              
54             =cut
55              
56 64     64   491 use strict;
  64         137  
  64         1964  
57 64     64   354 use Scalar::Util ();
  64         155  
  64         1220  
58 64     64   300 use Params::Util qw{_STRING _INSTANCE};
  64         131  
  64         2820  
59 64     64   445 use PPI ();
  64         131  
  64         932  
60 64     64   374 use PPI::Exception ();
  64         156  
  64         1564  
61 64     64   426 use PPI::Singletons '%_PARENT';
  64         153  
  64         344431  
62              
63             our $VERSION = '1.277';
64              
65             our $errstr = "";
66              
67             # Keyword -> Structure class maps
68             my %ROUND = (
69             # Conditions
70             'if' => 'PPI::Structure::Condition',
71             'elsif' => 'PPI::Structure::Condition',
72             'unless' => 'PPI::Structure::Condition',
73             'while' => 'PPI::Structure::Condition',
74             'until' => 'PPI::Structure::Condition',
75              
76             # For(each)
77             'for' => 'PPI::Structure::For',
78             'foreach' => 'PPI::Structure::For',
79             );
80              
81             # Opening brace to refining method
82             my %RESOLVE = (
83             '(' => '_round',
84             '[' => '_square',
85             '{' => '_curly',
86             );
87              
88             # Allows for experimental overriding of the tokenizer
89             our $X_TOKENIZER = "PPI::Tokenizer";
90 16766     16766 0 64315 sub X_TOKENIZER { $X_TOKENIZER }
91              
92              
93              
94              
95              
96             #####################################################################
97             # Constructor
98              
99             =pod
100              
101             =head2 new
102              
103             The C constructor creates a new C object. The object itself
104             is merely used to hold various buffers and state data during the lexing
105             process, and holds no significant data between -Elex_xxxxx calls.
106              
107             Returns a new C object
108              
109             =cut
110              
111             sub new {
112 16767     16767 1 35921 my $class = shift->_clear;
113 16767         74718 bless {
114             Tokenizer => undef, # Where we store the tokenizer for a run
115             buffer => [], # The input token buffer
116             delayed => [], # The "delayed insignificant tokens" buffer
117             }, $class;
118             }
119              
120              
121              
122              
123              
124             #####################################################################
125             # Main Lexing Methods
126              
127             =pod
128              
129             =head2 lex_file $filename
130              
131             The C method takes a filename as argument. It then loads the file,
132             creates a L for the content and lexes the token stream
133             produced by the tokenizer. Basically, a sort of all-in-one method for
134             getting a L object from a file name.
135              
136             Returns a L object, or C on error.
137              
138             =cut
139              
140             sub lex_file {
141 503 100   503 1 2110 my $self = ref $_[0] ? shift : shift->new;
142 503         2076 my $file = _STRING(shift);
143 503 100       1399 unless ( defined $file ) {
144 1         3 return $self->_error("Did not pass a filename to PPI::Lexer::lex_file");
145             }
146              
147             # Create the Tokenizer
148 502         1164 my $Tokenizer = eval {
149 502         1341 X_TOKENIZER->new($file);
150             };
151 502 50       2970 if ( _INSTANCE($@, 'PPI::Exception') ) {
    50          
152 0         0 return $self->_error( $@->message );
153             } elsif ( $@ ) {
154 0         0 return $self->_error( $errstr );
155             }
156              
157 502         2025 $self->lex_tokenizer( $Tokenizer );
158             }
159              
160             =pod
161              
162             =head2 lex_source $string
163              
164             The C method takes a normal scalar string as argument. It
165             creates a L object for the string, and then lexes the
166             resulting token stream.
167              
168             Returns a L object, or C on error.
169              
170             =cut
171              
172             sub lex_source {
173 16264 50   16264 1 256129 my $self = ref $_[0] ? shift : shift->new;
174 16264         27771 my $source = shift;
175 16264 50 33     69876 unless ( defined $source and not ref $source ) {
176 0         0 return $self->_error("Did not pass a string to PPI::Lexer::lex_source");
177             }
178              
179             # Create the Tokenizer and hand off to the next method
180 16264         24570 my $Tokenizer = eval {
181 16264         31507 X_TOKENIZER->new(\$source);
182             };
183 16264 50       61554 if ( _INSTANCE($@, 'PPI::Exception') ) {
    50          
184 0         0 return $self->_error( $@->message );
185             } elsif ( $@ ) {
186 0         0 return $self->_error( $errstr );
187             }
188              
189 16264         38239 $self->lex_tokenizer( $Tokenizer );
190             }
191              
192             =pod
193              
194             =head2 lex_tokenizer $Tokenizer
195              
196             The C takes as argument a L object. It
197             lexes the token stream from the tokenizer into a L object.
198              
199             Returns a L object, or C on error.
200              
201             =cut
202              
203             sub lex_tokenizer {
204 16766 50   16766 1 37071 my $self = ref $_[0] ? shift : shift->new;
205 16766         88859 my $Tokenizer = _INSTANCE(shift, 'PPI::Tokenizer');
206 16766 50       40727 return $self->_error(
207             "Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer"
208             ) unless $Tokenizer;
209              
210             # Create the empty document
211 16766         49830 my $Document = PPI::Document->new;
212              
213             # Lex the token stream into the document
214 16766         27360 $self->{Tokenizer} = $Tokenizer;
215 16766 100       24531 if ( !eval { $self->_lex_document($Document); 1 } ) {
  16766         41296  
  16765         33815  
216             # If an error occurs DESTROY the partially built document.
217 1         4 undef $Document;
218 1 50       10 if ( _INSTANCE($@, 'PPI::Exception') ) {
219 1         5 return $self->_error( $@->message );
220             } else {
221 0         0 return $self->_error( $errstr );
222             }
223             }
224              
225 16765         112269 return $Document;
226             }
227              
228              
229              
230              
231              
232             #####################################################################
233             # Lex Methods - Document Object
234              
235             sub _lex_document {
236 16766     16766   32032 my ($self, $Document) = @_;
237             # my $self = shift;
238             # my $Document = _INSTANCE(shift, 'PPI::Document') or return undef;
239              
240             # Start the processing loop
241 16766         22344 my $Token;
242 16766         36106 while ( ref($Token = $self->_get_token) ) {
243             # Add insignificant tokens directly beneath us
244 53194 100       141320 unless ( $Token->significant ) {
245 20596         49195 $self->_add_element( $Document, $Token );
246 20596         39521 next;
247             }
248              
249 32598 100       77290 if ( $Token->content eq ';' ) {
250             # It's a semi-colon on its own.
251             # We call this a null statement.
252 472         2073 $self->_add_element(
253             $Document,
254             PPI::Statement::Null->new($Token),
255             );
256 472         1180 next;
257             }
258              
259             # Handle anything other than a structural element
260 32126 100       78687 unless ( ref $Token eq 'PPI::Token::Structure' ) {
261             # Determine the class for the Statement, and create it
262 28947         71499 my $Statement = $self->_statement($Document, $Token)->new($Token);
263              
264             # Move the lexing down into the statement
265 28947         80416 $self->_add_delayed( $Document );
266 28947         72002 $self->_add_element( $Document, $Statement );
267 28947         69404 $self->_lex_statement( $Statement );
268              
269 28947         67875 next;
270             }
271              
272             # Is this the opening of a structure?
273 3179 100       7651 if ( $Token->__LEXER__opens ) {
274             # This should actually have a Statement instead
275 978         2938 $self->_rollback( $Token );
276 978         3063 my $Statement = PPI::Statement->new;
277 978         2694 $self->_add_element( $Document, $Statement );
278 978         2948 $self->_lex_statement( $Statement );
279 978         2307 next;
280             }
281              
282             # Is this the close of a structure.
283 2201 50       5197 if ( $Token->__LEXER__closes ) {
284             # Because we are at the top of the tree, this is an error.
285             # This means either a mis-parsing, or a mistake in the code.
286             # To handle this, we create a "Naked Close" statement
287 2201         7894 $self->_add_element( $Document,
288             PPI::Statement::UnmatchedBrace->new($Token)
289             );
290 2201         5174 next;
291             }
292              
293             # Shouldn't be able to get here
294 0         0 PPI::Exception->throw('Lexer reached an illegal state');
295             }
296              
297             # Did we leave the main loop because of a Tokenizer error?
298 16765 50       35494 unless ( defined $Token ) {
299 0 0       0 my $errstr = $self->{Tokenizer} ? $self->{Tokenizer}->errstr : '';
300 0   0     0 $errstr ||= 'Unknown Tokenizer Error';
301 0         0 PPI::Exception->throw($errstr);
302             }
303              
304             # No error, it's just the end of file.
305             # Add any insignificant trailing tokens.
306 16765         42075 $self->_add_delayed( $Document );
307              
308             # If the Tokenizer has any v6 blocks to attach, do so now.
309             # Checking once at the end is faster than adding a special
310             # case check for every statement parsed.
311 16765         29965 my $perl6 = $self->{Tokenizer}->{'perl6'};
312 16765 100       34804 if ( @$perl6 ) {
313 2         15 my $includes = $Document->find( 'PPI::Statement::Include::Perl6' );
314 2         7 foreach my $include ( @$includes ) {
315 2 50       7 unless ( @$perl6 ) {
316 0         0 PPI::Exception->throw('Failed to find a perl6 section');
317             }
318 2         7 $include->{perl6} = shift @$perl6;
319             }
320             }
321              
322 16765         26159 return 1;
323             }
324              
325              
326              
327              
328              
329             #####################################################################
330             # Lex Methods - Statement Object
331              
332             # Keyword -> Statement Subclass
333             my %STATEMENT_CLASSES = (
334             # Things that affect the timing of execution
335             'BEGIN' => 'PPI::Statement::Scheduled',
336             'CHECK' => 'PPI::Statement::Scheduled',
337             'UNITCHECK' => 'PPI::Statement::Scheduled',
338             'INIT' => 'PPI::Statement::Scheduled',
339             'END' => 'PPI::Statement::Scheduled',
340              
341             # Special subroutines for which 'sub' is optional
342             'AUTOLOAD' => 'PPI::Statement::Sub',
343             'DESTROY' => 'PPI::Statement::Sub',
344              
345             # Loading and context statement
346             'package' => 'PPI::Statement::Package',
347             # 'use' => 'PPI::Statement::Include',
348             'no' => 'PPI::Statement::Include',
349             'require' => 'PPI::Statement::Include',
350              
351             # Various declarations
352             'my' => 'PPI::Statement::Variable',
353             'local' => 'PPI::Statement::Variable',
354             'our' => 'PPI::Statement::Variable',
355             'state' => 'PPI::Statement::Variable',
356             # Statements starting with 'sub' could be any one of...
357             # 'sub' => 'PPI::Statement::Sub',
358             # 'sub' => 'PPI::Statement::Scheduled',
359             # 'sub' => 'PPI::Statement',
360              
361             # Compound statement
362             'if' => 'PPI::Statement::Compound',
363             'unless' => 'PPI::Statement::Compound',
364             'for' => 'PPI::Statement::Compound',
365             'foreach' => 'PPI::Statement::Compound',
366             'while' => 'PPI::Statement::Compound',
367             'until' => 'PPI::Statement::Compound',
368              
369             # Switch statement
370             'given' => 'PPI::Statement::Given',
371             'when' => 'PPI::Statement::When',
372             'default' => 'PPI::Statement::When',
373              
374             # Various ways of breaking out of scope
375             'redo' => 'PPI::Statement::Break',
376             'next' => 'PPI::Statement::Break',
377             'last' => 'PPI::Statement::Break',
378             'return' => 'PPI::Statement::Break',
379             'goto' => 'PPI::Statement::Break',
380              
381             # Special sections of the file
382             '__DATA__' => 'PPI::Statement::Data',
383             '__END__' => 'PPI::Statement::End',
384             );
385              
386             sub _statement {
387 54401     54401   99665 my ($self, $Parent, $Token) = @_;
388             # my $self = shift;
389             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
390             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
391              
392             # Check for things like ( parent => ... )
393 54401 100 100     352038 if (
394             $Parent->isa('PPI::Structure::List')
395             or
396             $Parent->isa('PPI::Structure::Constructor')
397             ) {
398 7641 100       26095 if ( $Token->isa('PPI::Token::Word') ) {
399             # Is the next significant token a =>
400             # Read ahead to the next significant token
401 1920         3235 my $Next;
402 1920         4008 while ( $Next = $self->_get_token ) {
403 2688 100       7896 unless ( $Next->significant ) {
404 816         1412 push @{$self->{delayed}}, $Next;
  816         1837  
405             # $self->_delay_element( $Next );
406 816         1609 next;
407             }
408              
409             # Got the next token
410 1872 100 100     8795 if (
411             $Next->isa('PPI::Token::Operator')
412             and
413             $Next->content eq '=>'
414             ) {
415             # Is an ordinary expression
416 891         2241 $self->_rollback( $Next );
417 891         4009 return 'PPI::Statement::Expression';
418             } else {
419 981         1647 last;
420             }
421             }
422              
423             # Rollback and continue
424 1029         2398 $self->_rollback( $Next );
425             }
426             }
427              
428 53510         84006 my $is_lexsub = 0;
429              
430             # Is it a token in our known classes list
431 53510         125322 my $class = $STATEMENT_CLASSES{$Token->content};
432 53510 100       112536 if ( $class ) {
433             # Is the next significant token a =>
434             # Read ahead to the next significant token
435 9645         14214 my $Next;
436 9645         18625 while ( $Next = $self->_get_token ) {
437 18923 100       47880 if ( !$Next->significant ) {
438 9325         12884 push @{$self->{delayed}}, $Next;
  9325         18221  
439 9325         17404 next;
440             }
441              
442             # Scheduled block must be followed by left curly or
443             # semicolon. Otherwise we have something else (e.g.
444             # open( CHECK, ... );
445 9598 100 66     24371 if (
      100        
446             'PPI::Statement::Scheduled' eq $class
447             and not ( $Next->isa( 'PPI::Token::Structure' )
448             and $Next->content =~ m/\A[{;]\z/ ) # }
449             ) {
450 1         6 $class = undef;
451 1         2 last;
452             }
453              
454             # Lexical subroutine
455 9597 100 100     19656 if (
      66        
456             $Token->content =~ /^(?:my|our|state)$/
457             and $Next->isa( 'PPI::Token::Word' ) and $Next->content eq 'sub'
458             ) {
459             # This should be PPI::Statement::Sub rather than PPI::Statement::Variable
460 7         14 $class = undef;
461 7         11 $is_lexsub = 1;
462 7         13 last;
463             }
464              
465             last if
466 9590 100 100     45081 !$Next->isa( 'PPI::Token::Operator' ) or $Next->content ne '=>';
467              
468             # Got the next token
469             # Is an ordinary expression
470 21         55 $self->_rollback( $Next );
471 21         76 return 'PPI::Statement';
472             }
473              
474             # Rollback and continue
475 9624         20965 $self->_rollback( $Next );
476             }
477              
478             # Handle potential barewords for subscripts
479 53489 100       167066 if ( $Parent->isa('PPI::Structure::Subscript') ) {
480             # Fast obvious case, just an expression
481 3845 100 100     11155 unless ( $class and $class->isa('PPI::Statement::Expression') ) {
482 3722         13558 return 'PPI::Statement::Expression';
483             }
484              
485             # This is something like "my" or "our" etc... more subtle.
486             # Check if the next token is a closing curly brace.
487             # This means we are something like $h{my}
488 123         167 my $Next;
489 123         230 while ( $Next = $self->_get_token ) {
490 119 50       299 unless ( $Next->significant ) {
491 0         0 push @{$self->{delayed}}, $Next;
  0         0  
492             # $self->_delay_element( $Next );
493 0         0 next;
494             }
495              
496             # Found the next significant token.
497             # Is it a closing curly brace?
498 119 50       241 if ( $Next->content eq '}' ) {
499 119         257 $self->_rollback( $Next );
500 119         527 return 'PPI::Statement::Expression';
501             } else {
502 0         0 $self->_rollback( $Next );
503 0         0 return $class;
504             }
505             }
506              
507             # End of file... this means it is something like $h{our
508             # which is probably going to be $h{our} ... I think
509 4         15 $self->_rollback( $Next );
510 4         17 return 'PPI::Statement::Expression';
511             }
512              
513             # If it's a token in our list, use that class
514 49644 100       123777 return $class if $class;
515              
516             # Handle the more in-depth sub detection
517 40181 100 100     106120 if ( $is_lexsub || $Token->content eq 'sub' ) {
518             # Read ahead to the next significant token
519 3219         5347 my $Next;
520 3219         7017 while ( $Next = $self->_get_token ) {
521 6375 100       16972 unless ( $Next->significant ) {
522 3180         4704 push @{$self->{delayed}}, $Next;
  3180         6772  
523             # $self->_delay_element( $Next );
524 3180         5988 next;
525             }
526              
527             # Got the next significant token
528 3195         7757 my $sclass = $STATEMENT_CLASSES{$Next->content};
529 3195 100 100     9097 if ( $sclass and $sclass eq 'PPI::Statement::Scheduled' ) {
530 28         86 $self->_rollback( $Next );
531 28         149 return 'PPI::Statement::Scheduled';
532             }
533 3167 100       9532 if ( $Next->isa('PPI::Token::Word') ) {
534 3060         7354 $self->_rollback( $Next );
535 3060         15242 return 'PPI::Statement::Sub';
536             }
537              
538             ### Comment out these two, as they would return PPI::Statement anyway
539             # if ( $content eq '{' ) {
540             # Anonymous sub at start of statement
541             # return 'PPI::Statement';
542             # }
543             #
544             # if ( $Next->isa('PPI::Token::Prototype') ) {
545             # Anonymous sub at start of statement
546             # return 'PPI::Statement';
547             # }
548              
549             # PPI::Statement is the safest fall-through
550 107         377 $self->_rollback( $Next );
551 107         519 return 'PPI::Statement';
552             }
553              
554             # End of file... PPI::Statement::Sub is the most likely
555 24         84 $self->_rollback( $Next );
556 24         192 return 'PPI::Statement::Sub';
557             }
558              
559 36962 100       77303 if ( $Token->content eq 'use' ) {
560             # Add a special case for "use v6" lines.
561 2238         3308 my $Next;
562 2238         4774 while ( $Next = $self->_get_token ) {
563 4471 100       12117 unless ( $Next->significant ) {
564 2235         3226 push @{$self->{delayed}}, $Next;
  2235         4889  
565             # $self->_delay_element( $Next );
566 2235         4456 next;
567             }
568              
569             # Found the next significant token.
570 2236 100 66     12240 if (
    100          
571             $Next->isa('PPI::Token::Operator')
572             and
573             $Next->content eq '=>'
574             ) {
575             # Is an ordinary expression
576 1         6 $self->_rollback( $Next );
577 1         7 return 'PPI::Statement';
578             # Is it a v6 use?
579             } elsif ( $Next->content eq 'v6' ) {
580 2         7 $self->_rollback( $Next );
581 2         25 return 'PPI::Statement::Include::Perl6';
582             } else {
583 2233         6132 $self->_rollback( $Next );
584 2233         11154 return 'PPI::Statement::Include';
585             }
586             }
587              
588             # End of file... this means it is an incomplete use
589             # line, just treat it as a normal include.
590 2         12 $self->_rollback( $Next );
591 2         23 return 'PPI::Statement::Include';
592             }
593              
594             # If our parent is a Condition, we are an Expression
595 34724 100       103389 if ( $Parent->isa('PPI::Structure::Condition') ) {
596 1220         5103 return 'PPI::Statement::Expression';
597             }
598              
599             # If our parent is a List, we are also an expression
600 33504 100       86975 if ( $Parent->isa('PPI::Structure::List') ) {
601 4965         22589 return 'PPI::Statement::Expression';
602             }
603              
604             # Switch statements use expressions, as well.
605 28539 100 100     146882 if (
606             $Parent->isa('PPI::Structure::Given')
607             or
608             $Parent->isa('PPI::Structure::When')
609             ) {
610 6         85 return 'PPI::Statement::Expression';
611             }
612              
613 28533 100       173675 if ( _INSTANCE($Token, 'PPI::Token::Label') ) {
614 357         2078 return 'PPI::Statement::Compound';
615             }
616              
617             # Beyond that, I have no idea for the moment.
618             # Just keep adding more conditions above this.
619 28176         109234 return 'PPI::Statement';
620             }
621              
622             sub _lex_statement {
623 55827     55827   95383 my ($self, $Statement) = @_;
624             # my $self = shift;
625             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
626              
627             # Handle some special statements
628 55827 100       204134 if ( $Statement->isa('PPI::Statement::End') ) {
629 8         172 return $self->_lex_end( $Statement );
630             }
631              
632             # Begin processing tokens
633 55819         75201 my $Token;
634 55819         101024 while ( ref( $Token = $self->_get_token ) ) {
635             # Delay whitespace and comment tokens
636 254592 100       598238 unless ( $Token->significant ) {
637 89457         117133 push @{$self->{delayed}}, $Token;
  89457         161794  
638             # $self->_delay_element( $Token );
639 89457         156712 next;
640             }
641              
642             # Structual closes, and __DATA__ and __END__ tags implicitly
643             # end every type of statement
644 165135 100 66     357574 if (
645             $Token->__LEXER__closes
646             or
647             $Token->isa('PPI::Token::Separator')
648             ) {
649             # Rollback and end the statement
650 17555         43654 return $self->_rollback( $Token );
651             }
652              
653             # Normal statements never implicitly end
654 147580 100       391176 unless ( $Statement->__LEXER__normal ) {
655             # Have we hit an implicit end to the statement
656 24328 100       54459 unless ( $self->_continues( $Statement, $Token ) ) {
657             # Rollback and finish the statement
658 4322         11522 return $self->_rollback( $Token );
659             }
660             }
661              
662             # Any normal character just gets added
663 143258 100       378189 unless ( $Token->isa('PPI::Token::Structure') ) {
664 98718         219867 $self->_add_element( $Statement, $Token );
665 98718         190545 next;
666             }
667              
668             # Handle normal statement terminators
669 44540 100       92113 if ( $Token->content eq ';' ) {
670 22670         56797 $self->_add_element( $Statement, $Token );
671 22670         38975 return 1;
672             }
673              
674             # Which leaves us with a new structure
675              
676             # Determine the class for the structure and create it
677 21870         48874 my $method = $RESOLVE{$Token->content};
678 21870         64853 my $Structure = $self->$method($Statement)->new($Token);
679              
680             # Move the lexing down into the Structure
681 21870         62367 $self->_add_delayed( $Statement );
682 21870         56809 $self->_add_element( $Statement, $Structure );
683 21870         53486 $self->_lex_structure( $Structure );
684             }
685              
686             # Was it an error in the tokenizer?
687 11272 50       23350 unless ( defined $Token ) {
688 0         0 PPI::Exception->throw;
689             }
690              
691             # No, it's just the end of the file...
692             # Roll back any insignificant tokens, they'll get added at the Document level
693 11272         21287 $self->_rollback;
694             }
695              
696             sub _lex_end {
697 8     8   32 my ($self, $Statement) = @_;
698             # my $self = shift;
699             # my $Statement = _INSTANCE(shift, 'PPI::Statement::End') or die "Bad param 1";
700              
701             # End of the file, EVERYTHING is ours
702 8         16 my $Token;
703 8         22 while ( $Token = $self->_get_token ) {
704             # Inlined $Statement->__add_element($Token);
705             Scalar::Util::weaken(
706 15         93 $_PARENT{Scalar::Util::refaddr $Token} = $Statement
707             );
708 15         21 push @{$Statement->{children}}, $Token;
  15         35  
709             }
710              
711             # Was it an error in the tokenizer?
712 8 50       50 unless ( defined $Token ) {
713 0         0 PPI::Exception->throw;
714             }
715              
716             # No, it's just the end of the file...
717             # Roll back any insignificant tokens, they get added at the Document level
718 8         24 $self->_rollback;
719             }
720              
721             # For many statements, it can be difficult to determine the end-point.
722             # This method takes a statement and the next significant token, and attempts
723             # to determine if the there is a statement boundary between the two, or if
724             # the statement can continue with the token.
725             sub _continues {
726 24328     24328   40956 my ($self, $Statement, $Token) = @_;
727             # my $self = shift;
728             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
729             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
730              
731             # Handle the simple block case
732             # { print 1; }
733 24328 100 100     57245 if (
734             $Statement->schildren == 1
735             and
736             $Statement->schild(0)->isa('PPI::Structure::Block')
737             ) {
738 53         251 return '';
739             }
740              
741             # Alrighty then, there are six implied-end statement types:
742             # ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, ::When,
743             # and ::Package statements.
744 24275 50       60880 return 1
745             if ref $Statement !~ /\b(?:Scheduled|Sub|Compound|Given|When|Package)$/;
746              
747             # Of these six, ::Scheduled, ::Sub, ::Given, and ::When follow the same
748             # simple rule and can be handled first. The block form of ::Package
749             # follows the rule, too. (The non-block form of ::Package
750             # requires a statement terminator, and thus doesn't need to have
751             # an implied end detected.)
752 24275         62783 my @part = $Statement->schildren;
753 24275         39917 my $LastChild = $part[-1];
754             # If the last significant element of the statement is a block,
755             # then an implied-end statement is done, no questions asked.
756 24275 100       127690 return !$LastChild->isa('PPI::Structure::Block')
757             if !$Statement->isa('PPI::Statement::Compound');
758              
759             # Now we get to compound statements, which kind of suck (to lex).
760             # However, of them all, the 'if' type, which includes unless, are
761             # relatively easy to handle compared to the others.
762 5387         16601 my $type = $Statement->type;
763 5387 100       12274 if ( $type eq 'if' ) {
764             # This should be one of the following
765             # if (EXPR) BLOCK
766             # if (EXPR) BLOCK else BLOCK
767             # if (EXPR) BLOCK elsif (EXPR) BLOCK ... else BLOCK
768              
769             # We only implicitly end on a block
770 3376 100       10390 unless ( $LastChild->isa('PPI::Structure::Block') ) {
771             # if (EXPR) ...
772             # if (EXPR) BLOCK else ...
773             # if (EXPR) BLOCK elsif (EXPR) BLOCK ...
774 2335         6953 return 1;
775             }
776              
777             # If the token before the block is an 'else',
778             # it's over, no matter what.
779 1041         3045 my $NextLast = $Statement->schild(-2);
780 1041 50 66     8701 if (
      66        
      66        
781             $NextLast
782             and
783             $NextLast->isa('PPI::Token')
784             and
785             $NextLast->isa('PPI::Token::Word')
786             and
787             $NextLast->content eq 'else'
788             ) {
789 74         403 return '';
790             }
791              
792             # Otherwise, we continue for 'elsif' or 'else' only.
793 967 100 100     4641 if (
      100        
794             $Token->isa('PPI::Token::Word')
795             and (
796             $Token->content eq 'else'
797             or
798             $Token->content eq 'elsif'
799             )
800             ) {
801 300         1223 return 1;
802             }
803              
804 667         2413 return '';
805             }
806              
807 2011 100       4714 if ( $type eq 'label' ) {
808             # We only have the label so far, could be any of
809             # LABEL while (EXPR) BLOCK
810             # LABEL while (EXPR) BLOCK continue BLOCK
811             # LABEL for (EXPR; EXPR; EXPR) BLOCK
812             # LABEL foreach VAR (LIST) BLOCK
813             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
814             # LABEL BLOCK continue BLOCK
815              
816             # Handle cases with a word after the label
817 333 100 100     1987 if (
818             $Token->isa('PPI::Token::Word')
819             and
820             $Token->content =~ /^(?:while|until|for|foreach)$/
821             ) {
822 38         140 return 1;
823             }
824              
825             # Handle labelled blocks
826 295 100 100     1388 if ( $Token->isa('PPI::Token::Structure') && $Token->content eq '{' ) {
827 210         788 return 1;
828             }
829              
830 85         306 return '';
831             }
832              
833             # Handle the common "after round braces" case
834 1678 100 100     8120 if ( $LastChild->isa('PPI::Structure') and $LastChild->braces eq '()' ) {
835             # LABEL while (EXPR) ...
836             # LABEL while (EXPR) ...
837             # LABEL for (EXPR; EXPR; EXPR) ...
838             # LABEL for VAR (LIST) ...
839             # LABEL foreach VAR (LIST) ...
840             # Only a block will do
841 371   33     2007 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
842             }
843              
844 1307 100       2984 if ( $type eq 'for' ) {
845             # LABEL for (EXPR; EXPR; EXPR) BLOCK
846 140 100 66     700 if (
    50          
    0          
847             $LastChild->isa('PPI::Token::Word')
848             and
849             $LastChild->content =~ /^for(?:each)?\z/
850             ) {
851             # LABEL for ...
852 127 100 66     1025 if (
      100        
853             (
854             $Token->isa('PPI::Token::Structure')
855             and
856             $Token->content eq '('
857             )
858             or
859             $Token->isa('PPI::Token::QuoteLike::Words')
860             ) {
861 21         84 return 1;
862             }
863              
864 106 50       338 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
865             # LABEL for VAR QW{} ...
866             # LABEL foreach VAR QW{} ...
867             # Only a block will do
868 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
869             }
870              
871             # In this case, we can also behave like a foreach
872 106         186 $type = 'foreach';
873              
874             } elsif ( $LastChild->isa('PPI::Structure::Block') ) {
875             # LABEL for (EXPR; EXPR; EXPR) BLOCK
876             # That's it, nothing can continue
877 13         46 return '';
878              
879             } elsif ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
880             # LABEL for VAR QW{} ...
881             # LABEL foreach VAR QW{} ...
882             # Only a block will do
883 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
884             }
885             }
886              
887             # Handle the common continue case
888 1273 100 100     5227 if ( $LastChild->isa('PPI::Token::Word') and $LastChild->content eq 'continue' ) {
889             # LABEL while (EXPR) BLOCK continue ...
890             # LABEL foreach VAR (LIST) BLOCK continue ...
891             # LABEL BLOCK continue ...
892             # Only a block will do
893 6   33     85 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
894             }
895              
896             # Handle the common continuable block case
897 1267 100       3961 if ( $LastChild->isa('PPI::Structure::Block') ) {
898             # LABEL while (EXPR) BLOCK
899             # LABEL while (EXPR) BLOCK ...
900             # LABEL for (EXPR; EXPR; EXPR) BLOCK
901             # LABEL foreach VAR (LIST) BLOCK
902             # LABEL foreach VAR (LIST) BLOCK ...
903             # LABEL BLOCK ...
904             # Is this the block for a continue?
905 422 100 66     3453 if ( _INSTANCE($part[-2], 'PPI::Token::Word') and $part[-2]->content eq 'continue' ) {
906             # LABEL while (EXPR) BLOCK continue BLOCK
907             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
908             # LABEL BLOCK continue BLOCK
909             # That's it, nothing can continue this
910 6         24 return '';
911             }
912              
913             # Only a continue will do
914 416   100     2531 return $Token->isa('PPI::Token::Word') && $Token->content eq 'continue';
915             }
916              
917 845 50       1784 if ( $type eq 'block' ) {
918             # LABEL BLOCK continue BLOCK
919             # Every possible case is covered in the common cases above
920             }
921              
922 845 100       1740 if ( $type eq 'while' ) {
923             # LABEL while (EXPR) BLOCK
924             # LABEL while (EXPR) BLOCK continue BLOCK
925             # LABEL until (EXPR) BLOCK
926             # LABEL until (EXPR) BLOCK continue BLOCK
927             # The only case not covered is the while ...
928 149 50 66     755 if (
      66        
929             $LastChild->isa('PPI::Token::Word')
930             and (
931             $LastChild->content eq 'while'
932             or
933             $LastChild->content eq 'until'
934             )
935             ) {
936             # LABEL while ...
937             # LABEL until ...
938             # Only a condition structure will do
939 149   33     742 return $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
940             }
941             }
942              
943 696 50       1442 if ( $type eq 'foreach' ) {
944             # LABEL foreach VAR (LIST) BLOCK
945             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
946             # The only two cases that have not been covered already are
947             # 'foreach ...' and 'foreach VAR ...'
948              
949 696 100       2123 if ( $LastChild->isa('PPI::Token::Symbol') ) {
950             # LABEL foreach my $scalar ...
951             # Open round brace, or a quotewords
952 206 100 66     1408 return 1 if $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
953 16 50       99 return 1 if $Token->isa('PPI::Token::QuoteLike::Words');
954 0         0 return '';
955             }
956              
957 490 100 100     1115 if ( $LastChild->content eq 'foreach' or $LastChild->content eq 'for' ) {
958             # There are three possibilities here
959 277 100 100     1380 if (
    100 100        
    100 66        
    100          
960             $Token->isa('PPI::Token::Word')
961             and (
962             ($STATEMENT_CLASSES{ $Token->content } || '')
963             eq
964             'PPI::Statement::Variable'
965             )
966             ) {
967             # VAR == 'my ...'
968 192         715 return 1;
969             } elsif ( $Token->content =~ /^\$/ ) {
970             # VAR == '$scalar'
971 34         125 return 1;
972             } elsif ( $Token->isa('PPI::Token::Structure') and $Token->content eq '(' ) {
973 42         181 return 1;
974             } elsif ( $Token->isa('PPI::Token::QuoteLike::Words') ) {
975 6         25 return 1;
976             } else {
977 3         11 return '';
978             }
979             }
980              
981 213 100 100     742 if (
982             ($STATEMENT_CLASSES{ $LastChild->content } || '')
983             eq
984             'PPI::Statement::Variable'
985             ) {
986             # LABEL foreach my ...
987             # Only a scalar will do
988 188         483 return $Token->content =~ /^\$/;
989             }
990              
991             # Handle the rare for my $foo qw{bar} ... case
992 25 50       83 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
993             # LABEL for VAR QW ...
994             # LABEL foreach VAR QW ...
995             # Only a block will do
996 25   33     122 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
997             }
998             }
999              
1000             # Something we don't know about... what could it be
1001 0         0 PPI::Exception->throw("Illegal state in '$type' compound statement");
1002             }
1003              
1004              
1005              
1006              
1007              
1008             #####################################################################
1009             # Lex Methods - Structure Object
1010              
1011             # Given a parent element, and a ( token to open a structure, determine
1012             # the class that the structure should be.
1013             sub _round {
1014 7834     7834   14830 my ($self, $Parent) = @_;
1015             # my $self = shift;
1016             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1017              
1018             # Get the last significant element in the parent
1019 7834         22530 my $Element = $Parent->schild(-1);
1020 7834 100       47107 if ( _INSTANCE($Element, 'PPI::Token::Word') ) {
1021             # Can it be determined because it is a keyword?
1022 6230         15281 my $rclass = $ROUND{$Element->content};
1023 6230 100       17721 return $rclass if $rclass;
1024             }
1025              
1026             # If we are part of a for or foreach statement, we are a ForLoop
1027 6523 100       46039 if ( $Parent->isa('PPI::Statement::Compound') ) {
    100          
    100          
1028 190 50       663 if ( $Parent->type =~ /^for(?:each)?$/ ) {
1029 190         912 return 'PPI::Structure::For';
1030             }
1031             } elsif ( $Parent->isa('PPI::Statement::Given') ) {
1032 3         37 return 'PPI::Structure::Given';
1033             } elsif ( $Parent->isa('PPI::Statement::When') ) {
1034 3         29 return 'PPI::Structure::When';
1035             }
1036              
1037             # Otherwise, it must be a list
1038              
1039             # If the previous element is -> then we mark it as a dereference
1040 6327 100 100     33578 if ( _INSTANCE($Element, 'PPI::Token::Operator') and $Element->content eq '->' ) {
1041 6         23 $Element->{_dereference} = 1;
1042             }
1043              
1044             'PPI::Structure::List'
1045 6327         23237 }
1046              
1047             # Given a parent element, and a [ token to open a structure, determine
1048             # the class that the structure should be.
1049             sub _square {
1050 2974     2974   6211 my ($self, $Parent) = @_;
1051             # my $self = shift;
1052             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1053              
1054             # Get the last significant element in the parent
1055 2974         8160 my $Element = $Parent->schild(-1);
1056              
1057             # Is this a subscript, like $foo[1] or $foo{expr}
1058            
1059 2974 100       9342 if ( $Element ) {
1060 2729 100 100     11823 if ( $Element->isa('PPI::Token::Operator') and $Element->content eq '->' ) {
1061             # $foo->[]
1062 400         970 $Element->{_dereference} = 1;
1063 400         1650 return 'PPI::Structure::Subscript';
1064             }
1065 2329 100       8403 if ( $Element->isa('PPI::Structure::Subscript') ) {
1066             # $foo{}[]
1067 25         102 return 'PPI::Structure::Subscript';
1068             }
1069 2304 100 100     9146 if ( $Element->isa('PPI::Token::Symbol') and $Element->content =~ /^(?:\$|\@)/ ) {
1070             # $foo[], @foo[]
1071 745         3051 return 'PPI::Structure::Subscript';
1072             }
1073 1559 100 100     6162 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%)/ ) {
1074 49         164 my $prior = $Parent->schild(-2);
1075 49 100 100     324 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1076             # Postfix dereference: ->@[...] ->%[...]
1077 2         13 return 'PPI::Structure::Subscript';
1078             }
1079             }
1080             # FIXME - More cases to catch
1081             }
1082              
1083             # Otherwise, we assume that it's an anonymous arrayref constructor
1084 1802         6643 'PPI::Structure::Constructor';
1085             }
1086              
1087             # Keyword -> Structure class maps
1088             my %CURLY_CLASSES = (
1089             # Blocks
1090             'sub' => 'PPI::Structure::Block',
1091             'grep' => 'PPI::Structure::Block',
1092             'map' => 'PPI::Structure::Block',
1093             'sort' => 'PPI::Structure::Block',
1094             'do' => 'PPI::Structure::Block',
1095             # rely on 'continue' + block being handled elsewhere
1096             # rely on 'eval' + block being handled elsewhere
1097              
1098             # Hash constructors
1099             'scalar' => 'PPI::Structure::Constructor',
1100             '=' => 'PPI::Structure::Constructor',
1101             '||=' => 'PPI::Structure::Constructor',
1102             '&&=' => 'PPI::Structure::Constructor',
1103             '//=' => 'PPI::Structure::Constructor',
1104             '||' => 'PPI::Structure::Constructor',
1105             '&&' => 'PPI::Structure::Constructor',
1106             '//' => 'PPI::Structure::Constructor',
1107             '?' => 'PPI::Structure::Constructor',
1108             ':' => 'PPI::Structure::Constructor',
1109             ',' => 'PPI::Structure::Constructor',
1110             '=>' => 'PPI::Structure::Constructor',
1111             '+' => 'PPI::Structure::Constructor', # per perlref
1112             'return' => 'PPI::Structure::Constructor', # per perlref
1113             'bless' => 'PPI::Structure::Constructor', # pragmatic --
1114             # perlfunc says first arg is a reference, and
1115             # bless {; ... } fails to compile.
1116             );
1117              
1118             my @CURLY_LOOKAHEAD_CLASSES = (
1119             {}, # not used
1120             {
1121             ';' => 'PPI::Structure::Block', # per perlref
1122             '}' => 'PPI::Structure::Constructor',
1123             },
1124             {
1125             '=>' => 'PPI::Structure::Constructor',
1126             },
1127             );
1128              
1129              
1130             # Given a parent element, and a { token to open a structure, determine
1131             # the class that the structure should be.
1132             sub _curly {
1133 11062     11062   21046 my ($self, $Parent) = @_;
1134             # my $self = shift;
1135             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1136              
1137             # Get the last significant element in the parent
1138 11062         26840 my $Element = $Parent->schild(-1);
1139 11062 100       40692 my $content = $Element ? $Element->content : '';
1140              
1141             # Is this a subscript, like $foo[1] or $foo{expr}
1142 11062 100       28848 if ( $Element ) {
1143 10410 100 66     30607 if ( $content eq '->' and $Element->isa('PPI::Token::Operator') ) {
1144             # $foo->{}
1145 2058         4774 $Element->{_dereference} = 1;
1146 2058         8143 return 'PPI::Structure::Subscript';
1147             }
1148 8352 100       29311 if ( $Element->isa('PPI::Structure::Subscript') ) {
1149             # $foo[]{}
1150 79         290 return 'PPI::Structure::Subscript';
1151             }
1152 8273 100 100     32611 if ( $content =~ /^(?:\$|\@)/ and $Element->isa('PPI::Token::Symbol') ) {
1153             # $foo{}, @foo{}
1154 541         2400 return 'PPI::Structure::Subscript';
1155             }
1156 7732 100 100     30437 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%|\*)/ ) {
1157 297         872 my $prior = $Parent->schild(-2);
1158 297 100 100     2383 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1159             # Postfix dereference: ->@{...} ->%{...} ->*{...}
1160 3         13 return 'PPI::Structure::Subscript';
1161             }
1162             }
1163 7729 100       23024 if ( $Element->isa('PPI::Structure::Block') ) {
1164             # deference - ${$hash_ref}{foo}
1165             # or even ${burfle}{foo}
1166             # hash slice - @{$hash_ref}{'foo', 'bar'}
1167 4 50       30 if ( my $prior = $Parent->schild(-2) ) {
1168 4         26 my $prior_content = $prior->content();
1169 4 50 66     36 $prior->isa( 'PPI::Token::Cast' )
      66        
1170             and ( $prior_content eq '@' ||
1171             $prior_content eq '$' )
1172             and return 'PPI::Structure::Subscript';
1173             }
1174             }
1175              
1176             # Are we the last argument of sub?
1177             # E.g.: 'sub foo {}', 'sub foo ($) {}'
1178 7727 100       28830 return 'PPI::Structure::Block' if $Parent->isa('PPI::Statement::Sub');
1179              
1180             # Are we the second or third argument of package?
1181             # E.g.: 'package Foo {}' or 'package Foo v1.2.3 {}'
1182 5458 100       20164 return 'PPI::Structure::Block'
1183             if $Parent->isa('PPI::Statement::Package');
1184              
1185 4165 100       11915 if ( $CURLY_CLASSES{$content} ) {
1186             # Known type
1187 829         3751 return $CURLY_CLASSES{$content};
1188             }
1189             }
1190              
1191             # Are we in a compound statement
1192 3988 100       12274 if ( $Parent->isa('PPI::Statement::Compound') ) {
1193             # We will only encounter blocks in compound statements
1194 1854         6309 return 'PPI::Structure::Block';
1195             }
1196              
1197             # Are we the second or third argument of use
1198 2134 100       7417 if ( $Parent->isa('PPI::Statement::Include') ) {
1199 53 50 33     187 if ( $Parent->schildren == 2 ||
      66        
1200             $Parent->schildren == 3 &&
1201             $Parent->schild(2)->isa('PPI::Token::Number')
1202             ) {
1203             # This is something like use constant { ... };
1204 53         229 return 'PPI::Structure::Constructor';
1205             }
1206             }
1207              
1208             # Unless we are at the start of the statement, everything else should be a block
1209             ### FIXME This is possibly a bad choice, but will have to do for now.
1210 2081 100       8286 return 'PPI::Structure::Block' if $Element;
1211              
1212 652 100 66     3367 if (
1213             $Parent->isa('PPI::Statement')
1214             and
1215             _INSTANCE($Parent->parent, 'PPI::Structure::List')
1216             ) {
1217 161         490 my $function = $Parent->parent->parent->schild(-2);
1218              
1219             # Special case: Are we the param of a core function
1220             # i.e. map({ $_ => 1 } @foo)
1221 161 100 100     813 return 'PPI::Structure::Block'
1222             if $function and $function->content =~ /^(?:map|grep|sort|eval|do)$/;
1223              
1224             # If not part of a block print, list-embedded curlies are most likely constructors
1225 67 100 100     374 return 'PPI::Structure::Constructor'
1226             if not $function or $function->content !~ /^(?:print|say)$/;
1227             }
1228              
1229             # We need to scan ahead.
1230 497         1318 my $Next;
1231 497         775 my $position = 0;
1232 497         843 my @delayed;
1233 497         1125 while ( $Next = $self->_get_token ) {
1234 1242 100       3750 unless ( $Next->significant ) {
1235 207         414 push @delayed, $Next;
1236 207         433 next;
1237             }
1238              
1239             # If we are off the end of the lookahead array,
1240 1035 100       3352 if ( ++$position >= @CURLY_LOOKAHEAD_CLASSES ) {
    100          
1241             # default to block.
1242 144         761 $self->_buffer( splice(@delayed), $Next );
1243 144         312 last;
1244             # If the content at this position is known
1245             } elsif ( my $class = $CURLY_LOOKAHEAD_CLASSES[$position]
1246             {$Next->content} ) {
1247             # return the associated class.
1248 269         834 $self->_buffer( splice(@delayed), $Next );
1249 269         1233 return $class;
1250             }
1251              
1252             # Delay and continue
1253 622         1521 push @delayed, $Next;
1254             }
1255              
1256             # Hit the end of the document, or bailed out, go with block
1257 228         721 $self->_buffer( splice(@delayed) );
1258 228 50       829 if ( ref $Parent eq 'PPI::Statement' ) {
1259 228         505 bless $Parent, 'PPI::Statement::Compound';
1260             }
1261 228         1012 return 'PPI::Structure::Block';
1262             }
1263              
1264              
1265             sub _lex_structure {
1266 21870     21870   36503 my ($self, $Structure) = @_;
1267             # my $self = shift;
1268             # my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1";
1269              
1270             # Start the processing loop
1271 21870         30477 my $Token;
1272 21870         38874 while ( ref($Token = $self->_get_token) ) {
1273             # Is this a direct type token
1274 87491 100       212047 unless ( $Token->significant ) {
1275 41426         54269 push @{$self->{delayed}}, $Token;
  41426         74220  
1276             # $self->_delay_element( $Token );
1277 41426         75618 next;
1278             }
1279              
1280             # Anything other than a Structure starts a Statement
1281 46065 100       145789 unless ( $Token->isa('PPI::Token::Structure') ) {
1282             # Because _statement may well delay and rollback itself,
1283             # we need to add the delayed tokens early
1284 25454         59868 $self->_add_delayed( $Structure );
1285              
1286             # Determine the class for the Statement and create it
1287 25454         58807 my $Statement = $self->_statement($Structure, $Token)->new($Token);
1288              
1289             # Move the lexing down into the Statement
1290 25454         69103 $self->_add_element( $Structure, $Statement );
1291 25454         69533 $self->_lex_statement( $Statement );
1292              
1293 25454         58376 next;
1294             }
1295              
1296             # Is this the opening of another structure directly inside us?
1297 20611 100       47309 if ( $Token->__LEXER__opens ) {
1298             # Rollback the Token, and recurse into the statement
1299 448         1689 $self->_rollback( $Token );
1300 448         1624 my $Statement = PPI::Statement->new;
1301 448         1616 $self->_add_element( $Structure, $Statement );
1302 448         1641 $self->_lex_statement( $Statement );
1303 448         1595 next;
1304             }
1305              
1306             # Is this the close of a structure ( which would be an error )
1307 20163 100       46829 if ( $Token->__LEXER__closes ) {
1308             # Is this OUR closing structure
1309 20111 100       46281 if ( $Token->content eq $Structure->start->__LEXER__opposite ) {
1310             # Add any delayed tokens, and the finishing token (the ugly way)
1311 19371         49412 $self->_add_delayed( $Structure );
1312 19371         36742 $Structure->{finish} = $Token;
1313             Scalar::Util::weaken(
1314 19371         98130 $_PARENT{Scalar::Util::refaddr $Token} = $Structure
1315             );
1316              
1317             # Confirm that ForLoop structures are actually so, and
1318             # aren't really a list.
1319 19371 100       64216 if ( $Structure->isa('PPI::Structure::For') ) {
1320 228 100       1224 if ( 2 > scalar grep {
1321 582         2393 $_->isa('PPI::Statement')
1322             } $Structure->children ) {
1323 207         650 bless($Structure, 'PPI::Structure::List');
1324             }
1325             }
1326 19371         58643 return 1;
1327             }
1328              
1329             # Unmatched closing brace.
1330             # Either they typed the wrong thing, or haven't put
1331             # one at all. Either way it's an error we need to
1332             # somehow handle gracefully. For now, we'll treat it
1333             # as implicitly ending the structure. This causes the
1334             # least damage across the various reasons why this
1335             # might have happened.
1336 740         1896 return $self->_rollback( $Token );
1337             }
1338              
1339             # It's a semi-colon on its own, just inside the block.
1340             # This is a null statement.
1341             $self->_add_element(
1342 52         409 $Structure,
1343             PPI::Statement::Null->new($Token),
1344             );
1345             }
1346              
1347             # Is this an error
1348 1759 50       3851 unless ( defined $Token ) {
1349 0         0 PPI::Exception->throw;
1350             }
1351              
1352             # No, it's just the end of file.
1353             # Add any insignificant trailing tokens.
1354 1759         3705 $self->_add_delayed( $Structure );
1355             }
1356              
1357              
1358              
1359              
1360              
1361             #####################################################################
1362             # Support Methods
1363              
1364             # Get the next token for processing, handling buffering
1365             sub _get_token {
1366 459124 100   459124   567576 shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token;
  459124         1593760  
1367             }
1368              
1369             # Old long version of the above
1370             # my $self = shift;
1371             # # First from the buffer
1372             # if ( @{$self->{buffer}} ) {
1373             # return shift @{$self->{buffer}};
1374             # }
1375             #
1376             # # Then from the Tokenizer
1377             # $self->{Tokenizer}->get_token;
1378             # }
1379              
1380             # Delay the addition of insignificant elements.
1381             # This ended up being inlined.
1382             # sub _delay_element {
1383             # my $self = shift;
1384             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 1";
1385             # push @{ $_[0]->{delayed} }, $_[1];
1386             # }
1387              
1388             # Add an Element to a Node, including any delayed Elements
1389             sub _add_element {
1390 222406     222406   356323 my ($self, $Parent, $Element) = @_;
1391             # my $self = shift;
1392             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1393             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 2";
1394              
1395             # Handle a special case, where a statement is not fully resolved
1396 222406 100 100     554846 if ( ref $Parent eq 'PPI::Statement'
1397             and my $first = $Parent->schild(0) ) {
1398 66488 50 33     223467 if ( $first->isa('PPI::Token::Label')
1399             and !(my $second = $Parent->schild(1)) ) {
1400 0         0 my $new_class = $STATEMENT_CLASSES{$second->content};
1401             # It's a labelled statement
1402 0 0       0 bless $Parent, $new_class if $new_class;
1403             }
1404             }
1405              
1406             # Add first the delayed, from the front, then the passed element
1407 222406         275403 foreach my $el ( @{$self->{delayed}} ) {
  222406         407452  
1408             Scalar::Util::weaken(
1409 59134         295749 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1410             );
1411             # Inlined $Parent->__add_element($el);
1412             }
1413             Scalar::Util::weaken(
1414 222406         1017698 $_PARENT{Scalar::Util::refaddr $Element} = $Parent
1415             );
1416 222406         264870 push @{$Parent->{children}}, @{$self->{delayed}}, $Element;
  222406         339323  
  222406         414986  
1417              
1418             # Clear the delayed elements
1419 222406         423297 $self->{delayed} = [];
1420             }
1421              
1422             # Specifically just add any delayed tokens, if any.
1423             sub _add_delayed {
1424 114166     114166   200446 my ($self, $Parent) = @_;
1425             # my $self = shift;
1426             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1427              
1428             # Add any delayed
1429 114166         145710 foreach my $el ( @{$self->{delayed}} ) {
  114166         232718  
1430             Scalar::Util::weaken(
1431 52190         265117 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1432             );
1433             # Inlined $Parent->__add_element($el);
1434             }
1435 114166         158883 push @{$Parent->{children}}, @{$self->{delayed}};
  114166         178936  
  114166         198207  
1436              
1437             # Clear the delayed elements
1438 114166         230852 $self->{delayed} = [];
1439             }
1440              
1441             # Rollback the delayed tokens, plus any passed. Once all the tokens
1442             # have been moved back on to the buffer, the order should be.
1443             # <--- @{$self->{delayed}}, @_, @{$self->{buffer}} <----
1444             sub _rollback {
1445 52468     52468   78207 my $self = shift;
1446              
1447             # First, put any passed objects back
1448 52468 100       106213 if ( @_ ) {
1449 41188         57016 unshift @{$self->{buffer}}, splice @_;
  41188         98283  
1450             }
1451              
1452             # Then, put back anything delayed
1453 52468 100       70213 if ( @{$self->{delayed}} ) {
  52468         108911  
1454 28440         37444 unshift @{$self->{buffer}}, splice @{$self->{delayed}};
  28440         42215  
  28440         51528  
1455             }
1456              
1457 52468         93434 1;
1458             }
1459              
1460             # Partial rollback, just return a single list to the buffer
1461             sub _buffer {
1462 641     641   922 my $self = shift;
1463              
1464             # Put any passed objects back
1465 641 100       1337 if ( @_ ) {
1466 485         682 unshift @{$self->{buffer}}, splice @_;
  485         1308  
1467             }
1468              
1469 641         1009 1;
1470             }
1471              
1472              
1473              
1474              
1475              
1476             #####################################################################
1477             # Error Handling
1478              
1479             # Set the error message
1480             sub _error {
1481 2     2   6 $errstr = $_[1];
1482 2         11 undef;
1483             }
1484              
1485             # Clear the error message.
1486             # Returns the object as a convenience.
1487             sub _clear {
1488 16768     16768   28033 $errstr = '';
1489 16768         30378 $_[0];
1490             }
1491              
1492             =pod
1493              
1494             =head2 errstr
1495              
1496             For any error that occurs, you can use the C, as either
1497             a static or object method, to access the error message.
1498              
1499             If no error occurs for any particular action, C will return false.
1500              
1501             =cut
1502              
1503             sub errstr {
1504 2     2 1 26 $errstr;
1505             }
1506              
1507              
1508              
1509              
1510              
1511             #####################################################################
1512             # PDOM Extensions
1513             #
1514             # This is something of a future expansion... ignore it for now :)
1515             #
1516             # use PPI::Statement::Sub ();
1517             #
1518             # sub PPI::Statement::Sub::__LEXER__normal { '' }
1519              
1520             1;
1521              
1522             =pod
1523              
1524             =head1 TO DO
1525              
1526             - Add optional support for some of the more common source filters
1527              
1528             - Some additional checks for blessing things into various Statement
1529             and Structure subclasses.
1530              
1531             =head1 SUPPORT
1532              
1533             See the L in the main module.
1534              
1535             =head1 AUTHOR
1536              
1537             Adam Kennedy Eadamk@cpan.orgE
1538              
1539             =head1 COPYRIGHT
1540              
1541             Copyright 2001 - 2011 Adam Kennedy.
1542              
1543             This program is free software; you can redistribute
1544             it and/or modify it under the same terms as Perl itself.
1545              
1546             The full text of the license can be found in the
1547             LICENSE file included with this module.
1548              
1549             =cut