File Coverage

blib/lib/PPI/Lexer.pm
Criterion Covered Total %
statement 441 462 95.4
branch 260 296 87.8
condition 158 206 76.7
subroutine 30 30 100.0
pod 5 6 83.3
total 894 1000 89.4


line stmt bran cond sub pod time code
1             package PPI::Lexer;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Lexer - The PPI Lexer
8              
9             =head1 SYNOPSIS
10              
11             use PPI;
12            
13             # Create a new Lexer
14             my $Lexer = PPI::Lexer->new;
15            
16             # Build a PPI::Document object from a Token stream
17             my $Tokenizer = PPI::Tokenizer->load('My/Module.pm');
18             my $Document = $Lexer->lex_tokenizer($Tokenizer);
19            
20             # Build a PPI::Document object for some raw source
21             my $source = "print 'Hello World!'; kill(Humans->all);";
22             $Document = $Lexer->lex_source($source);
23            
24             # Build a PPI::Document object for a particular file name
25             $Document = $Lexer->lex_file('My/Module.pm');
26              
27             =head1 DESCRIPTION
28              
29             The is the L Lexer. In the larger scheme of things, its job is to take
30             token streams, in a variety of forms, and "lex" them into nested structures.
31              
32             Pretty much everything in this module happens behind the scenes at this
33             point. In fact, at the moment you don't really need to instantiate the lexer
34             at all, the three main methods will auto-instantiate themselves a
35             C object as needed.
36              
37             All methods do a one-shot "lex this and give me a L object".
38              
39             In fact, if you are reading this, what you B want to do is to
40             just "load a document", in which case you can do this in a much more
41             direct and concise manner with one of the following.
42              
43             use PPI;
44            
45             $Document = PPI::Document->load( $filename );
46             $Document = PPI::Document->new( $string );
47              
48             See L for more details.
49              
50             For more unusual tasks, by all means forge onwards.
51              
52             =head1 METHODS
53              
54             =cut
55              
56 68     68   2569 use strict;
  68         1068  
  68         2455  
57 68     68   258 use Scalar::Util ();
  68         84  
  68         1445  
58 68     68   201 use Params::Util qw{_STRING _INSTANCE};
  68         82  
  68         2897  
59 68     68   283 use PPI ();
  68         85  
  68         841  
60 68     68   218 use PPI::Exception ();
  68         93  
  68         1123  
61 68     68   316 use PPI::Singletons '%_PARENT';
  68         88  
  68         307870  
62              
63             our $VERSION = '1.290';
64              
65             our $errstr = "";
66              
67             # Keyword -> Structure class maps
68             my %ROUND = (
69             # Conditions
70             'if' => 'PPI::Structure::Condition',
71             'elsif' => 'PPI::Structure::Condition',
72             'unless' => 'PPI::Structure::Condition',
73             'while' => 'PPI::Structure::Condition',
74             'until' => 'PPI::Structure::Condition',
75              
76             # For(each)
77             'for' => 'PPI::Structure::For',
78             'foreach' => 'PPI::Structure::For',
79             );
80              
81             # Opening brace to refining method
82             my %RESOLVE = (
83             '(' => '_round',
84             '[' => '_square',
85             '{' => '_curly',
86             );
87              
88             # Allows for experimental overriding of the tokenizer
89             our $X_TOKENIZER = "PPI::Tokenizer";
90 16846     16846 0 65730 sub X_TOKENIZER { $X_TOKENIZER }
91              
92              
93              
94              
95              
96             #####################################################################
97             # Constructor
98              
99             =pod
100              
101             =head2 new
102              
103             The C constructor creates a new C object. The object itself
104             is merely used to hold various buffers and state data during the lexing
105             process, and holds no significant data between -Elex_xxxxx calls.
106              
107             Returns a new C object
108              
109             =cut
110              
111             sub new {
112 16846     16846 1 39900 my $class = shift->_clear;
113 16846         86573 bless {
114             Tokenizer => undef, # Where we store the tokenizer for a run
115             buffer => [], # The input token buffer
116             delayed => [], # The "delayed insignificant tokens" buffer
117             features_stack => [], # Stack of features in scope
118             }, $class;
119             }
120              
121              
122              
123              
124              
125             #####################################################################
126             # Main Lexing Methods
127              
128             =pod
129              
130             =head2 lex_file $filename
131              
132             The C method takes a filename as argument. It then loads the file,
133             creates a L for the content and lexes the token stream
134             produced by the tokenizer. Basically, a sort of all-in-one method for
135             getting a L object from a file name.
136              
137             Additional arguments are passed to the tokenizer as a hash.
138              
139             Returns a L object, or C on error.
140              
141             =cut
142              
143 521     521 1 1773 sub lex_file { shift->_lex_input(@_) }
144              
145             =pod
146              
147             =head2 lex_source $string
148              
149             The C method takes a normal scalar string as argument. It
150             creates a L object for the string, and then lexes the
151             resulting token stream.
152              
153             Additional arguments are passed to the tokenizer as a hash.
154              
155             Returns a L object, or C on error.
156              
157             =cut
158              
159 16325     16325 1 975048 sub lex_source { shift->_lex_input( \shift, @_ ) }
160              
161             sub _lex_input {
162 16846     16846   32312 my ( $self, $input, %args ) = @_;
163 16846 100       48673 $self = ref $self ? $self : $self->new;
164              
165             # Create the Tokenizer
166 16846         25778 my $Tokenizer = eval { X_TOKENIZER->new($input) };
  16846         30753  
167             return #
168 16846 50       129681 $@
    50          
    100          
169             ? $self->_error( _INSTANCE( $@, 'PPI::Exception' ) ? $@->message : $@ )
170             : !_INSTANCE( $Tokenizer, 'PPI::Tokenizer' )
171             ? $self->_error($Tokenizer)
172             : $self->lex_tokenizer( $Tokenizer, %args );
173             }
174              
175             =pod
176              
177             =head2 lex_tokenizer $Tokenizer
178              
179             The C takes as argument a L object. It
180             lexes the token stream from the tokenizer into a L object.
181              
182             Additional arguments are set on the L produced.
183              
184             Returns a L object, or C on error.
185              
186             =cut
187              
188             sub lex_tokenizer {
189 16844 50   16844 1 33334 my $self = ref $_[0] ? shift : shift->new;
190 16844         40815 my $Tokenizer = _INSTANCE(shift, 'PPI::Tokenizer');
191 16844 50       32549 return $self->_error(
192             "Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer"
193             ) unless $Tokenizer;
194 16844         26603 my %args = @_;
195              
196             # Create the empty document
197 16844         42610 my $Document = PPI::Document->new;
198 16844         54823 ref($Document)->_setattr( $Document, %args );
199 16844         41177 $Tokenizer->_document($Document);
200 16844 100       32941 if (my $feat = $Document->feature_mods) {
201 18         17 push @{$self->{features_stack}}, $feat;
  18         35  
202 18         50 $Tokenizer->_features($feat);
203             }
204              
205             # Lex the token stream into the document
206 16844         26254 $self->{Tokenizer} = $Tokenizer;
207 16844 100       20020 if ( !eval { $self->_lex_document($Document); 1 } ) {
  16844         34555  
  16843         28893  
208             # If an error occurs DESTROY the partially built document.
209 1         3 $Tokenizer->_document(undef);
210 1         3 undef $Document;
211 1 50       5 if ( _INSTANCE($@, 'PPI::Exception') ) {
212 1         4 return $self->_error( $@->message );
213             } else {
214 0         0 return $self->_error( $errstr );
215             }
216             }
217              
218 16843         146493 return $Document;
219             }
220              
221              
222              
223              
224              
225             #####################################################################
226             # Lex Methods - Document Object
227              
228             sub _lex_document {
229 16844     16844   26117 my ($self, $Document) = @_;
230             # my $self = shift;
231             # my $Document = _INSTANCE(shift, 'PPI::Document') or return undef;
232              
233             # Start the processing loop
234 16844         18166 my $Token;
235 16844         37284 while ( ref($Token = $self->_get_token) ) {
236             # Add insignificant tokens directly beneath us
237 53876 100       106194 unless ( $Token->significant ) {
238 21205         38217 $self->_add_element( $Document, $Token );
239 21205         28793 next;
240             }
241              
242 32671 100       73302 if ( $Token->content eq ';' ) {
243             # It's a semi-colon on its own.
244             # We call this a null statement.
245 464         1783 $self->_add_element(
246             $Document,
247             PPI::Statement::Null->new($Token),
248             );
249 464         951 next;
250             }
251              
252             # Handle anything other than a structural element
253 32207 100       60775 unless ( ref $Token eq 'PPI::Token::Structure' ) {
254             # Determine the class for the Statement, and create it
255 29202         65959 my $Statement = $self->_statement($Document, $Token)->new($Token);
256              
257             # Move the lexing down into the statement
258 29202         66939 $self->_add_delayed( $Document );
259 29202         58066 $self->_add_element( $Document, $Statement );
260 29202         62673 $self->_lex_statement( $Statement );
261              
262 29202         55018 next;
263             }
264              
265             # Is this the opening of a structure?
266 3005 100       5519 if ( $Token->__LEXER__opens ) {
267             # This should actually have a Statement instead
268 940         2388 $self->_rollback( $Token );
269 940         3146 my $Statement = PPI::Statement->new;
270 940         2368 $self->_add_element( $Document, $Statement );
271 940         2132 $self->_lex_statement( $Statement );
272 940         1738 next;
273             }
274              
275             # Is this the close of a structure.
276 2065 50       3546 if ( $Token->__LEXER__closes ) {
277             # Because we are at the top of the tree, this is an error.
278             # This means either a mis-parsing, or a mistake in the code.
279             # To handle this, we create a "Naked Close" statement
280 2065         5858 $self->_add_element( $Document,
281             PPI::Statement::UnmatchedBrace->new($Token)
282             );
283 2065         3579 next;
284             }
285              
286             # Shouldn't be able to get here
287 0         0 PPI::Exception->throw('Lexer reached an illegal state');
288             }
289              
290             # Did we leave the main loop because of a Tokenizer error?
291 16843 50       26674 unless ( defined $Token ) {
292 0 0       0 my $errstr = $self->{Tokenizer} ? $self->{Tokenizer}->errstr : '';
293 0   0     0 $errstr ||= 'Unknown Tokenizer Error';
294 0         0 PPI::Exception->throw($errstr);
295             }
296              
297             # No error, it's just the end of file.
298             # Add any insignificant trailing tokens.
299 16843         29797 $self->_add_delayed( $Document );
300              
301             # If the Tokenizer has any v6 blocks to attach, do so now.
302             # Checking once at the end is faster than adding a special
303             # case check for every statement parsed.
304 16843         24899 my $perl6 = $self->{Tokenizer}->{'perl6'};
305 16843 100       27006 if ( @$perl6 ) {
306 2         18 my $includes = $Document->find( 'PPI::Statement::Include::Perl6' );
307 2         8 foreach my $include ( @$includes ) {
308 2 50       9 unless ( @$perl6 ) {
309 0         0 PPI::Exception->throw('Failed to find a perl6 section');
310             }
311 2         10 $include->{perl6} = shift @$perl6;
312             }
313             }
314              
315 16843         23396 return 1;
316             }
317              
318              
319              
320              
321              
322             #####################################################################
323             # Lex Methods - Statement Object
324              
325             # Keyword -> Statement Subclass
326             my %STATEMENT_CLASSES = (
327             # Things that affect the timing of execution
328             'BEGIN' => 'PPI::Statement::Scheduled',
329             'CHECK' => 'PPI::Statement::Scheduled',
330             'UNITCHECK' => 'PPI::Statement::Scheduled',
331             'INIT' => 'PPI::Statement::Scheduled',
332             'END' => 'PPI::Statement::Scheduled',
333              
334             # Special subroutines for which 'sub' is optional
335             'AUTOLOAD' => 'PPI::Statement::Sub',
336             'DESTROY' => 'PPI::Statement::Sub',
337              
338             # Loading and context statement
339             'package' => 'PPI::Statement::Package',
340             # 'use' => 'PPI::Statement::Include',
341             'no' => 'PPI::Statement::Include',
342             'require' => 'PPI::Statement::Include',
343              
344             # Various declarations
345             'my' => 'PPI::Statement::Variable',
346             'local' => 'PPI::Statement::Variable',
347             'our' => 'PPI::Statement::Variable',
348             'state' => 'PPI::Statement::Variable',
349             # Statements starting with 'sub' could be any one of...
350             # 'sub' => 'PPI::Statement::Sub',
351             # 'sub' => 'PPI::Statement::Scheduled',
352             # 'sub' => 'PPI::Statement',
353              
354             # Compound statement
355             'if' => 'PPI::Statement::Compound',
356             'unless' => 'PPI::Statement::Compound',
357             'for' => 'PPI::Statement::Compound',
358             'foreach' => 'PPI::Statement::Compound',
359             'while' => 'PPI::Statement::Compound',
360             'until' => 'PPI::Statement::Compound',
361              
362             # Switch statement
363             'given' => 'PPI::Statement::Given',
364             'when' => 'PPI::Statement::When',
365             'default' => 'PPI::Statement::When',
366              
367             # Various ways of breaking out of scope
368             'redo' => 'PPI::Statement::Break',
369             'next' => 'PPI::Statement::Break',
370             'last' => 'PPI::Statement::Break',
371             'return' => 'PPI::Statement::Break',
372             'goto' => 'PPI::Statement::Break',
373              
374             # Special sections of the file
375             '__DATA__' => 'PPI::Statement::Data',
376             '__END__' => 'PPI::Statement::End',
377             );
378              
379             sub _statement {
380 66534     66534   101481 my ($self, $Parent, $Token) = @_;
381             # my $self = shift;
382             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
383             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
384              
385             # Check for things like ( parent => ... )
386 66534 100 100     333222 if (
387             $Parent->isa('PPI::Structure::List')
388             or
389             $Parent->isa('PPI::Structure::Constructor')
390             ) {
391 11724 100       30991 if ( $Token->isa('PPI::Token::Word') ) {
392             # Is the next significant token a =>
393             # Read ahead to the next significant token
394 1999         2456 my $Next;
395 1999         3252 while ( $Next = $self->_get_token ) {
396 2846 100       5847 unless ( $Next->significant ) {
397 893         1071 push @{$self->{delayed}}, $Next;
  893         1448  
398             # $self->_delay_element( $Next );
399 893         1416 next;
400             }
401              
402             # Got the next token
403 1953 100 100     7034 if (
404             $Next->isa('PPI::Token::Operator')
405             and
406             $Next->content eq '=>'
407             ) {
408             # Is an ordinary expression
409 947         1937 $self->_rollback( $Next );
410 947         3558 return 'PPI::Statement::Expression';
411             } else {
412 1006         1338 last;
413             }
414             }
415              
416             # Rollback and continue
417 1052         2167 $self->_rollback( $Next );
418             }
419             }
420              
421 65587         75426 my $is_lexsub = 0;
422              
423             # Is it a token in our known classes list
424 65587         104978 my $content = $Token->content;
425             my $class =
426             ( $content eq 'try' and ( $self->{features_stack}[-1] || {} )->{try} )
427             ? 'PPI::Statement::Compound'
428 65587 100 100     159972 : $STATEMENT_CLASSES{$content};
429              
430 65587 100       100101 if ( $class ) {
431             # Is the next significant token a =>
432             # Read ahead to the next significant token
433 9918         10548 my $Next;
434 9918         16919 while ( $Next = $self->_get_token ) {
435 19467 100       38751 if ( !$Next->significant ) {
436 9596         10080 push @{$self->{delayed}}, $Next;
  9596         15964  
437 9596         14491 next;
438             }
439              
440             # Scheduled block must be followed by left curly or
441             # semicolon. Otherwise we have something else (e.g.
442             # open( CHECK, ... );
443 9871 100 66     21103 if (
      100        
444             'PPI::Statement::Scheduled' eq $class
445             and not ( $Next->isa( 'PPI::Token::Structure' )
446             and $Next->content =~ m/\A[{;]\z/ ) # }
447             ) {
448 1         2 $class = undef;
449 1         1 last;
450             }
451              
452             # Lexical subroutine
453 9870 100 100     45801 if (
      66        
454             $content =~ /^(?:my|our|state)\z/
455             and $Next->isa( 'PPI::Token::Word' ) and $Next->content eq 'sub'
456             ) {
457             # This should be PPI::Statement::Sub rather than PPI::Statement::Variable
458 7         11 $class = undef;
459 7         9 $is_lexsub = 1;
460 7         9 last;
461             }
462              
463             last if
464 9863 100 100     37195 !$Next->isa( 'PPI::Token::Operator' ) or $Next->content ne '=>';
465              
466             # Got the next token
467             # Is an ordinary expression
468 21         52 $self->_rollback( $Next );
469 21         84 return 'PPI::Statement';
470             }
471              
472             # Rollback and continue
473 9897         19845 $self->_rollback( $Next );
474             }
475              
476             # Handle potential barewords for subscripts
477 65566 100       161683 if ( $Parent->isa('PPI::Structure::Subscript') ) {
478             # Fast obvious case, just an expression
479 7567 100 100     14089 unless ( $class and $class->isa('PPI::Statement::Expression') ) {
480 7444         20299 return 'PPI::Statement::Expression';
481             }
482              
483             # This is something like "my" or "our" etc... more subtle.
484             # Check if the next token is a closing curly brace.
485             # This means we are something like $h{my}
486 123         129 my $Next;
487 123         157 while ( $Next = $self->_get_token ) {
488 119 50       242 unless ( $Next->significant ) {
489 0         0 push @{$self->{delayed}}, $Next;
  0         0  
490             # $self->_delay_element( $Next );
491 0         0 next;
492             }
493              
494             # Found the next significant token.
495             # Is it a closing curly brace?
496 119 50       215 if ( $Next->content eq '}' ) {
497 119         225 $self->_rollback( $Next );
498 119         371 return 'PPI::Statement::Expression';
499             } else {
500 0         0 $self->_rollback( $Next );
501 0         0 return $class;
502             }
503             }
504              
505             # End of file... this means it is something like $h{our
506             # which is probably going to be $h{our} ... I think
507 4         9 $self->_rollback( $Next );
508 4         14 return 'PPI::Statement::Expression';
509             }
510              
511             # If it's a token in our list, use that class
512 57999 100       119438 return $class if $class;
513              
514             # Handle the more in-depth sub detection
515 48263 100 100     117132 if ( $is_lexsub || $content eq 'sub' ) {
516             # Read ahead to the next significant token
517 3416         3986 my $Next;
518 3416         6240 while ( $Next = $self->_get_token ) {
519 6769 100       12924 unless ( $Next->significant ) {
520 3377         3662 push @{$self->{delayed}}, $Next;
  3377         6546  
521             # $self->_delay_element( $Next );
522 3377         6016 next;
523             }
524              
525             # Got the next significant token
526 3392         5822 my $sclass = $STATEMENT_CLASSES{$Next->content};
527 3392 100 100     7704 if ( $sclass and $sclass eq 'PPI::Statement::Scheduled' ) {
528 28         63 $self->_rollback( $Next );
529 28         115 return 'PPI::Statement::Scheduled';
530             }
531 3364 100       8372 if ( $Next->isa('PPI::Token::Word') ) {
532 3255         7702 $self->_rollback( $Next );
533 3255         14080 return 'PPI::Statement::Sub';
534             }
535              
536             ### Comment out these two, as they would return PPI::Statement anyway
537             # if ( $content eq '{' ) {
538             # Anonymous sub at start of statement
539             # return 'PPI::Statement';
540             # }
541             #
542             # if ( $Next->isa('PPI::Token::Prototype') ) {
543             # Anonymous sub at start of statement
544             # return 'PPI::Statement';
545             # }
546              
547             # PPI::Statement is the safest fall-through
548 109         264 $self->_rollback( $Next );
549 109         436 return 'PPI::Statement';
550             }
551              
552             # End of file... PPI::Statement::Sub is the most likely
553 24         95 $self->_rollback( $Next );
554 24         133 return 'PPI::Statement::Sub';
555             }
556              
557 44847 100       63159 if ( $content eq 'use' ) {
558             # Add a special case for "use v6" lines.
559 2293         2318 my $Next;
560 2293         4221 while ( $Next = $self->_get_token ) {
561 4581 100       8968 unless ( $Next->significant ) {
562 2290         2739 push @{$self->{delayed}}, $Next;
  2290         4184  
563             # $self->_delay_element( $Next );
564 2290         3733 next;
565             }
566              
567             # Found the next significant token.
568 2291 100 66     9662 if (
    100          
569             $Next->isa('PPI::Token::Operator')
570             and
571             $Next->content eq '=>'
572             ) {
573             # Is an ordinary expression
574 1         2 $self->_rollback( $Next );
575 1         4 return 'PPI::Statement';
576             # Is it a v6 use?
577             } elsif ( $Next->content eq 'v6' ) {
578 2         10 $self->_rollback( $Next );
579 2         56 return 'PPI::Statement::Include::Perl6';
580             } else {
581 2288         5392 $self->_rollback( $Next );
582 2288         11081 return 'PPI::Statement::Include';
583             }
584             }
585              
586             # End of file... this means it is an incomplete use
587             # line, just treat it as a normal include.
588 2         7 $self->_rollback( $Next );
589 2         17 return 'PPI::Statement::Include';
590             }
591              
592             # If our parent is a Condition, we are an Expression
593 42554 100       100686 if ( $Parent->isa('PPI::Structure::Condition') ) {
594 1244         4212 return 'PPI::Statement::Expression';
595             }
596              
597             # If our parent is a List, we are also an expression
598 41310 100       86569 if ( $Parent->isa('PPI::Structure::List') ) {
599 8889         28641 return 'PPI::Statement::Expression';
600             }
601              
602             # Switch statements use expressions, as well.
603 32421 100 100     141436 if (
604             $Parent->isa('PPI::Structure::Given')
605             or
606             $Parent->isa('PPI::Structure::When')
607             ) {
608 6         27 return 'PPI::Statement::Expression';
609             }
610              
611 32415 100       167370 if ( _INSTANCE($Token, 'PPI::Token::Label') ) {
612 379         1802 return 'PPI::Statement::Compound';
613             }
614              
615             # Beyond that, I have no idea for the moment.
616             # Just keep adding more conditions above this.
617 32036         114832 return 'PPI::Statement';
618             }
619              
620             sub _update_features {
621 67931     67931   85365 my ( $self, $statement ) = @_;
622              
623 67931 100       121381 return if ref $statement ne 'PPI::Statement::Include';
624             return unless #
625 3326 100       10229 my $new_features = $statement->feature_mods;
626 18         30 push @{ $self->{features_stack} }, {}
627 19 100       23 if not @{ $self->{features_stack} };
  19         50  
628 19         41 my $current_features = $self->{features_stack}[-1];
629             $self->{Tokenizer}->_features #
630             ( $self->{features_stack}[-1] =
631 19         30 { %{$current_features}, %{$new_features} } );
  19         29  
  19         85  
632             }
633              
634             sub _lex_statement {
635 67939     67939   88189 my ($self, $Statement) = @_;
636             # my $self = shift;
637             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
638              
639             # Handle some special statements
640 67939 100       196815 if ( $Statement->isa('PPI::Statement::End') ) {
641 8         25 return $self->_lex_end( $Statement );
642             }
643              
644             # Begin processing tokens
645 67931         71863 my $Token;
646 67931         102483 while ( ref( $Token = $self->_get_token ) ) {
647             # Delay whitespace and comment tokens
648 447210 100       718208 unless ( $Token->significant ) {
649 147385         132974 push @{$self->{delayed}}, $Token;
  147385         176780  
650             # $self->_delay_element( $Token );
651 147385         183401 next;
652             }
653              
654             # Structual closes, and __DATA__ and __END__ tags implicitly
655             # end every type of statement
656 299825 100 66     438417 if (
657             $Token->__LEXER__closes
658             or
659             $Token->isa('PPI::Token::Separator')
660             ) {
661             # Rollback and end the statement
662 25153         52172 $self->_update_features( $Statement );
663 25153         42048 return $self->_rollback( $Token );
664             }
665              
666             # Normal statements never implicitly end
667 274672 100       472777 unless ( $Statement->__LEXER__normal ) {
668             # Have we hit an implicit end to the statement
669 25212 100       45759 unless ( $self->_continues( $Statement, $Token ) ) {
670             # Rollback and finish the statement
671 4535         11523 $self->_update_features( $Statement );
672 4535         9091 return $self->_rollback( $Token );
673             }
674             }
675              
676             # Any normal character just gets added
677 270137 100       502385 unless ( $Token->isa('PPI::Token::Structure') ) {
678 213290         323905 $self->_add_element( $Statement, $Token );
679 213290         299106 next;
680             }
681              
682             # Handle normal statement terminators
683 56847 100       83898 if ( $Token->content eq ';' ) {
684 26805         47827 $self->_add_element( $Statement, $Token );
685 26805         52889 $self->_update_features( $Statement );
686 26805         37585 return 1;
687             }
688              
689             # Which leaves us with a new structure
690              
691             # Determine the class for the structure and create it
692 30042         51222 my $method = $RESOLVE{$Token->content};
693 30042         75237 my $Structure = $self->$method($Statement)->new($Token);
694              
695             # Move the lexing down into the Structure
696 30042         60575 $self->_add_delayed( $Statement );
697 30042         55132 $self->_add_element( $Statement, $Structure );
698 30042         55803 $self->_lex_structure( $Structure );
699             }
700              
701             # Was it an error in the tokenizer?
702 11438 50       17761 unless ( defined $Token ) {
703 0         0 PPI::Exception->throw;
704             }
705              
706             # No, it's just the end of the file...
707             # Roll back any insignificant tokens, they'll get added at the Document level
708 11438         22881 $self->_update_features( $Statement );
709 11438         17734 $self->_rollback;
710             }
711              
712             sub _lex_end {
713 8     8   15 my ($self, $Statement) = @_;
714             # my $self = shift;
715             # my $Statement = _INSTANCE(shift, 'PPI::Statement::End') or die "Bad param 1";
716              
717             # End of the file, EVERYTHING is ours
718 8         10 my $Token;
719 8         18 while ( $Token = $self->_get_token ) {
720             # Inlined $Statement->__add_element($Token);
721             Scalar::Util::weaken(
722 15         59 $_PARENT{Scalar::Util::refaddr $Token} = $Statement
723             );
724 15         17 push @{$Statement->{children}}, $Token;
  15         23  
725             }
726              
727             # Was it an error in the tokenizer?
728 8 50       23 unless ( defined $Token ) {
729 0         0 PPI::Exception->throw;
730             }
731              
732             # No, it's just the end of the file...
733             # Roll back any insignificant tokens, they get added at the Document level
734 8         15 $self->_rollback;
735             }
736              
737             # For many statements, it can be difficult to determine the end-point.
738             # This method takes a statement and the next significant token, and attempts
739             # to determine if the there is a statement boundary between the two, or if
740             # the statement can continue with the token.
741             sub _continues {
742 25212     25212   32788 my ($self, $Statement, $Token) = @_;
743             # my $self = shift;
744             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
745             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
746              
747             # Handle the simple block case
748             # { print 1; }
749 25212 100 100     48655 if (
750             $Statement->schildren == 1
751             and
752             $Statement->schild(0)->isa('PPI::Structure::Block')
753             ) {
754 49         151 return '';
755             }
756              
757             # Alrighty then, there are six implied-end statement types:
758             # ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, ::When,
759             # and ::Package statements.
760 25163 50       49712 return 1
761             if ref $Statement !~ /\b(?:Scheduled|Sub|Compound|Given|When|Package)$/;
762              
763             # Of these six, ::Scheduled, ::Sub, ::Given, and ::When follow the same
764             # simple rule and can be handled first. The block form of ::Package
765             # follows the rule, too. (The non-block form of ::Package
766             # requires a statement terminator, and thus doesn't need to have
767             # an implied end detected.)
768 25163         44428 my @part = $Statement->schildren;
769 25163         29735 my $LastChild = $part[-1];
770             # If the last significant element of the statement is a block,
771             # then an implied-end statement is done, no questions asked.
772 25163 100       108627 return !$LastChild->isa('PPI::Structure::Block')
773             if !$Statement->isa('PPI::Statement::Compound');
774              
775             # Now we get to compound statements, which kind of suck (to lex).
776             # However, of them all, the 'if' type, which includes unless, are
777             # relatively easy to handle compared to the others.
778 5559         14824 my $type = $Statement->type;
779 5559 100       9467 if ( $type eq 'if' ) {
780             # This should be one of the following
781             # if (EXPR) BLOCK
782             # if (EXPR) BLOCK else BLOCK
783             # if (EXPR) BLOCK elsif (EXPR) BLOCK ... else BLOCK
784              
785             # We only implicitly end on a block
786 3444 100       8548 unless ( $LastChild->isa('PPI::Structure::Block') ) {
787             # if (EXPR) ...
788             # if (EXPR) BLOCK else ...
789             # if (EXPR) BLOCK elsif (EXPR) BLOCK ...
790 2380         5735 return 1;
791             }
792              
793             # If the token before the block is an 'else',
794             # it's over, no matter what.
795 1064         2462 my $NextLast = $Statement->schild(-2);
796 1064 50 66     8390 if (
      66        
      66        
797             $NextLast
798             and
799             $NextLast->isa('PPI::Token')
800             and
801             $NextLast->isa('PPI::Token::Word')
802             and
803             $NextLast->content eq 'else'
804             ) {
805 73         270 return '';
806             }
807              
808             # Otherwise, we continue for 'elsif' or 'else' only.
809 991 100 100     3615 if (
      100        
810             $Token->isa('PPI::Token::Word')
811             and (
812             $Token->content eq 'else'
813             or
814             $Token->content eq 'elsif'
815             )
816             ) {
817 303         1068 return 1;
818             }
819              
820 688         2217 return '';
821             }
822              
823 2115 100       3691 if ( $type eq 'label' ) {
824             # We only have the label so far, could be any of
825             # LABEL while (EXPR) BLOCK
826             # LABEL while (EXPR) BLOCK continue BLOCK
827             # LABEL for (EXPR; EXPR; EXPR) BLOCK
828             # LABEL foreach VAR (LIST) BLOCK
829             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
830             # LABEL BLOCK continue BLOCK
831              
832             # Handle cases with a word after the label
833 356 100 100     1627 if (
834             $Token->isa('PPI::Token::Word')
835             and
836             $Token->content =~ /^(?:while|until|for|foreach)$/
837             ) {
838 38         83 return 1;
839             }
840              
841             # Handle labelled blocks
842 318 100 100     1288 if ( $Token->isa('PPI::Token::Structure') && $Token->content eq '{' ) {
843 241         745 return 1;
844             }
845              
846 77         218 return '';
847             }
848              
849             # Handle the common "after round braces" case
850 1759 100 100     6661 if ( $LastChild->isa('PPI::Structure') and $LastChild->braces eq '()' ) {
851             # LABEL while (EXPR) ...
852             # LABEL while (EXPR) ...
853             # LABEL for (EXPR; EXPR; EXPR) ...
854             # LABEL for VAR (LIST) ...
855             # LABEL foreach VAR (LIST) ...
856             # Only a block will do
857 384   33     1541 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
858             }
859              
860 1375 100       2511 if ( $type eq 'for' ) {
861             # LABEL for (EXPR; EXPR; EXPR) BLOCK
862 143 100 66     521 if (
    50          
    0          
863             $LastChild->isa('PPI::Token::Word')
864             and
865             $LastChild->content =~ /^for(?:each)?\z/
866             ) {
867             # LABEL for ...
868 130 100 66     873 if (
      100        
869             (
870             $Token->isa('PPI::Token::Structure')
871             and
872             $Token->content eq '('
873             )
874             or
875             $Token->isa('PPI::Token::QuoteLike::Words')
876             ) {
877 21         60 return 1;
878             }
879              
880 109 50       259 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
881             # LABEL for VAR QW{} ...
882             # LABEL foreach VAR QW{} ...
883             # Only a block will do
884 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
885             }
886              
887             # In this case, we can also behave like a foreach
888 109         231 $type = 'foreach';
889              
890             } elsif ( $LastChild->isa('PPI::Structure::Block') ) {
891             # LABEL for (EXPR; EXPR; EXPR) BLOCK
892             # That's it, nothing can continue
893 13         33 return '';
894              
895             } elsif ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
896             # LABEL for VAR QW{} ...
897             # LABEL foreach VAR QW{} ...
898             # Only a block will do
899 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
900             }
901             }
902              
903             # Handle the common continue case
904 1341 100 100     4133 if ( $LastChild->isa('PPI::Token::Word') and $LastChild->content eq 'continue' ) {
905             # LABEL while (EXPR) BLOCK continue ...
906             # LABEL foreach VAR (LIST) BLOCK continue ...
907             # LABEL BLOCK continue ...
908             # Only a block will do
909 6   33     29 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
910             }
911              
912 1335 50 66     2533 if ( $type eq 'try' and $LastChild->presumed_features->{try} ) {
913 6 100       33 return 1 if not $LastChild->isa('PPI::Structure::Block');
914              
915 2         5 my $NextLast = $Statement->schild(-2);
916 2 50 33     26 return ''
      33        
      33        
917             if $NextLast
918             and $NextLast->isa('PPI::Token')
919             and $NextLast->isa('PPI::Token::Word')
920             and $NextLast->content eq 'catch';
921              
922 2 50 33     13 return 1 #
923             if $Token->isa('PPI::Token::Word') and $Token->content eq 'catch';
924              
925 0         0 return '';
926             }
927              
928             # Handle the common continuable block case
929 1329 100       3307 if ( $LastChild->isa('PPI::Structure::Block') ) {
930             # LABEL while (EXPR) BLOCK
931             # LABEL while (EXPR) BLOCK ...
932             # LABEL for (EXPR; EXPR; EXPR) BLOCK
933             # LABEL foreach VAR (LIST) BLOCK
934             # LABEL foreach VAR (LIST) BLOCK ...
935             # LABEL BLOCK ...
936             # Is this the block for a continue?
937 464 100 66     3087 if ( _INSTANCE($part[-2], 'PPI::Token::Word') and $part[-2]->content eq 'continue' ) {
938             # LABEL while (EXPR) BLOCK continue BLOCK
939             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
940             # LABEL BLOCK continue BLOCK
941             # That's it, nothing can continue this
942 6         15 return '';
943             }
944              
945             # Only a continue will do
946 458   100     2079 return $Token->isa('PPI::Token::Word') && $Token->content eq 'continue';
947             }
948              
949 865 50       1452 if ( $type eq 'block' ) {
950             # LABEL BLOCK continue BLOCK
951             # Every possible case is covered in the common cases above
952             }
953              
954 865 100       1299 if ( $type eq 'while' ) {
955             # LABEL while (EXPR) BLOCK
956             # LABEL while (EXPR) BLOCK continue BLOCK
957             # LABEL until (EXPR) BLOCK
958             # LABEL until (EXPR) BLOCK continue BLOCK
959             # The only case not covered is the while ...
960 157 50 66     663 if (
      66        
961             $LastChild->isa('PPI::Token::Word')
962             and (
963             $LastChild->content eq 'while'
964             or
965             $LastChild->content eq 'until'
966             )
967             ) {
968             # LABEL while ...
969             # LABEL until ...
970             # Only a condition structure will do
971 157   33     670 return $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
972             }
973             }
974              
975 708 50       1074 if ( $type eq 'foreach' ) {
976             # LABEL foreach VAR (LIST) BLOCK
977             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
978             # The only two cases that have not been covered already are
979             # 'foreach ...' and 'foreach VAR ...'
980              
981 708 100       1644 if ( $LastChild->isa('PPI::Token::Symbol') ) {
982             # LABEL foreach my $scalar ...
983             # Open round brace, or a quotewords
984 210 100 66     1061 return 1 if $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
985 16 50       56 return 1 if $Token->isa('PPI::Token::QuoteLike::Words');
986 0         0 return '';
987             }
988              
989 498 100 100     780 if ( $LastChild->content eq 'foreach' or $LastChild->content eq 'for' ) {
990             # There are three possibilities here
991 281 100 100     1046 if (
    100 100        
    100 66        
    100          
992             $Token->isa('PPI::Token::Word')
993             and (
994             ($STATEMENT_CLASSES{ $Token->content } || '')
995             eq
996             'PPI::Statement::Variable'
997             )
998             ) {
999             # VAR == 'my ...'
1000 196         546 return 1;
1001             } elsif ( $Token->content =~ /^\$/ ) {
1002             # VAR == '$scalar'
1003 34         96 return 1;
1004             } elsif ( $Token->isa('PPI::Token::Structure') and $Token->content eq '(' ) {
1005 42         151 return 1;
1006             } elsif ( $Token->isa('PPI::Token::QuoteLike::Words') ) {
1007 6         21 return 1;
1008             } else {
1009 3         9 return '';
1010             }
1011             }
1012              
1013 217 100 100     1743 if (
1014             ($STATEMENT_CLASSES{ $LastChild->content } || '')
1015             eq
1016             'PPI::Statement::Variable'
1017             ) {
1018             # LABEL foreach my ...
1019             # Only a scalar will do
1020 192         373 return $Token->content =~ /^\$/;
1021             }
1022              
1023             # Handle the rare for my $foo qw{bar} ... case
1024 25 50       59 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
1025             # LABEL for VAR QW ...
1026             # LABEL foreach VAR QW ...
1027             # Only a block will do
1028 25   33     128 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
1029             }
1030             }
1031              
1032             # Something we don't know about... what could it be
1033 0         0 PPI::Exception->throw("Illegal state in '$type' compound statement");
1034             }
1035              
1036              
1037              
1038              
1039              
1040             #####################################################################
1041             # Lex Methods - Structure Object
1042              
1043             # Given a parent element, and a ( token to open a structure, determine
1044             # the class that the structure should be.
1045             sub _round {
1046 11856     11856   17629 my ($self, $Parent) = @_;
1047             # my $self = shift;
1048             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1049              
1050             # Get the last significant element in the parent
1051 11856         28586 my $Element = $Parent->schild(-1);
1052 11856 100       49138 if ( _INSTANCE($Element, 'PPI::Token::Word') ) {
1053             # Can it be determined because it is a keyword?
1054 10255         16337 my $rclass = $ROUND{$Element->content};
1055 10255 100       21397 return $rclass if $rclass;
1056             }
1057              
1058             # If we are part of a for or foreach statement, we are a ForLoop
1059 10513 100       81071 if ( $Parent->isa('PPI::Statement::Compound') ) {
    100          
    100          
    100          
1060 195 100       388 if ( $Parent->type =~ /^for(?:each)?$/ ) {
1061 194         864 return 'PPI::Structure::For';
1062             }
1063             } elsif ( $Parent->isa('PPI::Statement::Given') ) {
1064 3         35 return 'PPI::Structure::Given';
1065             } elsif ( $Parent->isa('PPI::Statement::When') ) {
1066 3         24 return 'PPI::Structure::When';
1067             } elsif ( $Parent->isa('PPI::Statement::Sub') ) {
1068 156         517 return 'PPI::Structure::Signature';
1069             }
1070              
1071             # Otherwise, it must be a list
1072              
1073             # If the previous element is -> then we mark it as a dereference
1074 10157 100 100     36763 if ( _INSTANCE($Element, 'PPI::Token::Operator') and $Element->content eq '->' ) {
1075 10         25 $Element->{_dereference} = 1;
1076             }
1077              
1078             'PPI::Structure::List'
1079 10157         31451 }
1080              
1081             # Given a parent element, and a [ token to open a structure, determine
1082             # the class that the structure should be.
1083             sub _square {
1084 6717     6717   10608 my ($self, $Parent) = @_;
1085             # my $self = shift;
1086             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1087              
1088             # Get the last significant element in the parent
1089 6717         15499 my $Element = $Parent->schild(-1);
1090              
1091             # Is this a subscript, like $foo[1] or $foo{expr}
1092            
1093 6717 100       14077 if ( $Element ) {
1094 6480 100 100     19108 if ( $Element->isa('PPI::Token::Operator') and $Element->content eq '->' ) {
1095             # $foo->[]
1096 4060         6476 $Element->{_dereference} = 1;
1097 4060         10862 return 'PPI::Structure::Subscript';
1098             }
1099 2420 100       7152 if ( $Element->isa('PPI::Structure::Subscript') ) {
1100             # $foo{}[]
1101 13         38 return 'PPI::Structure::Subscript';
1102             }
1103 2407 100 100     8088 if ( $Element->isa('PPI::Token::Symbol') and $Element->content =~ /^(?:\$|\@)/ ) {
1104             # $foo[], @foo[]
1105 723         2610 return 'PPI::Structure::Subscript';
1106             }
1107 1684 100 100     5945 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%)/ ) {
1108 41         83 my $prior = $Parent->schild(-2);
1109 41 100 100     160 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1110             # Postfix dereference: ->@[...] ->%[...]
1111 2         10 return 'PPI::Structure::Subscript';
1112             }
1113             }
1114             # FIXME - More cases to catch
1115             }
1116              
1117             # Otherwise, we assume that it's an anonymous arrayref constructor
1118 1919         6355 'PPI::Structure::Constructor';
1119             }
1120              
1121             # Keyword -> Structure class maps
1122             my %CURLY_CLASSES = (
1123             # Blocks
1124             'sub' => 'PPI::Structure::Block',
1125             'grep' => 'PPI::Structure::Block',
1126             'map' => 'PPI::Structure::Block',
1127             'sort' => 'PPI::Structure::Block',
1128             'do' => 'PPI::Structure::Block',
1129             # rely on 'continue' + block being handled elsewhere
1130             # rely on 'eval' + block being handled elsewhere
1131              
1132             # Hash constructors
1133             'scalar' => 'PPI::Structure::Constructor',
1134             '=' => 'PPI::Structure::Constructor',
1135             '||=' => 'PPI::Structure::Constructor',
1136             '&&=' => 'PPI::Structure::Constructor',
1137             '//=' => 'PPI::Structure::Constructor',
1138             '||' => 'PPI::Structure::Constructor',
1139             '&&' => 'PPI::Structure::Constructor',
1140             '//' => 'PPI::Structure::Constructor',
1141             '?' => 'PPI::Structure::Constructor',
1142             ':' => 'PPI::Structure::Constructor',
1143             ',' => 'PPI::Structure::Constructor',
1144             '=>' => 'PPI::Structure::Constructor',
1145             '+' => 'PPI::Structure::Constructor', # per perlref
1146             'return' => 'PPI::Structure::Constructor', # per perlref
1147             'bless' => 'PPI::Structure::Constructor', # pragmatic --
1148             # perlfunc says first arg is a reference, and
1149             # bless {; ... } fails to compile.
1150             );
1151              
1152             my @CURLY_LOOKAHEAD_CLASSES = (
1153             {}, # not used
1154             {
1155             ';' => 'PPI::Structure::Block', # per perlref
1156             '}' => 'PPI::Structure::Constructor',
1157             },
1158             {
1159             '=>' => 'PPI::Structure::Constructor',
1160             },
1161             );
1162              
1163              
1164             # Given a parent element, and a { token to open a structure, determine
1165             # the class that the structure should be.
1166             sub _curly {
1167 11469     11469   17669 my ($self, $Parent) = @_;
1168             # my $self = shift;
1169             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1170              
1171             # Get the last significant element in the parent
1172 11469         23537 my $Element = $Parent->schild(-1);
1173 11469 100       32320 my $content = $Element ? $Element->content : '';
1174              
1175             # Is this a subscript, like $foo[1] or $foo{expr}
1176 11469 100       23470 if ( $Element ) {
1177 10823 100 66     26361 if ( $content eq '->' and $Element->isa('PPI::Token::Operator') ) {
1178             # $foo->{}
1179 2116         3714 $Element->{_dereference} = 1;
1180 2116         7214 return 'PPI::Structure::Subscript';
1181             }
1182 8707 100       26102 if ( $Element->isa('PPI::Structure::Subscript') ) {
1183             # $foo[]{}
1184 73         183 return 'PPI::Structure::Subscript';
1185             }
1186 8634 100 100     29940 if ( $content =~ /^(?:\$|\@)/ and $Element->isa('PPI::Token::Symbol') ) {
1187             # $foo{}, @foo{}
1188 581         2049 return 'PPI::Structure::Subscript';
1189             }
1190 8053 100 100     30019 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%|\*)/ ) {
1191 327         716 my $prior = $Parent->schild(-2);
1192 327 100 100     1752 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1193             # Postfix dereference: ->@{...} ->%{...} ->*{...}
1194 3         12 return 'PPI::Structure::Subscript';
1195             }
1196             }
1197 8050 100       22025 if ( $Element->isa('PPI::Structure::Block') ) {
1198             # deference - ${$hash_ref}{foo}
1199             # or even ${burfle}{foo}
1200             # hash slice - @{$hash_ref}{'foo', 'bar'}
1201 3 50       8 if ( my $prior = $Parent->schild(-2) ) {
1202 3         7 my $prior_content = $prior->content();
1203 3 50 66     19 $prior->isa( 'PPI::Token::Cast' )
      66        
1204             and ( $prior_content eq '@' ||
1205             $prior_content eq '$' )
1206             and return 'PPI::Structure::Subscript';
1207             }
1208             }
1209              
1210             # Are we the last argument of sub?
1211             # E.g.: 'sub foo {}', 'sub foo ($) {}'
1212 8048 100       29244 return 'PPI::Structure::Block' if $Parent->isa('PPI::Statement::Sub');
1213              
1214             # Are we the second or third argument of package?
1215             # E.g.: 'package Foo {}' or 'package Foo v1.2.3 {}'
1216 5586 100       31884 return 'PPI::Structure::Block'
1217             if $Parent->isa('PPI::Statement::Package');
1218              
1219 4293 100       10282 if ( $CURLY_CLASSES{$content} ) {
1220             # Known type
1221 885         3341 return $CURLY_CLASSES{$content};
1222             }
1223             }
1224              
1225             # Are we in a compound statement
1226 4054 100       11147 if ( $Parent->isa('PPI::Statement::Compound') ) {
1227             # We will only encounter blocks in compound statements
1228 1922         5619 return 'PPI::Structure::Block';
1229             }
1230              
1231             # Are we the second or third argument of use
1232 2132 100       6717 if ( $Parent->isa('PPI::Statement::Include') ) {
1233 53 50 33     148 if ( $Parent->schildren == 2 ||
      66        
1234             $Parent->schildren == 3 &&
1235             $Parent->schild(2)->isa('PPI::Token::Number')
1236             ) {
1237             # This is something like use constant { ... };
1238 53         233 return 'PPI::Structure::Constructor';
1239             }
1240             }
1241              
1242             # Unless we are at the start of the statement, everything else should be a block
1243             ### FIXME This is possibly a bad choice, but will have to do for now.
1244 2079 100       7296 return 'PPI::Structure::Block' if $Element;
1245              
1246 646 100 66     3138 if (
1247             $Parent->isa('PPI::Statement')
1248             and
1249             _INSTANCE($Parent->parent, 'PPI::Structure::List')
1250             ) {
1251 177         363 my $function = $Parent->parent->parent->schild(-2);
1252              
1253             # Special case: Are we the param of a core function
1254             # i.e. map({ $_ => 1 } @foo)
1255 177 100 100     644 return 'PPI::Structure::Block'
1256             if $function and $function->content =~ /^(?:map|grep|sort|eval|do)$/;
1257              
1258             # If not part of a block print, list-embedded curlies are most likely constructors
1259 83 100 100     344 return 'PPI::Structure::Constructor'
1260             if not $function or $function->content !~ /^(?:print|say)$/;
1261             }
1262              
1263             # We need to scan ahead.
1264 475         646 my $Next;
1265 475         520 my $position = 0;
1266 475         631 my @delayed;
1267 475         817 while ( $Next = $self->_get_token ) {
1268 1155 100       2215 unless ( $Next->significant ) {
1269 189         279 push @delayed, $Next;
1270 189         323 next;
1271             }
1272              
1273             # If we are off the end of the lookahead array,
1274 966 100       2201 if ( ++$position >= @CURLY_LOOKAHEAD_CLASSES ) {
    100          
1275             # default to block.
1276 120         500 $self->_buffer( splice(@delayed), $Next );
1277 120         185 last;
1278             # If the content at this position is known
1279             } elsif ( my $class = $CURLY_LOOKAHEAD_CLASSES[$position]
1280             {$Next->content} ) {
1281             # return the associated class.
1282 269         620 $self->_buffer( splice(@delayed), $Next );
1283 269         1162 return $class;
1284             }
1285              
1286             # Delay and continue
1287 577         1066 push @delayed, $Next;
1288             }
1289              
1290             # Hit the end of the document, or bailed out, go with block
1291 206         575 $self->_buffer( splice(@delayed) );
1292 206 50       512 if ( ref $Parent eq 'PPI::Statement' ) {
1293 206         321 bless $Parent, 'PPI::Statement::Compound';
1294             }
1295 206         799 return 'PPI::Structure::Block';
1296             }
1297              
1298              
1299             sub _lex_structure {
1300 30042     30042   38404 my ($self, $Structure) = @_;
1301             # my $self = shift;
1302             # my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1";
1303              
1304 30042   100     30083 push @{$self->{features_stack}}, $self->{features_stack}[-1] || {};
  30042         76955  
1305              
1306             # Start the processing loop
1307 30042         32297 my $Token;
1308 30042         44597 while ( ref($Token = $self->_get_token) ) {
1309             # Is this a direct type token
1310 116255 100       217003 unless ( $Token->significant ) {
1311 50240         50527 push @{$self->{delayed}}, $Token;
  50240         71275  
1312             # $self->_delay_element( $Token );
1313 50240         83842 next;
1314             }
1315              
1316             # Anything other than a Structure starts a Statement
1317 66015 100       175879 unless ( $Token->isa('PPI::Token::Structure') ) {
1318             # Because _statement may well delay and rollback itself,
1319             # we need to add the delayed tokens early
1320 37332         70202 $self->_add_delayed( $Structure );
1321              
1322             # Determine the class for the Statement and create it
1323 37332         73871 my $Statement = $self->_statement($Structure, $Token)->new($Token);
1324              
1325             # Move the lexing down into the Statement
1326 37332         77314 $self->_add_element( $Structure, $Statement );
1327 37332         78956 $self->_lex_statement( $Statement );
1328              
1329 37332         68004 next;
1330             }
1331              
1332             # Is this the opening of another structure directly inside us?
1333 28683 100       46970 if ( $Token->__LEXER__opens ) {
1334             # Rollback the Token, and recurse into the statement
1335 465         1124 $self->_rollback( $Token );
1336 465         1388 my $Statement = PPI::Statement->new;
1337 465         1052 $self->_add_element( $Structure, $Statement );
1338 465         1223 $self->_lex_statement( $Statement );
1339 465         1022 next;
1340             }
1341              
1342             # Is this the close of a structure ( which would be an error )
1343 28218 100       47125 if ( $Token->__LEXER__closes ) {
1344 28171         29186 pop @{$self->{features_stack}};
  28171         39553  
1345              
1346             # Is this OUR closing structure
1347 28171 100       48548 if ( $Token->content eq $Structure->start->__LEXER__opposite ) {
1348             # Add any delayed tokens, and the finishing token (the ugly way)
1349 27529         51119 $self->_add_delayed( $Structure );
1350 27529         46668 $Structure->{finish} = $Token;
1351             Scalar::Util::weaken(
1352 27529         62542 $_PARENT{Scalar::Util::refaddr $Token} = $Structure
1353             );
1354              
1355             # Confirm that ForLoop structures are actually so, and
1356             # aren't really a list.
1357 27529 100       79183 if ( $Structure->isa('PPI::Structure::For') ) {
1358 232 100       657 if ( 2 > scalar grep {
1359 594         1778 $_->isa('PPI::Statement')
1360             } $Structure->children ) {
1361 211         347 bless($Structure, 'PPI::Structure::List');
1362             }
1363             }
1364 27529         64680 return 1;
1365             }
1366              
1367             # Unmatched closing brace.
1368             # Either they typed the wrong thing, or haven't put
1369             # one at all. Either way it's an error we need to
1370             # somehow handle gracefully. For now, we'll treat it
1371             # as implicitly ending the structure. This causes the
1372             # least damage across the various reasons why this
1373             # might have happened.
1374 642         1580 return $self->_rollback( $Token );
1375             }
1376              
1377             # It's a semi-colon on its own, just inside the block.
1378             # This is a null statement.
1379             $self->_add_element(
1380 47         243 $Structure,
1381             PPI::Statement::Null->new($Token),
1382             );
1383             }
1384              
1385             # Is this an error
1386 1871 50       3408 unless ( defined $Token ) {
1387 0         0 PPI::Exception->throw;
1388             }
1389              
1390 1871         1911 pop @{$self->{features_stack}};
  1871         2629  
1391              
1392             # No, it's just the end of file.
1393             # Add any insignificant trailing tokens.
1394 1871         3705 $self->_add_delayed( $Structure );
1395             }
1396              
1397              
1398              
1399              
1400              
1401             #####################################################################
1402             # Support Methods
1403              
1404             # Get the next token for processing, handling buffering
1405             sub _get_token {
1406 682663 100   682663   630430 shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token;
  682663         1581615  
1407             }
1408              
1409             # Old long version of the above
1410             # my $self = shift;
1411             # # First from the buffer
1412             # if ( @{$self->{buffer}} ) {
1413             # return shift @{$self->{buffer}};
1414             # }
1415             #
1416             # # Then from the Tokenizer
1417             # $self->{Tokenizer}->get_token;
1418             # }
1419              
1420             # Delay the addition of insignificant elements.
1421             # This ended up being inlined.
1422             # sub _delay_element {
1423             # my $self = shift;
1424             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 1";
1425             # push @{ $_[0]->{delayed} }, $_[1];
1426             # }
1427              
1428             # Add an Element to a Node, including any delayed Elements
1429             sub _add_element {
1430 361857     361857   433339 my ($self, $Parent, $Element) = @_;
1431             # my $self = shift;
1432             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1433             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 2";
1434              
1435             # Handle a special case, where a statement is not fully resolved
1436 361857 100 100     633832 if ( ref $Parent eq 'PPI::Statement'
1437             and my $first = $Parent->schild(0) ) {
1438 77015 50 33     198013 if ( $first->isa('PPI::Token::Label')
1439             and !(my $second = $Parent->schild(1)) ) {
1440 0         0 my $new_class = $STATEMENT_CLASSES{$second->content};
1441             # It's a labelled statement
1442 0 0       0 bless $Parent, $new_class if $new_class;
1443             }
1444             }
1445              
1446             # Add first the delayed, from the front, then the passed element
1447 361857         347694 foreach my $el ( @{$self->{delayed}} ) {
  361857         456950  
1448             Scalar::Util::weaken(
1449 115818         317660 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1450             );
1451             # Inlined $Parent->__add_element($el);
1452             }
1453             Scalar::Util::weaken(
1454 361857         812123 $_PARENT{Scalar::Util::refaddr $Element} = $Parent
1455             );
1456 361857         342963 push @{$Parent->{children}}, @{$self->{delayed}}, $Element;
  361857         405135  
  361857         502870  
1457              
1458             # Clear the delayed elements
1459 361857         488737 $self->{delayed} = [];
1460             }
1461              
1462             # Specifically just add any delayed tokens, if any.
1463             sub _add_delayed {
1464 142819     142819   186883 my ($self, $Parent) = @_;
1465             # my $self = shift;
1466             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1467              
1468             # Add any delayed
1469 142819         137117 foreach my $el ( @{$self->{delayed}} ) {
  142819         215762  
1470             Scalar::Util::weaken(
1471 61564         161680 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1472             );
1473             # Inlined $Parent->__add_element($el);
1474             }
1475 142819         143760 push @{$Parent->{children}}, @{$self->{delayed}};
  142819         177217  
  142819         186036  
1476              
1477             # Clear the delayed elements
1478 142819         223231 $self->{delayed} = [];
1479             }
1480              
1481             # Rollback the delayed tokens, plus any passed. Once all the tokens
1482             # have been moved back on to the buffer, the order should be.
1483             # <--- @{$self->{delayed}}, @_, @{$self->{buffer}} <----
1484             sub _rollback {
1485 60930     60930   67930 my $self = shift;
1486              
1487             # First, put any passed objects back
1488 60930 100       96907 if ( @_ ) {
1489 49484         47900 unshift @{$self->{buffer}}, splice @_;
  49484         97896  
1490             }
1491              
1492             # Then, put back anything delayed
1493 60930 100       62360 if ( @{$self->{delayed}} ) {
  60930         101622  
1494 29486         30640 unshift @{$self->{buffer}}, splice @{$self->{delayed}};
  29486         35798  
  29486         41943  
1495             }
1496              
1497 60930         87231 1;
1498             }
1499              
1500             # Partial rollback, just return a single list to the buffer
1501             sub _buffer {
1502 595     595   728 my $self = shift;
1503              
1504             # Put any passed objects back
1505 595 100       1220 if ( @_ ) {
1506 463         495 unshift @{$self->{buffer}}, splice @_;
  463         1028  
1507             }
1508              
1509 595         725 1;
1510             }
1511              
1512              
1513              
1514              
1515              
1516             #####################################################################
1517             # Error Handling
1518              
1519             # Set the error message
1520             sub _error {
1521 3     3   8 $errstr = "Lexer failed: $_[1]";
1522 3         15 undef;
1523             }
1524              
1525             # Clear the error message.
1526             # Returns the object as a convenience.
1527             sub _clear {
1528 16848     16848   26298 $errstr = '';
1529 16848         27393 $_[0];
1530             }
1531              
1532             =pod
1533              
1534             =head2 errstr
1535              
1536             For any error that occurs, you can use the C, as either
1537             a static or object method, to access the error message.
1538              
1539             If no error occurs for any particular action, C will return false.
1540              
1541             =cut
1542              
1543             sub errstr {
1544 6     6 1 23 $errstr;
1545             }
1546              
1547              
1548              
1549              
1550              
1551             #####################################################################
1552             # PDOM Extensions
1553             #
1554             # This is something of a future expansion... ignore it for now :)
1555             #
1556             # use PPI::Statement::Sub ();
1557             #
1558             # sub PPI::Statement::Sub::__LEXER__normal { '' }
1559              
1560             1;
1561              
1562             =pod
1563              
1564             =head1 TO DO
1565              
1566             - Add optional support for some of the more common source filters
1567              
1568             - Some additional checks for blessing things into various Statement
1569             and Structure subclasses.
1570              
1571             =head1 SUPPORT
1572              
1573             See the L in the main module.
1574              
1575             =head1 AUTHOR
1576              
1577             Adam Kennedy Eadamk@cpan.orgE
1578              
1579             =head1 COPYRIGHT
1580              
1581             Copyright 2001 - 2011 Adam Kennedy.
1582              
1583             This program is free software; you can redistribute
1584             it and/or modify it under the same terms as Perl itself.
1585              
1586             The full text of the license can be found in the
1587             LICENSE file included with this module.
1588              
1589             =cut