File Coverage

blib/lib/PPI/Token/Word.pm
Criterion Covered Total %
statement 102 119 85.7
branch 58 72 80.5
condition 42 45 93.3
subroutine 8 8 100.0
pod 2 2 100.0
total 212 246 86.1


line stmt bran cond sub pod time code
1             package PPI::Token::Word;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Token::Word - The generic "word" Token
8              
9             =head1 INHERITANCE
10              
11             PPI::Token::Word
12             isa PPI::Token
13             isa PPI::Element
14              
15             =head1 DESCRIPTION
16              
17             A C<PPI::Token::Word> object is a PPI-specific representation of several
18             different types of word-like things, and is one of the most common Token
19             classes found in typical documents.
20              
21             Specifically, it includes not only barewords, but also any other valid
22             Perl identifier including non-operator keywords and core functions, and
23             any include C<::> separators inside it, as long as it fits the
24             format of a class, function, etc.
25              
26             =head1 METHODS
27              
28             There are no methods available for C<PPI::Token::Word> beyond those
29             provided by its L<PPI::Token> and L<PPI::Element> parent
30             classes.
31              
32             We expect to add additional methods to help further resolve a Word as
33             a function, method, etc over time. If you need such a thing right
34             now, look at L<Perl::Critic::Utils>.
35              
36             =cut
37              
38 67     67   377 use strict;
  67         98  
  67         1937  
39 67     67   249 use PPI::Token ();
  67         81  
  67         1313  
40 67     67   207 use PPI::Singletons qw' %OPERATOR %QUOTELIKE %KEYWORDS ';
  67         102  
  67         121957  
41              
42             our $VERSION = '1.284';
43              
44             our @ISA = "PPI::Token";
45              
46             =pod
47              
48             =head2 literal
49              
50             Returns the value of the Word as a string. This assumes (often
51             incorrectly) that the Word is a bareword and not a function, method,
52             keyword, etc. This differs from C<content> because C<Foo'Bar> expands
53             to C<Foo::Bar>.
54              
55             =cut
56              
57             sub literal {
58 7     7 1 882 my $self = shift;
59 7         18 my $word = $self->content;
60              
61             # Expand Foo'Bar to Foo::Bar
62 7         16 $word =~ s/\'/::/g;
63              
64 7         23 return $word;
65             }
66              
67             =pod
68              
69             =head2 method_call
70              
71             Answers whether this is the name of a method in a method call. Returns true if
72             yes, false if no, and nothing if unknown.
73              
74             =cut
75              
76             sub method_call {
77 22     22 1 4864 my $self = shift;
78              
79 22         54 my $previous = $self->sprevious_sibling;
80 22 100 100     86 if (
      100        
81             $previous
82             and
83             $previous->isa('PPI::Token::Operator')
84             and
85             $previous->content eq '->'
86             ) {
87 4         14 return 1;
88             }
89              
90 18         36 my $snext = $self->snext_sibling;
91 18 100       39 return 0 unless $snext;
92              
93 15 100 100     138 if (
      100        
      100        
      100        
94             $snext->isa('PPI::Structure::List')
95             or
96             $snext->isa('PPI::Token::Structure')
97             or
98             $snext->isa('PPI::Token::Operator')
99             and (
100             $snext->content eq ','
101             or
102             $snext->content eq '=>'
103             )
104             ) {
105 9         46 return 0;
106             }
107              
108 6 100 100     20 if (
109             $snext->isa('PPI::Token::Word')
110             and
111             $snext->content =~ m< \w :: \z >xms
112             ) {
113 1         3 return 1;
114             }
115              
116 5         17 return;
117             }
118              
119              
120             sub __TOKENIZER__on_char {
121 18     18   47 my $class = shift;
122 18         32 my $t = shift;
123              
124             # Suck in till the end of the bareword
125 18         62 pos $t->{line} = $t->{line_cursor};
126 18 100       143 if ( $t->{line} =~ m/\G(\w+(?:(?:\'|::)\w+)*(?:::)?)/gc ) {
127 11         49 my $word = $1;
128             # Special Case: If we accidentally treat eq'foo' like
129             # the word "eq'foo", then just make 'eq' (or whatever
130             # else is in the %KEYWORDS hash.
131 11 50 66     48 if ( $word =~ /^(\w+)'/ && $KEYWORDS{$1} ) {
132 1         3 $word = $1;
133             }
134 11         28 $t->{token}->{content} .= $word;
135 11         25 $t->{line_cursor} += length $word;
136              
137             }
138              
139             # We might be a subroutine attribute.
140 18 50       60 if ( __current_token_is_attribute($t) ) {
141 0         0 $t->{class} = $t->{token}->set_class( 'Attribute' );
142 0         0 return $t->{class}->__TOKENIZER__commit( $t );
143             }
144              
145 18         83 my $word = $t->{token}->{content};
146 18 50       69 if ( $KEYWORDS{$word} ) {
147             # Check for a Perl keyword that is forced to be a normal word instead
148 0 0       0 if ( $t->__current_token_is_forced_word ) {
149 0         0 $t->{class} = $t->{token}->set_class( 'Word' );
150 0         0 return $t->{class}->__TOKENIZER__on_char( $t );
151             }
152              
153             # Check for a quote like operator. %QUOTELIKE must be subset of %KEYWORDS
154 0 0       0 if ( $QUOTELIKE{$word} ) {
155 0         0 $t->{class} = $t->{token}->set_class( $QUOTELIKE{$word} );
156 0         0 return $t->{class}->__TOKENIZER__on_char( $t );
157             }
158              
159             # Or one of the word operators. %OPERATOR must be subset of %KEYWORDS
160 0 0       0 if ( $OPERATOR{$word} ) {
161 0         0 $t->{class} = $t->{token}->set_class( 'Operator' );
162 0         0 return $t->_finalize_token->__TOKENIZER__on_char( $t );
163             }
164             }
165              
166             # Unless this is a simple identifier, at this point
167             # it has to be a normal bareword
168 18 100       85 if ( $word =~ /\:/ ) {
169 10         23 return $t->_finalize_token->__TOKENIZER__on_char( $t );
170             }
171              
172             # If the NEXT character in the line is a colon, this
173             # is a label.
174 8         25 my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
175 8 50       57 if ( $char eq ':' ) {
    100          
176 0         0 $t->{token}->{content} .= ':';
177 0         0 $t->{line_cursor}++;
178 0         0 $t->{class} = $t->{token}->set_class( 'Label' );
179              
180             # If not a label, '_' on its own is the magic filehandle
181             } elsif ( $word eq '_' ) {
182 1         7 $t->{class} = $t->{token}->set_class( 'Magic' );
183              
184             }
185              
186             # Finalise and process the character again
187 8         29 $t->_finalize_token->__TOKENIZER__on_char( $t );
188             }
189              
190              
191              
192             # We are committed to being a bareword.
193             # Or so we would like to believe.
194             sub __TOKENIZER__commit {
195 56605     56605   84563 my ($class, $t) = @_;
196              
197             # Our current position is the first character of the bareword.
198             # Capture the bareword.
199 56605         150235 pos $t->{line} = $t->{line_cursor};
200 56605 50       302652 unless ( $t->{line} =~ m/\G((?!\d)\w+(?:(?:\'|::)\w+)*(?:::)?)/gc ) {
201             # Programmer error
202 0         0 die sprintf "Fatal error... regex failed to match in '%s' when expected", substr $t->{line}, $t->{line_cursor};
203             }
204              
205             # Special Case: If we accidentally treat eq'foo' like the word "eq'foo",
206             # then unwind it and just make it 'eq' (or the other stringy comparitors)
207 56605         119774 my $word = $1;
208 56605 100 100     119161 if ( $word =~ /^(\w+)'/ && $KEYWORDS{$1} ) {
209 260         460 $word = $1;
210             }
211              
212             # Advance the position one after the end of the bareword
213 56605         76740 $t->{line_cursor} += length $word;
214              
215             # We might be a subroutine attribute.
216 56605 100       101071 if ( __current_token_is_attribute($t) ) {
217 1065         2281 $t->_new_token( 'Attribute', $word );
218             return ($t->{line_cursor} >= $t->{line_length}) ? 0
219 1065 50       3826 : $t->{class}->__TOKENIZER__on_char($t);
220             }
221              
222             # Check for the end of the file
223 55540 100       114102 if ( $word eq '__END__' ) {
224             # Create the token for the __END__ itself
225 8         26 $t->_new_token( 'Separator', $1 );
226 8         24 $t->_finalize_token;
227              
228             # Move into the End zone (heh)
229 8         15 $t->{zone} = 'PPI::Token::End';
230              
231             # Add the rest of the line as a comment, and a whitespace newline
232             # Anything after the __END__ on the line is "ignored". So we must
233             # also ignore it, by turning it into a comment.
234 8         18 my $end_rest = substr( $t->{line}, $t->{line_cursor} );
235 8         15 $t->{line_cursor} = length $t->{line};
236 8 100       32 if ( $end_rest =~ /\n$/ ) {
237 6         15 chomp $end_rest;
238 6 100       20 $t->_new_token( 'Comment', $end_rest ) if length $end_rest;
239 6         17 $t->_new_token( 'Whitespace', "\n" );
240             } else {
241 2 100       7 $t->_new_token( 'Comment', $end_rest ) if length $end_rest;
242             }
243 8         20 $t->_finalize_token;
244              
245 8         26 return 0;
246             }
247              
248             # Check for the data section
249 55532 100       86258 if ( $word eq '__DATA__' ) {
250             # Create the token for the __DATA__ itself
251 6         26 $t->_new_token( 'Separator', "$1" );
252 6         17 $t->_finalize_token;
253              
254             # Move into the Data zone
255 6         11 $t->{zone} = 'PPI::Token::Data';
256              
257             # Add the rest of the line as the Data token
258 6         13 my $data_rest = substr( $t->{line}, $t->{line_cursor} );
259 6         12 $t->{line_cursor} = length $t->{line};
260 6 100       23 if ( $data_rest =~ /\n$/ ) {
261 4         11 chomp $data_rest;
262 4 100       14 $t->_new_token( 'Comment', $data_rest ) if length $data_rest;
263 4         12 $t->_new_token( 'Whitespace', "\n" );
264             } else {
265 2 100       5 $t->_new_token( 'Comment', $data_rest ) if length $data_rest;
266             }
267 6         29 $t->_finalize_token;
268              
269 6         18 return 0;
270             }
271              
272 55526         63290 my $token_class;
273 55526 100 100     267521 if ( $word =~ /\:/ ) {
    100 100        
    100 66        
    100          
274             # Since it's not a simple identifier...
275 1293         2047 $token_class = 'Word';
276              
277             } elsif ( $KEYWORDS{$word} and $t->__current_token_is_forced_word ) {
278 6624         9868 $token_class = 'Word';
279              
280             } elsif ( $QUOTELIKE{$word} ) {
281             # Special Case: A Quote-like operator
282 2611         6892 $t->_new_token( $QUOTELIKE{$word}, $word );
283             return ($t->{line_cursor} >= $t->{line_length}) ? 0
284 2611 50       11276 : $t->{class}->__TOKENIZER__on_char( $t );
285              
286             } elsif ( $OPERATOR{$word} && ($word ne 'x' || $t->_current_x_is_operator) ) {
287             # Word operator
288 1648         2620 $token_class = 'Operator';
289              
290             } else {
291             # Get tokens early to be sure to not disturb state set up by pos and m//gc.
292 43350         76448 my @tokens = $t->_previous_significant_tokens(1);
293              
294             # If the next character is a ':' then it's a label...
295 43350         82495 pos $t->{line} = $t->{line_cursor};
296 43350 100       144051 if ( $t->{line} =~ m/\G(\s*:)(?!:)/gc ) {
    100          
297 1374 100 100     8784 if ( $tokens[0] and $tokens[0]->{content} eq 'sub' ) {
    100 100        
298             # ... UNLESS it's after 'sub' in which
299             # case it is a sub name and an attribute
300             # operator.
301             # We COULD have checked this at the top
302             # level of checks, but this would impose
303             # an additional performance per-word
304             # penalty, and every other case where the
305             # attribute operator doesn't directly
306             # touch the object name already works.
307 624         949 $token_class = 'Word';
308             } elsif ( !($tokens[0] and $tokens[0]->isa('PPI::Token::Operator')) ) {
309 442         1192 $word .= $1;
310 442         856 $t->{line_cursor} += length($1);
311 442         779 $token_class = 'Label';
312             } else {
313 308         549 $token_class = 'Word';
314             }
315             } elsif ( $word eq '_' ) {
316 433         922 $token_class = 'Magic';
317             } else {
318 41543         58740 $token_class = 'Word';
319             }
320             }
321              
322             # Create the new token and finalise
323 52915         118015 $t->_new_token( $token_class, $word );
324 52915 50       97974 if ( $t->{line_cursor} >= $t->{line_length} ) {
325             # End of the line
326 0         0 $t->_finalize_token;
327 0         0 return 0;
328             }
329 52915         85837 $t->_finalize_token->__TOKENIZER__on_char($t);
330             }
331              
332              
333              
334             # Is the current Word really a subroutine attribute?
335             sub __current_token_is_attribute {
336 56623     56623   73876 my ( $t ) = @_;
337 56623         119151 my @tokens = $t->_previous_significant_tokens(1);
338             return (
339             $tokens[0]
340             and (
341             # hint from tokenizer
342             $tokens[0]->{_attribute}
343             # nothing between attribute and us except whitespace
344 56623   66     411224 or $tokens[0]->isa('PPI::Token::Attribute')
345             )
346             );
347             }
348              
349             1;
350              
351             =pod
352              
353             =head1 TO DO
354              
355             - Add C<function>, C<method> etc detector methods
356              
357             =head1 SUPPORT
358              
359             See the L<support section|PPI/SUPPORT> in the main module.
360              
361             =head1 AUTHOR
362              
363             Adam Kennedy E<lt>adamk@cpan.orgE<gt>
364              
365             =head1 COPYRIGHT
366              
367             Copyright 2001 - 2011 Adam Kennedy.
368              
369             This program is free software; you can redistribute
370             it and/or modify it under the same terms as Perl itself.
371              
372             The full text of the license can be found in the
373             LICENSE file included with this module.
374              
375             =cut