File Coverage

blib/lib/PPI/Token/Word.pm

Criterion	Covered	Total	%
statement	100	118	84.7
branch	55	70	78.5
condition	38	42	90.4
subroutine	8	8	100.0
pod	2	2	100.0
total	203	240	84.5

line	stmt	bran	cond	sub	pod	time	code
1							package PPI::Token::Word;
2
3							=pod
4
5							=head1 NAME
6
7							PPI::Token::Word - The generic "word" Token
8
9							=head1 INHERITANCE
10
11							PPI::Token::Word
12							isa PPI::Token
13							isa PPI::Element
14
15							=head1 DESCRIPTION
16
17							A C object is a PPI-specific representation of several
18							different types of word-like things, and is one of the most common Token
19							classes found in typical documents.
20
21							Specifically, it includes not only barewords, but also any other valid
22							Perl identifier including non-operator keywords and core functions, and
23							any include C<::> separators inside it, as long as it fits the
24							format of a class, function, etc.
25
26							=head1 METHODS
27
28							There are no methods available for C beyond those
29							provided by its L and L parent
30							classes.
31
32							We expect to add additional methods to help further resolve a Word as
33							a function, method, etc over time. If you need such a thing right
34							now, look at L.
35
36							=cut
37
38	64			64		363	use strict;
	64					100
	64					1445
39	64			64		255	use PPI::Token ();
	64					101
	64					1060
40	64			64		260	use PPI::Singletons qw' %OPERATOR %QUOTELIKE %KEYWORDS ';
	64					94
	64					91398
41
42							our $VERSION = '1.275';
43
44							our @ISA = "PPI::Token";
45
46							=pod
47
48							=head2 literal
49
50							Returns the value of the Word as a string. This assumes (often
51							incorrectly) that the Word is a bareword and not a function, method,
52							keyword, etc. This differs from C because C expands
53							to C.
54
55							=cut
56
57							sub literal {
58	3			3	1	809	my $self = shift;
59	3					7	my $word = $self->content;
60
61							# Expand Foo'Bar to Foo::Bar
62	3					9	$word =~ s/\'/::/g;
63
64	3					12	return $word;
65							}
66
67							=pod
68
69							=head2 method_call
70
71							Answers whether this is the name of a method in a method call. Returns true if
72							yes, false if no, and nothing if unknown.
73
74							=cut
75
76							sub method_call {
77	22			22	1	3816	my $self = shift;
78
79	22					67	my $previous = $self->sprevious_sibling;
80	22	100	100			87	if (
			100
81							$previous
82							and
83							$previous->isa('PPI::Token::Operator')
84							and
85							$previous->content eq '->'
86							) {
87	4					18	return 1;
88							}
89
90	18					42	my $snext = $self->snext_sibling;
91	18	100				40	return 0 unless $snext;
92
93	15	100	100			163	if (
			100
			100
			100
94							$snext->isa('PPI::Structure::List')
95							or
96							$snext->isa('PPI::Token::Structure')
97							or
98							$snext->isa('PPI::Token::Operator')
99							and (
100							$snext->content eq ','
101							or
102							$snext->content eq '=>'
103							)
104							) {
105	9					33	return 0;
106							}
107
108	6	100	100			29	if (
109							$snext->isa('PPI::Token::Word')
110							and
111							$snext->content =~ m< \w :: \z >xms
112							) {
113	1					375	return 1;
114							}
115
116	5					34	return;
117							}
118
119
120							sub __TOKENIZER__on_char {
121	15			15		31	my $class = shift;
122	15					26	my $t = shift;
123
124							# Suck in till the end of the bareword
125	15					43	pos $t->{line} = $t->{line_cursor};
126	15	100				100	if ( $t->{line} =~ m/\G(\w+(?:(?:\'\|::)\w+)*(?:::)?)/gc ) {
127	8					32	my $word = $1;
128							# Special Case: If we accidentally treat eq'foo' like
129							# the word "eq'foo", then just make 'eq' (or whatever
130							# else is in the %KEYWORDS hash.
131	8	0	33			31	if ( $word =~ /^(\w+)'/ && $KEYWORDS{$1} ) {
132	0					0	$word = $1;
133							}
134	8					23	$t->{token}->{content} .= $word;
135	8					25	$t->{line_cursor} += length $word;
136
137							}
138
139							# We might be a subroutine attribute.
140	15	50				42	if ( __current_token_is_attribute($t) ) {
141	0					0	$t->{class} = $t->{token}->set_class( 'Attribute' );
142	0					0	return $t->{class}->__TOKENIZER__commit( $t );
143							}
144
145	15					74	my $word = $t->{token}->{content};
146	15	50				66	if ( $KEYWORDS{$word} ) {
147							# Check for a Perl keyword that is forced to be a normal word instead
148	0	0				0	if ( $t->__current_token_is_forced_word ) {
149	0					0	$t->{class} = $t->{token}->set_class( 'Word' );
150	0					0	return $t->{class}->__TOKENIZER__on_char( $t );
151							}
152
153							# Check for a quote like operator. %QUOTELIKE must be subset of %KEYWORDS
154	0	0				0	if ( $QUOTELIKE{$word} ) {
155	0					0	$t->{class} = $t->{token}->set_class( $QUOTELIKE{$word} );
156	0					0	return $t->{class}->__TOKENIZER__on_char( $t );
157							}
158
159							# Or one of the word operators. %OPERATOR must be subset of %KEYWORDS
160	0	0				0	if ( $OPERATOR{$word} ) {
161	0					0	$t->{class} = $t->{token}->set_class( 'Operator' );
162	0					0	return $t->_finalize_token->__TOKENIZER__on_char( $t );
163							}
164							}
165
166							# Unless this is a simple identifier, at this point
167							# it has to be a normal bareword
168	15	100				59	if ( $word =~ /\:/ ) {
169	7					18	return $t->_finalize_token->__TOKENIZER__on_char( $t );
170							}
171
172							# If the NEXT character in the line is a colon, this
173							# is a label.
174	8					24	my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
175	8	50				38	if ( $char eq ':' ) {
		100
176	0					0	$t->{token}->{content} .= ':';
177	0					0	$t->{line_cursor}++;
178	0					0	$t->{class} = $t->{token}->set_class( 'Label' );
179
180							# If not a label, '_' on its own is the magic filehandle
181							} elsif ( $word eq '_' ) {
182	1					6	$t->{class} = $t->{token}->set_class( 'Magic' );
183
184							}
185
186							# Finalise and process the character again
187	8					41	$t->_finalize_token->__TOKENIZER__on_char( $t );
188							}
189
190
191
192							# We are committed to being a bareword.
193							# Or so we would like to believe.
194							sub __TOKENIZER__commit {
195	55360			55360		92885	my ($class, $t) = @_;
196
197							# Our current position is the first character of the bareword.
198							# Capture the bareword.
199	55360					118461	pos $t->{line} = $t->{line_cursor};
200	55360	50				262770	unless ( $t->{line} =~ m/\G((?!\d)\w+(?:(?:\'\|::)\w+)*(?:::)?)/gc ) {
201							# Programmer error
202	0					0	die sprintf "Fatal error... regex failed to match in '%s' when expected", substr $t->{line}, $t->{line_cursor};
203							}
204
205							# Special Case: If we accidentally treat eq'foo' like the word "eq'foo",
206							# then unwind it and just make it 'eq' (or the other stringy comparitors)
207	55360					110569	my $word = $1;
208	55360	100	100			106830	if ( $word =~ /^(\w+)'/ && $KEYWORDS{$1} ) {
209	259					547	$word = $1;
210							}
211
212							# Advance the position one after the end of the bareword
213	55360					73959	$t->{line_cursor} += length $word;
214
215							# We might be a subroutine attribute.
216	55360	100				89734	if ( __current_token_is_attribute($t) ) {
217	1062					2460	$t->_new_token( 'Attribute', $word );
218							return ($t->{line_cursor} >= $t->{line_length}) ? 0
219	1062	50				2997	: $t->{class}->__TOKENIZER__on_char($t);
220							}
221
222							# Check for the end of the file
223	54298	100				114015	if ( $word eq '__END__' ) {
224							# Create the token for the __END__ itself
225	8					33	$t->_new_token( 'Separator', $1 );
226	8					23	$t->_finalize_token;
227
228							# Move into the End zone (heh)
229	8					16	$t->{zone} = 'PPI::Token::End';
230
231							# Add the rest of the line as a comment, and a whitespace newline
232							# Anything after the __END__ on the line is "ignored". So we must
233							# also ignore it, by turning it into a comment.
234	8					22	my $end_rest = substr( $t->{line}, $t->{line_cursor} );
235	8					16	$t->{line_cursor} = length $t->{line};
236	8	100				40	if ( $end_rest =~ /\n$/ ) {
237	6					16	chomp $end_rest;
238	6	100				25	$t->_new_token( 'Comment', $end_rest ) if length $end_rest;
239	6					23	$t->_new_token( 'Whitespace', "\n" );
240							} else {
241	2	100				7	$t->_new_token( 'Comment', $end_rest ) if length $end_rest;
242							}
243	8					30	$t->_finalize_token;
244
245	8					27	return 0;
246							}
247
248							# Check for the data section
249	54290	100				83071	if ( $word eq '__DATA__' ) {
250							# Create the token for the __DATA__ itself
251	6					27	$t->_new_token( 'Separator', "$1" );
252	6					18	$t->_finalize_token;
253
254							# Move into the Data zone
255	6					12	$t->{zone} = 'PPI::Token::Data';
256
257							# Add the rest of the line as the Data token
258	6					15	my $data_rest = substr( $t->{line}, $t->{line_cursor} );
259	6					13	$t->{line_cursor} = length $t->{line};
260	6	100				28	if ( $data_rest =~ /\n$/ ) {
261	4					20	chomp $data_rest;
262	4	100				24	$t->_new_token( 'Comment', $data_rest ) if length $data_rest;
263	4					15	$t->_new_token( 'Whitespace', "\n" );
264							} else {
265	2	100				7	$t->_new_token( 'Comment', $data_rest ) if length $data_rest;
266							}
267	6					20	$t->_finalize_token;
268
269	6					20	return 0;
270							}
271
272	54284					57459	my $token_class;
273	54284	100	100			224585	if ( $word =~ /\:/ ) {
		100	100
		100	66
		100
274							# Since it's not a simple identifier...
275	1424					2397	$token_class = 'Word';
276
277							} elsif ( $KEYWORDS{$word} and $t->__current_token_is_forced_word ) {
278	6625					9155	$token_class = 'Word';
279
280							} elsif ( $QUOTELIKE{$word} ) {
281							# Special Case: A Quote-like operator
282	2684					7043	$t->_new_token( $QUOTELIKE{$word}, $word );
283							return ($t->{line_cursor} >= $t->{line_length}) ? 0
284	2684	50				9086	: $t->{class}->__TOKENIZER__on_char( $t );
285
286							} elsif ( $OPERATOR{$word} && ($word ne 'x' \|\| $t->_current_x_is_operator) ) {
287							# Word operator
288	1561					2078	$token_class = 'Operator';
289
290							} else {
291							# Get tokens early to be sure to not disturb state set up by pos and m//gc.
292	41990					77184	my @tokens = $t->_previous_significant_tokens(1);
293
294							# If the next character is a ':' then it's a label...
295	41990					78732	pos $t->{line} = $t->{line_cursor};
296	41990	100				120776	if ( $t->{line} =~ m/\G(\s*:)(?!:)/gc ) {
		100
297	1135	100	100			4721	if ( $tokens[0] and $tokens[0]->{content} eq 'sub' ) {
298							# ... UNLESS it's after 'sub' in which
299							# case it is a sub name and an attribute
300							# operator.
301							# We COULD have checked this at the top
302							# level of checks, but this would impose
303							# an additional performance per-word
304							# penalty, and every other case where the
305							# attribute operator doesn't directly
306							# touch the object name already works.
307	621					992	$token_class = 'Word';
308							} else {
309	514					1106	$word .= $1;
310	514					823	$t->{line_cursor} += length($1);
311	514					848	$token_class = 'Label';
312							}
313							} elsif ( $word eq '_' ) {
314	452					760	$token_class = 'Magic';
315							} else {
316	40403					58417	$token_class = 'Word';
317							}
318							}
319
320							# Create the new token and finalise
321	51600					117221	$t->_new_token( $token_class, $word );
322	51600	50				88870	if ( $t->{line_cursor} >= $t->{line_length} ) {
323							# End of the line
324	0					0	$t->_finalize_token;
325	0					0	return 0;
326							}
327	51600					85689	$t->_finalize_token->__TOKENIZER__on_char($t);
328							}
329
330
331
332							# Is the current Word really a subroutine attribute?
333							sub __current_token_is_attribute {
334	55375			55375		74662	my ( $t ) = @_;
335	55375					106885	my @tokens = $t->_previous_significant_tokens(1);
336							return (
337							$tokens[0]
338							and (
339							# hint from tokenizer
340							$tokens[0]->{_attribute}
341							# nothing between attribute and us except whitespace
342	55375		66			359531	or $tokens[0]->isa('PPI::Token::Attribute')
343							)
344							);
345							}
346
347							1;
348
349							=pod
350
351							=head1 TO DO
352
353							- Add C, C etc detector methods
354
355							=head1 SUPPORT
356
357							See the L in the main module.
358
359							=head1 AUTHOR
360
361							Adam Kennedy Eadamk@cpan.orgE
362
363							=head1 COPYRIGHT
364
365							Copyright 2001 - 2011 Adam Kennedy.
366
367							This program is free software; you can redistribute
368							it and/or modify it under the same terms as Perl itself.
369
370							The full text of the license can be found in the
371							LICENSE file included with this module.
372
373							=cut